Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,7 @@ dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
"tests/unit/locators/serializers/test_locator_string_representation.py" = ["E501"]
"tests/unit/locators/test_locators.py" = ["E501"]
"tests/unit/utils/test_image_utils.py" = ["E501"]
"playground.py" = ["F401", "E501"]

[tool.ruff.lint.flake8-quotes]
docstring-quotes = "double"
Expand Down
17 changes: 17 additions & 0 deletions src/askui/model_providers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@
- `AnthropicVlmProvider` — VLM via direct Anthropic API
- `AnthropicImageQAProvider` — image Q&A via direct Anthropic API
- `GoogleImageQAProvider` — image Q&A via Google Gemini API (direct, no proxy)
- `OpenAIVlmProvider` — VLM via any OpenAI-compatible API
- `OpenAIImageQAProvider` — image Q&A via any OpenAI-compatible API
- `OllamaVlmProvider` — VLM via local Ollama instance (OpenAI-compatible)
- `OllamaImageQAProvider` — image Q&A via local Ollama instance (OpenAI-compatible)
- `OpenAICompatibleVlmProvider` — VLM via OpenAI-compatible API with fixed URL
"""

from askui.model_providers.anthropic_image_qa_provider import AnthropicImageQAProvider
Expand All @@ -22,6 +27,13 @@
from askui.model_providers.detection_provider import DetectionProvider
from askui.model_providers.google_image_qa_provider import GoogleImageQAProvider
from askui.model_providers.image_qa_provider import ImageQAProvider
from askui.model_providers.ollama_image_qa_provider import OllamaImageQAProvider
from askui.model_providers.ollama_vlm_provider import OllamaVlmProvider
from askui.model_providers.openai_compatible_vlm_provider import (
OpenAICompatibleVlmProvider,
)
from askui.model_providers.openai_image_qa_provider import OpenAIImageQAProvider
from askui.model_providers.openai_vlm_provider import OpenAIVlmProvider
from askui.model_providers.vlm_provider import VlmProvider
from askui.utils.model_pricing import ModelPricing

Expand All @@ -35,5 +47,10 @@
"GoogleImageQAProvider",
"ImageQAProvider",
"ModelPricing",
"OllamaImageQAProvider",
"OllamaVlmProvider",
"OpenAIImageQAProvider",
"OpenAIVlmProvider",
"OpenAICompatibleVlmProvider",
"VlmProvider",
]
49 changes: 49 additions & 0 deletions src/askui/model_providers/ollama_image_qa_provider.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
"""OllamaImageQAProvider — image Q&A via a local Ollama instance."""

from openai import OpenAI

from askui.model_providers.openai_image_qa_provider import OpenAIImageQAProvider

_DEFAULT_BASE_URL = "http://localhost:11434/v1"
_DEFAULT_MODEL_ID = "qwen3.5"


class OllamaImageQAProvider(OpenAIImageQAProvider):
"""Image Q&A provider that routes requests to a local Ollama instance.

Thin convenience wrapper around `OpenAIImageQAProvider` with Ollama
defaults (``base_url``, ``api_key``, ``model_id``).

Args:
model_id (str, optional): Ollama model to use. Defaults to
``"qwen3.5"``.
base_url (str, optional): Base URL for the Ollama OpenAI-compatible
API. Defaults to ``"http://localhost:11434/v1"``.
client (`OpenAI` | None, optional): Pre-configured OpenAI client.
If provided, ``base_url`` is ignored.

Example:
```python
from askui import AgentSettings, ComputerAgent
from askui.model_providers import OllamaImageQAProvider

agent = ComputerAgent(settings=AgentSettings(
image_qa_provider=OllamaImageQAProvider(
model_id="llava",
)
))
```
"""

def __init__(
self,
model_id: str = _DEFAULT_MODEL_ID,
base_url: str = _DEFAULT_BASE_URL,
client: OpenAI | None = None,
) -> None:
super().__init__(
model_id=model_id,
api_key="ollama", # Ollama requires no auth; OpenAI SDK needs a value
base_url=base_url,
client=client,
)
49 changes: 49 additions & 0 deletions src/askui/model_providers/ollama_vlm_provider.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
"""OllamaVlmProvider — VLM access via a local Ollama instance."""

from openai import OpenAI

from askui.model_providers.openai_vlm_provider import OpenAIVlmProvider

_DEFAULT_BASE_URL = "http://localhost:11434/v1"
_DEFAULT_MODEL_ID = "qwen3.5"


class OllamaVlmProvider(OpenAIVlmProvider):
"""VLM provider that routes requests to a local Ollama instance.

Thin convenience wrapper around `OpenAIVlmProvider` with Ollama
defaults (``base_url``, ``api_key``, ``model_id``).

Args:
model_id (str, optional): Ollama model to use. Defaults to
``"qwen3.5"``.
base_url (str, optional): Base URL for the Ollama OpenAI-compatible
API. Defaults to ``"http://localhost:11434/v1"``.
client (`OpenAI` | None, optional): Pre-configured OpenAI client.
If provided, ``base_url`` is ignored.

Example:
```python
from askui import AgentSettings, ComputerAgent
from askui.model_providers import OllamaVlmProvider

agent = ComputerAgent(settings=AgentSettings(
vlm_provider=OllamaVlmProvider(
model_id="qwen3.5",
)
))
```
"""

def __init__(
self,
model_id: str = _DEFAULT_MODEL_ID,
base_url: str = _DEFAULT_BASE_URL,
client: OpenAI | None = None,
) -> None:
super().__init__(
model_id=model_id,
api_key="ollama", # Ollama requires no auth; OpenAI SDK needs a value
base_url=base_url,
client=client,
)
59 changes: 59 additions & 0 deletions src/askui/model_providers/openai_compatible_vlm_provider.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
"""OpenAICompatibleVlmProvider — VLM access via a fixed endpoint URL."""

import httpx
from openai import OpenAI

from askui.model_providers.openai_vlm_provider import OpenAIVlmProvider


class OpenAICompatibleVlmProvider(OpenAIVlmProvider):
"""VLM provider for OpenAI-compatible APIs that require an exact endpoint URL.

The OpenAI SDK always appends ``/chat/completions`` to ``base_url``,
which breaks endpoints that already include the full path (e.g. RunPod,
custom proxies, serverless deployments). This provider works around
the issue by installing an httpx event hook that rewrites every
outgoing request URL to the exact ``endpoint_url``.

Args:
endpoint_url (str): Full endpoint URL including the path
(e.g. ``"https://my-host/v1/chat/completions"``).
model_id (str): Model name expected by the deployment.
api_key (str | None, optional): API key for the endpoint.

Example:
```python
from askui import AgentSettings, ComputerAgent
from askui.model_providers import OpenAICompatibleVlmProvider

agent = ComputerAgent(settings=AgentSettings(
vlm_provider=OpenAICompatibleVlmProvider(
endpoint_url="https://my-host/v1/chat/completions",
model_id="my-model",
api_key="...",
)
))
```
"""

def __init__(
self,
endpoint_url: str,
model_id: str | None = None,
api_key: str | None = None,
) -> None:
def _rewrite_url(request: httpx.Request) -> None:
request.url = httpx.URL(endpoint_url)

http_client = httpx.Client(event_hooks={"request": [_rewrite_url]})

client = OpenAI(
api_key=api_key,
base_url=endpoint_url,
http_client=http_client,
)

super().__init__(
model_id=model_id,
client=client,
)
78 changes: 78 additions & 0 deletions src/askui/model_providers/openai_image_qa_provider.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
"""OpenAIImageQAProvider — image Q&A via any OpenAI-compatible API."""

from functools import cached_property
from typing import Type

from openai import OpenAI
from typing_extensions import override

from askui.model_providers.image_qa_provider import ImageQAProvider
from askui.models.openai.get_model import OpenAIGetModel
from askui.models.shared.settings import GetSettings
from askui.models.types.response_schemas import ResponseSchema
from askui.utils.source_utils import Source


class OpenAIImageQAProvider(ImageQAProvider):
"""Image Q&A provider for any OpenAI-compatible API.

Works with OpenAI, Ollama, vLLM, LM Studio, Together AI, and any
other service that exposes an OpenAI-compatible ``/v1/chat/completions``
endpoint.

Args:
model_id (str): Model name to use.
api_key (str | None, optional): API key. Reads ``OPENAI_API_KEY``
from the environment if not provided.
base_url (str | None, optional): Base URL for the API. Defaults
to the OpenAI API (``https://api.openai.com/v1``).
client (`OpenAI` | None, optional): Pre-configured OpenAI client.
If provided, ``api_key`` and ``base_url`` are ignored.

Example:
```python
from askui import AgentSettings, ComputerAgent
from askui.model_providers import OpenAIImageQAProvider

agent = ComputerAgent(settings=AgentSettings(
image_qa_provider=OpenAIImageQAProvider(
model_id="gpt-4o",
api_key="sk-...",
)
))
```
"""

def __init__(
self,
model_id: str,
api_key: str | None = None,
base_url: str | None = None,
client: OpenAI | None = None,
) -> None:
self._model_id = model_id
self._client = client or OpenAI(
api_key=api_key,
base_url=base_url,
)

@cached_property
def _get_model(self) -> OpenAIGetModel:
"""Lazily initialise the `OpenAIGetModel` on first use."""
return OpenAIGetModel(model_id=self._model_id, client=self._client)

@override
def query(
self,
query: str,
source: Source,
response_schema: Type[ResponseSchema] | None,
get_settings: GetSettings,
) -> ResponseSchema | str:
result: ResponseSchema | str = self._get_model.get(
query=query,
source=source,
response_schema=response_schema,
get_settings=get_settings,
)
return result
Loading
Loading