Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -173,3 +173,7 @@ CELERY_SINGLE_PROCESS=1

BOOST_ENDPOINT_THROTTLE_INFO=60/minute
BOOST_ENDPOINT_THROTTLE_ADD_OR_UPDATE=10/hour

# Comma-separated hostnames allowed for git clone URLs (HTTPS only).
# Default when unset: github.com. Set to empty to disable host allowlisting (not recommended).
BOOST_ALLOWED_CLONE_HOSTS=github.com
5 changes: 3 additions & 2 deletions docs/boost-endpoint-api.md
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,7 @@ Top-level entry point called by the Celery task. Creates a temporary directory,

For each submodule the following steps run in order:

1. **Path validation** — the submodule name is checked against the temp directory root to prevent path traversal.
1. **Path validation** — the submodule name is checked against the temp directory root to prevent path traversal. Organization and submodule names must match `^[A-Za-z0-9._-]+$`. Clone URLs are validated before any `git` subprocess: only `https://` (and SCP-style SSH normalized to HTTPS for checks) is allowed; resolved addresses must not be private, loopback, or link-local; the hostname must appear in `ALLOWED_CLONE_HOSTS` (default `github.com`, overridable via `BOOST_ALLOWED_CLONE_HOSTS`).

2. **Clone** — `git clone -b local-{lang_code} --depth 1 https://github.com/{organization}/{submodule}.git` into a temporary subdirectory. A 300-second timeout applies. Clone failure is recorded in `errors` and processing stops for that submodule.

Expand Down Expand Up @@ -404,7 +404,8 @@ HTTP `400` responses and submodule `errors` lists use the same object schema. Va
| `required_field` | Missing `organization`, `version`, or `add_or_update`; empty `add_or_update` dict |
| `invalid_language_code` | Non-string or blank language key in `add_or_update` |
| `invalid_submodule_list` | Submodule value is not a non-empty list |
| `invalid_submodule` | Submodule name fails path validation |
| `invalid_submodule` | Submodule name fails path or segment validation |
| `invalid_clone_url` | Organization/submodule or resolved clone URL fails SSRF checks (bad scheme, private IP, or host not in `ALLOWED_CLONE_HOSTS`) |
| `clone_failed` | `git clone` failed or timed out |
| `no_documentation_files` | No supported doc files found after scan |
| `permission_denied` | Missing `project.add` or `project.edit` |
Expand Down
2 changes: 2 additions & 0 deletions docs/deployment-runbook.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ Runtime plugin env vars (set in `.env`, read by `settings_override.py` at boot):
|----------|-------------------|-------|
| `BOOST_ENDPOINT_THROTTLE_INFO` | `60/minute` | Scoped rate for `GET /boost-endpoint/info/` |
| `BOOST_ENDPOINT_THROTTLE_ADD_OR_UPDATE` | `10/hour` | Scoped rate for `POST /boost-endpoint/add-or-update/` |
| `BOOST_ALLOWED_CLONE_HOSTS` | `github.com` | Comma-separated hostnames permitted for git clone URLs (HTTPS only; SSRF mitigation) |

### Weblate environment variables

Expand All @@ -110,6 +111,7 @@ Key variables (full reference in `.env.example`):
| `CELERY_SINGLE_PROCESS` | `1` | `.env` | Weblate Celery worker process count; increase when tasks queue |
| `BOOST_ENDPOINT_THROTTLE_INFO` | `60/minute` | `.env` | Plugin rate limit (see above) |
| `BOOST_ENDPOINT_THROTTLE_ADD_OR_UPDATE` | `10/hour` | `.env` | Plugin rate limit (see above) |
| `BOOST_ALLOWED_CLONE_HOSTS` | `github.com` | `.env` | Hostnames allowed for git clone URLs (see above) |
| `WEBLATE_EMAIL_HOST` | `smtp.example.com` | `.env` | SMTP server; set user/password for production |
| `WEBLATE_GITHUB_USERNAME` | — | `.env` | GitHub account for VCS; required with token for add-or-update |
| `WEBLATE_GITHUB_TOKEN` | — | `.env` | GitHub PAT (`repo` scope); rotate via pre-deploy checklist |
Expand Down
1 change: 1 addition & 0 deletions src/boost_weblate/endpoint/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ class BoostEndpointErrorCode(StrEnum):
"""Stable machine-readable error codes for Boost endpoint failures."""

INVALID_SUBMODULE = "invalid_submodule"
INVALID_CLONE_URL = "invalid_clone_url"
CLONE_FAILED = "clone_failed"
NO_DOCUMENTATION_FILES = "no_documentation_files"
PERMISSION_DENIED = "permission_denied"
Expand Down
111 changes: 92 additions & 19 deletions src/boost_weblate/endpoint/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,35 @@

from __future__ import annotations

from enum import StrEnum
from typing import Any

from django.core.exceptions import ValidationError
from rest_framework import serializers

from boost_weblate.endpoint.errors import (
BoostEndpointErrorCode,
boost_validation_errors,
to_error_dict,
)
from boost_weblate.endpoint.validators import validate_repo_segment


class DrfValidationCode(StrEnum):
"""DRF ``ErrorDetail.code`` values mapped by this serializer."""

REQUIRED = "required"
NOT_A_LIST = "not_a_list"
EMPTY = "empty"


class RequestField(StrEnum):
"""Top-level add-or-update request field names."""

ORGANIZATION = "organization"
ADD_OR_UPDATE = "add_or_update"
VERSION = "version"
EXTENSIONS = "extensions"


class AddOrUpdateRequestSerializer(serializers.Serializer):
Expand Down Expand Up @@ -53,6 +73,7 @@ class AddOrUpdateRequestSerializer(serializers.Serializer):
def __init__(self, *args: Any, **kwargs: Any) -> None:
super().__init__(*args, **kwargs)
self._custom_validation_errors: list[dict[str, Any]] = []
self._custom_error_fields: set[str] = set()
self._structured_errors: list[dict[str, Any]] = []

@property
Expand All @@ -61,6 +82,7 @@ def structured_errors(self) -> list[dict[str, Any]]:

def is_valid(self, *, raise_exception: bool = False) -> bool:
self._custom_validation_errors = []
self._custom_error_fields = set()
valid = super().is_valid(raise_exception=False)
if not valid:
self._structured_errors = self._to_structured_errors()
Expand All @@ -73,16 +95,14 @@ def is_valid(self, *, raise_exception: bool = False) -> bool:
def _to_structured_errors(self) -> list[dict[str, Any]]:
structured = list(self._custom_validation_errors)
for field, messages in self.errors.items():
if field == "add_or_update" and self._custom_validation_errors:
if field in self._custom_error_fields:
continue
for subfield, message, drf_code in self._flatten_field_errors(
field, messages
):
for subfield, message, drf_code in self._flatten_field_errors(messages):
code = self._code_for_drf_error(field, drf_code, subfield=subfield)
metadata: dict[str, Any] = {"field": field}
if drf_code is not None:
metadata["drf_code"] = drf_code
if subfield and field == "add_or_update":
if subfield and field == RequestField.ADD_OR_UPDATE:
metadata["language"] = subfield
structured.append(to_error_dict(code, message, **metadata))
return structured
Expand All @@ -93,7 +113,7 @@ def _message_and_drf_code(err: Any) -> tuple[str, str | None]:

@staticmethod
def _flatten_field_errors(
field: str, messages: Any
messages: Any,
) -> list[tuple[str | None, str, str | None]]:
"""Flatten nested DRF errors into (subfield, message, drf_code) triplets."""
results: list[tuple[str | None, str, str | None]] = []
Expand All @@ -104,7 +124,7 @@ def _flatten_field_errors(
for msg in value:
if isinstance(msg, dict) or hasattr(msg, "items"):
nested = AddOrUpdateRequestSerializer._flatten_field_errors(
field, msg
msg
)
results.extend(
(key_str if sub is None else sub, message, drf_code)
Expand All @@ -116,9 +136,7 @@ def _flatten_field_errors(
)
results.append((key_str, message, drf_code))
elif isinstance(value, dict) or hasattr(value, "items"):
nested = AddOrUpdateRequestSerializer._flatten_field_errors(
field, value
)
nested = AddOrUpdateRequestSerializer._flatten_field_errors(value)
results.extend(
(key_str if sub is None else sub, message, drf_code)
for sub, message, drf_code in nested
Expand All @@ -143,16 +161,35 @@ def _code_for_drf_error(
*,
subfield: str | None = None,
) -> BoostEndpointErrorCode:
if drf_code == "required":
if drf_code == DrfValidationCode.REQUIRED:
return BoostEndpointErrorCode.REQUIRED_FIELD
if drf_code == "not_a_list":
if drf_code == DrfValidationCode.NOT_A_LIST:
return BoostEndpointErrorCode.INVALID_SUBMODULE_LIST
if drf_code == "empty":
if field == "add_or_update" and subfield:
if drf_code == DrfValidationCode.EMPTY:
if field == RequestField.ADD_OR_UPDATE and subfield:
return BoostEndpointErrorCode.INVALID_SUBMODULE_LIST
return BoostEndpointErrorCode.REQUIRED_FIELD
return BoostEndpointErrorCode.REQUIRED_FIELD

def validate_organization(self, value: str) -> str:
"""Reject organization names that would produce unsafe clone URLs."""
try:
return validate_repo_segment(value, field="organization")
except ValidationError as exc:
self._custom_error_fields.add(RequestField.ORGANIZATION)
self._custom_validation_errors.extend(
boost_validation_errors(
[
(
BoostEndpointErrorCode.INVALID_CLONE_URL,
str(exc),
{"field": RequestField.ORGANIZATION},
)
]
)
)
raise serializers.ValidationError(str(exc)) from exc

Comment thread
whisper67265 marked this conversation as resolved.
def validate_extensions(self, value: list[str] | None) -> list[str] | None:
"""Strip entries and remove blanks so all-empty input does not filter files."""
if value is None:
Expand All @@ -171,7 +208,10 @@ def validate_add_or_update(self, value: dict[str, Any]) -> dict[str, Any]:
"add_or_update: each key must be a non-empty language "
f"code; got {repr(lang_code)}"
),
{"field": "add_or_update", "language": str(lang_code)},
{
"field": RequestField.ADD_OR_UPDATE,
"language": str(lang_code),
},
)
)
continue
Expand All @@ -184,7 +224,7 @@ def validate_add_or_update(self, value: dict[str, Any]) -> dict[str, Any]:
f"submodule names; key {lang_code!r} is not a list "
f"(got {type(submodules).__name__})."
),
{"field": "add_or_update", "language": lang_code},
{"field": RequestField.ADD_OR_UPDATE, "language": lang_code},
)
)
elif len(submodules) == 0:
Expand All @@ -195,10 +235,43 @@ def validate_add_or_update(self, value: dict[str, Any]) -> dict[str, Any]:
"add_or_update: each value must be a non-empty list of "
f"submodule names; key {lang_code!r} has an empty list."
),
{"field": "add_or_update", "language": lang_code},
{"field": RequestField.ADD_OR_UPDATE, "language": lang_code},
)
)
else:
for submodule in submodules:
if not isinstance(submodule, str):
items.append(
(
BoostEndpointErrorCode.INVALID_SUBMODULE_LIST,
(
"add_or_update: each submodule name must be a "
f"string; key {lang_code!r} has "
f"{type(submodule).__name__}."
),
{
"field": RequestField.ADD_OR_UPDATE,
"language": lang_code,
},
)
)
break
try:
validate_repo_segment(submodule, field="submodule")
except ValidationError as exc:
items.append(
(
BoostEndpointErrorCode.INVALID_SUBMODULE,
str(exc),
{
"field": RequestField.ADD_OR_UPDATE,
"language": lang_code,
"submodule": submodule,
},
)
)
if items:
self._custom_validation_errors = boost_validation_errors(items)
raise serializers.ValidationError({"add_or_update": "invalid"})
self._custom_error_fields.add(RequestField.ADD_OR_UPDATE)
self._custom_validation_errors.extend(boost_validation_errors(items))
raise serializers.ValidationError({RequestField.ADD_OR_UPDATE: "invalid"})
return value
49 changes: 44 additions & 5 deletions src/boost_weblate/endpoint/services.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@

from django.conf import settings
from django.contrib.messages import get_messages
from django.core.exceptions import ValidationError
from django.db import transaction
from weblate.formats.models import FILE_FORMATS
from weblate.lang.models import Language
Expand All @@ -50,6 +51,11 @@
append_error,
to_error_dict,
)
from boost_weblate.endpoint.validators import (
github_https_clone_url,
github_ssh_repo_url,
validate_repo_segment,
)

if TYPE_CHECKING:
from weblate.lang.models import LanguageQuerySet
Expand Down Expand Up @@ -311,7 +317,7 @@ def get_supported_extensions(self) -> set[str]:

def clone_repository(self, submodule: str, target_dir: str, branch: str) -> bool:
"""Clone a git repository to target directory."""
repo_url = f"https://github.com/{self.organization}/{submodule}.git"
repo_url = github_https_clone_url(self.organization, submodule)

try:
LOGGER.info("Cloning %s to %s", repo_url, target_dir)
Expand Down Expand Up @@ -508,7 +514,15 @@ def create_or_update_component(
return None, False

# Single clone per repo: first component gets real repo, others use weblate://
real_repo = f"git@github.com:{self.organization}/{submodule}.git"
try:
real_repo = github_ssh_repo_url(self.organization, submodule)
except ValidationError as exc:
LOGGER.error(
"Invalid repo URL for %s/%s: %s", self.organization, submodule, exc
)
report_error(cause="Component creation/update")
return None, False

repo_owner = (
Component.objects.filter(project=project, repo=real_repo)
.order_by("slug")
Expand Down Expand Up @@ -902,6 +916,19 @@ def process_submodule(
"errors": [],
}

try:
validate_repo_segment(self.organization, field="organization")
validate_repo_segment(submodule, field="submodule")
except ValidationError as exc:
append_error(
result,
BoostEndpointErrorCode.INVALID_CLONE_URL,
str(exc),
submodule=submodule,
organization=self.organization,
)
return result

# Create temp directory for this submodule
temp_submodule_dir = os.path.join(temp_dir, submodule)
resolved = Path(temp_submodule_dir).resolve()
Expand All @@ -919,9 +946,21 @@ def process_submodule(
os.makedirs(temp_submodule_dir, exist_ok=True)

# Clone repository
if not self.clone_repository(
submodule, temp_submodule_dir, f"local-{self.lang_code}"
):
try:
cloned = self.clone_repository(
submodule, temp_submodule_dir, f"local-{self.lang_code}"
)
except ValidationError as exc:
append_error(
result,
BoostEndpointErrorCode.INVALID_CLONE_URL,
str(exc),
submodule=submodule,
organization=self.organization,
)
return result

if not cloned:
append_error(
result,
BoostEndpointErrorCode.CLONE_FAILED,
Expand Down
Loading
Loading