From 11f41dfabf276e5ee7c6800d6dca16496b02721d Mon Sep 17 00:00:00 2001 From: Martin Varga Date: Fri, 1 May 2026 14:03:39 +0200 Subject: [PATCH 1/6] Improve DB query logic in 3 endpoints which were causing performance issues GET /v1/resource/history GET /v1/project/ POST /v1/project/by_names were causing N+1 query issues --- server/mergin/sync/interfaces.py | 6 + server/mergin/sync/models.py | 34 +++- server/mergin/sync/public_api.yaml | 5 - server/mergin/sync/public_api_controller.py | 181 ++++++++++++++---- server/mergin/sync/workspace.py | 8 + .../mergin/tests/test_project_controller.py | 1 - 6 files changed, 187 insertions(+), 48 deletions(-) diff --git a/server/mergin/sync/interfaces.py b/server/mergin/sync/interfaces.py index bb2e9843..4f30c2bc 100644 --- a/server/mergin/sync/interfaces.py +++ b/server/mergin/sync/interfaces.py @@ -110,6 +110,12 @@ def get_by_name(self, name): """ pass + def get_by_names(self, names): + """ + Return list of workspaces whose names are in the given collection. + """ + pass + @abstractmethod def get_by_project(self, project): """ diff --git a/server/mergin/sync/models.py b/server/mergin/sync/models.py index 5f4aa967..ca1ab926 100644 --- a/server/mergin/sync/models.py +++ b/server/mergin/sync/models.py @@ -18,7 +18,9 @@ from blinker import signal from flask_login import current_user from pygeodiff import GeoDiff +from functools import cached_property from sqlalchemy import text, null, desc, nullslast, tuple_ +from sqlalchemy.orm import contains_eager, joinedload, load_only from sqlalchemy.dialects.postgresql import ARRAY, BIGINT, UUID, JSONB, ENUM, insert from sqlalchemy.types import String from sqlalchemy.ext.hybrid import hybrid_property @@ -658,7 +660,7 @@ def __init__( def path(self) -> str: return self.file.path - @property + @cached_property def diff(self) -> Optional[FileDiff]: """Diff file pushed with UPDATE_DIFF change type. @@ -713,9 +715,36 @@ def changes( if not (is_versioned_file(file) and since is not None and to is not None): return [] - history = [] + # when since=1 the range spans the entire project history; narrow it to + # the most recent CREATE/DELETE so we don't load records from previous + # file lifecycles that the Python break would discard anyway + if since == 1: + boundary = ( + FileHistory.query.join(ProjectFilePath) + .filter( + ProjectFilePath.project_id == project_id, + ProjectFilePath.path == file, + FileHistory.project_version_name <= to, + FileHistory.change.in_( + [PushChangeType.CREATE.value, PushChangeType.DELETE.value] + ), + ) + .order_by(desc(FileHistory.project_version_name)) + .with_entities(FileHistory.project_version_name) + .first() + ) + since = boundary[0] if boundary else since + full_history = ( FileHistory.query.join(ProjectFilePath) + .join(FileHistory.version) + .join(ProjectVersion.project) + .options( + contains_eager(FileHistory.version) + .load_only(ProjectVersion.name, ProjectVersion.project_id) + .contains_eager(ProjectVersion.project) + .load_only(Project.storage_params) + ) .filter( ProjectFilePath.project_id == project_id, FileHistory.project_version_name <= to, @@ -726,6 +755,7 @@ def changes( .all() ) + history = [] for item in full_history: history.append(item) diff --git a/server/mergin/sync/public_api.yaml b/server/mergin/sync/public_api.yaml index 157e8262..7f5749d1 100644 --- a/server/mergin/sync/public_api.yaml +++ b/server/mergin/sync/public_api.yaml @@ -1124,11 +1124,6 @@ components: - added - updated - removed - expiration: - nullable: true - type: string - format: date-time - example: 2019-02-26T08:47:58.636074Z UploadFileInfo: allOf: - $ref: "#/components/schemas/FileInfo" diff --git a/server/mergin/sync/public_api_controller.py b/server/mergin/sync/public_api_controller.py index 8f142e71..a3457e0a 100644 --- a/server/mergin/sync/public_api_controller.py +++ b/server/mergin/sync/public_api_controller.py @@ -24,8 +24,9 @@ ) from pygeodiff import GeoDiffLibError from flask_login import current_user -from sqlalchemy import and_, desc, asc +from sqlalchemy import and_, desc, asc, tuple_ from sqlalchemy.exc import IntegrityError, SQLAlchemyError +from sqlalchemy.orm import contains_eager, joinedload, load_only from gevent import sleep import base64 from werkzeug.exceptions import HTTPException, Conflict @@ -37,6 +38,7 @@ from ..auth.models import User from .models import ( FileSyncErrorType, + FileDiff, Project, ProjectVersion, Upload, @@ -397,19 +399,73 @@ def get_project(project_name, namespace, since="", version=None): # noqa: E501 abort(400, "Parameters 'since' and 'version' are mutually exclusive") elif since: data = ProjectSchema(exclude=["storage_params"]).dump(project) - # append history for versioned files + since_version = ProjectVersion.from_v_name(since) + versioned_paths = [f.path for f in project.files if is_versioned_file(f.path)] + + # load history for all versioned files in one query; only the columns + # actually used downstream are fetched from the joined tables + all_history = ( + FileHistory.query.join(ProjectFilePath) + .join(FileHistory.version) + .options( + contains_eager(FileHistory.file).load_only( + ProjectFilePath.path, ProjectFilePath.project_id + ), + contains_eager(FileHistory.version).load_only(ProjectVersion.name), + ) + .filter( + ProjectFilePath.project_id == project.id, + FileHistory.project_version_name.between( + since_version, project.latest_version + ), + ProjectFilePath.path.in_(versioned_paths), + ) + .order_by(FileHistory.file_path_id, desc(FileHistory.project_version_name)) + .all() + ) + + # partition by file and apply stop-at-CREATE logic, matching FileHistory.changes behaviour + history_by_file: dict = {} + for item in all_history: + fid = item.file_path_id + file_history = history_by_file.setdefault(fid, []) + if file_history and file_history[-1].change in ( + PushChangeType.CREATE.value, + PushChangeType.DELETE.value, + ): + continue + file_history.append(item) + + # batch-load all FileDiff records needed across all files in one query + update_diff_items = [ + i + for items in history_by_file.values() + for i in items + if i.change == PushChangeType.UPDATE_DIFF.value + ] + if update_diff_items: + diffs = FileDiff.query.filter( + FileDiff.file_path_id.in_({i.file_path_id for i in update_diff_items}), + FileDiff.rank == 0, + FileDiff.version.in_( + [i.project_version_name for i in update_diff_items] + ), + ).all() + diff_map = {(d.file_path_id, d.version): d for d in diffs} + for item in update_diff_items: + item.__dict__["diff"] = diff_map.get( + (item.file_path_id, item.project_version_name) + ) + + path_to_file_id = {i.file.path: i.file_path_id for i in all_history} files = [] for f in project.files: history_field = {} - for item in FileHistory.changes( - project.id, - f.path, - ProjectVersion.from_v_name(since), - project.latest_version, - ): - history_field[ProjectVersion.to_v_name(item.version.name)] = ( - FileHistorySchema(exclude=("mtime",)).dump(item) - ) + if is_versioned_file(f.path): + for item in history_by_file.get(path_to_file_id.get(f.path), []): + history_field[ProjectVersion.to_v_name(item.version.name)] = ( + FileHistorySchema(exclude=("mtime", "expiration")).dump(item) + ) files.append({**asdict(f), "history": history_field}) data["files"] = files elif version: @@ -470,38 +526,63 @@ def get_projects_by_names(): # noqa: E501 list_of_projects = request.json.get("projects", []) if len(list_of_projects) > 50: abort(400, "Too many projects") + + # batch-resolve workspaces by name (one DB query for DB-backed handlers) + unique_ws_names = { + key.split("/")[0] for key in list_of_projects if len(key.split("/")) == 2 + } + workspaces_by_name = { + ws.name: ws for ws in current_app.ws_handler.get_by_names(unique_ws_names) + } + results = {} - for project in list_of_projects: - projects = projects_query(ProjectPermissions.Read, as_admin=False) - splitted = project.split("/") - if len(splitted) != 2: - results[project] = {"error": 404} + valid_projects = [] # list of (key, workspace, project_name) + for key in list_of_projects: + parts = key.split("/") + if len(parts) != 2: + results[key] = {"error": 404} continue - ws = splitted[0] - name = splitted[1] - workspace = current_app.ws_handler.get_by_name(ws) + workspace = workspaces_by_name.get(parts[0]) if not workspace: - results[project] = {"error": 404} + results[key] = {"error": 404} continue - result = projects.filter( - Project.workspace_id == workspace.id, Project.name == name - ).first() - if result: - users_map = { - u.id: u.username - for u in User.query.select_from(ProjectUser) - .join(User) - .filter(ProjectUser.project_id == result.id) - .all() - } - workspaces_map = {workspace.id: workspace.name} - ctx = {"users_map": users_map, "workspaces_map": workspaces_map} - results[project] = ProjectListSchema(context=ctx).dump(result) - else: - if not current_user or not current_user.is_authenticated: - results[project] = {"error": 401} + valid_projects.append((key, workspace, parts[1])) + + if valid_projects: + # batch-fetch all requested projects in one query + ws_name_pairs = [(ws.id, name) for _, ws, name in valid_projects] + found_projects = ( + projects_query(ProjectPermissions.Read, as_admin=False) + .filter(tuple_(Project.workspace_id, Project.name).in_(ws_name_pairs)) + .all() + ) + found_map = {(p.workspace_id, p.name): p for p in found_projects} + + # batch-fetch all project members in one query + users_map = { + u.id: u.username + for u in User.query.select_from(ProjectUser) + .join(User) + .filter(ProjectUser.project_id.in_([p.id for p in found_projects])) + .all() + } + ws_ids = {p.workspace_id for p in found_projects} + workspaces_map = { + w.id: w.name for w in current_app.ws_handler.get_by_ids(ws_ids) + } + ctx = {"users_map": users_map, "workspaces_map": workspaces_map} + + for key, workspace, name in valid_projects: + result = found_map.get((workspace.id, name)) + if result: + results[key] = ProjectListSchema(context=ctx).dump(result) else: - results[project] = {"error": 404} + results[key] = ( + {"error": 401} + if not current_user or not current_user.is_authenticated + else {"error": 404} + ) + return results, 200 @@ -1191,10 +1272,30 @@ def get_resource_history(project_name, namespace, path): # noqa: E501 ) data = ProjectFileSchema().dump(fh) + history = FileHistory.changes(project.id, path, 1, project.latest_version) + + # batch-load all rank-0 FileDiff records needed for the history in one query + diff_map = {} + if history: + update_diff_versions = [ + i.project_version_name + for i in history + if i.change == PushChangeType.UPDATE_DIFF.value + ] + if update_diff_versions: + diffs = FileDiff.query.filter( + FileDiff.file_path_id == history[0].file_path_id, + FileDiff.rank == 0, + FileDiff.version.in_(update_diff_versions), + ).all() + diff_map = {d.version: d for d in diffs} + history_field = {} - for item in FileHistory.changes(project.id, path, 1, project.latest_version): + for item in history: + if item.change == PushChangeType.UPDATE_DIFF.value: + item.__dict__["diff"] = diff_map.get(item.project_version_name) history_field[ProjectVersion.to_v_name(item.version.name)] = FileHistorySchema( - exclude=("mtime",) + exclude=("mtime", "expiration") ).dump(item) data["history"] = history_field diff --git a/server/mergin/sync/workspace.py b/server/mergin/sync/workspace.py index e7575e46..4c9866c5 100644 --- a/server/mergin/sync/workspace.py +++ b/server/mergin/sync/workspace.py @@ -144,6 +144,14 @@ def get_by_name(self, name): return return self.factory_method() + def get_by_names(self, names): + result = [] + for name in set(names): + ws = self.get_by_name(name) + if ws: + result.append(ws) + return result + def get_by_project(self, project): return self.factory_method() diff --git a/server/mergin/tests/test_project_controller.py b/server/mergin/tests/test_project_controller.py index 60c36ee2..f8a99188 100644 --- a/server/mergin/tests/test_project_controller.py +++ b/server/mergin/tests/test_project_controller.py @@ -166,7 +166,6 @@ def test_file_history(client, diff_project): assert "v1" not in history assert "v3" in history assert "location" not in history["v7"] - assert "expiration" in history["v7"] def test_get_paginated_projects(client): From 6e4b3930e370188d3fa6afa74672e583ec2e3e1d Mon Sep 17 00:00:00 2001 From: Martin Varga Date: Tue, 5 May 2026 08:33:47 +0200 Subject: [PATCH 2/6] More fixes to avoid N+1, mostly in serialization schemas Use joins or batch precalculation and cached_properties to avoid multiple unbound DB queries. --- server/mergin/sync/models.py | 16 +++- server/mergin/sync/public_api_controller.py | 82 ++++++++++++++++++--- server/mergin/sync/schemas.py | 38 ++++------ 3 files changed, 103 insertions(+), 33 deletions(-) diff --git a/server/mergin/sync/models.py b/server/mergin/sync/models.py index ca1ab926..3b32cda9 100644 --- a/server/mergin/sync/models.py +++ b/server/mergin/sync/models.py @@ -6,6 +6,7 @@ import json import logging import os +import re import threading import time import uuid @@ -138,6 +139,12 @@ def workspace(self): project_workspace = current_app.ws_handler.get(self.workspace_id) return project_workspace + @cached_property + def _has_conflict(self) -> bool: + """True if any current project file matches a known conflict-copy pattern.""" + pattern = r"(\.gpkg|\.qgs|.qgz)(.*conflict.*)|( \(.*conflict.*)" + return any(re.search(pattern, f.path) for f in self.files) + def get_latest_files_cache(self) -> List[int]: """Get latest file history ids either from cached table or calculate them on the fly""" if self.latest_project_files.file_history_ids is not None: @@ -740,10 +747,11 @@ def changes( .join(FileHistory.version) .join(ProjectVersion.project) .options( + contains_eager(FileHistory.file).load_only(ProjectFilePath.path), contains_eager(FileHistory.version) .load_only(ProjectVersion.name, ProjectVersion.project_id) .contains_eager(ProjectVersion.project) - .load_only(Project.storage_params) + .load_only(Project.storage_params), ) .filter( ProjectFilePath.project_id == project_id, @@ -1811,11 +1819,15 @@ def diff_summary(self): def changes_count(self) -> Dict: """Return number of changes by type""" - query = f"SELECT change, COUNT(change) FROM file_history WHERE version_id = :version_id GROUP BY change;" + query = "SELECT change, COUNT(change) FROM file_history WHERE version_id = :version_id GROUP BY change;" params = {"version_id": self.id} result = db.session.execute(text(query), params).fetchall() return {row[0]: row[1] for row in result} + @cached_property + def _changes_count(self) -> Dict: + return self.changes_count() + @property def zip_path(self): return os.path.join( diff --git a/server/mergin/sync/public_api_controller.py b/server/mergin/sync/public_api_controller.py index a3457e0a..90c71945 100644 --- a/server/mergin/sync/public_api_controller.py +++ b/server/mergin/sync/public_api_controller.py @@ -24,9 +24,10 @@ ) from pygeodiff import GeoDiffLibError from flask_login import current_user -from sqlalchemy import and_, desc, asc, tuple_ +import re +from sqlalchemy import and_, desc, asc, text, tuple_ from sqlalchemy.exc import IntegrityError, SQLAlchemyError -from sqlalchemy.orm import contains_eager, joinedload, load_only +from sqlalchemy.orm import contains_eager, joinedload, load_only, selectinload from gevent import sleep import base64 from werkzeug.exceptions import HTTPException, Conflict @@ -502,6 +503,8 @@ def get_paginated_project_versions( project = require_project(namespace, project_name, ProjectPermissions.Read) query = ProjectVersion.query.filter( and_(ProjectVersion.project_id == project.id, ProjectVersion.name != 0) + ).options( + joinedload(ProjectVersion.project).load_only(Project.name, Project.workspace_id) ) query = ( query.order_by(desc(ProjectVersion.name)) @@ -511,11 +514,59 @@ def get_paginated_project_versions( paginate = query.paginate(page=page, per_page=per_page) result = paginate.items total = paginate.total - versions = ProjectVersionListSchema(many=True).dump(result) + + # batch-resolve workspace names for the page + ws_ids = {v.project.workspace_id for v in result} + workspaces_map = {w.id: w.name for w in current_app.ws_handler.get_by_ids(ws_ids)} + + # batch-compute change counts for all versions in the page in one query + if result: + version_ids = [v.id for v in result] + rows = db.session.execute( + text( + "SELECT version_id, change, COUNT(change) AS cnt" + " FROM file_history" + " WHERE version_id = ANY(:ids)" + " GROUP BY version_id, change" + ), + {"ids": version_ids}, + ).fetchall() + counts_map = {} + for row in rows: + counts_map.setdefault(row.version_id, {})[row.change] = row.cnt + for v in result: + v.__dict__["_changes_count"] = counts_map.get(v.id, {}) + + ctx = {"workspaces_map": workspaces_map} + versions = ProjectVersionListSchema(many=True, context=ctx).dump(result) data = {"versions": versions, "count": total} return data, 200 +def _precompute_has_conflict(projects): + """Pre-populate _has_conflict on each project using a single SQL query.""" + if not projects: + return + conflict_regex = r"(\.gpkg|\.qgs|.qgz)(.*conflict.*)|( \(.*conflict.*)" + rows = db.session.execute( + text( + """ + SELECT DISTINCT lpf.project_id + FROM latest_project_files lpf + CROSS JOIN unnest(lpf.file_history_ids) AS fh_id + JOIN file_history fh ON fh.id = fh_id + JOIN project_file_path fp ON fp.id = fh.file_path_id + WHERE lpf.project_id = ANY(:project_ids) + AND fp.path ~ :pattern + """ + ), + {"project_ids": [p.id for p in projects], "pattern": conflict_regex}, + ).fetchall() + conflict_ids = {row.project_id for row in rows} + for p in projects: + p.__dict__["_has_conflict"] = p.id in conflict_ids + + def get_projects_by_names(): # noqa: E501 """List mergin projects specified by list of projects with namespaces and names @@ -529,10 +580,13 @@ def get_projects_by_names(): # noqa: E501 # batch-resolve workspaces by name (one DB query for DB-backed handlers) unique_ws_names = { - key.split("/")[0] for key in list_of_projects if len(key.split("/")) == 2 + key.split("/")[0].lower() + for key in list_of_projects + if len(key.split("/")) == 2 } workspaces_by_name = { - ws.name: ws for ws in current_app.ws_handler.get_by_names(unique_ws_names) + ws.name.lower(): ws + for ws in current_app.ws_handler.get_by_names(unique_ws_names) } results = {} @@ -542,17 +596,19 @@ def get_projects_by_names(): # noqa: E501 if len(parts) != 2: results[key] = {"error": 404} continue - workspace = workspaces_by_name.get(parts[0]) + workspace = workspaces_by_name.get(parts[0].lower()) if not workspace: results[key] = {"error": 404} continue valid_projects.append((key, workspace, parts[1])) if valid_projects: - # batch-fetch all requested projects in one query + # batch-fetch all requested projects, eagerly loading project_users so + # members_by_role / get_role don't trigger per-project lazy loads ws_name_pairs = [(ws.id, name) for _, ws, name in valid_projects] found_projects = ( projects_query(ProjectPermissions.Read, as_admin=False) + .options(selectinload(Project.project_users)) .filter(tuple_(Project.workspace_id, Project.name).in_(ws_name_pairs)) .all() ) @@ -570,6 +626,9 @@ def get_projects_by_names(): # noqa: E501 workspaces_map = { w.id: w.name for w in current_app.ws_handler.get_by_ids(ws_ids) } + + _precompute_has_conflict(found_projects) + ctx = {"users_map": users_map, "workspaces_map": workspaces_map} for key, workspace, name in valid_projects: @@ -602,9 +661,11 @@ def get_projects_by_uuids(uuids): # noqa: E501 projects = ( projects_query(ProjectPermissions.Read, as_admin=False) + .options(selectinload(Project.project_users)) .filter(Project.id.in_(proj_ids)) .all() ) + _precompute_has_conflict(projects) ws_ids = set([p.workspace_id for p in projects]) projects_ids = [p.id for p in projects] users_map = { @@ -686,9 +747,12 @@ def get_paginated_projects( public, only_public, ) - pagination = projects.paginate(page=page, per_page=per_page) + pagination = projects.options(selectinload(Project.project_users)).paginate( + page=page, per_page=per_page + ) result = pagination.items total = pagination.total + _precompute_has_conflict(result) # create user map id:username passed to project schema to minimize queries to db projects_ids = [p.id for p in result] @@ -699,7 +763,7 @@ def get_paginated_projects( .filter(ProjectUser.project_id.in_(projects_ids)) .all() } - ws_ids = [p.workspace_id for p in projects] + ws_ids = [p.workspace_id for p in result] workspaces_map = {w.id: w.name for w in current_app.ws_handler.get_by_ids(ws_ids)} ctx = {"users_map": users_map, "workspaces_map": workspaces_map} sleep( diff --git a/server/mergin/sync/schemas.py b/server/mergin/sync/schemas.py index da18f7db..c2184c62 100644 --- a/server/mergin/sync/schemas.py +++ b/server/mergin/sync/schemas.py @@ -2,7 +2,6 @@ # # SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-MerginMaps-Commercial -import re from marshmallow import fields, ValidationError, Schema, post_dump from flask_login import current_user from flask import current_app @@ -192,7 +191,7 @@ class ProjectListSchema(ma.SQLAlchemyAutoSchema): id = fields.UUID() name = fields.Str() namespace = fields.Method("get_workspace_name") - access = fields.Function(lambda obj: ProjectAccessSchema().dump(obj)) + access = fields.Method("get_access") permissions = fields.Function(project_user_permissions) version = fields.Function(lambda obj: ProjectVersion.to_v_name(obj.latest_version)) updated = fields.Method("get_updated") @@ -200,22 +199,14 @@ class ProjectListSchema(ma.SQLAlchemyAutoSchema): creator = fields.Integer(attribute="creator_id") disk_usage = fields.Integer() tags = fields.List(fields.Str()) - has_conflict = fields.Method("get_has_conflict") + has_conflict = fields.Function(lambda obj: obj._has_conflict) + + def get_access(self, obj): + return ProjectAccessSchema(context=self.context).dump(obj) def get_updated(self, obj): return obj.updated if obj.updated else obj.created - def get_has_conflict(self, obj): - """Check if there is any conflict file in project generated by client - Patterns to check: - - file.[gpkg|qgs|qgz]_conflict_copy (older convention) - - file.gpkg_rebase_conflicts (older convention) - - file (conflicted copy, user vx).* - - file (edit conflict, user vx).json - """ - regex = r"(\.gpkg|\.qgs|.qgz)(.*conflict.*)|( \(.*conflict.*)" - return any(re.search(regex, file.path) for file in obj.files) - def get_workspace_name(self, obj): """Discover ProjectListSchema workspace name""" try: @@ -368,22 +359,25 @@ class ProjectAccessDetailSchema(Schema): class ProjectVersionListSchema(ma.SQLAlchemyAutoSchema): project_name = fields.Function(lambda obj: obj.project.name) - namespace = fields.Function(lambda obj: obj.project.workspace.name) + namespace = fields.Method("get_namespace") name = fields.Function(lambda obj: ProjectVersion.to_v_name(obj.name)) author = fields.String(attribute="author.username") created = DateTimeWithZ() changes = fields.Method("_changes") project_size = fields.Integer() + def get_namespace(self, obj): + workspaces_map = self.context.get("workspaces_map", {}) + return workspaces_map.get(obj.project.workspace_id, "") + def _changes(self, obj): - result = obj.changes_count() - data = { - "added": result.get(PushChangeType.CREATE.value, 0), - "updated": result.get(PushChangeType.UPDATE.value, 0), - "updated_diff": result.get(PushChangeType.UPDATE_DIFF.value, 0), - "removed": result.get(PushChangeType.DELETE.value, 0), + counts = obj._changes_count + return { + "added": counts.get(PushChangeType.CREATE.value, 0), + "updated": counts.get(PushChangeType.UPDATE.value, 0), + "updated_diff": counts.get(PushChangeType.UPDATE_DIFF.value, 0), + "removed": counts.get(PushChangeType.DELETE.value, 0), } - return data class Meta: model = ProjectVersion From 28a2c83a4c81633ac2c669fe278fe22a1b98c71a Mon Sep 17 00:00:00 2001 From: Martin Varga Date: Tue, 5 May 2026 09:05:54 +0200 Subject: [PATCH 3/6] Fix tests --- server/mergin/tests/test_project_controller.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/server/mergin/tests/test_project_controller.py b/server/mergin/tests/test_project_controller.py index f8a99188..d1f1afd6 100644 --- a/server/mergin/tests/test_project_controller.py +++ b/server/mergin/tests/test_project_controller.py @@ -2166,7 +2166,12 @@ def test_project_conflict_files(diff_project, file): } ] } + project_id = diff_project.id _ = add_project_version(diff_project, changes) + # expunge so the identity map releases the instance; re-query gives a fresh + # object without the stale cached_property value + db.session.expunge(diff_project) + diff_project = db.session.get(Project, project_id) project_info = ProjectListSchema(only=("has_conflict",), context=ctx).dump( diff_project ) From 398a9891eecd5fc5f17dc8664ff60b8836359976 Mon Sep 17 00:00:00 2001 From: Martin Varga Date: Wed, 13 May 2026 10:59:32 +0200 Subject: [PATCH 4/6] Do not log xml error responses --- server/mergin/app.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/server/mergin/app.py b/server/mergin/app.py index 41938d31..e5eb42d5 100644 --- a/server/mergin/app.py +++ b/server/mergin/app.py @@ -389,13 +389,16 @@ def handle_exception(e): def log_bad_request(response): """Log bad requests for easier debugging""" if response.status_code == 400: - json_body = response.get_json(silent=True) - if json_body and json_body.get("detail"): - # default response from connexion (check against swagger.yaml) - logging.warning(f'HTTP 400: {json_body["detail"]}') + if "xml" in response.content_type: + pass # QGIS proxy errors are already logged at the source else: - # either WTF form validation error or custom validation with abort(400) - logging.warning(f"HTTP 400: {response.data}") + json_body = response.get_json(silent=True) + if json_body and json_body.get("detail"): + # default response from connexion (check against swagger.yaml) + logging.warning(f'HTTP 400: {json_body["detail"]}') + else: + # either WTF form validation error or custom validation with abort(400) + logging.warning(f"HTTP 400: {response.data}") elif response.status_code == 409: # request which would result in conflict, e.g. creating the same project again logging.warning(f"HTTP 409: {response.data}") From 1f2d949f164e3f93dfb8e367b3605db58b5be884 Mon Sep 17 00:00:00 2001 From: Martin Varga Date: Wed, 27 May 2026 09:40:05 +0200 Subject: [PATCH 5/6] Fix case sensitivity issue in project lookup Remove .lower() transformation for workspace name lookup which was causing regression. --- server/mergin/sync/public_api_controller.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/server/mergin/sync/public_api_controller.py b/server/mergin/sync/public_api_controller.py index 90c71945..8dbe1237 100644 --- a/server/mergin/sync/public_api_controller.py +++ b/server/mergin/sync/public_api_controller.py @@ -580,15 +580,11 @@ def get_projects_by_names(): # noqa: E501 # batch-resolve workspaces by name (one DB query for DB-backed handlers) unique_ws_names = { - key.split("/")[0].lower() - for key in list_of_projects - if len(key.split("/")) == 2 + key.split("/")[0] for key in list_of_projects if len(key.split("/")) == 2 } workspaces_by_name = { - ws.name.lower(): ws - for ws in current_app.ws_handler.get_by_names(unique_ws_names) + ws.name: ws for ws in current_app.ws_handler.get_by_names(unique_ws_names) } - results = {} valid_projects = [] # list of (key, workspace, project_name) for key in list_of_projects: @@ -596,7 +592,7 @@ def get_projects_by_names(): # noqa: E501 if len(parts) != 2: results[key] = {"error": 404} continue - workspace = workspaces_by_name.get(parts[0].lower()) + workspace = workspaces_by_name.get(parts[0]) if not workspace: results[key] = {"error": 404} continue From b2692916f00fdfe8caf1c0cdf7bdb7963085965b Mon Sep 17 00:00:00 2001 From: Martin Varga Date: Fri, 29 May 2026 14:16:18 +0200 Subject: [PATCH 6/6] Chore: add CI for alembic migration tests --- .github/workflows/auto_tests.yml | 21 +- server/mergin/test_migrations/__init__.py | 0 .../mergin/test_migrations/test_migrations.py | 38 +++ server/mergin/test_migrations/utils.py | 241 ++++++++++++++++++ ...7f2185e2428_case_insensitive_unique_idx.py | 24 +- .../3daefa84ce67_add_deployment_info.py | 4 +- ...9c566e7_migrate_away_from_jsonb_columns.py | 2 +- .../dbd428cda965_migrate_to_jsonb.py | 37 ++- 8 files changed, 342 insertions(+), 25 deletions(-) create mode 100644 server/mergin/test_migrations/__init__.py create mode 100644 server/mergin/test_migrations/test_migrations.py create mode 100644 server/mergin/test_migrations/utils.py diff --git a/.github/workflows/auto_tests.yml b/.github/workflows/auto_tests.yml index 4d78bd65..0f7596e3 100644 --- a/.github/workflows/auto_tests.yml +++ b/.github/workflows/auto_tests.yml @@ -3,9 +3,18 @@ name: Auto Tests on: push jobs: - server_tests: + tests: runs-on: ubuntu-24.04 + strategy: + fail-fast: false + matrix: + include: + - suite: server + pytest_args: "-v --cov=mergin --cov-report=lcov mergin/tests" + - suite: migration + pytest_args: "-v mergin/test_migrations" + services: postgres: image: postgres:14 @@ -15,13 +24,18 @@ jobs: POSTGRES_USER: postgres ports: - 5435:5432 - # Set health checks to wait until postgres has started options: >- --health-cmd pg_isready --health-interval 10s --health-timeout 5s --health-retries 5 + env: + DB_USER: postgres + DB_PASSWORD: postgres + DB_HOST: localhost + DB_PORT: 5435 + steps: - name: Check out repository uses: actions/checkout@v3 @@ -36,9 +50,10 @@ jobs: - name: Run tests run: | cd server - pipenv run pytest -v --cov=mergin --cov-report=lcov mergin/tests + pipenv run pytest ${{ matrix.pytest_args }} - name: Coveralls + if: matrix.suite == 'server' uses: coverallsapp/github-action@v2 with: base-path: server diff --git a/server/mergin/test_migrations/__init__.py b/server/mergin/test_migrations/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/server/mergin/test_migrations/test_migrations.py b/server/mergin/test_migrations/test_migrations.py new file mode 100644 index 00000000..8bfa1154 --- /dev/null +++ b/server/mergin/test_migrations/test_migrations.py @@ -0,0 +1,38 @@ +# Copyright (C) Lutra Consulting Limited +# +# SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-MerginMaps-Commercial + +from pathlib import Path + +from ..app import db +from .utils import ( + make_migration_app, + make_migration_engine, + ordered_revisions, + run_migration_lifecycle, +) + +MIGRATIONS_DIR = str(Path(__file__).parents[2] / "migrations") +# 1fcbea2a0f2c (drop_namespace_related_objects) has an intentional no-op downgrade +# because removing namespace tables is irreversible. We stop there. +DOWNGRADE_TARGET = "1fcbea2a0f2c" + +migration_engine = make_migration_engine("mergin_migration_test") +migration_app = make_migration_app( + model_modules=[ + "mergin.auth.models", + "mergin.stats.models", + "mergin.sync.models", + ] +) + + +def test_migration_lifecycle(migration_app, migration_engine): + """Exercise the full migration chain: empty DB → head (one step at a time) → schema check → partial downgrade.""" + run_migration_lifecycle( + migration_engine, + MIGRATIONS_DIR, + ordered_revisions(MIGRATIONS_DIR), + db.metadata, + DOWNGRADE_TARGET, + ) diff --git a/server/mergin/test_migrations/utils.py b/server/mergin/test_migrations/utils.py new file mode 100644 index 00000000..8f174a74 --- /dev/null +++ b/server/mergin/test_migrations/utils.py @@ -0,0 +1,241 @@ +# Copyright (C) Lutra Consulting Limited +# +# SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-MerginMaps-Commercial + +import pytest +from alembic.autogenerate import compare_metadata +from alembic.config import Config as AlembicConfig +from alembic.runtime.migration import MigrationContext +from alembic.script import ScriptDirectory +from flask_migrate import downgrade, upgrade +from pathlib import Path +from sqlalchemy import create_engine, make_url, text + +# create_simple_app and Configuration are imported lazily inside their respective +# factory functions to avoid triggering Configuration evaluation at module import +# time (before pytest-dotenv has loaded .test.env into os.environ). + +_STRUCTURAL_DIFF_TYPES = ( + "add_table", + "remove_table", + "add_column", + "remove_column", + "modify_nullable", + "add_index", + "remove_index", + "add_constraint", + "remove_constraint", +) + + +def assert_schema_consistent(engine, metadata, include_schemas=()): + """Assert ORM metadata matches the migrated schema — structural checks only. + + Column types and server defaults are excluded: alembic's reflection of + PostgreSQL-specific types (JSONB, UUID, ARRAY …) and default expressions + produces false positives without an explicit allowlist. + + include_schemas: additional PostgreSQL schemas beyond 'public' to verify. + Extra schemas are checked via direct inspection (table existence only) rather + than compare_metadata, because include_schemas=True in MigrationContext causes + public-schema tables to be reflected with an explicit "public" qualifier that + doesn't match ORM models with schema=None — producing spurious add_table diffs. + """ + from sqlalchemy import inspect as sa_inspect + + extra_schema_set = set(include_schemas) + + def _table_schema(d): + """Extract the schema from a diff entry, or None.""" + if len(d) < 2: + return None + obj = d[1] + # add_table / remove_table: obj is the Table + schema = getattr(obj, "schema", None) + if schema is not None: + return schema + # add_index / remove_index: obj is an Index whose .table has the schema + table = getattr(obj, "table", None) + return getattr(table, "schema", None) + + # Standard structural check for the public (default) schema. + # Diffs involving tables in extra schemas are excluded here — they are + # checked separately below via direct inspection to avoid the schema-name + # mismatch that include_schemas=True causes in compare_metadata. + with engine.connect() as conn: + ctx = MigrationContext.configure(conn) + diff = compare_metadata(ctx, metadata) + checked = [ + d + for d in diff + if d[0] in _STRUCTURAL_DIFF_TYPES and _table_schema(d) not in extra_schema_set + ] + + # Table-existence check for each additional schema + if include_schemas: + inspector = sa_inspect(engine) + for schema in include_schemas: + existing = set(inspector.get_table_names(schema=schema)) + expected = { + table.name + for table in metadata.tables.values() + if table.schema == schema + } + for name in sorted(expected - existing): + checked.append(("add_table", f"{schema}.{name}")) + for name in sorted(existing - expected): + checked.append(("remove_table", f"{schema}.{name}")) + + assert not checked, ( + "ORM models differ from the migrated schema — add a migration or update the filter:\n" + + "\n".join(str(d) for d in checked) + ) + + +def ordered_revisions(migrations_dir, head="head", version_locations=None): + """Return revision IDs in upgrade order (base → head). + + Works for single-head (CE), single branch label (enterprise@head), and + multi-head tuples (("enterprise@head", "service@head")) alembic setups. + Pass version_locations as a colon-separated string when the script directory + has more than one branch folder. + """ + cfg = AlembicConfig() + cfg.set_main_option("script_location", migrations_dir) + cfg.set_main_option( + "version_locations", + version_locations or str(Path(migrations_dir) / "community"), + ) + cfg.set_main_option("path_separator", "os") + script = ScriptDirectory.from_config(cfg) + upper = tuple(head) if isinstance(head, (list, tuple)) else head + return [ + rev.revision + for rev in reversed(list(script.iterate_revisions(upper=upper, lower="base"))) + ] + + +def make_migration_app(extra_config=None, model_modules=()): + """Return a module-scoped pytest fixture that provides a minimal Flask app context. + + The app context is all alembic's env.py needs: it reads SQLALCHEMY_DATABASE_URI + from current_app.config and db metadata from Flask-Migrate's extension. + + model_modules: sequence of dotted module paths to import when the fixture runs, + e.g. ["mergin.sync.models", "src.workspace.models"]. Importing here (inside the + fixture) rather than at test-module level avoids db.metadata cross-contamination + when multiple migration test files are collected in the same pytest session: models + are only added to db.metadata when the fixture first executes, not at collection time. + + extra_config: optional dict of additional config values to set on the app. + Use this when a migration reads from current_app.config for non-DB settings. + """ + + @pytest.fixture(scope="module") + def migration_app(migration_engine): + import importlib + + from ..app import create_simple_app + + for module_path in model_modules: + importlib.import_module(module_path) + + app = create_simple_app() + app.config["SQLALCHEMY_DATABASE_URI"] = migration_engine.url.render_as_string( + hide_password=False + ) + if extra_config: + app.config.update(extra_config) + ctx = app.app_context() + ctx.push() + try: + yield app + finally: + ctx.pop() + + return migration_app + + +def make_migration_engine(test_db_name, pre_migration_sql=()): + """Return a module-scoped pytest fixture that creates and tears down a migration test DB. + + pre_migration_sql: optional sequence of SQL statements executed once after the DB + is created but before any migration runs (e.g. CREATE SCHEMA, CREATE EXTENSION). + """ + + @pytest.fixture(scope="module") + def migration_engine(): + from ..config import Configuration + + base_url = make_url(Configuration.SQLALCHEMY_DATABASE_URI) + admin_url = base_url.set(database="postgres") + test_db_url = base_url.set(database=test_db_name) + + admin_engine = create_engine(admin_url, isolation_level="AUTOCOMMIT") + with admin_engine.connect() as conn: + conn.execute(text(f"DROP DATABASE IF EXISTS {test_db_name}")) + conn.execute(text(f"CREATE DATABASE {test_db_name}")) + admin_engine.dispose() + + engine = create_engine(test_db_url) + if pre_migration_sql: + with engine.connect() as conn: + for sql in pre_migration_sql: + conn.execute(text(sql)) + conn.commit() + + yield engine + engine.dispose() + + admin_engine = create_engine(admin_url, isolation_level="AUTOCOMMIT") + with admin_engine.connect() as conn: + conn.execute(text(f"DROP DATABASE IF EXISTS {test_db_name}")) + admin_engine.dispose() + + return migration_engine + + +def run_migration_lifecycle( + engine, migrations_dir, revisions, metadata, downgrade_targets, include_schemas=() +): + """Run the three-phase migration lifecycle used by all migration test suites. + + Phase 1 — upgrade one revision at a time, asserting alembic_version after each. + Works for both single-head and multi-head chains: the applied-versions + set is checked with `in` so interleaved branch revisions pass correctly. + Phase 2 — structural schema consistency check between ORM metadata and the migrated DB. + Phase 3 — downgrade to each target in downgrade_targets (str or list[str]), + asserting the final alembic_version set matches exactly. + """ + assert ( + revisions + ), "ordered_revisions returned an empty list — check migrations_dir and version_locations" + for rev in revisions: + upgrade(directory=migrations_dir, revision=rev) + with engine.connect() as conn: + applied = { + row[0] + for row in conn.execute( + text("SELECT version_num FROM alembic_version") + ).fetchall() + } + assert ( + rev in applied + ), f"Migration {rev} did not apply correctly: alembic_version is {applied!r}" + + assert_schema_consistent(engine, metadata, include_schemas) + + if isinstance(downgrade_targets, str): + downgrade_targets = [downgrade_targets] + for target in downgrade_targets: + downgrade(directory=migrations_dir, revision=target) + with engine.connect() as conn: + final = { + row[0] + for row in conn.execute( + text("SELECT version_num FROM alembic_version") + ).fetchall() + } + assert final == set( + downgrade_targets + ), f"Unexpected state after downgrade: {final!r}" diff --git a/server/migrations/community/07f2185e2428_case_insensitive_unique_idx.py b/server/migrations/community/07f2185e2428_case_insensitive_unique_idx.py index 8c966a9c..985c306a 100644 --- a/server/migrations/community/07f2185e2428_case_insensitive_unique_idx.py +++ b/server/migrations/community/07f2185e2428_case_insensitive_unique_idx.py @@ -20,18 +20,26 @@ def upgrade(): conn = op.get_bind() conn.execute( - "CREATE UNIQUE INDEX ix_user_username ON public.user (LOWER(username));" + sa.text( + "CREATE UNIQUE INDEX ix_user_username ON public.user (LOWER(username));" + ) ) - conn.execute("CREATE UNIQUE INDEX ix_user_email ON public.user (LOWER(email));") - conn.execute("ALTER TABLE public.user DROP CONSTRAINT uq_user_email;") - conn.execute("ALTER TABLE public.user DROP CONSTRAINT uq_user_username;") + conn.execute( + sa.text("CREATE UNIQUE INDEX ix_user_email ON public.user (LOWER(email));") + ) + conn.execute(sa.text("ALTER TABLE public.user DROP CONSTRAINT uq_user_email;")) + conn.execute(sa.text("ALTER TABLE public.user DROP CONSTRAINT uq_user_username;")) def downgrade(): conn = op.get_bind() - conn.execute("DROP INDEX IF EXISTS ix_user_username;") - conn.execute("DROP INDEX IF EXISTS ix_user_email;") - conn.execute("ALTER TABLE public.user ADD CONSTRAINT uq_user_email UNIQUE (email);") + conn.execute(sa.text("DROP INDEX IF EXISTS ix_user_username;")) + conn.execute(sa.text("DROP INDEX IF EXISTS ix_user_email;")) + conn.execute( + sa.text("ALTER TABLE public.user ADD CONSTRAINT uq_user_email UNIQUE (email);") + ) conn.execute( - "ALTER TABLE public.user ADD CONSTRAINT uq_user_username UNIQUE (username);" + sa.text( + "ALTER TABLE public.user ADD CONSTRAINT uq_user_username UNIQUE (username);" + ) ) diff --git a/server/migrations/community/3daefa84ce67_add_deployment_info.py b/server/migrations/community/3daefa84ce67_add_deployment_info.py index 6b7e3f64..4014ecc8 100644 --- a/server/migrations/community/3daefa84ce67_add_deployment_info.py +++ b/server/migrations/community/3daefa84ce67_add_deployment_info.py @@ -34,7 +34,9 @@ def upgrade(): uuid.UUID(os.getenv("SERVICE_ID")) if os.getenv("SERVICE_ID") else uuid.uuid4() ) conn = op.get_bind() - conn.execute(f"INSERT INTO mergin_info VALUES ('{key}')") + conn.execute( + sa.text("INSERT INTO mergin_info VALUES (:service_id)"), {"service_id": key} + ) def downgrade(): diff --git a/server/migrations/community/c13819c566e7_migrate_away_from_jsonb_columns.py b/server/migrations/community/c13819c566e7_migrate_away_from_jsonb_columns.py index 81bbddf3..55acc9d5 100644 --- a/server/migrations/community/c13819c566e7_migrate_away_from_jsonb_columns.py +++ b/server/migrations/community/c13819c566e7_migrate_away_from_jsonb_columns.py @@ -226,7 +226,7 @@ def data_downgrade(): WHERE pv.name = 1 ) UPDATE project_version pv - SET files = first_pushes.files + SET files = first_pushes.file FROM first_pushes WHERE first_pushes.version_id = pv.id; """ diff --git a/server/migrations/community/dbd428cda965_migrate_to_jsonb.py b/server/migrations/community/dbd428cda965_migrate_to_jsonb.py index d77b540f..62248d5a 100644 --- a/server/migrations/community/dbd428cda965_migrate_to_jsonb.py +++ b/server/migrations/community/dbd428cda965_migrate_to_jsonb.py @@ -10,6 +10,7 @@ """ from alembic import op +from sqlalchemy import text # revision identifiers, used by Alembic. revision = "dbd428cda965" @@ -21,28 +22,40 @@ def upgrade(): conn = op.get_bind() conn.execute( - "ALTER TABLE project_version ALTER COLUMN files SET DATA TYPE jsonb USING files::jsonb;" + text( + "ALTER TABLE project_version ALTER COLUMN files SET DATA TYPE jsonb USING files::jsonb;" + ) ) conn.execute( - "ALTER TABLE project_version ALTER COLUMN changes SET DATA TYPE jsonb USING changes::jsonb;" + text( + "ALTER TABLE project_version ALTER COLUMN changes SET DATA TYPE jsonb USING changes::jsonb;" + ) ) conn.execute( - "ALTER TABLE project ALTER COLUMN files SET DATA TYPE jsonb USING files::jsonb;" + text( + "ALTER TABLE project ALTER COLUMN files SET DATA TYPE jsonb USING files::jsonb;" + ) ) conn.execute( - "CREATE INDEX ix_project_version_files_gin ON project_version USING gin (files);" + text( + "CREATE INDEX ix_project_version_files_gin ON project_version USING gin (files);" + ) ) conn.execute( - "CREATE INDEX ix_project_version_changes_gin ON project_version USING gin (changes);" + text( + "CREATE INDEX ix_project_version_changes_gin ON project_version USING gin (changes);" + ) + ) + conn.execute( + text("CREATE INDEX ix_project_files_gin ON project USING gin (files);") ) - conn.execute("CREATE INDEX ix_project_files_gin ON project USING gin (files);") def downgrade(): conn = op.get_bind() - conn.execute("DROP INDEX IF EXISTS ix_project_version_files_gin;") - conn.execute("DROP INDEX IF EXISTS ix_project_version_changes_gin;") - conn.execute("DROP INDEX IF EXISTS ix_project_files_gin;") - conn.execute("ALTER TABLE project_version ALTER COLUMN files TYPE json;") - conn.execute("ALTER TABLE project_version ALTER COLUMN changes TYPE json;") - conn.execute("ALTER TABLE project ALTER COLUMN files TYPE json;") + conn.execute(text("DROP INDEX IF EXISTS ix_project_version_files_gin;")) + conn.execute(text("DROP INDEX IF EXISTS ix_project_version_changes_gin;")) + conn.execute(text("DROP INDEX IF EXISTS ix_project_files_gin;")) + conn.execute(text("ALTER TABLE project_version ALTER COLUMN files TYPE json;")) + conn.execute(text("ALTER TABLE project_version ALTER COLUMN changes TYPE json;")) + conn.execute(text("ALTER TABLE project ALTER COLUMN files TYPE json;"))