feat: upgrade default backend to ONNX with YOLO11n/YOLO26n models

matteius · matteius · commit 2be8ea598abc · 2026-02-14T00:14:48.000-05:00
- Switch default backend from tflite (SSD MobileNet V1) to onnx (YOLO11n)
- Add YOLO11n (10.2MB, 30% faster than YOLOv8n) and YOLO26n (9.5MB, 52% faster, NMS-free) ONNX models
- Add postprocess_yolo26() for YOLO26 end-to-end output format (1, 300, 6)
- Make tensorflow optional (INSTALL_TENSORFLOW=1 build arg) — saves ~1GB Docker image size
- Make tflite backend import lazy so app starts without tensorflow installed
- Fix /api/v1/backends endpoint: distinguish 'unavailable' (deps missing) vs 'error' (model missing)
- Use backend-specific confidence threshold defaults in /detect endpoint
- Unpin onnxruntime version for latest optimizations
diff --git a/.gitignore b/.gitignore
@@ -34,6 +34,9 @@ ENV/
 # TFLite models and data
 backends/tflite/models/
 
+# PyTorch weights (used only for export, not deployed)
+*.pt
+
 # Logs
 *.log
 
diff --git a/Dockerfile b/Dockerfile
@@ -14,7 +14,7 @@ RUN apt-get update \
         libglib2.0-0 \
     && rm -rf /var/lib/apt/lists/*
 
-# Install Python deps (keeps this image self-contained for Unraid)
+# Core Python deps (ONNX is now the default backend)
 COPY Pipfile Pipfile.lock ./
 RUN python -m pip install --upgrade pip \
     && python -m pip install \
@@ -26,17 +26,21 @@ RUN python -m pip install --upgrade pip \
         pillow \
         exceptiongroup \
         numpy \
-        tensorflow \
-        onnxruntime==1.23.2 \
+        onnxruntime \
         opencv-python \
         scipy \
         shapely
 
+# Optional: install tensorflow for tflite backend support
+# Usage: docker build --build-arg INSTALL_TENSORFLOW=1 ...
+ARG INSTALL_TENSORFLOW=0
+RUN if [ "$INSTALL_TENSORFLOW" = "1" ]; then python -m pip install tensorflow; fi
+
 COPY . .
 
-# Bake the default TFLite model into the image (can be disabled at build time).
-# Usage: docker build --build-arg DOWNLOAD_DEFAULT_MODEL=0 ...
-ARG DOWNLOAD_DEFAULT_MODEL=1
+# Optionally bake the TFLite model (only useful if tensorflow is installed)
+# Usage: docker build --build-arg DOWNLOAD_DEFAULT_MODEL=1 ...
+ARG DOWNLOAD_DEFAULT_MODEL=0
 RUN if [ "$DOWNLOAD_DEFAULT_MODEL" = "1" ]; then python scripts/download_model.py; fi
 
 EXPOSE 8000
diff --git a/api/v1/endpoints/detection.py b/api/v1/endpoints/detection.py
@@ -8,7 +8,7 @@
 
 from models.detection import DetectionResponse, DetectionResult, ImageResponse
 from models.zone import ZoneConfiguration
-from backends.factory import get_backend
+from backends.factory import get_backend, BACKEND_REGISTRY
 from utils.image import validate_image, preprocess_image, image_to_bytes
 from utils.zones import filter_detections_by_zones, apply_class_filter, apply_size_filter
 from config import settings
@@ -85,8 +85,14 @@ async def detect_objects(
         logger.error(f"Image validation/processing failed: {str(e)}")
         raise HTTPException(status_code=400, detail=f"Invalid image: {str(e)}")
     
-    # Set confidence threshold
-    threshold = confidence_threshold or settings.TFLITE_CONFIDENCE_THRESHOLD
+    # Set confidence threshold — use the backend-specific default
+    if backend == "onnx":
+        default_threshold = settings.ONNX_CONFIDENCE_THRESHOLD
+    elif backend == "tflite":
+        default_threshold = settings.TFLITE_CONFIDENCE_THRESHOLD
+    else:
+        default_threshold = 0.5
+    threshold = confidence_threshold or default_threshold
     logger.debug(f"Using confidence threshold: {threshold}")
 
     # Perform detection
@@ -184,18 +190,25 @@ async def list_backends():
     """
     backends = {}
     for backend_name in settings.AVAILABLE_BACKENDS:
+        if backend_name not in BACKEND_REGISTRY:
+            backends[backend_name] = {
+                "status": "unavailable",
+                "error": f"Backend '{backend_name}' dependencies not installed"
+            }
+            continue
         try:
             detector = get_backend(backend_name)
             backends[backend_name] = {
                 "status": "available",
                 "model_info": detector.get_model_info()
             }
         except Exception as e:
+            logger.warning(f"Backend {backend_name} failed to initialize: {e}")
             backends[backend_name] = {
                 "status": "error",
                 "error": str(e)
             }
-    
+
     return {
         "default_backend": settings.DEFAULT_BACKEND,
         "backends": backends
diff --git a/backends/factory.py b/backends/factory.py
@@ -1,10 +1,15 @@
 from typing import Dict, Type
 
 from backends.base import DetectionBackend
-from backends.tflite.backend import TFLiteBackend
 from config import settings
 
-# Lazy imports for optional backends
+# Lazy imports for all optional backends
+try:
+    from backends.tflite.backend import TFLiteBackend
+    TFLITE_AVAILABLE = True
+except ImportError:
+    TFLITE_AVAILABLE = False
+
 try:
     from backends.onnx.backend import ONNXBackend
     ONNX_AVAILABLE = True
@@ -25,11 +30,11 @@
 
 
 # Registry of available backends
-BACKEND_REGISTRY: Dict[str, Type[DetectionBackend]] = {
-    "tflite": TFLiteBackend,
-}
+BACKEND_REGISTRY: Dict[str, Type[DetectionBackend]] = {}
+
+if TFLITE_AVAILABLE:
+    BACKEND_REGISTRY["tflite"] = TFLiteBackend
 
-# Add optional backends if available
 if ONNX_AVAILABLE:
     BACKEND_REGISTRY["onnx"] = ONNXBackend
 
diff --git a/backends/onnx/backend.py b/backends/onnx/backend.py
@@ -15,13 +15,13 @@ def __init__(self, model_path: str, labels_path: str, confidence_threshold: floa
                  iou_threshold: float = 0.45, model_type: str = "yolov8"):
         """
         Initialize the ONNX detection backend.
-        
+
         Args:
             model_path: Path to the ONNX model file
             labels_path: Path to the labels file
             confidence_threshold: Default confidence threshold
             iou_threshold: IoU threshold for NMS
-            model_type: Type of model (yolov8, yolov5, yolox, etc.)
+            model_type: Type of model (yolov8, yolo11, yolo26, yolov5, yolox, etc.)
         """
         self.model_path = model_path
         self.labels_path = labels_path
@@ -136,7 +136,10 @@ def postprocess_yolov8(self, outputs: List[np.ndarray], scale: float,
                           pad: Tuple[int, int], confidence_threshold: float,
                           orig_width: int, orig_height: int) -> List[DetectionResult]:
         """
-        Postprocess YOLOv8 outputs.
+        Postprocess YOLOv8/YOLO11 outputs.
+
+        Both YOLOv8 and YOLO11 share the same output format:
+        (1, 84, 8400) = (batch, 4_bbox + 80_classes, num_predictions)
 
         Args:
             outputs: Model outputs
@@ -149,7 +152,7 @@ def postprocess_yolov8(self, outputs: List[np.ndarray], scale: float,
         Returns:
             List of DetectionResult objects
         """
-        # YOLOv8 output shape: (1, 84, 8400) or (1, num_classes+4, num_predictions)
+        # YOLOv8/YOLO11 output shape: (1, 84, 8400) or (1, num_classes+4, num_predictions)
         # Format: [x_center, y_center, width, height, class_scores...]
         output = outputs[0]
         
@@ -223,6 +226,69 @@ def postprocess_yolov8(self, outputs: List[np.ndarray], scale: float,
         
         return results
     
+    def postprocess_yolo26(self, outputs: List[np.ndarray], scale: float,
+                          pad: Tuple[int, int], confidence_threshold: float,
+                          orig_width: int, orig_height: int) -> List[DetectionResult]:
+        """
+        Postprocess YOLO26 (NMS-free, end-to-end) outputs.
+
+        YOLO26 output shape: (1, 300, 6) = (batch, max_detections, [x1, y1, x2, y2, conf, class_id])
+        Coordinates are in input-image pixel space (640×640 with letterbox padding).
+        NMS is already applied inside the model — no manual NMS needed.
+
+        Args:
+            outputs: Model outputs
+            scale: Scale factor used in preprocessing
+            pad: Padding used in preprocessing (pad_w, pad_h)
+            confidence_threshold: Confidence threshold
+            orig_width: Original image width
+            orig_height: Original image height
+
+        Returns:
+            List of DetectionResult objects
+        """
+        output = outputs[0]  # Shape: (1, 300, 6)
+        if len(output.shape) == 3:
+            output = output[0]  # Shape: (300, 6)
+
+        pad_w, pad_h = pad
+        results = []
+
+        for detection in output:
+            x1, y1, x2, y2, conf, class_id = detection
+
+            if conf < confidence_threshold:
+                continue
+
+            # Remove padding and scale back to original image coordinates
+            x_min = (x1 - pad_w) / scale
+            y_min = (y1 - pad_h) / scale
+            x_max = (x2 - pad_w) / scale
+            y_max = (y2 - pad_h) / scale
+
+            # Normalize to [0, 1] using original image dimensions
+            x_min_norm = max(0.0, min(1.0, x_min / orig_width))
+            y_min_norm = max(0.0, min(1.0, y_min / orig_height))
+            x_max_norm = max(0.0, min(1.0, x_max / orig_width))
+            y_max_norm = max(0.0, min(1.0, y_max / orig_height))
+
+            cid = int(class_id)
+            label = self.labels[cid] if cid < len(self.labels) else f"class_{cid}"
+
+            detection_result = DetectionResult(
+                label=label,
+                confidence=float(conf),
+                bounding_box=BoundingBox(
+                    x_min=x_min_norm,
+                    y_min=y_min_norm,
+                    x_max=x_max_norm,
+                    y_max=y_max_norm
+                )
+            )
+            results.append(detection_result)
+
+        return results
+
     def _nms(self, boxes: np.ndarray, scores: np.ndarray, iou_threshold: float) -> List[int]:
         """
         Non-Maximum Suppression.
@@ -299,10 +365,12 @@ def detect(self, image: Image.Image, confidence_threshold: float = None) -> List
         outputs = self.session.run(self.output_names, {self.input_name: input_data})
 
         # Postprocess based on model type
-        if self.model_type in ["yolov8", "yolov5", "yolox"]:
+        if self.model_type == "yolo26":
+            results = self.postprocess_yolo26(outputs, scale, pad, confidence_threshold, orig_width, orig_height)
+        elif self.model_type in ["yolov8", "yolo11", "yolov5", "yolox"]:
             results = self.postprocess_yolov8(outputs, scale, pad, confidence_threshold, orig_width, orig_height)
         else:
-            # Default to YOLOv8 postprocessing
+            # Default to YOLOv8/YOLO11 postprocessing (same format)
             results = self.postprocess_yolov8(outputs, scale, pad, confidence_threshold, orig_width, orig_height)
 
         return results
diff --git a/backends/onnx/models/yolo11n.onnx b/backends/onnx/models/yolo11n.onnx
diff --git a/backends/onnx/models/yolo26n.onnx b/backends/onnx/models/yolo26n.onnx
diff --git a/config.py b/config.py
@@ -10,20 +10,20 @@ class Settings(BaseSettings):
     PROJECT_NAME: str = "Light Object Detection API"
 
     # Backend settings
-    DEFAULT_BACKEND: str = "tflite"
-    AVAILABLE_BACKENDS: List[str] = ["tflite", "onnx", "opencv", "edgetpu"]
+    DEFAULT_BACKEND: str = "onnx"
+    AVAILABLE_BACKENDS: List[str] = ["onnx", "tflite", "opencv", "edgetpu"]
 
     # TFLite settings
     TFLITE_MODEL_PATH: str = "backends/tflite/models/ssd_mobilenet_v1.tflite"
     TFLITE_LABELS_PATH: str = "backends/tflite/models/labelmap.txt"
     TFLITE_CONFIDENCE_THRESHOLD: float = 0.5
 
     # ONNX settings
-    ONNX_MODEL_PATH: str = "backends/onnx/models/yolov8n.onnx"
+    ONNX_MODEL_PATH: str = "backends/onnx/models/yolo11n.onnx"
     ONNX_LABELS_PATH: str = "backends/onnx/models/coco.txt"
     ONNX_CONFIDENCE_THRESHOLD: float = 0.5
     ONNX_IOU_THRESHOLD: float = 0.45
-    ONNX_MODEL_TYPE: str = "yolov8"
+    ONNX_MODEL_TYPE: str = "yolo11"
 
     # OpenCV DNN settings
     OPENCV_MODEL_PATH: str = "backends/opencv/models/yolov4-tiny.weights"