replicate · tempusfrangit · Feb 17, 2026 · Feb 17, 2026 · Feb 17, 2026
@@ -33,6 +33,7 @@ Development tasks are managed with [mise](https://mise.jdx.dev/). Run `mise task
 | `mise run build:coglet` | Build coglet wheel (dev) |
 | `mise run build:sdk` | Build SDK wheel |
 | `mise run install` | Build and symlink cog to /usr/local/bin |
+| `mise run docs:llm` | **IMPORTANT:** Regenerate `docs/llms.txt` after editing docs |
 
 ### Task Naming Convention
 
@@ -238,6 +239,7 @@ For comprehensive architecture documentation, see [`architecture/`](./architectu
 
 ### Updating the docs
 - Documentation is in the `docs/` directory, written in Markdown and generated into HTML using `mkdocs`.
+- **IMPORTANT:** After editing any file in `docs/` or `README.md`, you MUST run `mise run docs:llm` to regenerate `docs/llms.txt`. This file is used by coding agents and should be kept in sync with the documentation.
 
 ## CI Tool Dependencies
 

@@ -73,7 +73,7 @@ For example:
 
     docker run -d -p 5000:5000 my-model python -m cog.server.http --threads=10
 
-## `--host`
+### `--host`
 
 By default, Cog serves to `0.0.0.0`.
 You can override this using the `--host` option.

@@ -2,7 +2,7 @@
 
 > [!TIP]
 > For information about how to run the HTTP server, 
-> see [our documentation to deploying models](deploy.md).
+> see [our documentation on deploying models](deploy.md).
 
 When you run a Docker image built by Cog, 
 it serves an HTTP API for making predictions.
@@ -115,7 +115,7 @@ Prefer: respond-async
 Endpoints for creating and canceling a prediction idempotently
 accept a `prediction_id` parameter in their path.
 The server can run only one prediction at a time.
-The client must ensure that running prediction is complete
+The client must ensure that the running prediction is complete
 before creating a new one with a different ID.
 
 Clients are responsible for providing unique prediction IDs.

@@ -4,12 +4,17 @@ Cog plays nicely with Jupyter notebooks.
 
 ## Install the jupyterlab Python package
 
-First, add `jupyterlab` to the `python_packages` array in your [`cog.yaml`](yaml.md) file:
+First, add `jupyterlab` to your `requirements.txt` file and reference it in [`cog.yaml`](yaml.md):
 
+`requirements.txt`:
+```
+jupyterlab==3.3.4
+```
+
+`cog.yaml`:
 ```yaml
 build:
-  python_packages:
-    - "jupyterlab==3.3.4"
+  python_requirements: requirements.txt
 ```
 
 

@@ -61,7 +61,7 @@ Your Predictor class should define two methods: `setup()` and `predict()`.
 
 Prepare the model so multiple predictions run efficiently.
 
-Use this _optional_ method to include any expensive one-off operations in here like loading trained models, instantiate data transformations, etc.
+Use this _optional_ method to include expensive one-off operations like loading trained models, instantiating data transformations, etc.
 
 Many models use this method to download their weights (e.g. using [`pget`](https://github.com/replicate/pget)). This has some advantages:
 

@@ -57,7 +57,7 @@ class Trainer:
     def setup(self) -> None:
         self.base_model = ... # Load a big base model
 
-    def train(param: str) -> File:
+    def train(self, param: str) -> File:
         return self.base_model.train(param) # Train on top of a base model
 ```
 

@@ -30,7 +30,7 @@ Before beginning installation, make sure you have:
 
 ## 1. Install the GPU driver
 
-Per NVIDIA, the first order of business is to install the latest Game Ready drivers for you NVIDIA GPU.
+Per NVIDIA, the first order of business is to install the latest Game Ready drivers for your NVIDIA GPU.
 
 <https://www.nvidia.com/download/index.aspx>
 

@@ -131,7 +131,7 @@ func addUseCogBaseImageFlag(cmd *cobra.Command) {
 }
 
 func addBuildTimestampFlag(cmd *cobra.Command) {
-	cmd.Flags().Int64Var(&config.BuildSourceEpochTimestamp, "timestamp", -1, "Number of seconds sing Epoch to use for the build timestamp; this rewrites the timestamp of each layer. Useful for reproducibility. (`-1` to disable timestamp rewrites)")
+	cmd.Flags().Int64Var(&config.BuildSourceEpochTimestamp, "timestamp", -1, "Number of seconds since Epoch to use for the build timestamp; this rewrites the timestamp of each layer. Useful for reproducibility. (`-1` to disable timestamp rewrites)")
 	_ = cmd.Flags().MarkHidden("timestamp")
 }
 

@@ -252,7 +252,7 @@ func cmdPredict(cmd *cobra.Command, args []string) error {
 
 	timeout := time.Duration(setupTimeout) * time.Second
 	if err := predictor.Start(ctx, os.Stderr, timeout); err != nil {
-		// Only retry if we're using a GPU but but the user didn't explicitly select a GPU with --gpus
+		// Only retry if we're using a GPU but the user didn't explicitly select a GPU with --gpus
 		// If the user specified the wrong GPU, they are explicitly selecting a GPU and they'll want to hear about it
 		if gpus == "all" && errors.Is(err, docker.ErrMissingDeviceDriver) {
 			console.Info("Missing device driver, re-trying without GPU")

@@ -105,7 +105,7 @@ func run(cmd *cobra.Command, args []string) error {
 	console.Infof("Running '%s' in Docker with the current directory mounted as a volume...", strings.Join(args, " "))
 
 	err = docker.Run(ctx, dockerClient, runOptions)
-	// Only retry if we're using a GPU but but the user didn't explicitly select a GPU with --gpus
+	// Only retry if we're using a GPU but the user didn't explicitly select a GPU with --gpus
 	// If the user specified the wrong GPU, they are explicitly selecting a GPU and they'll want to hear about it
 	if runOptions.GPUs == "all" && err == docker.ErrMissingDeviceDriver {
 		console.Info("Missing device driver, re-trying without GPU")

@@ -97,7 +97,7 @@ func cmdServe(cmd *cobra.Command, arg []string) error {
 	console.Info("")
 
 	err = docker.Run(ctx, dockerClient, runOptions)
-	// Only retry if we're using a GPU but but the user didn't explicitly select a GPU with --gpus
+	// Only retry if we're using a GPU but the user didn't explicitly select a GPU with --gpus
 	// If the user specified the wrong GPU, they are explicitly selecting a GPU and they'll want to hear about it
 	if runOptions.GPUs == "all" && err == docker.ErrMissingDeviceDriver {
 		console.Info("Missing device driver, re-trying without GPU")

@@ -263,7 +263,7 @@ def _create_output_type(tpe: type) -> adt.OutputType:
     """Create an OutputType from a return type annotation."""
     if tpe is Any:
         print(
-            "Warning: use of Any as output type is error prone and highly-discouraged"
+            "Warning: use of Any as output type is error-prone and highly discouraged"
         )
         return adt.OutputType(kind=adt.OutputKind.SINGLE, type=_any_type)  # type: ignore[arg-type]
 

@@ -116,6 +116,6 @@ def fix_nullable_anyof(schema_node: Any) -> None:
         remove_title_next_to_ref(schema)
         fix_nullable_anyof(schema)
     except FileNotFoundError:
-        raise ConfigDoesNotExist("no cog.yaml found or present") from None
+        raise ConfigDoesNotExist("cog.yaml not found") from None
 
     print(json.dumps(schema, indent=2))
@@ -63,7 +63,7 @@ def __post_init__(self) -> None:
             self.webhook_events_filter = WebhookEvent.default_events()
 
     def dict(self, exclude_unset: bool = False) -> Dict[str, Any]:
-        """Convert to dictionary ."""
+        """Convert to dictionary."""
         result = {
             "input": self.input,
             "id": self.id,
@@ -102,7 +102,7 @@ class PredictionResponse:
     _fatal_exception: Optional[BaseException] = field(default=None, repr=False)
 
     def dict(self, exclude_unset: bool = False) -> Dict[str, Any]:
-        """Convert to dictionary ."""
+        """Convert to dictionary."""
         result = {
             "input": self.input,
             "output": self.output,