From 5b22d109d1de1e2b32aa8e3ed025d0c8352eeab9 Mon Sep 17 00:00:00 2001 From: RJ Ascani Date: Fri, 23 Jan 2026 17:35:16 -0800 Subject: [PATCH] Fix broken URLs --- backends/openvino/quantizer/quantizer.py | 2 +- backends/xnnpack/README.md | 4 ++-- docs/source/archive/backends-cadence-legacy.md | 2 +- docs/source/backends-cadence.md | 2 +- docs/source/compiler-delegate-and-partitioner.md | 7 ------- docs/source/getting-started-architecture.md | 6 +++--- docs/source/intro-how-it-works.md | 2 +- docs/source/ir-ops-set-definition.md | 2 +- docs/source/using-executorch-export.md | 2 +- examples/arm/README.md | 2 +- examples/arm/vgf_minimal_example.ipynb | 2 +- examples/models/efficient_sam/README.md | 4 ++-- examples/models/llama/UTILS.md | 2 +- examples/raspberry_pi/pico2/README.md | 2 +- examples/xnnpack/README.md | 2 +- extension/llm/export/partitioner_lib.py | 4 ++-- extension/llm/export/quantizer_lib.py | 2 +- 17 files changed, 21 insertions(+), 28 deletions(-) diff --git a/backends/openvino/quantizer/quantizer.py b/backends/openvino/quantizer/quantizer.py index 5766013689b..9156c046100 100644 --- a/backends/openvino/quantizer/quantizer.py +++ b/backends/openvino/quantizer/quantizer.py @@ -295,7 +295,7 @@ def _get_unified_scales_root_quantizer_id( """ Identifies the earliest quantizer node ID based on the corresponding `nncf_node.node_id` in the given NNCFGraph. This is required by the `_get_obs_or_fq_map` function. - Refer to: https://github.com/pytorch/pytorch/blob/main/torch/ao/quantization/pt2e/prepare.py#L291 + Refer to: https://github.com/pytorch/ao/blob/main/torchao/quantization/pt2e/prepare.py#L291 :param nncf_graph: The NNCFGraph instance. :param quantizer_ids: The list of quantizer IDs to evaluate. diff --git a/backends/xnnpack/README.md b/backends/xnnpack/README.md index 7c6a7ccbc33..36e323778df 100644 --- a/backends/xnnpack/README.md +++ b/backends/xnnpack/README.md @@ -133,5 +133,5 @@ create an issue on [github](https://www.github.com/pytorch/executorch/issues). ## See Also For more information about the XNNPACK Backend, please check out the following resources: -- [XNNPACK Backend](https://pytorch.org/executorch/main/backends-xnnpack) -- [XNNPACK Backend Internals](https://pytorch.org/executorch/main/backends/xnnpack/backend-delegates-xnnpack-reference) +- [XNNPACK Backend](https://pytorch.org/executorch/main/backends/xnnpack/xnnpack-overview.html) +- [XNNPACK Backend Internals](https://pytorch.org/executorch/main/backends/xnnpack/xnnpack-overview.html) diff --git a/docs/source/archive/backends-cadence-legacy.md b/docs/source/archive/backends-cadence-legacy.md index 21f60477c63..e1a21f280cf 100644 --- a/docs/source/archive/backends-cadence-legacy.md +++ b/docs/source/archive/backends-cadence-legacy.md @@ -137,7 +137,7 @@ python3 -m examples.cadence.operators.quantized__op ***Small Model: RNNT predictor***: -The torchaudio [RNNT-emformer](https://pytorch.org/audio/stable/tutorials/online_asr_tutorial.html) model is an Automatic Speech Recognition (ASR) model, comprised of three different submodels: an encoder, a predictor and a joiner. +The torchaudio [RNNT-emformer](https://github.com/pytorch/audio/tree/main/examples/asr/emformer_rnnt) model is an Automatic Speech Recognition (ASR) model, comprised of three different submodels: an encoder, a predictor and a joiner. The [predictor](https://github.com/pytorch/executorch/blob/main/examples/cadence/models/rnnt_predictor.py) is a sequence of basic ops (embedding, ReLU, linear, layer norm) and can be exported using: ```bash diff --git a/docs/source/backends-cadence.md b/docs/source/backends-cadence.md index 7fbf00c9f5f..ba2e5742015 100644 --- a/docs/source/backends-cadence.md +++ b/docs/source/backends-cadence.md @@ -197,7 +197,7 @@ In all cases the generated file is called `CadenceDemoModel.pte`. ***Speech/Audio Models***: -The torchaudio [RNNT-emformer](https://pytorch.org/audio/stable/tutorials/online_asr_tutorial.html) model is an Automatic Speech Recognition (ASR) model, comprised of three different submodels: +The torchaudio [RNNT-emformer](https://github.com/pytorch/audio/tree/main/examples/asr/emformer_rnnt) model is an Automatic Speech Recognition (ASR) model, comprised of three different submodels: - **RNNT Predictor**: Sequence of basic ops (embedding, ReLU, linear, layer norm) ```bash diff --git a/docs/source/compiler-delegate-and-partitioner.md b/docs/source/compiler-delegate-and-partitioner.md index c0449e7366b..9c0a3d536d0 100644 --- a/docs/source/compiler-delegate-and-partitioner.md +++ b/docs/source/compiler-delegate-and-partitioner.md @@ -187,13 +187,6 @@ exported_program_backend_1 = to_backend(exported_program, backend_1_parititioner exported_program_backend_1_and_2 = to_backend(exported_program_backend_1, backend_2_parititioner()) ``` -A more concrete example be found -[here](https://github.com/pytorch/executorch/blob/main/exir/backend/test/demos/test_xnnpack_qnnpack.py). -In this example, -qnnpack is one backend and xnnpack is another backend. We haven't open-sourced -these two backends delegates yet, and this example won't run out of box. It can -be used as a reference to see how it can be done. - This option is easy to try because usually all backends will implement their own partitioner. However this option may get different results if we change the order of to_backend call. If we want to have a better control on the nodes, like diff --git a/docs/source/getting-started-architecture.md b/docs/source/getting-started-architecture.md index 617d521b802..f7b131241e2 100644 --- a/docs/source/getting-started-architecture.md +++ b/docs/source/getting-started-architecture.md @@ -22,7 +22,7 @@ leverages PyTorch 2 compiler and export functionality [AOTAutograd](https://pytorch.org/functorch/stable/notebooks/aot_autograd_optimizations.html), [Quantization](https://pytorch.org/docs/main/quantization.html), [dynamic shapes](https://pytorch.org/get-started/pytorch-2.0/#pytorch-2x-faster-more-pythonic-and-as-dynamic-as-ever), -[control flow](https://pytorch.org/docs/main/export.html#data-shape-dependent-control-flow), +[control flow](https://pytorch.org/docs/main/user_guide/torch_compiler/export.html#data-shape-dependent-control-flow), etc.) to prepare a PyTorch program for execution on devices. Program preparation is often simply called AOT (ahead-of-time) because export, transformations and compilations to the program are performed before it is eventually run with the ExecuTorch runtime, written in C++. To have a lightweight runtime and small overhead in execution, we push work as much as possible to AOT. @@ -33,14 +33,14 @@ Starting from the program source code, below are the steps you would go through * Like all PyTorch use cases, ExecuTorch starts from model authoring, where standard `nn.Module` eager mode PyTorch programs are created. * Export-specific helpers are used to represent advanced features like [control - flow](https://pytorch.org/docs/main/export.html#data-shape-dependent-control-flow) + flow](https://pytorch.org/docs/main/user_guide/torch_compiler/export.html#data-shape-dependent-control-flow) (for example, helper functions to trace both branches of if-else) and [dynamic shapes](https://pytorch.org/get-started/pytorch-2.0/#pytorch-2x-faster-more-pythonic-and-as-dynamic-as-ever) (for example, data dependent dynamic shape constraint). ### Export -To deploy the program to the device, engineers need to have a graph representation for compiling a model to run on various backends. With [`torch.export()`](https://pytorch.org/docs/main/export.html), an [EXIR](ir-exir.md) (export intermediate representation) is generated with ATen dialect. All AOT compilations are based on this EXIR, but can have multiple dialects along the lowering path as detailed below. +To deploy the program to the device, engineers need to have a graph representation for compiling a model to run on various backends. With [`torch.export()`](https://pytorch.org/docs/main/user_guide/torch_compiler/export.html), an [EXIR](ir-exir.md) (export intermediate representation) is generated with ATen dialect. All AOT compilations are based on this EXIR, but can have multiple dialects along the lowering path as detailed below. * _[ATen Dialect](ir-exir.md#aten-dialect)_. PyTorch Edge is based on PyTorch’s Tensor library ATen, which has clear contracts for efficient execution. ATen Dialect is a graph represented by ATen nodes which are fully ATen compliant. Custom operators are allowed, but must be registered with the dispatcher. It’s flatten with no module hierarchy (submodules in a bigger module), but the source code and module hierarchy are preserved in the metadata. This representation is also autograd safe. * Optionally, _quantization_, either QAT (quantization-aware training) or PTQ (post training quantization) can be applied to the whole ATen graph before converting to Core ATen. Quantization helps with reducing the model size, which is important for edge devices. diff --git a/docs/source/intro-how-it-works.md b/docs/source/intro-how-it-works.md index 3ced602fed4..be9b1e6d374 100644 --- a/docs/source/intro-how-it-works.md +++ b/docs/source/intro-how-it-works.md @@ -15,7 +15,7 @@ This figure illustrates the three-step process of exporting a PyTorch program, c ExecuTorch provides the following benefits to engineers who need to deploy machine learning models to an edge device: -* **Export that is robust and powerful.** Export uses [`torch.export()`](https://pytorch.org/docs/main/export.html), which uses the same technology used in PyTorch 2.x to capture PyTorch programs for fast execution. While eager mode is flexible and allows experimentation in Python, it may not work well if Python isn't available or cannot deliver efficient execution. The _Export Intermediate Representation (Export IR)_ that export flow generates can describe a wide range of dynamism in PyTorch models, including control flow and dynamic shapes, which makes it a powerful tool for fully capturing existing PyTorch models with little effort. +* **Export that is robust and powerful.** Export uses [`torch.export()`](https://pytorch.org/docs/main/user_guide/torch_compiler/export.html), which uses the same technology used in PyTorch 2.x to capture PyTorch programs for fast execution. While eager mode is flexible and allows experimentation in Python, it may not work well if Python isn't available or cannot deliver efficient execution. The _Export Intermediate Representation (Export IR)_ that export flow generates can describe a wide range of dynamism in PyTorch models, including control flow and dynamic shapes, which makes it a powerful tool for fully capturing existing PyTorch models with little effort. * **Operator standardization.** During the graph export process, the nodes in the graph represent operators such as addition, multiplication, or convolution. These operators are part of a small standardized list called the [Core ATen Op set](https://pytorch.org/docs/main/torch.compiler_ir.html#core-aten-ir). Most PyTorch programs can be decomposed into a graph using this small set of operators during export. Small list of standardized operators reduces the surface, needed to be covered, by third-party operator libraries as well as accelerator backends, in order to run models exported for ExecuTorch. ExecuTorch runtime ships with one such library, called portable operator library, that implements core ATen opset. * **Standardization for compiler interfaces (aka delegates) and the OSS ecosystem.** In addition to the _Operator standardization_ above, ExecuTorch has a [standardized interface](compiler-delegate-and-partitioner.md) for delegation to compilers. This allows third-party vendors and compilers to implement interfaces and API entry points for compilation and execution of (either partial or full) graphs targeting their specialized hardware. This provides greater flexibility in terms of hardware support and performance optimization, as well as easier integration with the PyTorch open source ecosystem for on-device AI. * **First-party Developer Tools** Due to the above standardization efforts, it was possible to build unified first-party [developer tools](devtools-overview.md) for ExecuTorch, where developers can export, compile, and deploy to a wide range of target devices—such as iOS, Android, and microcontrollers—using the same APIs, streamlining the process and increasing productivity. Additionally, ExecuTorch provides profiling and debugging functionality to easily inspect intermediate states, which are core parts of most developer workflows. diff --git a/docs/source/ir-ops-set-definition.md b/docs/source/ir-ops-set-definition.md index e9050520f95..ec9561c2d5c 100644 --- a/docs/source/ir-ops-set-definition.md +++ b/docs/source/ir-ops-set-definition.md @@ -1,6 +1,6 @@ # Definition of the Core ATen Operator Set -This page provides the description and background of the Core ATen Operator Set (opset). This page is recommended reading for those developing a new kernel library or delegate for ExecuTorch. It is also recommended that one is familiar with [`torch.export`](https://pytorch.org/docs/main/export.html) as a prerequisite; in particular, the concepts of torch FX graphs, operator decomposition, and functionalization. +This page provides the description and background of the Core ATen Operator Set (opset). This page is recommended reading for those developing a new kernel library or delegate for ExecuTorch. It is also recommended that one is familiar with [`torch.export`](https://pytorch.org/docs/main/user_guide/torch_compiler/export.html) as a prerequisite; in particular, the concepts of torch FX graphs, operator decomposition, and functionalization. The list of operators that have been identified as a Core ATen operator can be found on the [IRs page of the PyTorch documentation website](https://pytorch.org/docs/main/torch.compiler_ir.html). diff --git a/docs/source/using-executorch-export.md b/docs/source/using-executorch-export.md index ae73cb5aeac..0ca8e3f48da 100644 --- a/docs/source/using-executorch-export.md +++ b/docs/source/using-executorch-export.md @@ -280,7 +280,7 @@ decode_ep = torch.export.export(DecodeWrapper(model), ...) ## Next Steps -The PyTorch and ExecuTorch export and lowering APIs provide a high level of customizability to meet the needs of diverse hardware and models. See [torch.export](https://pytorch.org/docs/main/export.html) and [Export API Reference](export-to-executorch-api-reference.rst) for more information. +The PyTorch and ExecuTorch export and lowering APIs provide a high level of customizability to meet the needs of diverse hardware and models. See [torch.export](https://pytorch.org/docs/main/user_guide/torch_compiler/export.html) and [Export API Reference](export-to-executorch-api-reference.rst) for more information. For advanced use cases, see the following: - [Quantization Overview](quantization-overview.md) for information on quantizing models to reduce inference time and memory footprint. diff --git a/examples/arm/README.md b/examples/arm/README.md index cae763e9657..797b66ab1ed 100644 --- a/examples/arm/README.md +++ b/examples/arm/README.md @@ -148,6 +148,6 @@ $ ./examples/arm/run.sh --model_name=mv2 --target=ethos-u55-128 --no_delegate ### Online Tutorial -We also have a [tutorial](https://pytorch.org/executorch/stable/backends-arm-ethos-u) explaining the steps performed in these +We also have a [tutorial](https://pytorch.org/executorch/stable/backends/arm-ethos-u/arm-ethos-u-overview.html) explaining the steps performed in these scripts, expected results, possible problems and more. It is a step-by-step guide you can follow to better understand this delegate. diff --git a/examples/arm/vgf_minimal_example.ipynb b/examples/arm/vgf_minimal_example.ipynb index b6f5f60dbd1..6c5f64ebc49 100644 --- a/examples/arm/vgf_minimal_example.ipynb +++ b/examples/arm/vgf_minimal_example.ipynb @@ -23,7 +23,7 @@ "\n", "Before you begin:\n", "1. (In a clean virtual environment with a compatible Python version) Install executorch using `./install_executorch.sh`\n", - "2. Install MLSDK and Tosa using `examples/arm/setup.sh --disable-ethos-u-deps --enable-mlsdk-deps` (For further guidance, refer to https://docs.pytorch.org/executorch/main/tutorial-arm.html)\n", + "2. Install MLSDK and Tosa using `examples/arm/setup.sh --disable-ethos-u-deps --enable-mlsdk-deps` (For further guidance, refer to https://pytorch.org/executorch/main/backends/arm-ethos-u/arm-ethos-u-overview.html)\n", "3. Export vulkan environment variables and add MLSDK components to PATH and LD_LIBRARY_PATH using `examples/arm/arm-scratch/setup_path.sh`\n", "\n", "With all commands executed from the base `executorch` folder.\n", diff --git a/examples/models/efficient_sam/README.md b/examples/models/efficient_sam/README.md index 1f89a3ec5b3..120a68a434f 100644 --- a/examples/models/efficient_sam/README.md +++ b/examples/models/efficient_sam/README.md @@ -12,7 +12,7 @@ Follow the [tutorial](https://pytorch.org/executorch/main/getting-started-setup# ### Exporting to Core ML -Make sure to install the [required dependencies](https://pytorch.org/executorch/main/backends-coreml#development-requirements) for Core ML export. +Make sure to install the [required dependencies](https://pytorch.org/executorch/main/backends/coreml/coreml-overview.html#development-requirements) for Core ML export. To export the model to Core ML, run the following command: @@ -32,7 +32,7 @@ python -m examples.xnnpack.aot_compiler -m efficient_sam # Performance -Tests were conducted on an Apple M1 Pro chip using the instructions for building and running Executorch with [Core ML](https://pytorch.org/executorch/main/backends-coreml#runtime-integration) and [XNNPACK](https://pytorch.org/executorch/main/tutorial-xnnpack-delegate-lowering#running-the-xnnpack-model-with-cmake) backends. +Tests were conducted on an Apple M1 Pro chip using the instructions for building and running Executorch with [Core ML](https://pytorch.org/executorch/main/backends/coreml/coreml-overview.html#runtime-integration) and [XNNPACK](https://pytorch.org/executorch/main/tutorial-xnnpack-delegate-lowering#running-the-xnnpack-model-with-cmake) backends. | Backend Configuration | Average Inference Time (seconds) | | ---------------------- | -------------------------------- | diff --git a/examples/models/llama/UTILS.md b/examples/models/llama/UTILS.md index 25bd7f77080..9fe7a8a6895 100644 --- a/examples/models/llama/UTILS.md +++ b/examples/models/llama/UTILS.md @@ -25,7 +25,7 @@ From `executorch` root: ## Smaller model delegated to other backends Currently we supported lowering the stories model to other backends, including, CoreML, MPS and QNN. Please refer to the instruction -for each backend ([CoreML](https://pytorch.org/executorch/main/backends-coreml), [MPS](https://pytorch.org/executorch/main/backends-mps), [QNN](https://pytorch.org/executorch/main/backends-qualcomm)) before trying to lower them. After the backend library is installed, the script to export a lowered model is +for each backend ([CoreML](https://pytorch.org/executorch/main/backends/coreml/coreml-overview.html), [MPS](https://pytorch.org/executorch/main/backends/mps/mps-overview.html), [QNN](https://pytorch.org/executorch/main/backends-qualcomm)) before trying to lower them. After the backend library is installed, the script to export a lowered model is - Lower to CoreML: `python -m extension.llm.export.export_llm model.use_kv_cache=True model.enable_dynamic_shape=False backend.coreml.enabled=True base.checkpoint=stories110M.pt base.params=params.json` - MPS: `python -m extension.llm.export.export_llm model.use_kv_cache=True model.enable_dynamic_shape=False backend.mps.enabled=True base.checkpoint=stories110M.pt base.params=params.json` diff --git a/examples/raspberry_pi/pico2/README.md b/examples/raspberry_pi/pico2/README.md index 439b4ded806..40ad72a2faa 100644 --- a/examples/raspberry_pi/pico2/README.md +++ b/examples/raspberry_pi/pico2/README.md @@ -45,7 +45,7 @@ For more details , refer to the following guides: ## (Prerequisites) Prepare the Environment for Arm Setup executorch development environment. Also see instructions for setting up the environment for Arm. -Make sure you have the toolchain configured correctly. Refer to this [setup](https://docs.pytorch.org/executorch/main/backends-arm-ethos-u.html#development-requirements) for more details. +Make sure you have the toolchain configured correctly. Refer to this [setup](https://pytorch.org/executorch/main/backends/arm-ethos-u/arm-ethos-u-overview.html#development-requirements) for more details. ```bash which arm-none-eabi-gcc diff --git a/examples/xnnpack/README.md b/examples/xnnpack/README.md index ad09bb90d37..f00fd476a50 100644 --- a/examples/xnnpack/README.md +++ b/examples/xnnpack/README.md @@ -1,7 +1,7 @@ # XNNPACK Backend [XNNPACK](https://github.com/google/XNNPACK) is a library of optimized neural network operators for ARM and x86 CPU platforms. Our delegate lowers models to run using these highly optimized CPU operators. You can try out lowering and running some example models in the demo. Please refer to the following docs for information on the XNNPACK Delegate -- [XNNPACK Backend Delegate Overview](https://pytorch.org/executorch/main/backends-xnnpack) +- [XNNPACK Backend Delegate Overview](https://pytorch.org/executorch/main/backends/xnnpack/xnnpack-overview.html) - [XNNPACK Delegate Export Tutorial](https://pytorch.org/executorch/main/tutorial-xnnpack-delegate-lowering) diff --git a/extension/llm/export/partitioner_lib.py b/extension/llm/export/partitioner_lib.py index 03ac2bd91e4..31df8e00155 100644 --- a/extension/llm/export/partitioner_lib.py +++ b/extension/llm/export/partitioner_lib.py @@ -61,7 +61,7 @@ def get_mps_partitioner(use_kv_cache: bool = False): ) except ImportError: raise ImportError( - "Please install the MPS backend follwing https://pytorch.org/executorch/main/backends-mps" + "Please install the MPS backend follwing https://pytorch.org/executorch/main/backends/mps/mps-overview.html" ) compile_specs = [CompileSpec("use_fp16", bytes([True]))] @@ -98,7 +98,7 @@ def get_coreml_partitioner( ) except ImportError: raise ImportError( - "Please install the CoreML backend follwing https://pytorch.org/executorch/main/backends-coreml" + "Please install the CoreML backend follwing https://pytorch.org/executorch/main/backends/coreml/coreml-overview.html" + "; for buck users, please add example dependancies: //executorch/backends/apple/coreml:backend, and etc" ) diff --git a/extension/llm/export/quantizer_lib.py b/extension/llm/export/quantizer_lib.py index 592a6666dfa..977c925a25b 100644 --- a/extension/llm/export/quantizer_lib.py +++ b/extension/llm/export/quantizer_lib.py @@ -267,7 +267,7 @@ def get_coreml_quantizer(pt2e_quantize: str): from executorch.backends.apple.coreml.quantizer import CoreMLQuantizer except ImportError: raise ImportError( - "Please install the CoreML backend follwing https://pytorch.org/executorch/main/backends-coreml" + "Please install the CoreML backend follwing https://pytorch.org/executorch/main/backends/coreml/coreml-overview.html" ) if pt2e_quantize == "coreml_8a_c8w":