From c93bad8563ec5b87dae8762e581fe436875e40f8 Mon Sep 17 00:00:00 2001 From: Xin Xie Date: Tue, 26 May 2026 07:22:02 -0700 Subject: [PATCH] Add cache-local qubit remapping prototype --- apps/qsim_base.cc | 34 ++++-- lib/BUILD | 16 +++ lib/fuser.h | 53 +++++++++ lib/qubit_remap.h | 276 ++++++++++++++++++++++++++++++++++++++++++++++ lib/run_qsim.h | 147 +++++++++++++++++++----- 5 files changed, 488 insertions(+), 38 deletions(-) create mode 100644 lib/qubit_remap.h diff --git a/apps/qsim_base.cc b/apps/qsim_base.cc index a5a7a0b1b..0d53f0d55 100644 --- a/apps/qsim_base.cc +++ b/apps/qsim_base.cc @@ -24,13 +24,14 @@ #include "../lib/fuser_mqubit.h" #include "../lib/io_file.h" #include "../lib/operation.h" +#include "../lib/qubit_remap.h" #include "../lib/run_qsim.h" #include "../lib/simmux.h" #include "../lib/util_cpu.h" constexpr char usage[] = "usage:\n ./qsim_base -c circuit -d maxtime " "-s seed -t threads -f max_fused_size " - "-v verbosity -z\n"; + "-v verbosity -r -z\n"; struct Options { std::string circuit_file; @@ -39,6 +40,7 @@ struct Options { unsigned num_threads = 1; unsigned max_fused_size = 2; unsigned verbosity = 0; + bool cache_local_remap = false; bool denormals_are_zeros = false; }; @@ -47,7 +49,7 @@ Options GetOptions(int argc, char* argv[]) { int k; - while ((k = getopt(argc, argv, "c:d:s:t:f:v:z")) != -1) { + while ((k = getopt(argc, argv, "c:d:s:t:f:v:rz")) != -1) { switch (k) { case 'c': opt.circuit_file = optarg; @@ -67,6 +69,9 @@ Options GetOptions(int argc, char* argv[]) { case 'v': opt.verbosity = std::atoi(optarg); break; + case 'r': + opt.cache_local_remap = true; + break; case 'z': opt.denormals_are_zeros = true; break; @@ -91,7 +96,8 @@ bool ValidateOptions(const Options& opt) { template void PrintAmplitudes( - unsigned num_qubits, const StateSpace& state_space, const State& state) { + unsigned num_qubits, const StateSpace& state_space, const State& state, + const qsim::qubit_remap::QubitMap& logical_to_physical = {}) { static constexpr char const* bits[8] = { "000", "001", "010", "011", "100", "101", "110", "111", }; @@ -100,7 +106,9 @@ void PrintAmplitudes( unsigned s = 3 - std::min(unsigned{3}, num_qubits); for (uint64_t i = 0; i < size; ++i) { - auto a = state_space.GetAmpl(state, i); + uint64_t physical_i = + qsim::qubit_remap::LogicalToPhysicalIndex(i, logical_to_physical); + auto a = state_space.GetAmpl(state, physical_i); qsim::IO::messagef("%s:%16.8g%16.8g%16.8g\n", bits[i] + s, std::real(a), std::imag(a), std::norm(a)); } @@ -141,13 +149,13 @@ int main(int argc, char* argv[]) { unsigned num_threads; }; - using Simulator = Factory::Simulator; - using StateSpace = Simulator::StateSpace; + using StateSpace = Factory::StateSpace; using State = StateSpace::State; using Fuser = MultiQubitGateFuser; using Runner = QSimRunner; - StateSpace state_space = Factory(opt.num_threads).CreateStateSpace(); + Factory factory(opt.num_threads); + StateSpace state_space = factory.CreateStateSpace(); State state = state_space.Create(circuit.num_qubits); if (state_space.IsNull(state)) { @@ -155,15 +163,23 @@ int main(int argc, char* argv[]) { return 1; } + qubit_remap::QubitMap logical_to_physical; + state_space.SetStateZero(state); Runner::Parameter param; param.max_fused_size = opt.max_fused_size; param.seed = opt.seed; param.verbosity = opt.verbosity; + param.cache_local_remap = opt.cache_local_remap; + + auto simulator = factory.CreateSimulator(); + bool ok = Runner::Run(param, circuit, state_space, simulator, state, + logical_to_physical); - if (Runner::Run(param, Factory(opt.num_threads), circuit, state)) { - PrintAmplitudes(circuit.num_qubits, state_space, state); + if (ok) { + PrintAmplitudes(circuit.num_qubits, state_space, state, + logical_to_physical); } return 0; diff --git a/lib/BUILD b/lib/BUILD index ddc2ed2bc..587a5b7f8 100644 --- a/lib/BUILD +++ b/lib/BUILD @@ -50,6 +50,7 @@ cc_library( "operation_base.h", "parfor.h", "qtrajectory.h", + "qubit_remap.h", "run_qsim.h", "run_qsimh.h", "seqfor.h", @@ -127,6 +128,7 @@ cuda_library( "operation_base.h", "parfor.h", "qtrajectory.h", + "qubit_remap.h", "run_qsim.h", "run_qsimh.h", "seqfor.h", @@ -274,6 +276,7 @@ cc_library( "operation.h", "operation_base.h", "parfor.h", + "qubit_remap.h", "run_qsim.h", "seqfor.h", "simmux.h", @@ -515,6 +518,18 @@ cc_library( ], ) +cc_library( + name = "qubit_remap", + hdrs = ["qubit_remap.h"], + deps = [ + ":circuit", + ":fuser", + ":gate", + ":matrix", + ":operation", + ], +) + cc_library( name = "fuser_basic", hdrs = ["fuser_basic.h"], @@ -560,6 +575,7 @@ cc_library( ":gate", ":gate_appl", ":operation_base", + ":qubit_remap", ":util", ], ) diff --git a/lib/fuser.h b/lib/fuser.h index 96272c3a5..46e970751 100644 --- a/lib/fuser.h +++ b/lib/fuser.h @@ -15,6 +15,7 @@ #ifndef FUSER_H_ #define FUSER_H_ +#include #include #include @@ -178,6 +179,58 @@ inline void CalculateFusedMatrix(FusedGate& gate) { } } +/** + * Multiplies component gate matrices for a range of fused gates. + * @param gbeg, gend The iterator range [gbeg, gend) of fused gates. + */ +template +inline void CalculateFusedMatrices(Iterator gbeg, Iterator gend) { + for (auto g = gbeg; g != gend; ++g) { + if (!g->ParentIsDecomposed()) { + CalculateFusedMatrix(*g); + } + } +} + +/** + * Multiplies component gate matrices for a vector of fused gates. + * @param gates The vector of fused gates. + */ +template +inline void CalculateFusedMatrices(std::vector& gates) { + CalculateFusedMatrices(gates.begin(), gates.end()); +} + +/** + * Rebuilds fused-gate qubit lists and matrices from component gates. + * @param fused_gates The vector of fused gates to rebuild. + */ +template +inline void RebuildFusedGates(std::vector& fused_gates) { + for (auto& op : fused_gates) { + auto* fused_gate = OpGetAlternative>( + op); + if (fused_gate == nullptr) { + continue; + } + + auto& qubits = fused_gate->qubits; + qubits.clear(); + + for (const auto& gate : fused_gate->gates) { + const auto& gate_qubits = OpQubits(gate); + qubits.insert(qubits.end(), gate_qubits.begin(), gate_qubits.end()); + } + + std::sort(qubits.begin(), qubits.end()); + qubits.erase(std::unique(qubits.begin(), qubits.end()), qubits.end()); + + if (!fused_gate->ParentIsDecomposed()) { + CalculateFusedMatrix(*fused_gate); + } + } +} + } // namespace qsim #endif // FUSER_H_ diff --git a/lib/qubit_remap.h b/lib/qubit_remap.h new file mode 100644 index 000000000..52faf8751 --- /dev/null +++ b/lib/qubit_remap.h @@ -0,0 +1,276 @@ +// Copyright 2026 Google LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef QUBIT_REMAP_H_ +#define QUBIT_REMAP_H_ + +#include +#include +#include +#include + +#include "circuit.h" +#include "fuser.h" +#include "gate.h" +#include "matrix.h" +#include "operation.h" + +namespace qsim { +namespace qubit_remap { + +namespace detail { + +using QubitMap = std::vector; + +inline QubitMap MakeIdentityQubitMap(unsigned num_qubits) { + QubitMap map(num_qubits); + std::iota(map.begin(), map.end(), 0); + return map; +} + +inline const Qubits& EmptyQubits() { + static const Qubits qubits; + return qubits; +} + +template +inline const Qubits& GetControlQubits(const Operation& op) { + using FP = OpFpType; + if (const auto* gate = OpGetAlternative>(op)) { + return gate->controlled_by; + } + + return EmptyQubits(); +} + +template +inline void AddGateQubitScores( + const Operation& op, std::vector& scores) { + const auto& qubits = OpQubits(op); + uint64_t target_weight = qubits.size(); + + for (auto q : qubits) { + scores[q] += target_weight; + } + + for (auto q : GetControlQubits(op)) { + scores[q] += 1; + } +} + +template +inline QubitMap MakeCacheLocalQubitMap( + unsigned num_qubits, const std::vector& ops) { + auto logical_order = MakeIdentityQubitMap(num_qubits); + std::vector scores(num_qubits, 0); + + for (const auto& op : ops) { + AddGateQubitScores(op, scores); + } + + std::stable_sort(logical_order.begin(), logical_order.end(), + [&scores](auto a, auto b) { + return scores[a] > scores[b]; + }); + + QubitMap logical_to_physical(num_qubits); + for (unsigned physical = 0; physical < num_qubits; ++physical) { + logical_to_physical[logical_order[physical]] = physical; + } + + return logical_to_physical; +} + +inline QubitMap InvertQubitMap(const QubitMap& qubit_map) { + QubitMap inverse(qubit_map.size()); + + for (unsigned i = 0; i < qubit_map.size(); ++i) { + inverse[qubit_map[i]] = i; + } + + return inverse; +} + +inline unsigned GetLowestSetBit(uint64_t mask) { + unsigned qubit = 0; + while ((mask & uint64_t{1}) == 0) { + mask >>= 1; + ++qubit; + } + + return qubit; +} + +inline uint64_t RemapIndex(uint64_t index, const QubitMap& qubit_map) { + if (qubit_map.empty()) { + return index; + } + + uint64_t remapped_index = 0; + + while (index != 0) { + unsigned qubit = GetLowestSetBit(index); + if (qubit < qubit_map.size()) { + remapped_index |= uint64_t{1} << qubit_map[qubit]; + } else { + remapped_index |= uint64_t{1} << qubit; + } + index &= index - 1; + } + + return remapped_index; +} + +template +inline void RemapMeasurementResult( + const QubitMap& logical_to_physical, MeasurementResult& result) { + if (logical_to_physical.empty() || !result.valid) { + return; + } + + auto physical_to_logical = InvertQubitMap(logical_to_physical); + result.mask = RemapIndex(result.mask, physical_to_logical); + result.bits = RemapIndex(result.bits, physical_to_logical); +} + +template +inline void RemapTargetQubits(const QubitMap& logical_to_physical, Gate& gate) { + for (auto& q : gate.qubits) { + q = logical_to_physical[q]; + } + + auto perm = NormalToGateOrderPermutation(gate.qubits); + if (!perm.empty()) { + if (!gate.matrix.empty()) { + MatrixShuffle(perm, gate.qubits.size(), gate.matrix); + } + + gate.swapped = true; + std::sort(gate.qubits.begin(), gate.qubits.end()); + } +} + +template +inline void RemapControlQubits(const QubitMap& logical_to_physical, Gate& gate) { + struct Control { + unsigned qubit; + bool value; + }; + + std::vector controls; + controls.reserve(gate.controlled_by.size()); + + for (unsigned i = 0; i < gate.controlled_by.size(); ++i) { + controls.push_back({ + logical_to_physical[gate.controlled_by[i]], + static_cast((gate.cmask >> i) & 1)}); + } + + std::sort(controls.begin(), controls.end(), + [](const Control& a, const Control& b) { + return a.qubit < b.qubit; + }); + + gate.controlled_by.clear(); + gate.cmask = 0; + + for (unsigned i = 0; i < controls.size(); ++i) { + gate.controlled_by.push_back(controls[i].qubit); + gate.cmask |= uint64_t{controls[i].value} << i; + } +} + +template +inline void RemapGate(const QubitMap& logical_to_physical, Gate& gate) { + RemapTargetQubits(logical_to_physical, gate); +} + +template +inline void RemapGate( + const QubitMap& logical_to_physical, ControlledGate& gate) { + RemapTargetQubits(logical_to_physical, gate); + RemapControlQubits(logical_to_physical, gate); +} + +inline void RemapQubits( + const QubitMap& logical_to_physical, Qubits& qubits) { + for (auto& q : qubits) { + q = logical_to_physical[q]; + } +} + +template +inline void RemapOperation( + const QubitMap& logical_to_physical, Operation& op) { + using FP = OpFpType; + + if (auto* gate = OpGetAlternative>(op)) { + RemapGate(logical_to_physical, *gate); + } else if (auto* gate = OpGetAlternative>(op)) { + RemapGate(logical_to_physical, *gate); + } else { + RemapQubits(logical_to_physical, OpBaseOperation(op).qubits); + } +} + +template +inline void RemapCircuit( + const QubitMap& logical_to_physical, Circuit& circuit) { + for (auto& op : circuit.ops) { + RemapOperation(logical_to_physical, op); + } +} + +template +inline void RemapCopiedMeasurements( + const QubitMap& logical_to_physical, std::vector& fused_ops) { + for (auto& op : fused_ops) { + if (auto* measurement = OpGetAlternative(op)) { + RemapQubits(logical_to_physical, measurement->qubits); + } + } +} + +} // namespace detail + +using QubitMap = detail::QubitMap; + +inline uint64_t LogicalToPhysicalIndex( + uint64_t logical_index, const QubitMap& logical_to_physical) { + return detail::RemapIndex(logical_index, logical_to_physical); +} + +template +inline QubitMap RemapCircuit(Circuit& circuit) { + auto logical_to_physical = + detail::MakeCacheLocalQubitMap(circuit.num_qubits, circuit.ops); + detail::RemapCircuit(logical_to_physical, circuit); + return logical_to_physical; +} + +template +inline QubitMap RemapCircuit(std::vector& fused_ops, + Circuit& circuit) { + auto logical_to_physical = + detail::MakeCacheLocalQubitMap(circuit.num_qubits, fused_ops); + detail::RemapCircuit(logical_to_physical, circuit); + detail::RemapCopiedMeasurements(logical_to_physical, fused_ops); + RebuildFusedGates(fused_ops); + return logical_to_physical; +} + +} // namespace qubit_remap +} // namespace qsim + +#endif // QUBIT_REMAP_H_ diff --git a/lib/run_qsim.h b/lib/run_qsim.h index 1c3b8a970..f5cabe78e 100644 --- a/lib/run_qsim.h +++ b/lib/run_qsim.h @@ -17,12 +17,14 @@ #include #include +#include #include #include "circuit.h" #include "gate.h" #include "gate_appl.h" #include "operation_base.h" +#include "qubit_remap.h" #include "util.h" namespace qsim { @@ -39,6 +41,12 @@ struct QSimRunner final { using State = typename StateSpace::State; using MeasurementResult = typename StateSpace::MeasurementResult; + template + struct IsCircuit : std::false_type {}; + + template + struct IsCircuit> : std::true_type {}; + /** * User-specified parameters for gate fusion and simulation. */ @@ -47,6 +55,10 @@ struct QSimRunner final { * Random number generator seed to apply measurement gates. */ uint64_t seed; + /** + * Remap logical qubits to improve state-vector cache locality. + */ + bool cache_local_remap = false; }; /** @@ -159,6 +171,49 @@ struct QSimRunner final { return true; } + template + static bool RunFusedGates( + const Parameter& param, const FusedOps& fused_ops, + const StateSpace& state_space, const Simulator& simulator, State& state, + std::vector& measure_results) { + double t0 = 0.0; + double t1 = 0.0; + + RGen rgen(param.seed); + measure_results.reserve(measure_results.size() + fused_ops.size()); + + if (param.verbosity > 0) { + t0 = GetTime(); + } + + // Apply fused operations. + for (std::size_t i = 0; i < fused_ops.size(); ++i) { + if (param.verbosity > 3) { + t1 = GetTime(); + } + + if (!ApplyGate(state_space, simulator, fused_ops[i], rgen, state, + measure_results)) { + IO::errorf("measurement failed.\n"); + return false; + } + + if (param.verbosity > 3) { + state_space.DeviceSync(); + double t2 = GetTime(); + IO::messagef("gate %lu done in %g seconds.\n", i, t2 - t1); + } + } + + if (param.verbosity > 0) { + state_space.DeviceSync(); + double t2 = GetTime(); + IO::messagef("simu time is %g seconds.\n", t2 - t0); + } + + return true; + } + /** * Runs the given circuit and make the final state available to the caller, * recording the result of any intermediate measurements in the circuit. @@ -222,11 +277,43 @@ struct QSimRunner final { * the run, ordered by time and qubit index. * @return True if the simulation completed successfully; false otherwise. */ + template + static bool Run(const Parameter& param, Circuit& circuit, + const StateSpace& state_space, const Simulator& simulator, + State& state, + std::vector& measure_results) { + if (param.cache_local_remap) { + IO::errorf( + "cache-local remap requires a logical_to_physical output map.\n"); + return false; + } + + qubit_remap::QubitMap discarded_map; + + return Run(param, circuit, state_space, simulator, state, + discarded_map, measure_results); + } + template static bool Run(const Parameter& param, const Circuit& circuit, const StateSpace& state_space, const Simulator& simulator, State& state, std::vector& measure_results) { + auto circuit_copy = circuit; + + return Run(param, circuit_copy, state_space, simulator, state, + measure_results); + } + + template + static bool Run(const Parameter& param, Circuit& circuit, + const StateSpace& state_space, const Simulator& simulator, + State& state, + qubit_remap::QubitMap& logical_to_physical, + std::vector& measure_results) { + using CircuitType = std::remove_cv_t; + const auto& ops = Operations::get(circuit); + double t0 = 0.0; double t1 = 0.0; @@ -234,57 +321,48 @@ struct QSimRunner final { t0 = GetTime(); } - RGen rgen(param.seed); - if (param.verbosity > 1) { t1 = GetTime(); IO::messagef("init time is %g seconds.\n", t1 - t0); t0 = GetTime(); } - const auto& ops = Operations::get(circuit); - auto fused_ops = Fuser::FuseGates(param, state.num_qubits(), ops); + auto fused_ops = Fuser::FuseGates( + param, state.num_qubits(), ops, !param.cache_local_remap); if (fused_ops.size() == 0 && ops.size() > 0) { return false; } - measure_results.reserve(fused_ops.size()); + if (param.cache_local_remap) { + if constexpr (IsCircuit::value) { + logical_to_physical = + qubit_remap::RemapCircuit(fused_ops, circuit); + } else { + IO::errorf("cache-local remap requires a qsim::Circuit.\n"); + return false; + } + } if (param.verbosity > 1) { t1 = GetTime(); IO::messagef("fuse time is %g seconds.\n", t1 - t0); } - if (param.verbosity > 0) { - t0 = GetTime(); + std::size_t previous_measurement_count = measure_results.size(); + if (!RunFusedGates(param, fused_ops, state_space, simulator, state, + measure_results)) { + return false; } - // Apply fused operations. - for (std::size_t i = 0; i < fused_ops.size(); ++i) { - if (param.verbosity > 3) { - t1 = GetTime(); - } - - if (!ApplyGate(state_space, simulator, fused_ops[i], rgen, state, - measure_results)) { - IO::errorf("measurement failed.\n"); - return false; - } - - if (param.verbosity > 3) { - state_space.DeviceSync(); - double t2 = GetTime(); - IO::messagef("gate %lu done in %g seconds.\n", i, t2 - t1); + if (param.cache_local_remap) { + for (std::size_t i = previous_measurement_count; + i < measure_results.size(); ++i) { + qubit_remap::detail::RemapMeasurementResult( + logical_to_physical, measure_results[i]); } } - if (param.verbosity > 0) { - state_space.DeviceSync(); - double t2 = GetTime(); - IO::messagef("simu time is %g seconds.\n", t2 - t0); - } - return true; } @@ -310,6 +388,17 @@ struct QSimRunner final { return Run( param, circuit, state_space, simulator, state, discarded_results); } + + template + static bool Run(const Parameter& param, Circuit& circuit, + const StateSpace& state_space, const Simulator& simulator, + State& state, + qubit_remap::QubitMap& logical_to_physical) { + std::vector discarded_results; + + return Run(param, circuit, state_space, simulator, state, + logical_to_physical, discarded_results); + } }; } // namespace qsim