diff --git a/neo/rawio/spikegadgetsrawio.py b/neo/rawio/spikegadgetsrawio.py index 365d784e3..e4c43d1ca 100644 --- a/neo/rawio/spikegadgetsrawio.py +++ b/neo/rawio/spikegadgetsrawio.py @@ -8,14 +8,87 @@ https://bitbucket.org/mkarlsso/trodes/wiki/Configuration Note : - * this file format have multiple version. news version include the gain for scaling. - The actual implementation does not contain this feature because we don't have - files to test this. So now the gain is "hardcoded" to 1. and so units are - not handled correctly. - -The ".rec" file format contains: - * a first text part with information in an XML structure - * a second part for the binary buffer + * this file format has multiple versions. When the SpikeConfiguration entries in the + XML header carry a `spikeScalingToUv` attribute (newer versions), the reader uses + it to populate per-channel `gain` and sets `units="uV"`. When the attribute is + absent (older files), the reader falls back to `gain=1` with empty `units` and + emits a warning that the data has no physical units. + +The ".rec" file format contains two parts: + + +--------------------------------------------+ + | XML configuration | + | GlobalConfiguration | + | HardwareConfiguration | + | SpikeConfiguration | + | ... | + | terminated by "" | + +--------------------------------------------+ + | | + | Binary section | + | A stream of fixed-size packets, one | + | per sample tick. packet_size is | + | constant for a given file, computed | + | at parse time from the XML. | + | | + | packet 0 | + | packet 1 | + | packet 2 | + | ... | + +--------------------------------------------+ + +Each packet has the following structure (packet_size bytes total): + + +------+-----------+-----------+ ... +-----------+--------+----- ... -----+ + | 0x55 | device A | device B | | device K | tstamp | ephys region | + | 1 B | numBytes | numBytes | | numBytes | 4 B | 2*N_ephy B | + +------+-----------+-----------+ ... +-----------+--------+----- ... -----+ + ^ ^ ^ ^ + | | | | + packet one block per hardware device uint32 one int16 per + marker with a numBytes attribute in sample ephys channel; + HardwareConfiguration (e.g. tick layout of this + Controller_DIO, Multiplexed, region depends + SysClock, ECU) on the device, + see below. + +The ephys region is laid out differently depending on the acquisition hardware, +which is declared by SpikeConfiguration.device: + + * Intan recordings (`device="intan"` or absent on legacy files): + chip-interleaved order. The SpikeGadgets MCU (main control unit) clocks all + attached Intan chips in parallel over SPI (serial peripheral interface), so + samples are written in the sequence + [chip 0 ch 0, chip 1 ch 0, ..., chip N-1 ch 0, + chip 0 ch 1, chip 1 ch 1, ..., chip N-1 ch 1, ...]. + + Example with chanPerChip=32 and 4 chips, every packet's ephys region: + + slot 0 1 2 3 | 4 5 6 7 | ... | 124 125 126 127 + hwChan 0 32 64 96 | 1 33 65 97 | ... | 31 63 95 127 + + Formula: hwChan at slot i = (i % n_chips) * chanPerChip + (i // n_chips). + The XML's hwChan attributes are *not* in slot order; they are + listed in user-defined SpikeNTrode (tetrode) groupings, so the reader cannot + use XML document position as a proxy and must reproduce the chip-interleaved + sequence directly (see `_intan_hwchans_in_binary_order`). + + * Neuropixels recordings (`device="neuropixels1"` or `"neuropixels2"`): + hwChan ascending order. The SpikeGadgets MCU (main control unit) firmware + emits Neuropixels samples in hwChan ascending order: byte pair i of each + packet holds the sample from the electrode whose hwChan = i. The XML's + elements may be listed in any order Trodes wrote them + (typically not hwChan-ascending), but that order does not constrain the + binary; the binary always follows hwChan ordering. + + Example, every packet's ephys region: + + slot 0 1 2 3 4 5 ... 380 381 382 383 + hwChan 0 1 2 3 4 5 ... 380 381 382 383 + + The reader walks the XML to recover per-trode metadata (gain, anatomical + coordinates) for each hwChan, but the column-to-byte-position mapping is + the identity: column i reads byte pair i, with hwChan i's data. Author: Samuel Garcia """ @@ -81,31 +154,49 @@ def __init__(self, filename="", selected_streams=None): def _source_name(self): return self.filename - def _produce_ephys_channel_ids(self, n_total_channels, n_channels_per_chip, missing_hw_chans): - """Compute the channel ID labels for subset of spikegadgets recordings - The ephys channels in the .rec file are stored in the following order: - hwChan ID of channel 0 of first chip, hwChan ID of channel 0 of second chip, ..., hwChan ID of channel 0 of Nth chip, - hwChan ID of channel 1 of first chip, hwChan ID of channel 1 of second chip, ..., hwChan ID of channel 1 of Nth chip, - ... - So if there are 32 channels per chip and 128 channels (4 chips), then the channel IDs are: - 0, 32, 64, 96, 1, 33, 65, 97, ..., 128 - See also: https://github.com/NeuralEnsemble/python-neo/issues/1215 - - This doesn't work for all types of spikegadgets - see: https://github.com/NeuralEnsemble/python-neo/issues/1517 - - If there are any missing hardware channels, they must be specified in missing_hw_chans. - See: https://github.com/NeuralEnsemble/python-neo/issues/1592 + def _intan_hwchans_in_binary_order(self, sconf, num_ephy_channels, num_ephy_channels_xml): + """Return the hwChan-at-each-byte-position sequence for an Intan recording. + + The SpikeGadgets MCU (main control unit) multiplexes Intan chips in chip-interleaved + order: local-channel outer, chip inner. Every chip contributes its local channel 0 + first, then every chip contributes its local channel 1, and so on. So byte pair i of + each packet holds the sample for the hwChan returned at index i. + + Example: chanPerChip=32 and 4 chips (128 channels total). Returns + [0, 32, 64, 96, 1, 33, 65, 97, ..., 31, 63, 95, 127]. + + If the channel count does not divide evenly into chanPerChip we fall back to XML + document order; in that case the chip-interleaved bridging assumption does not + apply and labels reflect the XML's hwChan attributes directly. + + See also: + - https://github.com/NeuralEnsemble/python-neo/issues/1215 (origin of this logic) + - https://github.com/NeuralEnsemble/python-neo/issues/1517 (doesn't fit all setups) + - https://github.com/NeuralEnsemble/python-neo/issues/1592 (missing channels) """ - ephys_channel_ids_list = [] - for local_hw_channel in range(n_channels_per_chip): - n_chips = int(n_total_channels / n_channels_per_chip) + intan_chans_per_chip = int(sconf.attrib.get("chanPerChip", 32)) # RHD2132 default for legacy files + hw_chans_in_xml = [int(schan.attrib["hwChan"]) for trode in sconf for schan in trode] + + channels_fit_chip_layout = ( + intan_chans_per_chip > 0 + and num_ephy_channels % intan_chans_per_chip == 0 + ) + if not channels_fit_chip_layout: + return hw_chans_in_xml + + # Reproduce the chip-interleaved hwChan sequence (local-channel outer, chip inner) + # so that hwchans_in_binary_order[i] is the hwChan whose data lives at byte pair i. + # Any hwChans absent from the SpikeConfiguration are skipped. + missing_hw_chans = set(range(num_ephy_channels)) - set(hw_chans_in_xml) + n_chips = num_ephy_channels_xml // intan_chans_per_chip + hwchans_in_binary_order = [] + for local_hw_channel in range(intan_chans_per_chip): for chip in range(n_chips): - global_hw_chan = local_hw_channel + chip * n_channels_per_chip - if global_hw_chan in missing_hw_chans: + hw_chan = local_hw_channel + chip * intan_chans_per_chip + if hw_chan in missing_hw_chans: continue - ephys_channel_ids_list.append(local_hw_channel + chip * n_channels_per_chip) - return ephys_channel_ids_list + hwchans_in_binary_order.append(hw_chan) + return hwchans_in_binary_order def _parse_header(self): # parse file until "" @@ -122,7 +213,7 @@ def _parse_header(self): # explore xml header root = ElementTree.fromstring(header_txt) - gconf = sr = root.find("GlobalConfiguration") + gconf = root.find("GlobalConfiguration") hconf = root.find("HardwareConfiguration") sconf = root.find("SpikeConfiguration") @@ -140,12 +231,6 @@ def _parse_header(self): "than the number of channels in the hardware configuration" ) - # as spikegadgets change we should follow this - try: - num_chan_per_chip = int(sconf.attrib["chanPerChip"]) - except KeyError: - num_chan_per_chip = 32 # default value for Intan chips - # explore sub stream and count packet size # first bytes is 0x55 packet_size = 1 @@ -214,61 +299,73 @@ def _parse_header(self): if num_ephy_channels > 0: stream_id = "trodes" - stream_name = stream_id - buffer_id = "" - signal_streams.append((stream_name, stream_id, buffer_id)) + signal_streams.append((stream_id, stream_id, "")) self._mask_channels_bytes[stream_id] = [] - # we can only produce these channels for a subset of spikegadgets setup. If this criteria isn't - # true then we should just use the raw_channel_ids and let the end user sort everything out - if num_ephy_channels % num_chan_per_chip == 0: - all_hw_chans = [int(schan.attrib["hwChan"]) for trode in sconf for schan in trode] - missing_hw_chans = set(range(num_ephy_channels)) - set(all_hw_chans) - channel_ids = self._produce_ephys_channel_ids( - num_ephy_channels_xml, num_chan_per_chip, missing_hw_chans - ) - raw_channel_ids = False - else: - raw_channel_ids = True - - chan_ind = 0 self.is_scaleable = all("spikeScalingToUv" in trode.attrib for trode in sconf) if not self.is_scaleable: self.logger.warning( "Unable to read channel gain scaling (to uV) from .rec header. Data has no physical units!" ) - for trode in sconf: - if "spikeScalingToUv" in trode.attrib: - gain = float(trode.attrib["spikeScalingToUv"]) + # Compute the hwChan-at-each-byte-position sequence based on the hardware kind + # declared in SpikeConfiguration.device. + # - Intan recordings are chip-interleaved by the MCU (main control unit), so we + # reproduce that ordering from the chip layout. + # - Neuropixels recordings are emitted in hwChan ascending order: byte pair i + # holds the sample from the electrode whose hwChan = i. + spike_device = sconf.attrib.get("device") + if spike_device in (None, "intan"): + hwchans_in_binary_order = self._intan_hwchans_in_binary_order( + sconf, num_ephy_channels, num_ephy_channels_xml + ) + elif spike_device in ("neuropixels1", "neuropixels2"): + xml_hwchans = {int(schan.attrib["hwChan"]) for trode in sconf for schan in trode} + expected = set(range(num_ephy_channels)) + if xml_hwchans != expected: + raise NeoReadWriteError( + "SpikeGadgets Neuropixels: hwChan values in SpikeConfiguration do not " + f"cover [0, {num_ephy_channels}). The firmware emits Neuropixels samples " + "in hwChan ascending order over a contiguous range; this file has " + f"missing or out-of-range hwChans (missing: {sorted(expected - xml_hwchans)[:5]}..., " + f"extra: {sorted(xml_hwchans - expected)[:5]}...)." + ) + hwchans_in_binary_order = list(range(num_ephy_channels)) + else: + raise NeoReadWriteError( + f"SpikeGadgets: unsupported SpikeConfiguration.device {spike_device!r}. " + "Add a dedicated branch for this hardware in SpikeGadgetsRawIO._parse_header." + ) + + # Walk binary order, using hwChan as the explicit bridge to per-trode metadata. + trode_by_hwchan = {int(schan.attrib["hwChan"]): trode for trode in sconf for schan in trode} + schan_by_hwchan = {int(schan.attrib["hwChan"]): schan for trode in sconf for schan in trode} + for binary_index, hw_chan in enumerate(hwchans_in_binary_order): + parent_trode = trode_by_hwchan[hw_chan] + schan = schan_by_hwchan[hw_chan] + if "spikeScalingToUv" in parent_trode.attrib: + gain = float(parent_trode.attrib["spikeScalingToUv"]) units = "uV" else: - gain = 1 # revert to hardcoded gain + gain = 1.0 units = "" - for schan in trode: - # Here we use raw ids if necessary for parsing (for some neuropixel recordings) - # otherwise we default back to the raw hwChan IDs - if raw_channel_ids: - name = "trode" + trode.attrib["id"] + "chan" + schan.attrib["hwChan"] - chan_id = schan.attrib["hwChan"] - else: - chan_id = str(channel_ids[chan_ind]) - name = "hwChan" + chan_id - - offset = 0.0 - buffer_id = "" - signal_channels.append( - (name, chan_id, self._sampling_rate, "int16", units, gain, offset, stream_id, buffer_id) - ) - - chan_mask = np.zeros(packet_size, dtype="bool") - num_bytes = packet_size - 2 * num_ephy_channels + 2 * chan_ind - chan_mask[num_bytes] = True - chan_mask[num_bytes + 1] = True - self._mask_channels_bytes[stream_id].append(chan_mask) + if spike_device in ("neuropixels1", "neuropixels2"): + chan_id = f"hwChan{hw_chan}" + sorting_group = schan.attrib.get("spikeSortingGroup", "0") + name = f"probe{sorting_group}_chan{hw_chan}" + else: + chan_id = str(hw_chan) + name = f"trode{parent_trode.attrib['id']}chan{hw_chan}" + signal_channels.append( + (name, chan_id, self._sampling_rate, "int16", units, gain, 0.0, stream_id, "") + ) - chan_ind += 1 + num_bytes = packet_size - 2 * num_ephy_channels + 2 * binary_index + chan_mask = np.zeros(packet_size, dtype="bool") + chan_mask[num_bytes] = True + chan_mask[num_bytes + 1] = True + self._mask_channels_bytes[stream_id].append(chan_mask) # make mask as array (used in _get_analogsignal_chunk(...)) self._mask_streams = {} diff --git a/neo/test/rawiotest/test_spikegadgetsrawio.py b/neo/test/rawiotest/test_spikegadgetsrawio.py index 2c20ecbf1..c30a5738f 100644 --- a/neo/test/rawiotest/test_spikegadgetsrawio.py +++ b/neo/test/rawiotest/test_spikegadgetsrawio.py @@ -17,6 +17,7 @@ class TestSpikeGadgetsRawIO( "spikegadgets/20210225_em8_minirec2_ac.rec", "spikegadgets/W122_06_09_2019_1_fromSD.rec", "spikegadgets/SpikeGadgets_test_data_2xNpix1.0_20240318_173658.rec", + "spikegadgets/neuropixels2_4shank/20260122_134412_merged_cropped_1min_NP2.rec", ] def test_parse_header_missing_channels(self): @@ -53,6 +54,35 @@ def test_parse_header_missing_channels(self): # fmt: on ) + def test_neuropixels_uses_hwchan_ids(self): + # Regression test for Neuropixels channel id and name semantics. + # ids are f"hwChan{i}" and names are f"probe{spikeSortingGroup}_chan{i}", + # where i is the channel index in the trodes stream (which equals the hwChan + # the firmware writes at that byte position, since the SpikeGadgets MCU emits + # Neuropixels samples in hwChan ascending order). + file_path = Path( + self.get_local_path("spikegadgets/SpikeGadgets_test_data_2xNpix1.0_20240318_173658.rec") + ) + reader = SpikeGadgetsRawIO(filename=file_path) + reader.parse_header() + + trodes_mask = reader.header["signal_channels"]["stream_id"] == "trodes" + trodes_ids = list(reader.header["signal_channels"]["id"][trodes_mask]) + trodes_names = list(reader.header["signal_channels"]["name"][trodes_mask]) + + self.assertEqual(trodes_ids[:4], ["hwChan0", "hwChan1", "hwChan2", "hwChan3"]) + self.assertEqual(trodes_ids[-4:], ["hwChan764", "hwChan765", "hwChan766", "hwChan767"]) + self.assertEqual(len(trodes_ids), 768) + + # Names embed the SpikeChannel spikeSortingGroup attribute. The two-probe NP1 + # workspace sets spikeSortingGroup=0 for one probe and =1 for the other; the + # boundary is interleaved across channel indices (not at i=384) because the + # two probes' hwChans interleave in chip blocks. + self.assertEqual(trodes_names[0], "probe0_chan0") + self.assertEqual(trodes_names[767], "probe1_chan767") + groups = {n.split("_")[0] for n in trodes_names} + self.assertEqual(groups, {"probe0", "probe1"}) + def test_opening_gibberish_file(self): """Test that parsing a file without raises ValueError instead of infinite loop.""" # Create a temporary file with gibberish content that doesn't have the required tag