Source code for brainsets.utils.dandi_utils

__all__ = [
    "extract_subject_from_nwb",
    "extract_spikes_from_nwbfile",
    "download_file",
    "get_nwb_asset_list",
]

# Drives the generated API reference; see docs/source/api_reference.py.
__api_ref__ = {
    "description": None,
    "sections": [{"autosummary": __all__}],
}


from typing import Literal
from pathlib import Path
import numpy as np
import pandas as pd
from pynwb import NWBFile

from temporaldata import ArrayDict, IrregularTimeSeries

from brainsets.descriptions import SubjectDescription

try:
    import dandi

    DANDI_AVAILABLE = True
except ImportError:
    DANDI_AVAILABLE = False


def _check_dandi_available(func_name: str) -> None:
    """Raise ImportError if DANDI is not available."""
    if not DANDI_AVAILABLE:
        raise ImportError(
            f"{func_name} requires the dandi library which is not installed. "
            "Install it with `pip install dandi`"
        )


[docs] def extract_subject_from_nwb(nwbfile: NWBFile): r"""Extract a :obj:`SubjectDescription <brainsets.descriptions.SubjectDescription>` from an NWBFile The resultant description will include ``id``, ``species``, and ``sex`` Args: nwbfile: An open NWB file handle Returns: A :obj:`SubjectDescription <brainsets.descriptions.SubjectDescription>` """ # DANDI has requirements for metadata included in `subject` # - subject_id: A subject identifier must be provided. # - species: either a latin binomial or NCBI taxonomic identifier. # - sex: must be "M", "F", "O" (other), or "U" (unknown). # - date_of_birth or age: this does not appear to be enforced, so will be skipped. species = nwbfile.subject.species if "NCBITaxon" in species: species = "NCBITaxon_" + species.split("_")[-1] return SubjectDescription( id=nwbfile.subject.subject_id.lower(), species=species, sex=nwbfile.subject.sex, )
[docs] def extract_spikes_from_nwbfile( nwbfile: NWBFile, recording_tech: Literal["UTAH_ARRAY_THRESHOLD_CROSSINGS", "UTAH_ARRAY_SPIKES"], ): r"""Extract spikes and unit metadata from an NWBFile Args: nwbfile: An open NWB file handle recording_tech: One of ``"UTAH_ARRAY_THRESHOLD_CROSSINGS"`` or ``"UTAH_ARRAY_SPIKES"`` """ # spikes timestamps = [] unit_index = [] # units unit_meta = [] units = nwbfile.units.spike_times_index[:] electrodes = nwbfile.units.electrodes.table # all these units are obtained using threshold crossings for i in range(len(units)): if recording_tech == "UTAH_ARRAY_THRESHOLD_CROSSINGS": # label unit group_name = electrodes["group_name"][i] unit_id = f"group_{group_name}/elec{i}/multiunit_{0}" elif recording_tech == "UTAH_ARRAY_SPIKES": # label unit electrode_id = nwbfile.units[i].electrodes.item().item() group_name = electrodes["group_name"][electrode_id] unit_id = f"group_{group_name}/elec{electrode_id}/unit_{i}" else: raise ValueError(f"Recording tech {recording_tech} not supported") # extract spikes spiketimes = units[i] timestamps.append(spiketimes) if len(spiketimes) > 0: unit_index.append([i] * len(spiketimes)) # extract unit metadata unit_meta.append( { "id": unit_id, "unit_number": i, "count": len(spiketimes), } ) # convert unit metadata to a Data object unit_meta_df = pd.DataFrame(unit_meta) # list of dicts to dataframe units = ArrayDict.from_dataframe( unit_meta_df, unsigned_to_long=True, ) # concatenate spikes timestamps = np.concatenate(timestamps) unit_index = np.concatenate(unit_index) # create spikes object spikes = IrregularTimeSeries( timestamps=timestamps, unit_index=unit_index, domain="auto", ) # make sure to sort the spikes spikes.sort() return spikes, units
[docs] def download_file( path: str | Path, url: str, raw_dir: str | Path, overwrite: bool = False, ) -> Path: r"""Download a file from DANDI Full path of the downloaded path will be ``raw_dir / path``. Args: path: path of the downloaded file within :obj:`raw_dir` url: URL of the DANDI asset raw_dir: root directory where the file will be downloaded overwrite: Will overwrite existing file if :obj:`True` (default :obj:`False`) """ _check_dandi_available("download_file") import dandi.download raw_dir = Path(raw_dir) asset_path = Path(path) download_dir = raw_dir / asset_path.parent download_dir.mkdir(exist_ok=True, parents=True) dandi.download.download( url, download_dir, existing=( dandi.download.DownloadExisting.REFRESH if not overwrite else dandi.download.DownloadExisting.OVERWRITE ), ) return raw_dir / asset_path
[docs] def get_nwb_asset_list(dandiset_id: str) -> list: r"""Get a list of all remote NWB assets in the given dandiset Args: dandiset_id: The dandiset ID (e.g. 'DANDI:000688/draft') Returns: A list of all remote NWB assets (``dandi.dandiapi.RemoteBlobAsset``) within this dandiset """ _check_dandi_available("get_nwb_asset_list") from dandi import dandiarchive parsed_url = dandiarchive.parse_dandi_url(dandiset_id) with parsed_url.navigate() as (client, dandiset, assets): asset_list = [x for x in assets if x.path.endswith(".nwb")] return asset_list