Skip to content

Commit

Permalink
[ENH] Support new format for Neurosynth and NeuroQuery data (#535)
Browse files Browse the repository at this point in the history
* Initial work on new fetcher.

* Get fetcher working.

* Get conversion working.

* Fix test_fetch_neurosynth.

* Work on conversion tests.

* Fix other test.

Now I just need the test files.

* Change entity order in new standard.

* Fix path.

* Update example, add test data, and fix tests.

* Add fetch_neuroquery to API.

* Clean things up a bit.

* Drop ids txt file and add metadata tsv.gz file.

* Update test files.

* Make some metadata optional.

* Add NeuroQuery stuff.

* Generalize the download example.

Ref #550.

* Pin NeuroQuery to commit instead of branch.
  • Loading branch information
tsalo authored Aug 10, 2021
1 parent 3423662 commit 9e7f14b
Show file tree
Hide file tree
Showing 14 changed files with 756 additions and 151 deletions.
1 change: 1 addition & 0 deletions docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,7 @@ For more information about functional characterization analysis, see :ref:`Meta-
:toctree: generated/
:template: function.rst

extract.fetch_neuroquery
extract.fetch_neurosynth
extract.download_nidm_pain
extract.download_mallet
Expand Down
84 changes: 66 additions & 18 deletions examples/01_datasets/download_neurosynth.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,47 +4,95 @@
.. _datasets2:
=============================================
Download and convert the Neurosynth database
=============================================
================================================
Download the Neurosynth or NeuroQuery databases
================================================
Download and convert the Neurosynth database (with abstracts) for analysis with
NiMARE.
.. note::
This will likely change as we work to shift database querying to a remote
database, rather than handling it locally with NiMARE.
.. warning::
In August 2021, the Neurosynth database was reorganized according to a new file format.
As such, the ``fetch_neurosynth`` function for NiMARE versions before 0.0.10 will not work
with its default parameters.
In order to download the Neurosynth database in its older format using NiMARE <= 0.0.9,
do the following::
nimare.extract.fetch_neurosynth(
url=(
"https://github.com/neurosynth/neurosynth-data/blob/"
"e8f27c4a9a44dbfbc0750366166ad2ba34ac72d6/current_data.tar.gz?raw=true"
),
)
"""
###############################################################################
# Start with the necessary imports
# --------------------------------
import os

from neurosynth.base.dataset import download
from pprint import pprint

import nimare

###############################################################################
# Download Neurosynth
# --------------------------------
# -------------------
# Neurosynth's data files are stored at https://github.com/neurosynth/neurosynth-data.
out_dir = os.path.abspath("../example_data/")
if not os.path.isdir(out_dir):
os.mkdir(out_dir)
os.makedirs(out_dir, exist_ok=True)

if not os.path.isfile(os.path.join(out_dir, "database.txt")):
download(out_dir, unpack=True)
files = nimare.extract.fetch_neurosynth(
path=out_dir,
version="7",
overwrite=False,
source="abstract",
vocab="terms",
)
pprint(files)
neurosynth_db = files[0]

###############################################################################
# Convert Neurosynth database to NiMARE dataset file
# --------------------------------------------------
dset = nimare.io.convert_neurosynth_to_dataset(
os.path.join(out_dir, "database.txt"), os.path.join(out_dir, "features.txt")
neurosynth_dset = nimare.io.convert_neurosynth_to_dataset(
database_file=neurosynth_db["database"],
annotations_files=neurosynth_db["features"],
)
dset.save(os.path.join(out_dir, "neurosynth_dataset.pkl.gz"))
neurosynth_dset.save(os.path.join(out_dir, "neurosynth_dataset.pkl.gz"))
print(neurosynth_dset)

###############################################################################
# Add article abstracts to dataset
# --------------------------------
dset = nimare.extract.download_abstracts(dset, "[email protected]")
dset.save(os.path.join(out_dir, "neurosynth_nimare_with_abstracts.pkl.gz"))
# This is only possible because Neurosynth uses PMIDs as study IDs.
#
# Make sure you replace the example email address with your own.
neurosynth_dset = nimare.extract.download_abstracts(neurosynth_dset, "[email protected]")
neurosynth_dset.save(os.path.join(out_dir, "neurosynth_dataset_with_abstracts.pkl.gz"))

###############################################################################
# Do the same with NeuroQuery
# ---------------------------
# NeuroQuery's data files are stored at https://github.com/neuroquery/neuroquery_data.
files = nimare.extract.fetch_neuroquery(
path=out_dir,
version="1",
overwrite=False,
source="combined",
vocab="neuroquery7547",
type="tfidf",
)
pprint(files)
neuroquery_db = files[0]

# Note that the conversion function says "neurosynth".
# This is just for backwards compatibility.
neuroquery_dset = nimare.io.convert_neurosynth_to_dataset(
database_file=neuroquery_db["database"],
annotations_files=neuroquery_db["features"],
)
neuroquery_dset.save(os.path.join(out_dir, "neuroquery_dataset.pkl.gz"))
print(neuroquery_dset)

# NeuroQuery also uses PMIDs as study IDs.
neuroquery_dset = nimare.extract.download_abstracts(neuroquery_dset, "[email protected]")
neuroquery_dset.save(os.path.join(out_dir, "neuroquery_dataset_with_abstracts.pkl.gz"))
12 changes: 2 additions & 10 deletions nimare/extract/__init__.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
"""Dataset and trained model downloading functions."""
import warnings

from . import utils
from .extract import (
download_abstracts,
download_cognitive_atlas,
download_mallet,
download_nidm_pain,
download_peaks2maps_model,
fetch_neuroquery,
fetch_neurosynth,
)

Expand All @@ -17,14 +16,7 @@
"download_cognitive_atlas",
"download_abstracts",
"download_peaks2maps_model",
"fetch_neuroquery",
"fetch_neurosynth",
"utils",
]

warnings.simplefilter("default")

warnings.warn(
"{} is an experimental module under active development; use it at your "
"own risk.".format(__name__),
ImportWarning,
)
Loading

0 comments on commit 9e7f14b

Please sign in to comment.