Explore Borealis Dataverse API¶

Interactive exploration of the Dataverse REST API against the ncszo-gnssa test sub-dataverse on borealisdata.ca.

Token is loaded from a .env file in the project root (or any parent directory). Create .env with:

DATAVERSE_API_TOKEN=your-token-here

In [7]:

Copied!





from dotenv import load_dotenv
import os
import json
import httpx

load_dotenv()  # reads .env from cwd or any parent directory

HOST = "borealisdata.ca"
ALIAS = "ncszo-gnssa"
BASE_URL = f"https://{HOST}/api"

TOKEN = os.environ.get("DATAVERSE_API_TOKEN", "")
HEADERS = {"X-Dataverse-key": TOKEN} if TOKEN else {}

print(f"Host: {HOST}")
print(f"Token set: {bool(TOKEN)}")
from dotenv import load_dotenv
import os
import json
import httpx

load_dotenv()  # reads .env from cwd or any parent directory

HOST = "borealisdata.ca"
ALIAS = "ncszo-gnssa"
BASE_URL = f"https://{HOST}/api"

TOKEN = os.environ.get("DATAVERSE_API_TOKEN", "")
HEADERS = {"X-Dataverse-key": TOKEN} if TOKEN else {}

print(f"Host: {HOST}")
print(f"Token set: {bool(TOKEN)}")

Host: borealisdata.ca
Token set: True

1. List sub-dataverse contents¶

GET /api/dataverses/{alias}/contents

In [8]:

Copied!





resp = httpx.get(f"{BASE_URL}/dataverses/{ALIAS}/contents", headers=HEADERS)
resp.raise_for_status()
contents = resp.json()
print(json.dumps(contents, indent=2)[:3000])
resp = httpx.get(f"{BASE_URL}/dataverses/{ALIAS}/contents", headers=HEADERS)
resp.raise_for_status()
contents = resp.json()
print(json.dumps(contents, indent=2)[:3000])

{
  "status": "OK",
  "data": [
    {
      "id": 908158,
      "identifier": "SP3/7HF3IC",
      "persistentUrl": "https://doi.org/10.5683/SP3/7HF3IC",
      "protocol": "doi",
      "authority": "10.5683",
      "separator": "/",
      "publisher": "Borealis",
      "storageIdentifier": "s3://10.5683/SP3/7HF3IC",
      "datasetType": "dataset",
      "type": "dataset"
    },
    {
      "id": 1103315,
      "identifier": "SP3/ATPQI7",
      "persistentUrl": "https://doi.org/10.5683/SP3/ATPQI7",
      "protocol": "doi",
      "authority": "10.5683",
      "separator": "/",
      "publisher": "Borealis",
      "storageIdentifier": "s3://10.5683/SP3/ATPQI7",
      "datasetType": "dataset",
      "type": "dataset"
    }
  ]
}

In [9]:

Copied!

# Summarise what's in there
for item in contents.get("data", []):
    print(item.get("type"), "|", item.get("title", item.get("name", "")), "|", item.get("globalId", ""))
# Summarise what's in there
for item in contents.get("data", []):
    print(item.get("type"), "|", item.get("title", item.get("name", "")), "|", item.get("globalId", ""))

dataset |  | 
dataset |  |

2. Inspect a dataset's file list¶

GET /api/datasets/:persistentId/?persistentId={PID}

Edit PID below with a DOI from the listing above.

In [10]:

Copied!





# Replace with an actual PID from the listing above
PID = "doi:10.5683/SP3/7HF3IC"

resp = httpx.get(
    f"{BASE_URL}/datasets/:persistentId/",
    params={"persistentId": PID},
    headers=HEADERS,
)
resp.raise_for_status()
dataset = resp.json()
print(json.dumps(dataset, indent=2)[:3000])
# Replace with an actual PID from the listing above
PID = "doi:10.5683/SP3/7HF3IC"

resp = httpx.get(
    f"{BASE_URL}/datasets/:persistentId/",
    params={"persistentId": PID},
    headers=HEADERS,
)
resp.raise_for_status()
dataset = resp.json()
print(json.dumps(dataset, indent=2)[:3000])

{
  "status": "OK",
  "data": {
    "id": 908158,
    "identifier": "SP3/7HF3IC",
    "persistentUrl": "https://doi.org/10.5683/SP3/7HF3IC",
    "protocol": "doi",
    "authority": "10.5683",
    "separator": "/",
    "publisher": "Borealis",
    "storageIdentifier": "s3://10.5683/SP3/7HF3IC",
    "datasetType": "dataset",
    "latestVersion": {
      "id": 49924,
      "datasetId": 908158,
      "datasetPersistentId": "doi:10.5683/SP3/7HF3IC",
      "datasetType": "dataset",
      "storageIdentifier": "s3://10.5683/SP3/7HF3IC",
      "internalVersionNumber": 495,
      "versionState": "DRAFT",
      "latestVersionPublishingState": "DRAFT",
      "deaccessionLink": "",
      "lastUpdateTime": "2026-03-17T21:21:21Z",
      "createTime": "2025-04-23T18:50:15Z",
      "license": {
        "name": "CC0 1.0",
        "uri": "http://creativecommons.org/publicdomain/zero/1.0",
        "iconUri": "https://licensebuttons.net/p/zero/1.0/88x31.png",
        "rightsIdentifier": "CC0-1.0",
        "rightsIdentifierScheme": "SPDX",
        "schemeUri": "https://spdx.org/licenses/",
        "languageCode": "en"
      },
      "fileAccessRequest": true,
      "metadataBlocks": {
        "citation": {
          "displayName": "Citation Metadata",
          "name": "citation",
          "fields": [
            {
              "typeName": "title",
              "multiple": false,
              "typeClass": "primitive",
              "value": "Test set"
            },
            {
              "typeName": "author",
              "multiple": true,
              "typeClass": "compound",
              "value": [
                {
                  "authorName": {
                    "typeName": "authorName",
                    "multiple": false,
                    "typeClass": "primitive",
                    "value": "Heesemann, Martin"
                  },
                  "authorAffiliation": {
                    "typeName": "authorAffiliation",
                    "multiple": false,
                    "typeClass": "primitive",
                    "value": "University of Victoria"
                  }
                },
                {
                  "authorName": {
                    "typeName": "authorName",
                    "multiple": false,
                    "typeClass": "primitive",
                    "value": "Hutchinson, Jesse"
                  },
                  "authorAffiliation": {
                    "typeName": "authorAffiliation",
                    "multiple": false,
                    "typeClass": "primitive",
                    "value": "University of Victoria"
                  }
                }
              ]
            },
            {
              "typeName": "datasetContact",
              "multiple": true,
              "typeClass": "compound",
              "value": [
                {
                  "datasetContactName": {
                    "typeName": "datasetContactName",
                    "multiple": f

In [11]:

Copied!





# Inspect file list structure
files = dataset["data"]["latestVersion"]["files"]
print(f"Total files: {len(files)}")
print()
for f in files[:10]:
    df = f["dataFile"]
    print(
        f"  id={df['id']}",
        f"filename={df['filename']}",
        f"size={df.get('filesize', '?')}",
        f"dir='{f.get('directoryLabel', '')}'" ,
        f"type={df.get('contentType', '')}",
    )
# Inspect file list structure
files = dataset["data"]["latestVersion"]["files"]
print(f"Total files: {len(files)}")
print()
for f in files[:10]:
    df = f["dataFile"]
    print(
        f"  id={df['id']}",
        f"filename={df['filename']}",
        f"size={df.get('filesize', '?')}",
        f"dir='{f.get('directoryLabel', '')}'" ,
        f"type={df.get('contentType', '')}",
    )

Total files: 465

  id=1103599 filename=0 size=1394708 dir='dual_heading.zarr/heading_std' type=application/octet-stream
  id=1103361 filename=0 size=75189 dir='dual_heading.zarr/nsol' type=application/octet-stream
  id=1103633 filename=0 size=46655 dir='dual_heading.zarr/nsat' type=application/octet-stream
  id=1103429 filename=0 size=19572 dir='dual_heading.zarr/datetime' type=application/octet-stream
  id=1103463 filename=0 size=68656 dir='dual_heading.zarr/nmulti' type=application/octet-stream
  id=1103292 filename=0 size=1642733 dir='dual_heading.zarr/pitch' type=application/octet-stream
  id=1103327 filename=0 size=13239 dir='dual_heading.zarr/dts' type=application/octet-stream
  id=1103531 filename=0 size=75189 dir='dual_heading.zarr/nelmask' type=application/octet-stream
  id=1103497 filename=0 size=1715106 dir='dual_heading.zarr/heading' type=application/octet-stream
  id=1103395 filename=0 size=1399253 dir='dual_heading.zarr/pitch_std' type=application/octet-stream

3. Test byte-range access¶

GET /api/access/datafile/{id} with Range: bytes=0-99

In [ ]:

Copied!





# Pick a file id from the listing above
FILE_ID = files[0]["dataFile"]["id"]
print(f"Testing range request on file id={FILE_ID}")

resp = httpx.get(
    f"{BASE_URL}/access/datafile/{FILE_ID}",
    headers={**HEADERS, "Range": "bytes=0-99"},
    follow_redirects=True, # Important: follow redirects to get the actual file - 
)
print(f"Status: {resp.status_code}")
print(f"Content-Range: {resp.headers.get('content-range', 'not returned')}")
print(f"Bytes received: {len(resp.content)}")
print(f"First 100 bytes: {resp.content[:100]}")
# Pick a file id from the listing above
FILE_ID = files[0]["dataFile"]["id"]
print(f"Testing range request on file id={FILE_ID}")

resp = httpx.get(
    f"{BASE_URL}/access/datafile/{FILE_ID}",
    headers={**HEADERS, "Range": "bytes=0-99"},
    follow_redirects=True, # Important: follow redirects to get the actual file - 
)
print(f"Status: {resp.status_code}")
print(f"Content-Range: {resp.headers.get('content-range', 'not returned')}")
print(f"Bytes received: {len(resp.content)}")
print(f"First 100 bytes: {resp.content[:100]}")

Testing range request on file id=1103599
Status: 206
Content-Range: bytes 0-99/1394708
Bytes received: 100
First 100 bytes: b'\x02\x01!\x01f\xcd \x00\x00\x00\x02\x00\x14H\x15\x00\xcaN\x01\x00\x191\x04\x00|\xd7\x02\x00T\x00\x00\x009\x01\x07\x00\xcbM\x08\x00\x0c\xa2\t\x00\xb0\xb9\x05\x00q\xe5\n\x00v\xa0\x0e\x00\xee \x0c\x00na\r\x00\xa3!\x11\x00\xa3\xdd\x0f\x002^\x12\x00D\x9c\x13\x00\x01\xd6\x14\x00rN\x01\x00\xf4\x049520\x0b\x00\x00\x000.'

4. Observed JSON shapes¶

Document key fields here after running the cells above:

Field	Path in JSON	Notes
File ID	`data.latestVersion.files[].dataFile.id`	integer
Filename	`data.latestVersion.files[].dataFile.filename`
File size	`data.latestVersion.files[].dataFile.filesize`	bytes
Directory	`data.latestVersion.files[].directoryLabel`	may be absent or empty
MIME type	`data.latestVersion.files[].dataFile.contentType`
MD5	`data.latestVersion.files[].dataFile.md5`	may be absent