Explore Borealis Dataverse API¶
Interactive exploration of the Dataverse REST API against the ncszo-gnssa test sub-dataverse on borealisdata.ca.
Token is loaded from a .env file in the project root (or any parent directory).
Create .env with:
DATAVERSE_API_TOKEN=your-token-here
In [7]:
Copied!
from dotenv import load_dotenv
import os
import json
import httpx
load_dotenv() # reads .env from cwd or any parent directory
HOST = "borealisdata.ca"
ALIAS = "ncszo-gnssa"
BASE_URL = f"https://{HOST}/api"
TOKEN = os.environ.get("DATAVERSE_API_TOKEN", "")
HEADERS = {"X-Dataverse-key": TOKEN} if TOKEN else {}
print(f"Host: {HOST}")
print(f"Token set: {bool(TOKEN)}")
from dotenv import load_dotenv
import os
import json
import httpx
load_dotenv() # reads .env from cwd or any parent directory
HOST = "borealisdata.ca"
ALIAS = "ncszo-gnssa"
BASE_URL = f"https://{HOST}/api"
TOKEN = os.environ.get("DATAVERSE_API_TOKEN", "")
HEADERS = {"X-Dataverse-key": TOKEN} if TOKEN else {}
print(f"Host: {HOST}")
print(f"Token set: {bool(TOKEN)}")
Host: borealisdata.ca Token set: True
1. List sub-dataverse contents¶
GET /api/dataverses/{alias}/contents
In [8]:
Copied!
resp = httpx.get(f"{BASE_URL}/dataverses/{ALIAS}/contents", headers=HEADERS)
resp.raise_for_status()
contents = resp.json()
print(json.dumps(contents, indent=2)[:3000])
resp = httpx.get(f"{BASE_URL}/dataverses/{ALIAS}/contents", headers=HEADERS)
resp.raise_for_status()
contents = resp.json()
print(json.dumps(contents, indent=2)[:3000])
{
"status": "OK",
"data": [
{
"id": 908158,
"identifier": "SP3/7HF3IC",
"persistentUrl": "https://doi.org/10.5683/SP3/7HF3IC",
"protocol": "doi",
"authority": "10.5683",
"separator": "/",
"publisher": "Borealis",
"storageIdentifier": "s3://10.5683/SP3/7HF3IC",
"datasetType": "dataset",
"type": "dataset"
},
{
"id": 1103315,
"identifier": "SP3/ATPQI7",
"persistentUrl": "https://doi.org/10.5683/SP3/ATPQI7",
"protocol": "doi",
"authority": "10.5683",
"separator": "/",
"publisher": "Borealis",
"storageIdentifier": "s3://10.5683/SP3/ATPQI7",
"datasetType": "dataset",
"type": "dataset"
}
]
}
In [9]:
Copied!
# Summarise what's in there
for item in contents.get("data", []):
print(item.get("type"), "|", item.get("title", item.get("name", "")), "|", item.get("globalId", ""))
# Summarise what's in there
for item in contents.get("data", []):
print(item.get("type"), "|", item.get("title", item.get("name", "")), "|", item.get("globalId", ""))
dataset | | dataset | |
2. Inspect a dataset's file list¶
GET /api/datasets/:persistentId/?persistentId={PID}
Edit PID below with a DOI from the listing above.
In [10]:
Copied!
# Replace with an actual PID from the listing above
PID = "doi:10.5683/SP3/7HF3IC"
resp = httpx.get(
f"{BASE_URL}/datasets/:persistentId/",
params={"persistentId": PID},
headers=HEADERS,
)
resp.raise_for_status()
dataset = resp.json()
print(json.dumps(dataset, indent=2)[:3000])
# Replace with an actual PID from the listing above
PID = "doi:10.5683/SP3/7HF3IC"
resp = httpx.get(
f"{BASE_URL}/datasets/:persistentId/",
params={"persistentId": PID},
headers=HEADERS,
)
resp.raise_for_status()
dataset = resp.json()
print(json.dumps(dataset, indent=2)[:3000])
{
"status": "OK",
"data": {
"id": 908158,
"identifier": "SP3/7HF3IC",
"persistentUrl": "https://doi.org/10.5683/SP3/7HF3IC",
"protocol": "doi",
"authority": "10.5683",
"separator": "/",
"publisher": "Borealis",
"storageIdentifier": "s3://10.5683/SP3/7HF3IC",
"datasetType": "dataset",
"latestVersion": {
"id": 49924,
"datasetId": 908158,
"datasetPersistentId": "doi:10.5683/SP3/7HF3IC",
"datasetType": "dataset",
"storageIdentifier": "s3://10.5683/SP3/7HF3IC",
"internalVersionNumber": 495,
"versionState": "DRAFT",
"latestVersionPublishingState": "DRAFT",
"deaccessionLink": "",
"lastUpdateTime": "2026-03-17T21:21:21Z",
"createTime": "2025-04-23T18:50:15Z",
"license": {
"name": "CC0 1.0",
"uri": "http://creativecommons.org/publicdomain/zero/1.0",
"iconUri": "https://licensebuttons.net/p/zero/1.0/88x31.png",
"rightsIdentifier": "CC0-1.0",
"rightsIdentifierScheme": "SPDX",
"schemeUri": "https://spdx.org/licenses/",
"languageCode": "en"
},
"fileAccessRequest": true,
"metadataBlocks": {
"citation": {
"displayName": "Citation Metadata",
"name": "citation",
"fields": [
{
"typeName": "title",
"multiple": false,
"typeClass": "primitive",
"value": "Test set"
},
{
"typeName": "author",
"multiple": true,
"typeClass": "compound",
"value": [
{
"authorName": {
"typeName": "authorName",
"multiple": false,
"typeClass": "primitive",
"value": "Heesemann, Martin"
},
"authorAffiliation": {
"typeName": "authorAffiliation",
"multiple": false,
"typeClass": "primitive",
"value": "University of Victoria"
}
},
{
"authorName": {
"typeName": "authorName",
"multiple": false,
"typeClass": "primitive",
"value": "Hutchinson, Jesse"
},
"authorAffiliation": {
"typeName": "authorAffiliation",
"multiple": false,
"typeClass": "primitive",
"value": "University of Victoria"
}
}
]
},
{
"typeName": "datasetContact",
"multiple": true,
"typeClass": "compound",
"value": [
{
"datasetContactName": {
"typeName": "datasetContactName",
"multiple": f
In [11]:
Copied!
# Inspect file list structure
files = dataset["data"]["latestVersion"]["files"]
print(f"Total files: {len(files)}")
print()
for f in files[:10]:
df = f["dataFile"]
print(
f" id={df['id']}",
f"filename={df['filename']}",
f"size={df.get('filesize', '?')}",
f"dir='{f.get('directoryLabel', '')}'" ,
f"type={df.get('contentType', '')}",
)
# Inspect file list structure
files = dataset["data"]["latestVersion"]["files"]
print(f"Total files: {len(files)}")
print()
for f in files[:10]:
df = f["dataFile"]
print(
f" id={df['id']}",
f"filename={df['filename']}",
f"size={df.get('filesize', '?')}",
f"dir='{f.get('directoryLabel', '')}'" ,
f"type={df.get('contentType', '')}",
)
Total files: 465 id=1103599 filename=0 size=1394708 dir='dual_heading.zarr/heading_std' type=application/octet-stream id=1103361 filename=0 size=75189 dir='dual_heading.zarr/nsol' type=application/octet-stream id=1103633 filename=0 size=46655 dir='dual_heading.zarr/nsat' type=application/octet-stream id=1103429 filename=0 size=19572 dir='dual_heading.zarr/datetime' type=application/octet-stream id=1103463 filename=0 size=68656 dir='dual_heading.zarr/nmulti' type=application/octet-stream id=1103292 filename=0 size=1642733 dir='dual_heading.zarr/pitch' type=application/octet-stream id=1103327 filename=0 size=13239 dir='dual_heading.zarr/dts' type=application/octet-stream id=1103531 filename=0 size=75189 dir='dual_heading.zarr/nelmask' type=application/octet-stream id=1103497 filename=0 size=1715106 dir='dual_heading.zarr/heading' type=application/octet-stream id=1103395 filename=0 size=1399253 dir='dual_heading.zarr/pitch_std' type=application/octet-stream
3. Test byte-range access¶
GET /api/access/datafile/{id} with Range: bytes=0-99
In [ ]:
Copied!
# Pick a file id from the listing above
FILE_ID = files[0]["dataFile"]["id"]
print(f"Testing range request on file id={FILE_ID}")
resp = httpx.get(
f"{BASE_URL}/access/datafile/{FILE_ID}",
headers={**HEADERS, "Range": "bytes=0-99"},
follow_redirects=True, # Important: follow redirects to get the actual file -
)
print(f"Status: {resp.status_code}")
print(f"Content-Range: {resp.headers.get('content-range', 'not returned')}")
print(f"Bytes received: {len(resp.content)}")
print(f"First 100 bytes: {resp.content[:100]}")
# Pick a file id from the listing above
FILE_ID = files[0]["dataFile"]["id"]
print(f"Testing range request on file id={FILE_ID}")
resp = httpx.get(
f"{BASE_URL}/access/datafile/{FILE_ID}",
headers={**HEADERS, "Range": "bytes=0-99"},
follow_redirects=True, # Important: follow redirects to get the actual file -
)
print(f"Status: {resp.status_code}")
print(f"Content-Range: {resp.headers.get('content-range', 'not returned')}")
print(f"Bytes received: {len(resp.content)}")
print(f"First 100 bytes: {resp.content[:100]}")
Testing range request on file id=1103599 Status: 206 Content-Range: bytes 0-99/1394708 Bytes received: 100 First 100 bytes: b'\x02\x01!\x01f\xcd \x00\x00\x00\x02\x00\x14H\x15\x00\xcaN\x01\x00\x191\x04\x00|\xd7\x02\x00T\x00\x00\x009\x01\x07\x00\xcbM\x08\x00\x0c\xa2\t\x00\xb0\xb9\x05\x00q\xe5\n\x00v\xa0\x0e\x00\xee \x0c\x00na\r\x00\xa3!\x11\x00\xa3\xdd\x0f\x002^\x12\x00D\x9c\x13\x00\x01\xd6\x14\x00rN\x01\x00\xf4\x049520\x0b\x00\x00\x000.'
4. Observed JSON shapes¶
Document key fields here after running the cells above:
| Field | Path in JSON | Notes |
|---|---|---|
| File ID | data.latestVersion.files[].dataFile.id |
integer |
| Filename | data.latestVersion.files[].dataFile.filename |
|
| File size | data.latestVersion.files[].dataFile.filesize |
bytes |
| Directory | data.latestVersion.files[].directoryLabel |
may be absent or empty |
| MIME type | data.latestVersion.files[].dataFile.contentType |
|
| MD5 | data.latestVersion.files[].dataFile.md5 |
may be absent |