{
"@id": "https://stars.renci.org/var/translator/data/diseases/2026_01_02/1.0/normalization_2025sep1/",
"@type": "sc:Dataset",
"name": "diseases",
"description": "A knowledge graph built for the NCATS Biomedical Data Translator project using Translator-Ingests, Biolink Model, and Node Normalizer.",
"license": "MIT",
"url": "https://stars.renci.org/var/translator/data/diseases/2026_01_02/1.0/normalization_2025sep1/",
"version": "diseases_2026_01_02_1.0_2025sep1_4.3.6",
"dateCreated": "2026_01_03",
"biolinkVersion": "4.3.6",
"babelVersion": "2025sep1",
"distribution": [
{
"@id": "diseases.tar.xz",
"@type": "cr:FileObject",
"contentUrl": "diseases.tar.xz",
"encodingFormat": "application/x-xz",
"description": "Compressed tar archive containing the KGX files: nodes.jsonl and edges.jsonl"
}
],
"isBasedOn": [
{
"id": "diseases",
"name": "Jensen Lab DISEASES Database Reference Ingest Guide",
"description": "The DISEASES database is a web resource that integrates knowledge on gene-disease associations. It generates de novo associations through automated text mining, and aggregates associations from external sources of manually curated knowledge and GWAS-based study results. The associations are assigned a confidence score to facilitate comparisons across data types and sources.",
"license": {
"license_name": "CC BY 4.0"
},
"url": [
"https://diseases.jensenlab.org/Downloads"
],
"version": "2026_01_02"
}
],
"schema": {
"nodes": [
{
"category": [
"biolink:Disease"
],
"count": 5563,
"id_prefixes": {
"MONDO": 5462,
"DOID": 101
},
"attributes": {
"name": 5563,
"equivalent_identifiers": 5563,
"information_content": 5490
}
},
{
"category": [
"biolink:Protein"
],
"count": 9145,
"id_prefixes": {
"UniProtKB": 9108,
"ENSEMBL": 37
},
"attributes": {
"name": 9145,
"equivalent_identifiers": 9145,
"information_content": 8974
}
},
{
"category": [
"biolink:Protein",
"biolink:Gene"
],
"count": 7318,
"id_prefixes": {
"NCBIGene": 7318
},
"attributes": {
"name": 7318,
"equivalent_identifiers": 7318,
"information_content": 6696
}
}
],
"nodes_summary": {
"total_count": 22026,
"id_prefixes": {
"UniProtKB": 9108,
"NCBIGene": 7318,
"MONDO": 5462,
"DOID": 101,
"ENSEMBL": 37
},
"attributes": {
"name": 22026,
"equivalent_identifiers": 22026,
"information_content": 21160
}
},
"edges": [
{
"subject_category": [
"biolink:Protein",
"biolink:Gene"
],
"predicate": "biolink:occurs_together_in_literature_with",
"object_category": [
"biolink:Disease"
],
"count": 115316,
"primary_knowledge_sources": {
"infores:diseases": 115316
},
"qualifiers": {},
"attributes": {
"id": 115316,
"category": 115316,
"knowledge_level": 115316,
"agent_type": 115316,
"has_confidence_score": 115316,
"z_score": 115316,
"original_subject": 115316,
"original_object": 115316
},
"subject_id_prefixes": {
"NCBIGene": 115316
},
"object_id_prefixes": {
"MONDO": 114145,
"DOID": 1171
}
},
{
"subject_category": [
"biolink:Protein"
],
"predicate": "biolink:occurs_together_in_literature_with",
"object_category": [
"biolink:Disease"
],
"count": 151599,
"primary_knowledge_sources": {
"infores:diseases": 151599
},
"qualifiers": {},
"attributes": {
"id": 151599,
"category": 151599,
"knowledge_level": 151599,
"agent_type": 151599,
"has_confidence_score": 151599,
"z_score": 151599,
"original_subject": 151599,
"original_object": 151599
},
"subject_id_prefixes": {
"UniProtKB": 151132,
"ENSEMBL": 467
},
"object_id_prefixes": {
"MONDO": 149906,
"DOID": 1693
}
},
{
"subject_category": [
"biolink:Protein"
],
"predicate": "biolink:associated_with",
"object_category": [
"biolink:Disease"
],
"count": 2346,
"primary_knowledge_sources": {
"infores:medlineplus": 2285,
"infores:amyco": 61
},
"qualifiers": {},
"attributes": {
"id": 2346,
"category": 2346,
"knowledge_level": 2346,
"agent_type": 2346,
"has_confidence_score": 2346,
"original_subject": 2346,
"original_object": 2346
},
"subject_id_prefixes": {
"UniProtKB": 2342,
"ENSEMBL": 4
},
"object_id_prefixes": {
"MONDO": 2320,
"DOID": 26
}
},
{
"subject_category": [
"biolink:Protein",
"biolink:Gene"
],
"predicate": "biolink:associated_with",
"object_category": [
"biolink:Disease"
],
"count": 1581,
"primary_knowledge_sources": {
"infores:medlineplus": 1461,
"infores:amyco": 120
},
"qualifiers": {},
"attributes": {
"id": 1581,
"category": 1581,
"knowledge_level": 1581,
"agent_type": 1581,
"has_confidence_score": 1581,
"original_subject": 1581,
"original_object": 1581
},
"subject_id_prefixes": {
"NCBIGene": 1581
},
"object_id_prefixes": {
"MONDO": 1569,
"DOID": 12
}
}
],
"edges_summary": {
"total_count": 270842,
"predicates": {
"biolink:occurs_together_in_literature_with": 266915,
"biolink:associated_with": 3927
},
"primary_knowledge_sources": {
"infores:diseases": 266915,
"infores:medlineplus": 3746,
"infores:amyco": 181
},
"predicates_by_knowledge_source": {
"infores:diseases": {
"biolink:occurs_together_in_literature_with": 266915
},
"infores:medlineplus": {
"biolink:associated_with": 3746
},
"infores:amyco": {
"biolink:associated_with": 181
}
},
"qualifiers": {},
"attributes": {
"id": 270842,
"category": 270842,
"knowledge_level": 270842,
"agent_type": 270842,
"has_confidence_score": 270842,
"original_subject": 270842,
"original_object": 270842,
"z_score": 266915
}
}
}
}