{
"@id": "https://stars.renci.org/var/translator/releases/gene2phenotype/2026_01_03/",
"@type": "sc:Dataset",
"name": "gene2phenotype",
"description": "A knowledge graph built for the NCATS Biomedical Data Translator project using Translator-Ingests, Biolink Model, and Node Normalizer.",
"license": "MIT",
"url": "https://stars.renci.org/var/translator/releases/gene2phenotype/2026_01_03/",
"version": "gene2phenotype_2026_01_03_1.0_2025sep1_4.3.6",
"dateCreated": "2026_01_03",
"biolinkVersion": "4.3.6",
"babelVersion": "2025sep1",
"distribution": [
{
"@id": "gene2phenotype.tar.xz",
"@type": "cr:FileObject",
"contentUrl": "gene2phenotype.tar.xz",
"encodingFormat": "application/x-xz",
"description": "Compressed tar archive containing the KGX files: nodes.jsonl and edges.jsonl"
}
],
"isBasedOn": [
{
"id": "gene2phenotype",
"name": "EBI Gene2Phenotype Reference Ingest Guide",
"description": "EBI's Gene2Phenotype dataset contains high-quality gene-disease associations curated by UK disease domain experts and consultant clinical geneticists. It integrates data on genes, their variants, and related disorders. It is constructed by experts reviewing published literature, and it is primarily an inclusion list to allow targeted filtering of genome-wide data for diagnostic purposes. Each entry associates a gene with a disease, including a confidence level, allelic requirement and molecular mechanism.",
"license": {
"terms_of_use_description": "Unsure, but likely uses EMBL-EBI terms of use (linked in website footer). Currently don't see a formal standard license. Various resource webpages (About -> The G2P Project, Downloads, Publications) say that all data is 'freely available' and to please cite the date accessed/data version and Thorman et al 2019",
"terms_of_use_url": "https://www.ebi.ac.uk/about/terms-of-use/#general"
},
"url": [
"Latest data is provided at https://www.ebi.ac.uk/gene2phenotype/download (downloads created on-the-fly)",
"Archived static releases provided on the FTP site at https://ftp.ebi.ac.uk/pub/databases/gene2phenotype/G2P_data_downloads/"
],
"version": "2026_01_03"
}
],
"schema": {
"nodes": [
{
"category": [
"biolink:Disease"
],
"count": 2832,
"id_prefixes": {
"MONDO": 2784,
"OMIM": 38,
"DOID": 10
},
"attributes": {
"name": 2832,
"equivalent_identifiers": 2832,
"information_content": 2811
}
},
{
"category": [
"biolink:Gene",
"biolink:Protein"
],
"count": 2563,
"id_prefixes": {
"NCBIGene": 2563
},
"attributes": {
"name": 2563,
"equivalent_identifiers": 2563,
"information_content": 2563
}
},
{
"category": [
"biolink:Gene"
],
"count": 21,
"id_prefixes": {
"NCBIGene": 21
},
"attributes": {
"name": 21,
"equivalent_identifiers": 21,
"information_content": 19
}
}
],
"nodes_summary": {
"total_count": 5416,
"id_prefixes": {
"MONDO": 2784,
"NCBIGene": 2584,
"OMIM": 38,
"DOID": 10
},
"attributes": {
"name": 5416,
"equivalent_identifiers": 5416,
"information_content": 5393
}
},
"edges": [
{
"subject_category": [
"biolink:Gene",
"biolink:Protein"
],
"predicate": "biolink:associated_with",
"object_category": [
"biolink:Disease"
],
"count": 2988,
"primary_knowledge_sources": {
"infores:gene2phenotype": 2988
},
"qualifiers": {
"qualified_predicate": 2988,
"subject_form_or_variant_qualifier": 2988
},
"attributes": {
"id": 2988,
"category": 2988,
"knowledge_level": 2988,
"agent_type": 2988,
"update_date": 2988,
"allelic_requirement": 2988,
"original_subject": 2988,
"original_object": 2988,
"publications": 2913
},
"subject_id_prefixes": {
"NCBIGene": 2988
},
"object_id_prefixes": {
"MONDO": 2938,
"OMIM": 39,
"DOID": 11
}
},
{
"subject_category": [
"biolink:Gene"
],
"predicate": "biolink:associated_with",
"object_category": [
"biolink:Disease"
],
"count": 26,
"primary_knowledge_sources": {
"infores:gene2phenotype": 26
},
"qualifiers": {
"qualified_predicate": 26,
"subject_form_or_variant_qualifier": 26
},
"attributes": {
"id": 26,
"category": 26,
"knowledge_level": 26,
"agent_type": 26,
"update_date": 26,
"allelic_requirement": 26,
"original_subject": 26,
"original_object": 26,
"publications": 23
},
"subject_id_prefixes": {
"NCBIGene": 26
},
"object_id_prefixes": {
"MONDO": 26
}
}
],
"edges_summary": {
"total_count": 3014,
"predicates": {
"biolink:associated_with": 3014
},
"primary_knowledge_sources": {
"infores:gene2phenotype": 3014
},
"predicates_by_knowledge_source": {
"infores:gene2phenotype": {
"biolink:associated_with": 3014
}
},
"qualifiers": {
"qualified_predicate": 3014,
"subject_form_or_variant_qualifier": 3014
},
"attributes": {
"id": 3014,
"category": 3014,
"knowledge_level": 3014,
"agent_type": 3014,
"update_date": 3014,
"allelic_requirement": 3014,
"original_subject": 3014,
"original_object": 3014,
"publications": 2936
}
}
}
}