KGX STORAGE

graph-metadata.json
Back to Folder Download File
Size: 7.1 KB Modified: 2026-01-06 08:02:33
JSON Content
{
  "@id": "https://stars.renci.org/var/translator/data/diseases/2026_01_02/1.0/normalization_2025sep1/",
  "@type": "sc:Dataset",
  "name": "diseases",
  "description": "A knowledge graph built for the NCATS Biomedical Data Translator project using Translator-Ingests, Biolink Model, and Node Normalizer.",
  "license": "MIT",
  "url": "https://stars.renci.org/var/translator/data/diseases/2026_01_02/1.0/normalization_2025sep1/",
  "version": "diseases_2026_01_02_1.0_2025sep1_4.3.6",
  "dateCreated": "2026_01_03",
  "biolinkVersion": "4.3.6",
  "babelVersion": "2025sep1",
  "distribution": [
    {
      "@id": "diseases.tar.xz",
      "@type": "cr:FileObject",
      "contentUrl": "diseases.tar.xz",
      "encodingFormat": "application/x-xz",
      "description": "Compressed tar archive containing the KGX files: nodes.jsonl and edges.jsonl"
    }
  ],
  "isBasedOn": [
    {
      "id": "diseases",
      "name": "Jensen Lab DISEASES Database Reference Ingest Guide",
      "description": "The DISEASES database is a web resource that integrates knowledge on gene-disease associations. It generates de novo associations through automated text mining, and aggregates associations from external sources of manually curated knowledge and GWAS-based study results. The associations are assigned a confidence score to facilitate comparisons across data types and sources.",
      "license": {
        "license_name": "CC BY 4.0"
      },
      "url": [
        "https://diseases.jensenlab.org/Downloads"
      ],
      "version": "2026_01_02"
    }
  ],
  "schema": {
    "nodes": [
      {
        "category": [
          "biolink:Disease"
        ],
        "count": 5563,
        "id_prefixes": {
          "MONDO": 5462,
          "DOID": 101
        },
        "attributes": {
          "name": 5563,
          "equivalent_identifiers": 5563,
          "information_content": 5490
        }
      },
      {
        "category": [
          "biolink:Protein"
        ],
        "count": 9145,
        "id_prefixes": {
          "UniProtKB": 9108,
          "ENSEMBL": 37
        },
        "attributes": {
          "name": 9145,
          "equivalent_identifiers": 9145,
          "information_content": 8974
        }
      },
      {
        "category": [
          "biolink:Protein",
          "biolink:Gene"
        ],
        "count": 7318,
        "id_prefixes": {
          "NCBIGene": 7318
        },
        "attributes": {
          "name": 7318,
          "equivalent_identifiers": 7318,
          "information_content": 6696
        }
      }
    ],
    "nodes_summary": {
      "total_count": 22026,
      "id_prefixes": {
        "UniProtKB": 9108,
        "NCBIGene": 7318,
        "MONDO": 5462,
        "DOID": 101,
        "ENSEMBL": 37
      },
      "attributes": {
        "name": 22026,
        "equivalent_identifiers": 22026,
        "information_content": 21160
      }
    },
    "edges": [
      {
        "subject_category": [
          "biolink:Protein",
          "biolink:Gene"
        ],
        "predicate": "biolink:occurs_together_in_literature_with",
        "object_category": [
          "biolink:Disease"
        ],
        "count": 115316,
        "primary_knowledge_sources": {
          "infores:diseases": 115316
        },
        "qualifiers": {},
        "attributes": {
          "id": 115316,
          "category": 115316,
          "knowledge_level": 115316,
          "agent_type": 115316,
          "has_confidence_score": 115316,
          "z_score": 115316,
          "original_subject": 115316,
          "original_object": 115316
        },
        "subject_id_prefixes": {
          "NCBIGene": 115316
        },
        "object_id_prefixes": {
          "MONDO": 114145,
          "DOID": 1171
        }
      },
      {
        "subject_category": [
          "biolink:Protein"
        ],
        "predicate": "biolink:occurs_together_in_literature_with",
        "object_category": [
          "biolink:Disease"
        ],
        "count": 151599,
        "primary_knowledge_sources": {
          "infores:diseases": 151599
        },
        "qualifiers": {},
        "attributes": {
          "id": 151599,
          "category": 151599,
          "knowledge_level": 151599,
          "agent_type": 151599,
          "has_confidence_score": 151599,
          "z_score": 151599,
          "original_subject": 151599,
          "original_object": 151599
        },
        "subject_id_prefixes": {
          "UniProtKB": 151132,
          "ENSEMBL": 467
        },
        "object_id_prefixes": {
          "MONDO": 149906,
          "DOID": 1693
        }
      },
      {
        "subject_category": [
          "biolink:Protein"
        ],
        "predicate": "biolink:associated_with",
        "object_category": [
          "biolink:Disease"
        ],
        "count": 2346,
        "primary_knowledge_sources": {
          "infores:medlineplus": 2285,
          "infores:amyco": 61
        },
        "qualifiers": {},
        "attributes": {
          "id": 2346,
          "category": 2346,
          "knowledge_level": 2346,
          "agent_type": 2346,
          "has_confidence_score": 2346,
          "original_subject": 2346,
          "original_object": 2346
        },
        "subject_id_prefixes": {
          "UniProtKB": 2342,
          "ENSEMBL": 4
        },
        "object_id_prefixes": {
          "MONDO": 2320,
          "DOID": 26
        }
      },
      {
        "subject_category": [
          "biolink:Protein",
          "biolink:Gene"
        ],
        "predicate": "biolink:associated_with",
        "object_category": [
          "biolink:Disease"
        ],
        "count": 1581,
        "primary_knowledge_sources": {
          "infores:medlineplus": 1461,
          "infores:amyco": 120
        },
        "qualifiers": {},
        "attributes": {
          "id": 1581,
          "category": 1581,
          "knowledge_level": 1581,
          "agent_type": 1581,
          "has_confidence_score": 1581,
          "original_subject": 1581,
          "original_object": 1581
        },
        "subject_id_prefixes": {
          "NCBIGene": 1581
        },
        "object_id_prefixes": {
          "MONDO": 1569,
          "DOID": 12
        }
      }
    ],
    "edges_summary": {
      "total_count": 270842,
      "predicates": {
        "biolink:occurs_together_in_literature_with": 266915,
        "biolink:associated_with": 3927
      },
      "primary_knowledge_sources": {
        "infores:diseases": 266915,
        "infores:medlineplus": 3746,
        "infores:amyco": 181
      },
      "predicates_by_knowledge_source": {
        "infores:diseases": {
          "biolink:occurs_together_in_literature_with": 266915
        },
        "infores:medlineplus": {
          "biolink:associated_with": 3746
        },
        "infores:amyco": {
          "biolink:associated_with": 181
        }
      },
      "qualifiers": {},
      "attributes": {
        "id": 270842,
        "category": 270842,
        "knowledge_level": 270842,
        "agent_type": 270842,
        "has_confidence_score": 270842,
        "original_subject": 270842,
        "original_object": 270842,
        "z_score": 266915
      }
    }
  }
}