UniCarbKB SPARQL

Summary

The below examples can be queried against the latest version of UniCarbKB 2.0 using the GlycoCoO 1.3.2 ontology

For more information and example RDF files refer to the GlycoCoO GitHub site

Prefixes

Endpoint

http://sparql.unicarbkb.org (http://130.56.249.35:40935/unicarbkb/query)

License

Please note this data is CC 4.0 licensed. All users are required to cite UniCarbKB (https://www.ncbi.nlm.nih.gov › pmc › articles › PMC3964942) and funding from ARDC-NeCTAR and the NIH Glycoscience Common Fund project GlyGen. Data provenance must be acknowledged and cross-referencing is recommended. The developers acknowledge GlycoSuiteDB, EUROCarbDB, GlycoBase (now GlycoStore) and all biocurators involved in these projects.

Saccaride Id, Publication Details

select distinct   ?pmid ?id ?sacc
where {
  ?Glyco a gco:Referenced_glycoconjugate ; gco:has_protein_part ?p ; glycan:published_in ?published .
  ?published glycan:has_pmid ?pmid .
  ?p a gco:Referenced_protein ; gco:has_saccharide_set ?set .
  ?set a gco:Saccharide_Set ; sio:is-component-part-of ?comp .
  ?comp a gco:Saccharide_Set_Item ; owl:sameAs ?sacc .
  ?sacc a glycan:Saccharide ; glycan:has_unicarbkb_id ?id .
  OPTIONAL { ?p a gco:Referenced_protein ; glycan:has_protein_description ?description } .
  OPTIONAL { ?p a gco:Referenced_protein ; rdfs:seeAlso ?uniprot  } .
  OPTIONAL { ?p rdfs:comment ?comment  } .
}

Get all proteins

SELECT distinct ?Protein ?ReferencedProtein
WHERE {
  ?ReferencedProtein gco:has_protein ?Protein
}
LIMIT 25

Get Reference Compound with Publication, Species filtered by a Protein Accession

SELECT distinct ?ReferencedCompound ?Pmid ?Protein ?Species
WHERE {
  ?Glyco a gco:Referenced_glycoconjugate ; gco:has_protein_part ?Protein ; glycan:published_in ?published ;    glycan:is_from_source ?Source .
  ?published glycan:has_pmid ?Pmid .
  ?Protein a gco:Referenced_protein ; gco:has_saccharide_set ?set .
  ?Source glycan:has_taxon ?Taxon .
  ?Taxon uniprot:scientificName ?Species .
  FILTER regex(str(?ProteinAcc), "P07911") .
}

Glycoprotein and Glycosylation Site

SELECT distinct  ?Region  ?Protein ?pos ?Faldo
WHERE {
 
  ?Protein gco:glycosylated_at ?Region .
  ?Region faldo:ExactPosition ?Faldo .
  ?Faldo faldo:position ?pos .
  FILTER regex(str(?ProteinAcc), "P07911") .
}

Get unicarbkb glycan ids for structures at a location for a specific protein.

SELECT distinct ?ProteinAcc ?Position ?Saccharide ?Id
WHERE {
  ?Glyco a gco:Referenced_glycoconjugate ; gco:has_protein_part ?Protein .
  ?Protein gco:has_protein ?ProteinAcc .
  ?Protein gco:glycosylated_at ?Region .
  ?Protein gco:has_saccharide_set ?Set .
  ?Set sio:is-component-part-of ?SetItem .
  ?SetItem owl:sameAs ?Saccharide .
  ?Saccharide dcterms:identifier ?Id .
  ?Region faldo:ExactPosition ?Faldo .
  ?Faldo faldo:position ?Position .
  FILTER regex(str(?ProteinAcc), "P07911") .
}
limit 10

Glycan Structures Attached to a Specific Site for P07911

List of structures attached to P07911 at position 76

SELECT distinct ?ProteinAcc ?Position ?Saccharide ?Id
WHERE {
  ?Glyco a gco:Referenced_glycoconjugate ; gco:has_protein_part ?Protein .
  ?Protein gco:has_protein ?ProteinAcc .
  ?Protein gco:has_saccharide_set ?Set .
  ?Set sio:is-component-part-of ?SetItem .
  ?SetItem owl:sameAs ?Saccharide .
  ?Saccharide dcterms:identifier ?Id .
  ?Protein gco:glycosylated_at ?Region .
  ?Region faldo:ExactPosition ?Faldo .
  ?Faldo faldo:position ?Position .
  FILTER regex(str(?Position), "76") . 
  FILTER regex(str(?ProteinAcc), "P07911") .
}

Haptoglobin - UniProt, UniCarbKB Id, GlyTouCan, Position, Amino Acid and Curation Notes

 select distinct ?identifer ?Position ?Id ?Toucan ?AminoAcid ?TypeAminoAcid  ?Pmid 
 where { 
  ?Glyco a gco:Referenced_glycoconjugate ; gco:has_protein_part ?Protein ; glycan:published_in ?published ; glycan:is_from_source ?Source .
  		?published glycan:has_pmid ?Pmid .
  ?Protein a gco:Referenced_protein ; gco:has_protein ?test .
    ?test a gco:Glycoprotein ; rdfs:seeAlso ?uniprot2 ; dcterms:identifier ?identifer .
  ?Protein gco:glycosylated_at ?Region .
  ?Region a faldo:region ; faldo:ExactPosition ?Faldo .
  ?Faldo faldo:position ?Position .
  ?Region a faldo:region ; gco:has_saccharide_set ?Set .
  ?Set sio:is-component-part-of ?SetItem .
  ?SetItem owl:sameAs ?Saccharide .
  ?Saccharide dcterms:identifier ?Id .
 optional { ?Saccharide glycan:has_glytoucan_id ?Toucan . }
  ?Faldo gco:has_amino_acid ?AminoAcid .
  ?AminoAcid gco:amino_acid ?TypeAminoAcid .
  FILTER regex(str(?identifer), "P00738") .
}
group by ?identifer ?Position ?Id ?Toucan ?AminoAcid ?TypeAminoAcid  ?Pmid 

Haptoglobin - Tissue


 select distinct ?Label ?Source
 where { 
  ?Glyco a gco:Referenced_glycoconjugate ; gco:has_protein_part ?Protein ; glycan:published_in ?published ; glycan:is_from_source ?Source ; glycan:has_tissue ?tissue .
  ?tissue ?x ?related .
  ?related rdfs:label ?Label .
  ?published glycan:has_pmid ?Pmid .
  ?Protein a gco:Referenced_protein ; gco:has_protein ?ProteinInfo .
  ?ProteinInfo a gco:Glycoprotein ; rdfs:seeAlso ?uniprot2 ; dcterms:identifier ?identifer .
  FILTER regex(str(?identifer), "P00738") .  
}
group by  ?Label ?Source

Use of CAR Identifiers

A selection of Uniprot / UniCarbKb cross-refs have implemented unique CAR ids

select distinct ?P ?CAR ?Region ?Position
where {
  ?P up:Annotation ?Anno .
  ?Anno gco:has_car_id  ?CAR .
  ?CAR gco:glycosylated_at ?Region .
  ?Region faldo:ExactPosition ?Exact . 
  ?Exact faldo:position ?Position .
}

Last updated

Was this helpful?