UniCarbKB SPARQL
Summary
The below examples can be queried against the latest version of UniCarbKB 2.0 using the GlycoCoO 1.3.2 ontology
UniCarbKB data releases (CSV and SPARQL) are available from our GitLab repo
Prefixes
Endpoint
http://sparql.unicarbkb.org (http://130.56.249.35:40935/unicarbkb/query)
License
Please note this data is CC 4.0 licensed. All users are required to cite UniCarbKB (https://www.ncbi.nlm.nih.gov › pmc › articles › PMC3964942) and funding from ARDC-NeCTAR and the NIH Glycoscience Common Fund project GlyGen. Data provenance must be acknowledged and cross-referencing is recommended. The developers acknowledge GlycoSuiteDB, EUROCarbDB, GlycoBase (now GlycoStore) and all biocurators involved in these projects.
Saccaride Id, Publication Details
select distinct ?pmid ?id ?sacc
where {
?Glyco a gco:Referenced_glycoconjugate ; gco:has_protein_part ?p ; glycan:published_in ?published .
?published glycan:has_pmid ?pmid .
?p a gco:Referenced_protein ; gco:has_saccharide_set ?set .
?set a gco:Saccharide_Set ; sio:is-component-part-of ?comp .
?comp a gco:Saccharide_Set_Item ; owl:sameAs ?sacc .
?sacc a glycan:Saccharide ; glycan:has_unicarbkb_id ?id .
OPTIONAL { ?p a gco:Referenced_protein ; glycan:has_protein_description ?description } .
OPTIONAL { ?p a gco:Referenced_protein ; rdfs:seeAlso ?uniprot } .
OPTIONAL { ?p rdfs:comment ?comment } .
}
Get all proteins
SELECT distinct ?Protein ?ReferencedProtein
WHERE {
?ReferencedProtein gco:has_protein ?Protein
}
LIMIT 25
Get Reference Compound with Publication, Species filtered by a Protein Accession
SELECT distinct ?ReferencedCompound ?Pmid ?Protein ?Species
WHERE {
?Glyco a gco:Referenced_glycoconjugate ; gco:has_protein_part ?Protein ; glycan:published_in ?published ; glycan:is_from_source ?Source .
?published glycan:has_pmid ?Pmid .
?Protein a gco:Referenced_protein ; gco:has_saccharide_set ?set .
?Source glycan:has_taxon ?Taxon .
?Taxon uniprot:scientificName ?Species .
FILTER regex(str(?ProteinAcc), "P07911") .
}
Glycoprotein and Glycosylation Site
SELECT distinct ?Region ?Protein ?pos ?Faldo
WHERE {
?Protein gco:glycosylated_at ?Region .
?Region faldo:ExactPosition ?Faldo .
?Faldo faldo:position ?pos .
FILTER regex(str(?ProteinAcc), "P07911") .
}
Get unicarbkb glycan ids for structures at a location for a specific protein.
SELECT distinct ?ProteinAcc ?Position ?Saccharide ?Id
WHERE {
?Glyco a gco:Referenced_glycoconjugate ; gco:has_protein_part ?Protein .
?Protein gco:has_protein ?ProteinAcc .
?Protein gco:glycosylated_at ?Region .
?Protein gco:has_saccharide_set ?Set .
?Set sio:is-component-part-of ?SetItem .
?SetItem owl:sameAs ?Saccharide .
?Saccharide dcterms:identifier ?Id .
?Region faldo:ExactPosition ?Faldo .
?Faldo faldo:position ?Position .
FILTER regex(str(?ProteinAcc), "P07911") .
}
limit 10
Glycan Structures Attached to a Specific Site for P07911
List of structures attached to P07911 at position 76
SELECT distinct ?ProteinAcc ?Position ?Saccharide ?Id
WHERE {
?Glyco a gco:Referenced_glycoconjugate ; gco:has_protein_part ?Protein .
?Protein gco:has_protein ?ProteinAcc .
?Protein gco:has_saccharide_set ?Set .
?Set sio:is-component-part-of ?SetItem .
?SetItem owl:sameAs ?Saccharide .
?Saccharide dcterms:identifier ?Id .
?Protein gco:glycosylated_at ?Region .
?Region faldo:ExactPosition ?Faldo .
?Faldo faldo:position ?Position .
FILTER regex(str(?Position), "76") .
FILTER regex(str(?ProteinAcc), "P07911") .
}
Haptoglobin - UniProt, UniCarbKB Id, GlyTouCan, Position, Amino Acid and Curation Notes
select distinct ?identifer ?Position ?Id ?Toucan ?AminoAcid ?TypeAminoAcid ?Pmid
where {
?Glyco a gco:Referenced_glycoconjugate ; gco:has_protein_part ?Protein ; glycan:published_in ?published ; glycan:is_from_source ?Source .
?published glycan:has_pmid ?Pmid .
?Protein a gco:Referenced_protein ; gco:has_protein ?test .
?test a gco:Glycoprotein ; rdfs:seeAlso ?uniprot2 ; dcterms:identifier ?identifer .
?Protein gco:glycosylated_at ?Region .
?Region a faldo:region ; faldo:ExactPosition ?Faldo .
?Faldo faldo:position ?Position .
?Region a faldo:region ; gco:has_saccharide_set ?Set .
?Set sio:is-component-part-of ?SetItem .
?SetItem owl:sameAs ?Saccharide .
?Saccharide dcterms:identifier ?Id .
optional { ?Saccharide glycan:has_glytoucan_id ?Toucan . }
?Faldo gco:has_amino_acid ?AminoAcid .
?AminoAcid gco:amino_acid ?TypeAminoAcid .
FILTER regex(str(?identifer), "P00738") .
}
group by ?identifer ?Position ?Id ?Toucan ?AminoAcid ?TypeAminoAcid ?Pmid
Haptoglobin - Tissue
select distinct ?Label ?Source
where {
?Glyco a gco:Referenced_glycoconjugate ; gco:has_protein_part ?Protein ; glycan:published_in ?published ; glycan:is_from_source ?Source ; glycan:has_tissue ?tissue .
?tissue ?x ?related .
?related rdfs:label ?Label .
?published glycan:has_pmid ?Pmid .
?Protein a gco:Referenced_protein ; gco:has_protein ?ProteinInfo .
?ProteinInfo a gco:Glycoprotein ; rdfs:seeAlso ?uniprot2 ; dcterms:identifier ?identifer .
FILTER regex(str(?identifer), "P00738") .
}
group by ?Label ?Source
Use of CAR Identifiers
A selection of Uniprot / UniCarbKb cross-refs have implemented unique CAR ids
select distinct ?P ?CAR ?Region ?Position
where {
?P up:Annotation ?Anno .
?Anno gco:has_car_id ?CAR .
?CAR gco:glycosylated_at ?Region .
?Region faldo:ExactPosition ?Exact .
?Exact faldo:position ?Position .
}
Last updated
Was this helpful?