This page is also available in: 日本語

This page provides some examples of SQL queries that can be executed from the PDBj Mine interface at http://pdbj.org/mine.
You can use these samples at SQL Search of PDBj Mine or PDBj Mine:REST API.
The result are returned in web pages or a test file such as XML, CSV, and TSV, respectively. (See also PDBj Mine: search result.)

[Search for all PDB IDs:]

SELECT pdbid FROM brief_summary

[Search for all count (PDB IDs):]

SELECT count(pdbid) FROM brief_summary

[Search for all PDB IDs (1g*):]

SELECT pdbid FROM brief_summary where pdbid like '1g%'

[Search for citation_author category (1gof):]

SELECT * FROM citation_author where pdbid='1gof'

[Search for citation_author ('Ito, N.'):]

SELECT pdbid
FROM brief_summary b 
WHERE '{"Ito, N."}' <@ citation_author

[Search for citation_author ('Ito, N.' and 'Phillips, S.E.V.'):]

SELECT pdbid
FROM brief_summary b 
WHERE '{"Ito, N.","Phillips, S.E.V."}' <@ citation_author

[Search for citation_author ['Ito, N.'] (primary):]

SELECT pdbid
FROM brief_summary b 
WHERE '{"Ito, N."}' <@ citation_author_pri

[Search for journal year ['2001']:]

SELECT pdbid
FROM brief_summary b 
WHERE '{2001}' && citation_year

[Search for journal volume ['555']:]

SELECT pdbid
FROM brief_summary b 
WHERE '{"555"}' && citation_volume

[Search for exprimental method [SOLUTION NMR]:]

SELECT pdbid
FROM brief_summary b
WHERE 6 = ANY(exptl_method_ids)

Note on experimental methods

In the brief_summary table, experimental method (exptl_method column) is defined as an enumeration of keywords defined as follows:

  1. X-RAY DIFFRACTION
  2. NEUTRON DIFFRACTION
  3. FIBER DIFFRACTION
  4. ELECTRON CRYSTALLOGRAPHY
  5. ELECTRON MICROSCOPY
  6. SOLUTION NMR
  7. SOLID-STATE NMR
  8. SOLUTION SCATTERING
  9. POWDER DIFFRACTION
  10. INFRARED SPECTROSCOPY
  11. EPR
  12. THEORETICAL MODEL
  13. Others

[Search for journal year[1991] volume[266] exp_method [X-RAY DIFFRACTION]:]

SELECT pdbid
FROM brief_summary b 
WHERE '{1991}' && citation_year AND '{"266"}' && citation_volume 
AND '{1}' && exptl_method_ids

[Search for exptl_crystal/density_percent_sol (1iqq):]

select density_percent_sol from exptl_crystal where pdbid='1iqq'

[Search for citation/pdbx_database_id_DOI (1iqq):]

select "pdbx_database_id_DOI" from citation where pdbid = '1iqq'

[Search the PubMed ID of 1iqq (citation/pdbx_database_id_PubMed):]

select "pdbx_database_id_PubMed" from citation where pdbid = '1iqq'

[Result]

 pdbx_database_id_PubMed: 
11577107

[Search the PubMed ID of 1ao8 and 1ai9 (citation/pdbx_database_id_PubMed):]

select pdbid, "pdbx_database_id_PubMed" from citation where pdbid='1ai9' or pdbid='1ao8'

[Result]

 1ao8 
 pdbx_database_id_PubMed: 
9514736 1ai9 
 pdbx_database_id_PubMed: 
9374515

[Search for citation/pdbx_database_id_DOI (1i*):]

select pdbid, "pdbx_database_id_DOI" from citation where pdbid like '1i%'

[Search for all entries with release date [after 2000-01-01]:]

SELECT pdbid
FROM brief_summary 
WHERE release_date >= '2000-01-01'

[Search for all entries with release date [after 2003-01-01] and deposition date [after 2001-01-01] and PDB ID (2k*) :]

SELECT pdbid
FROM brief_summary b 
WHERE release_date >= '2003-01-01' 
AND deposition_date >= '2001-01-01' 
AND b.pdbid LIKE '2k%'

[Search for all entries with chain type [polypeptide(D)]:]

SELECT pdbid FROM brief_summary WHERE '{1}' <@ chain_type_ids

Note on chain types

The chain types of polymers in the brief_summary table are defined as an enumeration of specific keywords. The following chain types are currently available:

  1. polypeptide(D)
  2. polypeptide(L)
  3. polydeoxyribonucleotide
  4. polyribonucleotide
  5. polysaccharide(D)
  6. polysaccharide(L)
  7. polydeoxyribonucleotide/polyribonucleotide hybrid
  8. cyclic-pseudo-peptide
  9. other

[Search for all entries with chain type [polypeptide(D),polypeptide(L)]:]

SELECT pdbid FROM brief_summary WHERE '{1,2}' <@ chain_type_ids

[Search for all entries with chain type [no polydeoxyribonucleotide]:]

SELECT pdbid FROM brief_summary WHERE NOT ('{3}' && chain_type_ids)

[Search for all entries with chain type [no polydeoxyribonucleotide, polyribonucleotide]:]

SELECT pdbid
FROM brief_summary 
WHERE NOT ('{3,4}' && chain_type_ids)

[Search for all entries with chain type [all no except polysaccharide(D)]:]

SELECT pdbid
FROM brief_summary 
WHERE ('{5}' <@ chain_type_ids) AND NOT ('{1,2,3,4,6,7,8,9}' && chain_type_ids)

[Search for exptl/method, unit axes, unit angles, exptl_crystal/density_percent_sol, exptl_crystal/density_percent_sol (1iqq, 1ioo):]

SELECT exptl.pdbid, exptl.method, cell.length_a, cell.length_b, cell.length_c, exptl_crystal.density_percent_sol 
from exptl
join cell on exptl.pdbid = cell.pdbid
join exptl_crystal on exptl.pdbid = exptl_crystal.pdbid where exptl.pdbid = '1iqq' or exptl.pdbid = '1ioo'

[Search for entity names with EC number 1.1.1.1:]

select pdbid, pdbx_description from entity where pdbx_ec='1.1.1.1'

[Search for all entries with resolution less than or equal to 2.0 angstroms:]

select pdbid, ls_d_res_high as "resolution" from refine where ls_d_res_high <= 2.0

[List PDB ID and total polymer molecular weights:]

select pdbid, sum(pdbx_number_of_molecules*formula_weight) as weight 
from entity
where type = 'polymer'
group by pdbid
order by weight desc

[Search for all entries with pdbx_descriptor, pdbx_keywords, unit axes, unit angles, number of polymer molecules, and polymer weight:]

select cell.pdbid, struct.pdbx_descriptor, struct_keywords.pdbx_keywords, 
cell.length_a, cell.length_b, cell.length_c,
cell.angle_alpha, cell.angle_beta, cell.angle_gamma,
sum(entity.pdbx_number_of_molecules) as number_of_molecules,
sum(entity.pdbx_number_of_molecules * entity.formula_weight) as weight
from cell
join struct on cell.pdbid = struct.pdbid
join struct_keywords on cell.pdbid = struct_keywords.pdbid
join entity on cell.pdbid = entity.pdbid
where entity.type = 'polymer'
group by cell.pdbid, struct.pdbx_descriptor, struct_keywords.pdbx_keywords, cell.length_a, cell.length_b, cell.length_c, cell.angle_alpha, cell.angle_beta, cell.angle_gamma

NOTE: The mmCIF category views

As can be seen in the above example, writing queries that involve many mmCIF category can be tedious. For convenience, views ( mmCIF category views ) that group mmCIF items in each category into one row are defined. Using these views, a query that is semantically equivalent to the one above can be written as

select cell.pdbid, struct.pdbx_descriptor, struct_keywords.pdbx_keywords,
cell.length_a, cell.length_b, cell.length_c,
cell.angle_alpha, cell.angle_beta, cell.angle_gamma,
sum(entity.pdbx_number_of_molecules) as number_of_molecules,
sum(entity.pdbx_number_of_molecules * entity.formula_weight) as weight
from cell
join struct on cell.pdbid = struct.pdbid
join struct_keywords on cell.pdbid = struct_keywords.pdbid
join entity on cell.pdbid = entity.pdbid
where entity.type = 'polymer'
group by cell.pdbid, struct.pdbx_descriptor, struct_keywords.pdbx_keywords, cell.length_a, cell.length_b, cell.length_c, cell.angle_alpha, cell.angle_beta, cell.angle_gamma
order by cell.pdbid

Here, struct, struct_keywords, cell, entity are the mmCIF category views. The view names and item (column) names are defined as in the mmCIF dictionary as much as possible. For some item names, you may need to enclose them by double quotes because they involve characters that are not allowed as SQL (literal) identifiers.

There are a few caveats to note:

  1. Querying with the mmCIF category views can be slow sometimes, especially if a category contains many items, and hence the view involves many (left outer) joins.
  2. Since many of the mmCIF items lack actual data, the value of those items are set to NULL in the mmCIF category views.

[Retrieve biological unit information]

select pdbx_struct_assembly.pdbid, pdbx_struct_assembly.details, pdbx_struct_assembly.oligomeric_count, pdbx_struct_assembly.oligomeric_details,
pdbx_struct_assembly_gen.assembly_id, pdbx_struct_assembly_gen.asym_id_list, pdbx_struct_assembly_gen.oper_expression
from pdbx_struct_assembly
join pdbx_struct_assembly_gen on pdbx_struct_assembly.pdbid = pdbx_struct_assembly_gen.pdbid and pdbx_struct_assembly.id = pdbx_struct_assembly_gen.assembly_id
where pdbx_struct_assembly.details is not null

NOTE

Here we use views for mmCIF categories pdbx_struct_assembly and pdbx_struct_assembly_gen which groups mmCIF items in each category element into one row.


[Retrieve biological unit information for PDB entry 1nov]

select pdbx_struct_assembly.pdbid, pdbx_struct_assembly.details, pdbx_struct_assembly.oligomeric_count, pdbx_struct_assembly.oligomeric_details,
pdbx_struct_assembly_gen.assembly_id, pdbx_struct_assembly_gen.asym_id_list, pdbx_struct_assembly_gen.oper_expression
from pdbx_struct_assembly
join pdbx_struct_assembly_gen on pdbx_struct_assembly.pdbid = pdbx_struct_assembly_gen.pdbid and pdbx_struct_assembly.id = pdbx_struct_assembly_gen.assembly_id
where pdbx_struct_assembly.pdbid='1nov'

[Result]

 1nov 
 details: 
complete icosahedral assembly oligomeric_count: 
360 oligomeric_details: 
360-MERIC assembly_id: 
1 asym_id_list: 
A,B,C,D,E,F oper_expression: 
(1-60) 1nov 
 details: 
icosahedral asymmetric unit assembly_id: 
2 asym_id_list: 
A,B,C,D,E,F oper_expression: 
1 1nov 
 details: 
icosahedral pentamer assembly_id: 
3 asym_id_list: 
A,B,C,D,E,F oper_expression: 
(1-5) 1nov 
 details: 
icosahedral 23 hexamer assembly_id: 
4 asym_id_list: 
A,B,C,D,E,F oper_expression: 
(1,2,6,10,23,24) 1nov 
 details: 
icosahedral asymmetric unit, std point frame assembly_id: 
PAU asym_id_list: 
A,B,C,D,E,F oper_expression: 
P

[Counting the total number of residues in entries]

with slen(pdbid, entity_id, len) as
(select pdbid, entity_id, count(*) 
 from entity_poly_seq 
 group by pdbid, entity_id)
select entity.pdbid, sum(entity.pdbx_number_of_molecules*s.len)
from entity
join slen s on s.pdbid = entity.pdbid and s.entity_id = entity.id
group by entity.pdbid

[Map between PDB entity and UniProt accession]

Using category view

select distinct pdbid, entity_id, db_code, pdbx_db_accession from struct_ref where db_name = 'UNP' and entity_id is NOT NULL

2013-12-04 (last edited: yesterday)2014-10-29
Copyright © 2013-2014 Protein Data Bank Japan
  • Everything
  • PDBID/Keywords
  • Author
  • Chemie
  • Sequence
  • ?
104537
entries available on 2014-10-29
00:00 UTC / 09:00 JST