{"doi":"10.1093/nar/gkab1061","title":"AlphaFold Protein Structure Database: massively expanding the structural coverage of protein-sequence space with high-accuracy models","abstract":"The AlphaFold Protein Structure Database (AlphaFold DB, https://alphafold.ebi.ac.uk) is an openly accessible, extensive database of high-accuracy protein-structure predictions. Powered by AlphaFold v2.0 of DeepMind, it has enabled an unprecedented expansion of the structural coverage of the known protein-sequence space. AlphaFold DB provides programmatic access to and interactive visualization of predicted atomic coordinates, per-residue and pairwise model-confidence estimates and predicted aligned errors. The initial release of AlphaFold DB contains over 360,000 predicted structures across 21 model-organism proteomes, which will soon be expanded to cover most of the (over 100 million) representative sequences from the UniRef90 data set.","journal":"Nucleic Acids Research","year":2021,"id":3056,"datarank":14.864306230120592,"base_score":8.958668737047434,"endowment":8.958668737047434,"self_citation_contribution":1.3438003105571152,"citation_network_contribution":13.520505919563476,"self_endowment_contribution":1.3438003105571152,"citer_contribution":13.520505919563476,"corpus_percentile":87.0626525630594,"corpus_rank":160,"citation_count":8161,"citer_count":191,"citers_with_citation_signal":191,"citers_with_endowment":191,"datacite_reuse_total":0,"is_dataset":true,"is_dataset_confidence":0.9499,"is_oa":true,"file_count":0,"downloads":0,"has_version_chain":false,"published_date":"2021-11-17","fair_score":58.125,"fair_percentile":91.84256816182938,"algorithm_id":"datarank_citation_only_1hop_v6","ranking_scope":"data_only","authors":[{"id":33974,"name":"Stephen Anyango","orcid":"0000-0003-4838-443X","position":1,"is_corresponding":false},{"id":33975,"name":"Mandar Deshpande","orcid":"0000-0002-9043-7665","position":2,"is_corresponding":false},{"id":33976,"name":"Sreenath Nair","orcid":"0000-0002-6861-7627","position":3,"is_corresponding":false},{"id":33977,"name":"Cindy Natassia","orcid":null,"position":4,"is_corresponding":false},{"id":33978,"name":"Galabina Yordanova","orcid":"0000-0002-0329-6922","position":5,"is_corresponding":false},{"id":33979,"name":"David Yuan","orcid":null,"position":6,"is_corresponding":false},{"id":33980,"name":"Oana Stroe","orcid":"0000-0001-9653-0644","position":7,"is_corresponding":false},{"id":33981,"name":"Gemma Wood","orcid":null,"position":8,"is_corresponding":false},{"id":33982,"name":"Agata Laydon","orcid":"0000-0001-6499-8517","position":9,"is_corresponding":false},{"id":32501,"name":"Augustin Žídek","orcid":"0000-0002-0748-9684","position":10,"is_corresponding":false},{"id":32496,"name":"Tim Green","orcid":"0000-0002-3227-1505","position":11,"is_corresponding":false},{"id":32499,"name":"Kathryn Tunyasuvunakool","orcid":"0000-0002-8594-1074","position":12,"is_corresponding":false},{"id":32513,"name":"Stig Petersen","orcid":"0000-0002-5043-2325","position":13,"is_corresponding":false},{"id":32493,"name":"John M. Jumper","orcid":"0000-0001-6169-6580","position":14,"is_corresponding":false},{"id":32515,"name":"Ellen Clancy","orcid":"0000-0003-4425-3985","position":15,"is_corresponding":false},{"id":33983,"name":"Richard Green","orcid":"0000-0001-8851-7204","position":16,"is_corresponding":false},{"id":33984,"name":"Ankur Vora","orcid":"0000-0002-8439-2501","position":17,"is_corresponding":false},{"id":33985,"name":"Mira Lutfi","orcid":null,"position":18,"is_corresponding":false},{"id":32497,"name":"Michael Figurnov","orcid":"0000-0003-1386-8741","position":19,"is_corresponding":false},{"id":32507,"name":"Andrew Cowie","orcid":"0000-0002-4491-1434","position":20,"is_corresponding":false},{"id":33986,"name":"Nicole Hobbs","orcid":"0000-0002-7221-3617","position":21,"is_corresponding":false},{"id":32525,"name":"Pushmeet Kohli","orcid":"0000-0002-7466-7997","position":22,"is_corresponding":false},{"id":18085,"name":"Kerstin Lindblad‐Toh","orcid":"0000-0001-8338-0253","position":24,"is_corresponding":false},{"id":32526,"name":"Demis Hassabis","orcid":"0000-0003-2812-9917","position":25,"is_corresponding":false},{"id":33988,"name":"Sameer Velankar","orcid":"0000-0002-8439-5964","position":26,"is_corresponding":false},{"id":33989,"name":"David Yu Yuan","orcid":"0000-0003-1075-1628","position":27,"is_corresponding":false},{"id":33990,"name":"Gerard J. Kleywegt","orcid":"0000-0002-4670-0331","position":28,"is_corresponding":false},{"id":33973,"name":"Mihaly Varadi","orcid":"0000-0002-3687-0839","position":0,"is_corresponding":true}],"reference_count":21,"raw_metadata":{"citation_network_status":"fetched"},"created_at":"2026-03-01T18:20:47.508186Z","pmid":"34791371","pmcid":"PMC8728224","fwci":null,"citation_percentile":null,"influential_citations":0,"oa_status":null,"license":null,"views":0,"total_file_size_bytes":0,"version_count":0,"fair_f":65.0,"fair_a":80.0,"fair_i":37.5,"fair_r":50.0,"fair_zscore":1.1681,"fair_rationale":{"fair_score":58.12,"has_llm":true,"dimensions":{"F":{"name":"Findable","score":65.0,"criteria":[{"key":"f_has_doi","label":"Has a persistent DOI","kind":"deterministic","weight":1.0,"fraction":1.0,"signal":"DOI present","rationale":null},{"key":"f_repository_presence","label":"Indexed in repositories / literature DBs","kind":"deterministic","weight":1.0,"fraction":1.0,"signal":"datacite=0, pmcid=True, pmid=True","rationale":null},{"key":"f_persistent_ids","label":"Resolvable scholarly identifiers (OpenAlex)","kind":"deterministic","weight":0.5,"fraction":0.0,"signal":"no OpenAlex id","rationale":null},{"key":"f_metadata_richness","label":"Rich, machine-readable metadata","kind":"llm","weight":1.0,"fraction":0.5,"signal":null,"rationale":"The paper describes metadata stored in JSON for search via Apache Solr, but does not detail machine-readable metadata standards (e.g., schema.org, DCAT) or structured metadata beyond basic search fields."}]},"A":{"name":"Accessible","score":80.0,"criteria":[{"key":"a_open_access","label":"Open Access / files deposited","kind":"deterministic","weight":1.5,"fraction":1.0,"signal":"Open Access","rationale":null},{"key":"a_retrievable","label":"Free full text retrievable","kind":"deterministic","weight":1.0,"fraction":0.0,"signal":"0 OA location(s)","rationale":null},{"key":"a_access_protocol","label":"Clear data/code access protocol","kind":"llm","weight":1.0,"fraction":1.0,"signal":null,"rationale":"The paper clearly specifies multiple access mechanisms: FTP, a public API, and interactive web pages, with explicit URLs and example endpoints, ensuring open access without barriers."}]},"I":{"name":"Interoperable","score":37.5,"criteria":[{"key":"i_linked_data","label":"Linked datasets / DataCite relations","kind":"deterministic","weight":1.0,"fraction":0.0,"signal":"linked_datasets=0, datacite=0","rationale":null},{"key":"i_standard_ids","label":"References data via standard accessions","kind":"deterministic","weight":1.0,"fraction":0.0,"signal":"accessions=0, trials=0","rationale":null},{"key":"i_standards","label":"Standard formats, vocabularies & identifiers","kind":"llm","weight":1.0,"fraction":0.75,"signal":null,"rationale":"The data uses standard formats (mmCIF, PDB, JSON) and identifiers (UniProt accessions), but lacks explicit use of controlled vocabularies or ontologies for annotation beyond basic formats."}]},"R":{"name":"Reusable","score":50.0,"criteria":[{"key":"r_license","label":"Clear, open reuse license","kind":"deterministic","weight":1.5,"fraction":0.0,"signal":"no license","rationale":null},{"key":"r_downloads","label":"Demonstrated reuse (downloads)","kind":"deterministic","weight":0.5,"fraction":0.0,"signal":"downloads=0","rationale":null},{"key":"r_version","label":"Versioned / maintained","kind":"deterministic","weight":0.5,"fraction":0.0,"signal":"no version chain","rationale":null},{"key":"r_dataset","label":"Classified as a data resource","kind":"deterministic","weight":0.5,"fraction":1.0,"signal":"is_dataset","rationale":null},{"key":"r_reusability","label":"Data-availability statement, license & reproducibility","kind":"llm","weight":2.0,"fraction":0.833,"signal":null,"rationale":"The paper includes an open license (CC BY 4.0), provides data availability with detailed access instructions, and offers reproducibility through versioned data, but does not include a formal data citation or detailed methods for reproducing predictions."}]}},"suggestions":["Add structured metadata using schema.org or DataCite properties for automated discovery.","Include formal data availability statement with persistent identifiers (e.g., DOIs) for each release or dataset.","Provide a machine-readable metadata file (e.g., JSON-LD) describing terms of use, provenance, and access on the main page.","Use controlled vocabularies (e.g., EDAM) for describing data types and operations to enhance interoperability.","Document the full pipeline for reproducing structure predictions to improve reproducibility and reusability."],"model":"deepseek/deepseek-v4-flash","agent_version":"fair_agent_v2","fulltext_source":"epmc_xml"},"fair_model":"deepseek/deepseek-v4-flash","fair_agent_version":"fair_agent_v2","fair_fulltext_source":"epmc_xml","fair_has_llm":true,"fair_computed_at":"2026-06-18T00:26:57.307308Z","clinical_trials":[],"software_tools":[],"db_accessions":[],"linked_datasets":[],"topics":[]}