{"doi":"10.1128/aem.03006-05","title":"Greengenes, a Chimera-Checked 16S rRNA Gene Database and Workbench Compatible with ARB","abstract":"A 16S rRNA gene database (http://greengenes.lbl.gov) addresses limitations of public repositories by providing chimera screening, standard alignment, and taxonomic classification using multiple published taxonomies. It was found that there is incongruent taxonomic nomenclature among curators even at the phylum level. Putative chimeras were identified in 3% of environmental sequences and in 0.2% of records derived from isolates. Environmental sequences were classified into 100 phylum-level lineages in the Archaea and Bacteria.","journal":"Applied and Environmental Microbiology","year":2006,"id":10580,"datarank":24.05367240315057,"base_score":9.303557453687317,"endowment":9.303557453687317,"self_citation_contribution":1.3955336180530977,"citation_network_contribution":22.658138785097474,"self_endowment_contribution":1.3955336180530977,"citer_contribution":22.658138785097474,"corpus_percentile":97.64035801464605,"corpus_rank":30,"citation_count":11229,"citer_count":185,"citers_with_citation_signal":185,"citers_with_endowment":185,"datacite_reuse_total":0,"is_dataset":true,"is_dataset_confidence":0.9089,"is_oa":true,"file_count":0,"downloads":0,"has_version_chain":false,"published_date":"2006-07-01","fair_score":35.0833,"fair_percentile":17.21635883905013,"algorithm_id":"datarank_citation_only_1hop_v6","ranking_scope":"data_only","authors":[{"id":87677,"name":"N. Larsen","orcid":"0000-0003-2874-3984","position":2,"is_corresponding":false},{"id":87678,"name":"M. Rojas","orcid":null,"position":3,"is_corresponding":false},{"id":87679,"name":"E. L. Brodie","orcid":null,"position":4,"is_corresponding":false},{"id":87681,"name":"T. Huber","orcid":null,"position":6,"is_corresponding":false},{"id":87682,"name":"D. Dalevi","orcid":null,"position":7,"is_corresponding":false},{"id":29036,"name":"Lucia Alvarado","orcid":null,"position":9,"is_corresponding":false},{"id":19848,"name":"Todd Z. DeSantis","orcid":"0000-0002-1786-1013","position":10,"is_corresponding":false},{"id":19795,"name":"Philip Hugenholtz","orcid":"0000-0001-5386-7925","position":11,"is_corresponding":false},{"id":87685,"name":"Mark Rojas","orcid":null,"position":12,"is_corresponding":false},{"id":87686,"name":"Eoin Brodie","orcid":"0000-0002-8453-8435","position":13,"is_corresponding":false},{"id":15647,"name":"Keith Keller","orcid":"0000-0002-1546-7522","position":14,"is_corresponding":false},{"id":87687,"name":"Thomas Huber","orcid":"0000-0002-3680-8699","position":15,"is_corresponding":false},{"id":87688,"name":"Daniel Dalevi","orcid":null,"position":16,"is_corresponding":false},{"id":57178,"name":"Pengwei Hu","orcid":"0000-0001-5974-7932","position":17,"is_corresponding":false},{"id":29037,"name":"Gary L. Andersen","orcid":"0000-0002-1618-9827","position":18,"is_corresponding":false},{"id":29065,"name":"Catherine Davis","orcid":"0000-0003-3156-9163","position":0,"is_corresponding":true}],"reference_count":30,"raw_metadata":{"citation_network_status":"fetched"},"created_at":"2026-03-01T18:20:47.508186Z","pmid":"16820507","pmcid":"PMC1489311","fwci":null,"citation_percentile":null,"influential_citations":0,"oa_status":null,"license":null,"views":0,"total_file_size_bytes":0,"version_count":0,"fair_f":64.0,"fair_a":58.0,"fair_i":5.0,"fair_r":13.3333,"fair_zscore":-0.9161,"fair_rationale":{"fair_score":35.08,"has_llm":true,"dimensions":{"F":{"name":"Findable","score":64.0,"criteria":[{"key":"f_has_doi","label":"Has a persistent DOI","kind":"deterministic","weight":1.0,"fraction":1.0,"signal":"DOI present","rationale":null},{"key":"f_repository_presence","label":"Indexed in repositories / literature DBs","kind":"deterministic","weight":1.0,"fraction":1.0,"signal":"datacite=0, pmcid=True, pmid=True","rationale":null},{"key":"f_persistent_ids","label":"Resolvable scholarly identifiers (OpenAlex)","kind":"deterministic","weight":0.5,"fraction":0.0,"signal":"no OpenAlex id","rationale":null},{"key":"f_metadata_richness","label":"Rich, machine-readable metadata","kind":"llm","weight":1.0,"fraction":0.0,"signal":null,"rationale":"No mention of machine-readable metadata, metadata schema, or identifiers."}]},"A":{"name":"Accessible","score":58.0,"criteria":[{"key":"a_open_access","label":"Open Access / files deposited","kind":"deterministic","weight":1.5,"fraction":1.0,"signal":"Open Access","rationale":null},{"key":"a_retrievable","label":"Free full text retrievable","kind":"deterministic","weight":1.0,"fraction":0.0,"signal":"0 OA location(s)","rationale":null},{"key":"a_access_protocol","label":"Clear data/code access protocol","kind":"llm","weight":1.0,"fraction":0.5,"signal":null,"rationale":"Provides a URL for the database but lacks details on download methods, authentication, or code access."}]},"I":{"name":"Interoperable","score":5.0,"criteria":[{"key":"i_linked_data","label":"Linked datasets / DataCite relations","kind":"deterministic","weight":1.0,"fraction":0.0,"signal":"linked_datasets=0, datacite=0","rationale":null},{"key":"i_standard_ids","label":"References data via standard accessions","kind":"deterministic","weight":1.0,"fraction":0.0,"signal":"accessions=0, trials=0","rationale":null},{"key":"i_standards","label":"Standard formats, vocabularies & identifiers","kind":"llm","weight":1.0,"fraction":0.25,"signal":null,"rationale":"Mentions 'standard alignment' and 'multiple published taxonomies' but does not specify exact formats, vocabularies, or identifiers."}]},"R":{"name":"Reusable","score":13.33,"criteria":[{"key":"r_license","label":"Clear, open reuse license","kind":"deterministic","weight":1.5,"fraction":0.0,"signal":"no license","rationale":null},{"key":"r_downloads","label":"Demonstrated reuse (downloads)","kind":"deterministic","weight":0.5,"fraction":0.0,"signal":"downloads=0","rationale":null},{"key":"r_version","label":"Versioned / maintained","kind":"deterministic","weight":0.5,"fraction":0.0,"signal":"no version chain","rationale":null},{"key":"r_dataset","label":"Classified as a data resource","kind":"deterministic","weight":0.5,"fraction":1.0,"signal":"is_dataset","rationale":null},{"key":"r_reusability","label":"Data-availability statement, license & reproducibility","kind":"llm","weight":2.0,"fraction":0.0,"signal":null,"rationale":"No data-availability statement, license, or reproducibility information provided."}]}},"suggestions":["Include a formal metadata schema (e.g., Dublin Core, DataCite) and provide machine-readable metadata (e.g., XML, JSON-LD).","Specify clear data access protocols such as direct download links, API endpoints, or FTP access, and state any authentication requirements.","Define standard file formats (e.g., FASTA, GenBank) and controlled vocabularies (e.g., NCBI taxonomy) used in the database.","Add a data-availability statement with a persistent identifier (e.g., DOI) and a reuse license (e.g., Creative Commons).","Provide versioning information and documentation to support reproducibility of analyses."],"model":"deepseek/deepseek-v4-flash","agent_version":"fair_agent_v2","fulltext_source":"abstract_only"},"fair_model":"deepseek/deepseek-v4-flash","fair_agent_version":"fair_agent_v2","fair_fulltext_source":"abstract_only","fair_has_llm":true,"fair_computed_at":"2026-06-18T00:25:32.095007Z","clinical_trials":[],"software_tools":[],"db_accessions":[],"linked_datasets":[],"topics":[]}