{"doi":"10.1093/nar/gky955","title":"GENCODE reference annotation for the human and mouse genomes","abstract":"The accurate identification and description of the genes in the human and mouse genomes is a fundamental requirement for high quality analysis of data informing both genome biology and clinical genomics. Over the last 15 years, the GENCODE consortium has been producing reference quality gene annotations to provide this foundational resource. The GENCODE consortium includes both experimental and computational biology groups who work together to improve and extend the GENCODE gene annotation. Specifically, we generate primary data, create bioinformatics tools and provide analysis to support the work of expert manual gene annotators and automated gene annotation pipelines. In addition, manual and computational annotation workflows use any and all publicly available data and analysis, along with the research literature to identify and characterise gene loci to the highest standard. GENCODE gene annotations are accessible via the Ensembl and UCSC Genome Browsers, the Ensembl FTP site, Ensembl Biomart, Ensembl Perl and REST APIs as well as https://www.gencodegenes.org.","journal":"Nucleic Acids Research","year":2018,"id":7515,"datarank":12.503513440162008,"base_score":8.120291313968561,"endowment":8.120291313968561,"self_citation_contribution":1.2180436970952844,"citation_network_contribution":11.285469743066724,"self_endowment_contribution":1.2180436970952844,"citer_contribution":11.285469743066724,"corpus_percentile":83.72660699755899,"corpus_rank":201,"citation_count":3485,"citer_count":177,"citers_with_citation_signal":177,"citers_with_endowment":177,"datacite_reuse_total":0,"is_dataset":true,"is_dataset_confidence":0.9436,"is_oa":true,"file_count":0,"downloads":0,"has_version_chain":false,"published_date":"2018-10-24","fair_score":66.4583,"fair_percentile":96.3060686015831,"algorithm_id":"datarank_citation_only_1hop_v6","ranking_scope":"data_only","authors":[{"id":16842,"name":"Mark Diekhans","orcid":"0000-0002-0430-0989","position":1,"is_corresponding":false},{"id":42975,"name":"Anne-Maud Ferreira","orcid":"0000-0002-4749-746X","position":2,"is_corresponding":false},{"id":11760,"name":"Todd A. Johnson","orcid":"0000-0003-4607-2782","position":3,"is_corresponding":false},{"id":292,"name":"Irwin Jungreis","orcid":"0000-0002-3197-5367","position":4,"is_corresponding":false},{"id":24601,"name":"Jane E Loveland","orcid":"0000-0002-7669-2934","position":5,"is_corresponding":false},{"id":24517,"name":"Shinichi Morishita","orcid":"0000-0002-6201-8885","position":6,"is_corresponding":false},{"id":43162,"name":"Cristina Sisu","orcid":"0000-0001-9371-0797","position":7,"is_corresponding":false},{"id":43209,"name":"James Wright","orcid":null,"position":8,"is_corresponding":false},{"id":42914,"name":"Joel Armstrong","orcid":"0000-0003-2077-4671","position":9,"is_corresponding":false},{"id":18893,"name":"If Barnes","orcid":"0000-0001-9303-4610","position":10,"is_corresponding":false},{"id":42926,"name":"Andrew Berry","orcid":"0000-0001-5096-7701","position":11,"is_corresponding":false},{"id":18895,"name":"Alexandra Bignell","orcid":"0000-0002-5926-7020","position":12,"is_corresponding":false},{"id":59116,"name":"Silvia Carbonell Sala","orcid":null,"position":13,"is_corresponding":false},{"id":11743,"name":"Jacqueline Chrast","orcid":null,"position":14,"is_corresponding":false},{"id":35062,"name":" Fiona Cunningham","orcid":"0000-0002-7445-2419","position":15,"is_corresponding":false},{"id":59117,"name":"Tomás Di Domenico","orcid":"0000-0003-2887-815X","position":16,"is_corresponding":false},{"id":42963,"name":"Sarah Donaldson","orcid":"0000-0002-2576-3173","position":17,"is_corresponding":false},{"id":49022,"name":"Ian T. Fiddes","orcid":"0000-0002-1580-7443","position":18,"is_corresponding":false},{"id":30915,"name":"Carlos García Girón","orcid":"0000-0002-0935-7271","position":19,"is_corresponding":false},{"id":59119,"name":"Tiago Grego","orcid":"0000-0002-7946-7062","position":21,"is_corresponding":false},{"id":36424,"name":"Manoj Hariharan","orcid":"0000-0002-1006-5372","position":22,"is_corresponding":false},{"id":30880,"name":"Thibaut Hourlier","orcid":"0000-0003-4894-7773","position":23,"is_corresponding":false},{"id":18874,"name":"Toby Hunt","orcid":"0000-0001-8377-0841","position":24,"is_corresponding":false},{"id":59121,"name":"Osagie G Izuogu","orcid":null,"position":25,"is_corresponding":false},{"id":11723,"name":"Julien Lagarde","orcid":"0000-0002-0290-7445","position":26,"is_corresponding":false},{"id":24505,"name":"Fergal J. Martin","orcid":"0000-0002-1672-050X","position":27,"is_corresponding":false},{"id":98907,"name":"Laura Martinez-Gomez","orcid":null,"position":28,"is_corresponding":false},{"id":59123,"name":"Shamika Mohanan","orcid":"0000-0001-8126-2747","position":29,"is_corresponding":false},{"id":59124,"name":"Paul Muir","orcid":"0000-0002-3645-6822","position":30,"is_corresponding":false},{"id":13855,"name":"Fabio C. P. Navarro","orcid":null,"position":31,"is_corresponding":false},{"id":59125,"name":"Anne Parker","orcid":null,"position":32,"is_corresponding":false},{"id":37964,"name":"Baikang Pei","orcid":null,"position":33,"is_corresponding":false},{"id":59126,"name":"Fernando Pozo","orcid":null,"position":34,"is_corresponding":false},{"id":29376,"name":"Magali Ruffier","orcid":"0000-0002-8386-1580","position":35,"is_corresponding":false},{"id":59128,"name":"Bianca M. Schmitt","orcid":"0000-0003-4341-2972","position":36,"is_corresponding":false},{"id":59129,"name":"Eloise Stapleton","orcid":null,"position":37,"is_corresponding":false},{"id":43173,"name":"Balaji Sundararaman","orcid":"0000-0001-8559-1660","position":38,"is_corresponding":false},{"id":59130,"name":"Irina Sycheva","orcid":"0000-0002-1931-4531","position":39,"is_corresponding":false},{"id":43182,"name":"Barbara Uszczynska-Ratajczak","orcid":null,"position":40,"is_corresponding":false},{"id":43212,"name":"Jie Xu","orcid":"0000-0002-1503-1104","position":41,"is_corresponding":false},{"id":59133,"name":"Andrew Yates","orcid":"0000-0002-8886-4772","position":42,"is_corresponding":false},{"id":32862,"name":"Vyacheslav Amstislavskiy","orcid":"0000-0002-1384-7599","position":43,"is_corresponding":false},{"id":12202,"name":"Yan Zhang","orcid":"0000-0002-9173-7029","position":44,"is_corresponding":false},{"id":37972,"name":"Bronwen Aken","orcid":"0000-0002-3032-4095","position":45,"is_corresponding":false},{"id":17805,"name":"Jyoti Sharma Choudhary","orcid":"0000-0003-0881-5477","position":46,"is_corresponding":false},{"id":42990,"name":"Grigorios Georgolopoulos","orcid":"0000-0002-9906-4797","position":47,"is_corresponding":false},{"id":59323,"name":"Simon G. Gregory","orcid":"0000-0002-7805-1743","position":48,"is_corresponding":false},{"id":14693,"name":"Sharon L. R. Kardia","orcid":"0000-0002-9853-3379","position":50,"is_corresponding":false},{"id":30899,"name":"Nathan D. Olson","orcid":"0000-0003-2585-3037","position":51,"is_corresponding":false},{"id":11786,"name":"Alexandre Reymond","orcid":"0000-0003-1030-8327","position":52,"is_corresponding":false},{"id":13168,"name":"Liis Uusküla-Reimand","orcid":"0000-0002-5322-5524","position":53,"is_corresponding":false},{"id":20075,"name":"David B. Jaffe","orcid":"0000-0001-8739-568X","position":54,"is_corresponding":false},{"id":43288,"name":"James C. Wright","orcid":"0000-0001-6950-4328","position":55,"is_corresponding":false},{"id":59135,"name":"Sílvia Carbonell Sala","orcid":"0000-0001-7956-6215","position":56,"is_corresponding":false},{"id":37971,"name":"Jose Manuel Gonzalez","orcid":"0000-0001-5569-0705","position":57,"is_corresponding":false},{"id":43255,"name":"Matthew P. Hardy","orcid":"0000-0001-6420-1715","position":58,"is_corresponding":false},{"id":43032,"name":"Osagie Izuogu","orcid":"0000-0003-3116-2558","position":59,"is_corresponding":false},{"id":59136,"name":"Laura Martínez Gómez","orcid":"0000-0001-9843-1332","position":60,"is_corresponding":false},{"id":14412,"name":"Fábio C. P. Navarro","orcid":"0000-0002-5640-9070","position":61,"is_corresponding":false},{"id":59137,"name":"Fernando Campo del Pozo","orcid":"0000-0001-7688-6045","position":62,"is_corresponding":false},{"id":43283,"name":"Barbara Uszczyńska-Ratajczak","orcid":"0000-0003-0150-3841","position":63,"is_corresponding":false},{"id":43213,"name":"Jinrui Xu","orcid":"0000-0003-1944-2821","position":64,"is_corresponding":false},{"id":2131,"name":"Tim Hubbard","orcid":"0000-0002-1767-9318","position":65,"is_corresponding":false},{"id":29021,"name":"Michael G. FitzGerald","orcid":"0000-0002-0488-0530","position":0,"is_corresponding":true}],"reference_count":48,"raw_metadata":{"citation_network_status":"fetched"},"created_at":"2026-03-01T18:20:47.508186Z","pmid":"30357393","pmcid":"PMC6323946","fwci":null,"citation_percentile":null,"influential_citations":0,"oa_status":"gold","license":"other-oa","views":0,"total_file_size_bytes":0,"version_count":0,"fair_f":77.5,"fair_a":80.0,"fair_i":50.0,"fair_r":58.3333,"fair_zscore":1.9219,"fair_rationale":{"fair_score":66.46,"has_llm":true,"dimensions":{"F":{"name":"Findable","score":77.5,"criteria":[{"key":"f_has_doi","label":"Has a persistent DOI","kind":"deterministic","weight":1.0,"fraction":1.0,"signal":"DOI present","rationale":null},{"key":"f_repository_presence","label":"Indexed in repositories / literature DBs","kind":"deterministic","weight":1.0,"fraction":1.0,"signal":"datacite=0, pmcid=True, pmid=True","rationale":null},{"key":"f_persistent_ids","label":"Resolvable scholarly identifiers (OpenAlex)","kind":"deterministic","weight":0.5,"fraction":0.0,"signal":"no OpenAlex id","rationale":null},{"key":"f_metadata_richness","label":"Rich, machine-readable metadata","kind":"llm","weight":1.0,"fraction":0.75,"signal":null,"rationale":"The paper describes rich metadata for gene annotation (e.g., biotypes, TSL, APPRIS) but does not explicitly provide machine-readable metadata or structured schemas beyond standard GFF3/GTF formats."}]},"A":{"name":"Accessible","score":80.0,"criteria":[{"key":"a_open_access","label":"Open Access / files deposited","kind":"deterministic","weight":1.5,"fraction":1.0,"signal":"Open Access","rationale":null},{"key":"a_retrievable","label":"Free full text retrievable","kind":"deterministic","weight":1.0,"fraction":0.0,"signal":"0 OA location(s)","rationale":null},{"key":"a_access_protocol","label":"Clear data/code access protocol","kind":"llm","weight":1.0,"fraction":1.0,"signal":null,"rationale":"The paper clearly specifies multiple access protocols including FTP, Ensembl Biomart, REST API, and web browsers, all without login barriers."}]},"I":{"name":"Interoperable","score":50.0,"criteria":[{"key":"i_linked_data","label":"Linked datasets / DataCite relations","kind":"deterministic","weight":1.0,"fraction":0.0,"signal":"linked_datasets=0, datacite=0","rationale":null},{"key":"i_standard_ids","label":"References data via standard accessions","kind":"deterministic","weight":1.0,"fraction":0.0,"signal":"accessions=0, trials=0","rationale":null},{"key":"i_standards","label":"Standard formats, vocabularies & identifiers","kind":"llm","weight":1.0,"fraction":1.0,"signal":null,"rationale":"The data uses standard formats (GFF3, GTF), standard identifiers (Ensembl, miRBase, UniProt), and controlled vocabularies (biotypes), but does not mention formal ontology IRIs or schema.org markup."}]},"R":{"name":"Reusable","score":58.33,"criteria":[{"key":"r_license","label":"Clear, open reuse license","kind":"deterministic","weight":1.5,"fraction":0.0,"signal":"no license","rationale":null},{"key":"r_downloads","label":"Demonstrated reuse (downloads)","kind":"deterministic","weight":0.5,"fraction":0.0,"signal":"downloads=0","rationale":null},{"key":"r_version","label":"Versioned / maintained","kind":"deterministic","weight":0.5,"fraction":0.0,"signal":"no version chain","rationale":null},{"key":"r_dataset","label":"Classified as a data resource","kind":"deterministic","weight":0.5,"fraction":1.0,"signal":"is_dataset","rationale":null},{"key":"r_reusability","label":"Data-availability statement, license & reproducibility","kind":"llm","weight":2.0,"fraction":1.0,"signal":null,"rationale":"A clear data-availability statement (https://www.gencodegenes.org), explicit Creative Commons license (CC BY 4.0), versioned releases, and documentation supporting reproducibility are all provided."}]}},"suggestions":["Provide structured machine-readable metadata as JSON-LD or RDF for the gene set, including PID for each annotation release.","Add a formal data citation with a persistent identifier and versioning for each GENCODE release.","Publish the annotation schema as a standardized ontology (e.g., OWL) to improve interoperability with other resources."],"model":"deepseek/deepseek-v4-flash","agent_version":"fair_agent_v2","fulltext_source":"epmc_xml"},"fair_model":"deepseek/deepseek-v4-flash","fair_agent_version":"fair_agent_v2","fair_fulltext_source":"epmc_xml","fair_has_llm":true,"fair_computed_at":"2026-06-18T00:29:25.720371Z","clinical_trials":[],"software_tools":[],"db_accessions":[],"linked_datasets":[],"topics":[]}