{"doi":"10.1016/j.febslet.2004.12.046","title":"Comparison of the current RefSeq, Ensembl and EST databases for counting genes and gene discovery","abstract":"<jats:p>Large amounts of refined sequence material in the form of predicted, curated and annotated genes and expressed sequences tags (ESTs) have recently been added to the NCBI databases. We matched the transcript‐sequences of RefSeq, Ensembl and dbEST in an attempt to provide an updated overview of how many unique human genes can be found. The results indicate that there are about 25 000 unique genes in the union of RefSeq and Ensembl with 12–18% and 8–13% of the genes in each set unique to the other set, respectively. About 20% of all genes had splice variants. There are a considerable number of ESTs (2 200 000) that do not match the identified genes and we used an in‐house pipeline to identify 22 novel genes from Genscan predictions that have considerable EST coverage. The study provides an insight into the current status of human gene catalogues and shows that considerable refinement of methods and datasets is needed to come to a conclusive gene count.</jats:p>","journal":"FEBS Letters","year":2005,"id":30982,"datarank":1.9164465415577312,"base_score":3.6888794541139363,"endowment":3.6888794541139363,"self_citation_contribution":0.5533319181170905,"citation_network_contribution":1.3631146234406406,"self_endowment_contribution":0.5533319181170905,"citer_contribution":1.3631146234406406,"corpus_percentile":65.1,"corpus_rank":487,"citation_count":39,"citer_count":31,"citers_with_citation_signal":22,"citers_with_endowment":22,"datacite_reuse_total":0,"is_dataset":true,"is_dataset_confidence":null,"is_oa":false,"file_count":0,"downloads":0,"has_version_chain":false,"published_date":null,"fair_score":null,"fair_percentile":null,"algorithm_id":"datarank_citation_only_1hop_v6","ranking_scope":"data_only","authors":[{"id":167066,"name":"Christian G. Murray","orcid":null,"position":1,"is_corresponding":false},{"id":167067,"name":"Tobias Hill","orcid":null,"position":2,"is_corresponding":false},{"id":167068,"name":"Robert Fredriksson","orcid":null,"position":3,"is_corresponding":false},{"id":51892,"name":"Helgi B. Schiöth","orcid":"0000-0001-7112-0921","position":4,"is_corresponding":false},{"id":167065,"name":"Thomas P. Larsson","orcid":null,"position":0,"is_corresponding":false}],"reference_count":0,"raw_metadata":{"has_enrichment":true,"base_score":3.6888794541139363,"endowment":3.6888794541139363,"datacite_reuse_total":0,"file_count":0,"downloads":0,"views":0,"has_version_chain":false,"is_dataset":false,"is_oa":false,"pmid":"15670830","pmcid":null,"openalex_id":"https://openalex.org/W2039941094","authors":[],"funders":[],"total_grants":0,"fwci":2.0489,"citation_percentile":0.85818647,"influential_citations":0,"citation_trend":[{"year":2012,"count":1},{"year":2013,"count":3},{"year":2017,"count":2},{"year":2018,"count":1},{"year":2020,"count":1},{"year":2022,"count":1},{"year":2023,"count":2}],"oa_status":"closed","license":"http://onlinelibrary.wiley.com/termsAndConditions#vor","oa_locations":[{"url":"https://api.wiley.com/onlinelibrary/tdm/v1/articles/10.1016%2Fj.febslet.2004.12.046","host_type":"publisher"},{"url":"https://febs.onlinelibrary.wiley.com/doi/pdf/10.1016/j.febslet.2004.12.046","host_type":"publisher"},{"url":"https://doi.org/10.1016/j.febslet.2004.12.046","host_type":"journal"},{"url":"https://pubmed.ncbi.nlm.nih.gov/15670830","host_type":"repository"}],"fields_of_study":["Genomics and Phylogenetic Studies","Bioinformatics and Genomic Networks","Machine Learning in Bioinformatics","Medicine","Biology","Computer Science","Alternative Splicing","Expressed Sequence Tags","Genome, Human","Humans","RNA, Messenger"],"mesh_terms":["Humans","RNA, Messenger","Genome, Human","Alternative Splicing","Expressed Sequence Tags"],"keywords":["Ensembl","RefSeq","Gene","Expressed sequence tag","Gene nomenclature","Computational biology","Biology","Human genome","Pipeline (software)","Database","Genetics","Bioinformatics","Genome","Computer science","Genomics"],"sdg_mappings":[],"linked_datasets":[],"clinical_trials":[],"software_tools":[],"database_accessions":[],"source":"live","citation_network_status":"fetched"},"created_at":"2026-06-09T06:09:29.136208Z","pmid":null,"pmcid":null,"fwci":null,"citation_percentile":null,"influential_citations":0,"oa_status":null,"license":null,"views":0,"total_file_size_bytes":0,"version_count":0,"fair_f":null,"fair_a":null,"fair_i":null,"fair_r":null,"fair_zscore":null,"fair_rationale":null,"fair_model":null,"fair_agent_version":null,"fair_fulltext_source":null,"fair_has_llm":null,"fair_computed_at":null,"clinical_trials":[],"software_tools":[],"db_accessions":[],"linked_datasets":[],"topics":[]}