{"doi":"10.1093/nar/gkh107","title":"CleanEx: a database of heterogeneous gene expression data based on a consistent gene nomenclature","abstract":null,"journal":"Nucleic Acids Research","year":2004,"id":17032,"datarank":3.154640327000399,"base_score":3.5553480614894135,"endowment":3.5553480614894135,"self_citation_contribution":0.5333022092234121,"citation_network_contribution":2.621338117776987,"self_endowment_contribution":0.5333022092234121,"citer_contribution":2.621338117776987,"corpus_percentile":68.8,"corpus_rank":408,"citation_count":34,"citer_count":29,"citers_with_citation_signal":27,"citers_with_endowment":27,"datacite_reuse_total":0,"is_dataset":true,"is_dataset_confidence":null,"is_oa":false,"file_count":0,"downloads":0,"has_version_chain":false,"published_date":null,"fair_score":46.875,"fair_percentile":44.3051890941073,"algorithm_id":"datarank_citation_only_1hop_v6","ranking_scope":"data_only","authors":[{"id":123323,"name":"V. Praz","orcid":null,"position":0,"is_corresponding":false}],"reference_count":0,"raw_metadata":{"has_enrichment":true,"base_score":3.5553480614894135,"endowment":3.5553480614894135,"datacite_reuse_total":0,"file_count":0,"downloads":0,"views":0,"has_version_chain":false,"is_dataset":false,"is_oa":false,"pmid":"14681477","pmcid":"PMC308841","openalex_id":"https://openalex.org/W2160073543","authors":[],"funders":[],"total_grants":0,"fwci":0.9151,"citation_percentile":0.72978183,"influential_citations":0,"citation_trend":[{"year":2012,"count":1},{"year":2013,"count":2},{"year":2014,"count":1},{"year":2015,"count":2},{"year":2016,"count":6},{"year":2017,"count":1},{"year":2024,"count":1}],"oa_status":"green","license":"other-oa","oa_locations":[{"url":"https://serval.unil.ch/notice/serval:BIB_D6D0A7867F9C","host_type":"repository"},{"url":"https://europepmc.org/articles/pmc308841?pdf=render","host_type":"GREEN"},{"url":"https://serval.unil.ch/notice/serval:BIB_D6D0A7867F9C","host_type":"repository"},{"url":"http://academic.oup.com/nar/article-pdf/32/suppl_1/D542/7621941/gkh107.pdf","host_type":"publisher"},{"url":"https://doi.org/10.1093/nar/gkh107","host_type":"journal"},{"url":"https://pubmed.ncbi.nlm.nih.gov/14681477","host_type":"repository"},{"url":"https://iris.unil.ch/handle/iris/144004","host_type":"repository"},{"url":"http://europepmc.org/pmc/articles/PMC308841","host_type":"repository"},{"url":"http://infoscience.epfl.ch/record/114901","host_type":"repository"},{"url":"https://www.ncbi.nlm.nih.gov/pmc/articles/308841","host_type":"repository"},{"url":"https://iris.unil.ch/bitstreams/6bca4457-48c3-47e8-80ff-c3b5a112a1a4/download","host_type":"repository"}],"fields_of_study":["Gene expression and cancer classification","Bioinformatics and Genomic Networks","Genomics and Phylogenetic Studies","Medicine","Computer Science","Biology","Animals","Databases, Genetic","Gene Expression Profiling","Genes","Genomics","Humans","Information Storage and Retrieval","Internet","Quality Control","Research Design","Terminology as Topic","User-Computer Interface"],"mesh_terms":["Animals","Genes","Humans","Terminology as Topic","Quality Control","Research Design","User-Computer Interface","Information Storage and Retrieval","Internet","Gene Expression Profiling","Genomics","Databases, Genetic"],"keywords":["RefSeq","UniGene","Gene nomenclature","Biology","Gene","Computational biology","Population","Genome","GenBank","String (physics)","Genetics","Database","Computer science","Nomenclature","Expressed sequence tag"],"sdg_mappings":[{"sdg_number":0,"sdg_label":"Industry, innovation and infrastructure"}],"linked_datasets":[],"clinical_trials":[],"software_tools":[],"database_accessions":[{"name":"refseq"}],"source":"live","citation_network_status":"fetched"},"created_at":"2026-06-02T16:07:56.269979Z","pmid":"14681477","pmcid":"PMC308841","fwci":null,"citation_percentile":null,"influential_citations":0,"oa_status":"green","license":"other-oa","views":0,"total_file_size_bytes":0,"version_count":0,"fair_f":65.0,"fair_a":72.5,"fair_i":25.0,"fair_r":25.0,"fair_zscore":0.1505,"fair_rationale":{"fair_score":46.88,"has_llm":true,"dimensions":{"F":{"name":"Findable","score":65.0,"criteria":[{"key":"f_has_doi","label":"Has a persistent DOI","kind":"deterministic","weight":1.0,"fraction":1.0,"signal":"DOI present","rationale":null},{"key":"f_repository_presence","label":"Indexed in repositories / literature DBs","kind":"deterministic","weight":1.0,"fraction":1.0,"signal":"datacite=0, pmcid=True, pmid=True","rationale":null},{"key":"f_persistent_ids","label":"Resolvable scholarly identifiers (OpenAlex)","kind":"deterministic","weight":0.5,"fraction":0.0,"signal":"no OpenAlex id","rationale":null},{"key":"f_metadata_richness","label":"Rich, machine-readable metadata","kind":"llm","weight":1.0,"fraction":0.5,"signal":null,"rationale":"The paper describes web-based access and some flat-file structures but does not mention machine-readable metadata (e.g., structured metadata schemas, XML/RDF, or JSON-LD)."}]},"A":{"name":"Accessible","score":72.5,"criteria":[{"key":"a_open_access","label":"Open Access / files deposited","kind":"deterministic","weight":1.5,"fraction":0.5,"signal":"files/OA location present but not flagged OA","rationale":null},{"key":"a_retrievable","label":"Free full text retrievable","kind":"deterministic","weight":1.0,"fraction":1.0,"signal":"11 OA location(s)","rationale":null},{"key":"a_access_protocol","label":"Clear data/code access protocol","kind":"llm","weight":1.0,"fraction":0.75,"signal":null,"rationale":"The paper provides clear URLs for web access and a download link for some files via FTP, but does not state that the FTP protocol is machine-actionable with authentication conditions or a license."}]},"I":{"name":"Interoperable","score":25.0,"criteria":[{"key":"i_linked_data","label":"Linked datasets / DataCite relations","kind":"deterministic","weight":1.0,"fraction":0.0,"signal":"linked_datasets=0, datacite=0","rationale":null},{"key":"i_standard_ids","label":"References data via standard accessions","kind":"deterministic","weight":1.0,"fraction":0.0,"signal":"accessions=0, trials=0","rationale":null},{"key":"i_standards","label":"Standard formats, vocabularies & identifiers","kind":"llm","weight":1.0,"fraction":0.5,"signal":null,"rationale":"The database uses gene nomenclature standards and some unique identifiers, but the expression data formats are described as 'weakly standardized' and multiple ad-hoc formats are used without reference to community-accepted ontologies or data exchange standards."}]},"R":{"name":"Reusable","score":25.0,"criteria":[{"key":"r_license","label":"Clear, open reuse license","kind":"deterministic","weight":1.5,"fraction":0.0,"signal":"no license","rationale":null},{"key":"r_downloads","label":"Demonstrated reuse (downloads)","kind":"deterministic","weight":0.5,"fraction":0.0,"signal":"downloads=0","rationale":null},{"key":"r_version","label":"Versioned / maintained","kind":"deterministic","weight":0.5,"fraction":0.0,"signal":"no version chain","rationale":null},{"key":"r_dataset","label":"Classified as a data resource","kind":"deterministic","weight":0.5,"fraction":1.0,"signal":"is_dataset","rationale":null},{"key":"r_reusability","label":"Data-availability statement, license & reproducibility","kind":"llm","weight":2.0,"fraction":0.333,"signal":null,"rationale":"No explicit data-availability statement, license, or reproducibility details are provided; the paper only mentions that cleanex_exp is not redistributed via FTP due to copyright concerns, which limits reuse."}]}},"suggestions":["Add structured machine-readable metadata (e.g., DCAT or schema.org) to the data files and web pages to improve findability.","Provide a clear data license (e.g., Creative Commons) for the expression data and clarify reuse permissions.","Use community-accepted data formats (e.g., MAGE-TAB, ISA-Tab) and ontologies (e.g., GO, EFO) to enhance interoperability.","Document the data ingestion and quality-control pipelines in a reproducible manner, including versioned code and workflow descriptions.","Offer programmatic access via an API (e.g., REST or SPARQL) to improve machine accessibility."],"model":"deepseek/deepseek-v4-flash","agent_version":"fair_agent_v2","fulltext_source":"unpaywall_pdf"},"fair_model":"deepseek/deepseek-v4-flash","fair_agent_version":"fair_agent_v2","fair_fulltext_source":"unpaywall_pdf","fair_has_llm":true,"fair_computed_at":"2026-06-18T00:44:25.010067Z","clinical_trials":[],"software_tools":[],"db_accessions":[],"linked_datasets":[],"topics":[]}