{"doi":"10.1038/sdata.2017.125","title":"Precision annotation of digital samples in NCBI’s gene expression omnibus","abstract":"The Gene Expression Omnibus (GEO) contains more than two million digital samples from functional genomics experiments amassed over almost two decades. However, individual sample meta-data remains poorly described by unstructured free text attributes preventing its largescale reanalysis. We introduce the Search Tag Analyze Resource for GEO as a web application (http://STARGEO.org) to curate better annotations of sample phenotypes uniformly across different studies, and to use these sample annotations to define robust genomic signatures of disease pathology by meta-analysis. In this paper, we target a small group of biomedical graduate students to show rapid crowd-curation of precise sample annotations across all phenotypes, and we demonstrate the biological validity of these crowd-curated annotations for breast cancer. STARGEO.org makes GEO data findable, accessible, interoperable and reusable (i.e., FAIR) to ultimately facilitate knowledge discovery. Our work demonstrates the utility of crowd-curation and interpretation of open 'big data' under FAIR principles as a first step towards realizing an ideal paradigm of precision medicine.","journal":"Scientific Data","year":2017,"id":2524,"datarank":1.8951774245640918,"base_score":3.912023005428146,"endowment":3.912023005428146,"self_citation_contribution":0.586803450814222,"citation_network_contribution":1.3083739737498699,"self_endowment_contribution":0.586803450814222,"citer_contribution":1.3083739737498699,"corpus_percentile":64.03580146460537,"corpus_rank":443,"citation_count":53,"citer_count":38,"citers_with_citation_signal":33,"citers_with_endowment":33,"datacite_reuse_total":0,"is_dataset":true,"is_dataset_confidence":0.9364,"is_oa":true,"file_count":0,"downloads":0,"has_version_chain":false,"published_date":"2017-09-19","fair_score":65.4167,"fair_percentile":96.2,"algorithm_id":"datarank_citation_only_1hop_v6","ranking_scope":"data_only","authors":[{"id":30067,"name":"James Pan","orcid":"0000-0002-7144-726X","position":1,"is_corresponding":false},{"id":30068,"name":"Osama El-Sayed","orcid":"0000-0002-6665-0562","position":2,"is_corresponding":false},{"id":30069,"name":"Jihad Aljabban","orcid":"0000-0003-4999-9372","position":3,"is_corresponding":false},{"id":30070,"name":"Imad Aljabban","orcid":"0000-0003-1231-1159","position":4,"is_corresponding":false},{"id":30071,"name":"Tej D. Azad","orcid":"0000-0001-7823-4294","position":5,"is_corresponding":false},{"id":30072,"name":"Mohamad O. Hadied","orcid":null,"position":6,"is_corresponding":false},{"id":30073,"name":"Shuaib Raza","orcid":null,"position":7,"is_corresponding":false},{"id":30074,"name":"Benjamin Abhishek Rayikanti","orcid":null,"position":8,"is_corresponding":false},{"id":11850,"name":"BIN CHEN","orcid":"0000-0001-8858-874X","position":9,"is_corresponding":false},{"id":15913,"name":"Hyojung Paik","orcid":null,"position":10,"is_corresponding":false},{"id":3381,"name":"Dvir Aran","orcid":"0000-0001-6334-5039","position":11,"is_corresponding":false},{"id":30075,"name":"Jordan Spatz","orcid":"0000-0002-6732-7353","position":12,"is_corresponding":false},{"id":1208,"name":"Daniel S. Himmelstein","orcid":"0000-0002-3012-7446","position":13,"is_corresponding":false},{"id":6496,"name":"Maryam Panahiazar","orcid":"0000-0003-2089-3937","position":14,"is_corresponding":false},{"id":3336,"name":"Sanchita Bhattacharya","orcid":"0000-0002-3056-0733","position":15,"is_corresponding":false},{"id":2824,"name":"Marina Sirota","orcid":"0000-0002-7246-6083","position":16,"is_corresponding":false},{"id":113,"name":"Mark A. Musen","orcid":"0000-0003-3325-793X","position":17,"is_corresponding":false},{"id":51,"name":"Atul Janardhan Butte","orcid":"0000-0002-7433-2740","position":18,"is_corresponding":false},{"id":30076,"name":"Mohamad Omar Hadied","orcid":"0000-0001-6440-2023","position":19,"is_corresponding":false},{"id":30077,"name":"S. Mahmood Raza","orcid":null,"position":20,"is_corresponding":false},{"id":15919,"name":"Harikrishna Paik","orcid":"0000-0002-3994-0695","position":21,"is_corresponding":false},{"id":15916,"name":"Dexter Hadley","orcid":"0000-0003-0990-4674","position":0,"is_corresponding":true}],"reference_count":53,"raw_metadata":{"citation_network_status":"fetched"},"created_at":"2026-03-01T18:20:47.508186Z","pmid":"28925997","pmcid":"PMC5604135","fwci":null,"citation_percentile":null,"influential_citations":0,"oa_status":"gold","license":"other-oa","views":0,"total_file_size_bytes":0,"version_count":0,"fair_f":100.0,"fair_a":70.0,"fair_i":50.0,"fair_r":41.6667,"fair_zscore":0.9797,"fair_rationale":{"fair_score":65.42,"has_llm":false,"dimensions":{"F":{"name":"Findable","score":100.0,"criteria":[{"key":"f_has_doi","label":"Has a persistent DOI","kind":"deterministic","weight":1.0,"fraction":1.0,"signal":"DOI present","rationale":null},{"key":"f_repository_presence","label":"Indexed in repositories / literature DBs","kind":"deterministic","weight":1.0,"fraction":1.0,"signal":"datacite=0, pmcid=True, pmid=True","rationale":null},{"key":"f_persistent_ids","label":"Resolvable scholarly identifiers (OpenAlex)","kind":"deterministic","weight":0.5,"fraction":1.0,"signal":"OpenAlex id present","rationale":null}]},"A":{"name":"Accessible","score":70.0,"criteria":[{"key":"a_open_access","label":"Open Access / files deposited","kind":"deterministic","weight":1.5,"fraction":0.5,"signal":"files/OA location present but not flagged OA","rationale":null},{"key":"a_retrievable","label":"Free full text retrievable","kind":"deterministic","weight":1.0,"fraction":1.0,"signal":"16 OA location(s)","rationale":null}]},"I":{"name":"Interoperable","score":50.0,"criteria":[{"key":"i_linked_data","label":"Linked datasets / DataCite relations","kind":"deterministic","weight":1.0,"fraction":0.0,"signal":"linked_datasets=0, datacite=0","rationale":null},{"key":"i_standard_ids","label":"References data via standard accessions","kind":"deterministic","weight":1.0,"fraction":1.0,"signal":"accessions=1, trials=0","rationale":null}]},"R":{"name":"Reusable","score":41.67,"criteria":[{"key":"r_license","label":"Clear, open reuse license","kind":"deterministic","weight":1.5,"fraction":0.5,"signal":"license present (other-oa)","rationale":null},{"key":"r_downloads","label":"Demonstrated reuse (downloads)","kind":"deterministic","weight":0.5,"fraction":0.0,"signal":"downloads=0","rationale":null},{"key":"r_version","label":"Versioned / maintained","kind":"deterministic","weight":0.5,"fraction":0.0,"signal":"no version chain","rationale":null},{"key":"r_dataset","label":"Classified as a data resource","kind":"deterministic","weight":0.5,"fraction":1.0,"signal":"is_dataset","rationale":null}]}},"suggestions":["Link the underlying datasets via DOIs / DataCite relations.","Maintain explicit versioning for the dataset.","Make the paper/data Open Access or deposit the files in an open repository.","Attach a clear, open reuse license (e.g. CC-BY or CC0)."],"model":null,"agent_version":"fair_agent_v2","fulltext_source":"epmc_xml"},"fair_model":null,"fair_agent_version":"fair_agent_v2","fair_fulltext_source":"epmc_xml","fair_has_llm":false,"fair_computed_at":"2026-06-20T11:12:22.848348Z","clinical_trials":[],"software_tools":[],"db_accessions":[],"linked_datasets":[],"topics":[]}