data-as-a-science.bib


@book{dietz_openintro_2015,
	edition = {Third},
	title = {{OpenIntro} {Statistics}},
	url = {https://www.openintro.org/},
	publisher = {OpenIntro.org},
	author = {Dietz, David M and Barr, Christopher D and Çetinkaya-Rundel, Mine},
	year = {2015}
}

@book{adhikari_computational_2015,
	title = {Computational and {Inferential} {Thinking}: {The} {Foundations} of {Data} {Science}},
	url = {https://www.inferentialthinking.com/},
	publisher = {Gitbooks.io},
	author = {Adhikari, Ani and DeNero, John},
	year = {2015},
	note = {Backup Publisher: UC Berkeley}
}

@book{tufte_beautiful_2006,
	title = {Beautiful {Evidence}},
	url = {https://www.edwardtufte.com/tufte/books_be},
	publisher = {Graphics Pr},
	author = {Tufte, Edward},
	year = {2006}
}

@book{baggini_ethics_2007,
	title = {The {Ethics} {Toolkit}},
	url = {http://www.blackwellpublishing.com/},
	publisher = {Blackwell Publishing},
	author = {Baggini, Julian and Fosl, Peter},
	year = {2007}
}

@book{dicker_principles_2012,
	edition = {Third},
	title = {Principles of {Epidemiology} in {Public} {Health} {Practice}: {An} {Introduction} to {Applied} {Epidemiology} and {Biostatistics}},
	url = {https://www.cdc.gov/ophss/csels/dsepd/ss1978/},
	publisher = {U.S. Department of Health and Human Services},
	author = {Dicker, Richard and Coronado, Fátima and Koo, Denise and Parrish, Roy Gibson},
	year = {2012},
	note = {Backup Publisher: Centers for Disease Control and Prevention}
}

@book{mckinney_python_2013,
	edition = {First},
	title = {Python for {Data} {Analysis}},
	url = {http://wesmckinney.com/},
	publisher = {O'Reilly},
	author = {McKinney, Wes},
	year = {2013}
}

@book{bowles_economy_2017,
	title = {The {Economy}},
	url = {http://www.core-econ.org/the-economy/},
	publisher = {The CORE Project},
	author = {Bowles, Samuel and Carlin, Wendy and Stevens, Margaret},
	year = {2017}
}

@book{edmonds_would_2014,
	title = {Would you kill the fat man? : the trolley problem and what your answer tells us about right and wrong},
	publisher = {Princeton University Press},
	author = {Edmonds, David},
	year = {2014}
}

@article{katz_tuskegee_2006,
	title = {The {Tuskegee} {Legacy} {Project}: {Willingness} of {Minorities} to {Participate} in {Biomedical} {Research}},
	volume = {17},
	url = {http://muse.jhu.edu/article/206217},
	doi = {10.1353/hpu.2006.0126},
	number = {4},
	journal = {Journal of Health Care for the Poor and Underserved},
	author = {Katz, Ralph and Kegeles, Steven and Kressin, Nancy and Green, Lee and Wang, Min Qi and James, Sherman and Russell, Stefanie Luise and Claudio, Cristina},
	month = nov,
	year = {2006},
	pages = {698--715}
}

@article{taylor_offshore_2017,
	title = {Offshore {Human} {Testing} {Of} {Herpes} {Vaccine} {Stokes} {Debate} {Over} {U}.{S}. {Safety} {Rules}},
	url = {https://khn.org/news/offshore-rush-for-herpes-vaccine-roils-debate-over-u-s-safety-rules/},
	journal = {Kaiser Health News},
	author = {Taylor, Marisa},
	month = aug,
	year = {2017}
}

@book{folse_fundamental_2005,
	title = {Some {Fundamental} {Concepts} in {Ethics}},
	url = {http://people.loyno.edu/~folse/ethics.html},
	publisher = {Department of Philosophy, College of Arts and Sciences, Loyola University},
	author = {Folse, Henry},
	year = {2005}
}

@article{budoff_effect_2020,
	title = {Effect of icosapent ethyl on progression of coronary atherosclerosis in patients with elevated triglycerides on statin therapy: final results of the {EVAPORATE} trial},
	shorttitle = {Effect of icosapent ethyl on progression of coronary atherosclerosis in patients with elevated triglycerides on statin therapy},
	url = {https://academic.oup.com/eurheartj/advance-article/doi/10.1093/eurheartj/ehaa652/5898836},
	doi = {10.1093/eurheartj/ehaa652},
	abstract = {AbstractAims.  Despite the effects of statins in reducing cardiovascular events and slowing progression of coronary atherosclerosis, significant cardiovascular},
	language = {en},
	urldate = {2020-08-31},
	journal = {European Heart Journal},
	author = {Budoff, Matthew J. and Bhatt, Deepak L. and Kinninger, April and Lakshmanan, Suvasini and Muhlestein, Joseph B. and Le, Viet T. and May, Heidi T. and Shaikh, Kashif and Shekar, Chandana and Roy, Sion K. and Tayek, John and Nelson, John R.},
	month = aug,
	year = {2020}
}

@article{hell_quantitative_2017,
	title = {Quantitative global plaque characteristics from coronary computed tomography angiography for the prediction of future cardiac mortality during long-term follow-up},
	volume = {18},
	issn = {2047-2404},
	url = {https://academic.oup.com/ehjcimaging/article/18/12/1331/4056171},
	doi = {10.1093/ehjci/jex183},
	abstract = {AbstractAims.  Adverse plaque characteristics determined by coronary computed tomography angiography (CTA) have been associated with future cardiac events. Our},
	language = {en},
	number = {12},
	urldate = {2020-08-31},
	journal = {European Heart Journal - Cardiovascular Imaging},
	author = {Hell, Michaela M. and Motwani, Manish and Otaki, Yuka and Cadet, Sebastien and Gransar, Heidi and Miranda-Peats, Romalisa and Valk, Jacob and Slomka, Piotr J. and Cheng, Victor Y. and Rozanski, Alan and Tamarappoo, Balaji K. and Hayes, Sean and Achenbach, Stephan and Berman, Daniel S. and Dey, Damini},
	month = jul,
	year = {2017},
	note = {Publisher: Oxford Academic},
	pages = {1331--1339}
}

@article{maslin_cutaneous_2018,
	title = {Cutaneous larva migrans with pulmonary involvement},
	volume = {2018},
	copyright = {© BMJ Publishing Group Ltd (unless otherwise stated in the text of the article) 2018. All rights reserved. No commercial use is permitted unless otherwise expressly granted.},
	issn = {1757-790X},
	url = {https://casereports.bmj.com/content/2018/bcr-2017-223508},
	doi = {10.1136/bcr-2017-223508},
	abstract = {Douglas Maslin, Marc Wallace. Cutaneous larva migrans with …},
	language = {en},
	urldate = {2020-08-31},
	journal = {Case Reports},
	author = {Maslin, Douglas and Wallace, Marc},
	month = feb,
	year = {2018},
	pmid = {29330280},
	note = {Publisher: BMJ Publishing Group
Section: Images in…},
	pages = {bcr}
}

@article{gelinas_when_2016,
	title = {When and {Why} {Is} {Research} without {Consent} {Permissible}?},
	volume = {46},
	copyright = {Published 2016. This article is a U.S. Government work and is in the public domain in the USA.},
	issn = {1552-146X},
	url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/hast.548},
	doi = {10.1002/hast.548},
	abstract = {The view that research with competent adults requires valid consent to be ethical perhaps finds its clearest expression in the Nuremberg Code, whose famous first principle asserts that “the voluntary consent of the human subject is absolutely essential.” In a similar vein, the United Nations International Covenant on Civil and Political Rights states that “no one shall be subjected without his free consent to medical or scientific experimentation.” Yet although some formulations of the consent principle allow no exceptions, others hold that informed consent is not always strictly necessary for ethical research. The U.S. federal regulations known as the “Common Rule,” which govern research with human subjects, lists several conditions for waiving consent. However, neither guidance documents on the ethics of clinical research nor the literature in bioethics contains a general justification of research without consent. The purpose of this paper is to advance a justificatory framework that will explain why research without consent is permissible in paradigmatic cases and that can be useful in analyzing cases about which there is (or is likely to be) disagreement. We argue that research without consent can be justified on two grounds: if it stands to infringe no right of the participants and obtaining consent is impracticable, or if the gravity of the rights infringement is minor and outweighed by the expected social value of the research and obtaining consent is impracticable.},
	language = {en},
	number = {2},
	urldate = {2020-09-01},
	journal = {Hastings Center Report},
	author = {Gelinas, Luke and Wertheimer, Alan and Miller, Franklin G.},
	year = {2016},
	note = {\_eprint: https://onlinelibrary.wiley.com/doi/pdf/10.1002/hast.548},
	pages = {35--43}
}

@article{shaw_facebooks_2015,
	title = {Facebook’s flawed emotion experiment: {Antisocial} research on social network users:},
	copyright = {© The Author(s) 2015},
	shorttitle = {Facebook’s flawed emotion experiment},
	url = {https://journals.sagepub.com/doi/10.1177/1747016115579535},
	doi = {10.1177/1747016115579535},
	abstract = {In June 2014, a paper reporting the results of a study into ‘emotional contagion’ on Facebook was published. This research has already attracted a great deal of...},
	language = {en},
	urldate = {2020-09-01},
	journal = {Research Ethics},
	author = {Shaw, David},
	month = may,
	year = {2015},
	note = {Publisher: SAGE PublicationsSage UK: London, England}
}

@article{kramer_experimental_2014,
	title = {Experimental evidence of massive-scale emotional contagion through social networks},
	volume = {111},
	copyright = {©  . Freely available online through the PNAS open access option.},
	issn = {0027-8424, 1091-6490},
	url = {https://www.pnas.org/content/111/24/8788},
	doi = {10.1073/pnas.1320040111},
	abstract = {Emotional states can be transferred to others via emotional contagion, leading people to experience the same emotions without their awareness. Emotional contagion is well established in laboratory experiments, with people transferring positive and negative emotions to others. Data from a large real-world social network, collected over a 20-y period suggests that longer-lasting moods (e.g., depression, happiness) can be transferred through networks [Fowler JH, Christakis NA (2008) BMJ 337:a2338], although the results are controversial. In an experiment with people who use Facebook, we test whether emotional contagion occurs outside of in-person interaction between individuals by reducing the amount of emotional content in the News Feed. When positive expressions were reduced, people produced fewer positive posts and more negative posts; when negative expressions were reduced, the opposite pattern occurred. These results indicate that emotions expressed by others on Facebook influence our own emotions, constituting experimental evidence for massive-scale contagion via social networks. This work also suggests that, in contrast to prevailing assumptions, in-person interaction and nonverbal cues are not strictly necessary for emotional contagion, and that the observation of others’ positive experiences constitutes a positive experience for people.},
	language = {en},
	number = {24},
	urldate = {2020-09-01},
	journal = {Proceedings of the National Academy of Sciences},
	author = {Kramer, Adam D. I. and Guillory, Jamie E. and Hancock, Jeffrey T.},
	month = jun,
	year = {2014},
	pmid = {24889601},
	note = {Publisher: National Academy of Sciences
Section: Social Sciences},
	keywords = {big data, computer-mediated communication, social media},
	pages = {8788--8790}
}

@misc{noauthor_dcmi_nodate,
	title = {{DCMI}: {Dublin} {Core}™ {Metadata} {Element} {Set}, {Version} 1.1: {Reference} {Description}},
	url = {https://www.dublincore.org/specifications/dublin-core/dces/},
	urldate = {2020-09-02}

@misc{noauthor_datacite_nodate,
	type = {website},
	title = {{DataCite} {Schema}},
	copyright = {CC-BY},
	url = {https://schema.datacite.org/},
	abstract = {The DataCite Schema server.},
	language = {en},
	urldate = {2020-09-02},
	journal = {DataCite Schema}
}

@techreport{chait_technical_2014,
	address = {Washington, D.C.},
	type = {Text/{HTML}},
	title = {Technical assessment of open data platforms for national statistical organisations},
	url = {https://openknowledge.worldbank.org/handle/10986/21111},
	abstract = {The term "open data" is generally understood to be data that are made available to the public free of charge, without registration or restrictive licenses, for any purpose whatsoever (including commercial purposes), in electronic, machine-readable formats that ensure data are easy to find, download and use. National Statistics Offices (NSOs) have the potential to play a pivotal role in the implementation of open data initiatives. As producers and curators of data, the objective of making high quality data more accessible and usable is consistent with their guiding principles. NSOs indicate, in research conducted in support of this report, that one of the difficulties they encounter is that the technology they use to publish - or electronically distribute - data for public use is not compatible with open formats. They also indicate that common software packages used for open data portals do not accommodate the data formats and metadata they produce. Two key concerns related to data dissemination products are addresses: (1) Can such products designed primarily for NSOs satisfy requirements for an open data initiative?; and (2) Can such products designed primarily for open data satisfy the requirements of NSOs? Furthermore, data reuse, both by data experts and the public at large, is key to creating new opportunities and benefits from government data. The following recommendations are made to improve the overall utility of data publication platforms to NSOs and the open data community: improve technical documentation; ensure public Application Programming Interfaces (APIs) and endpoints are interoperable; presentation of metadata and Uniform Resource Identifiers (URIs) must conform to W3C standards; natural language search and metadata faceting should be standard; structural metadata and hypercube support are core NSO requirements; dashboards and visualisations are necessary for user engagement; and develop data engagement tools for improving data-quality and reuse.},
	language = {en},
	urldate = {2020-09-02},
	institution = {World Bank Group},
	author = {Chait, Gavin},
	month = dec,
	year = {2014}
}

@book{downey_think_2014,
	address = {Needham, Massachusetts},
	edition = {2.1.0},
	title = {Think {Stats} 2 - {Exploratory} {Data} {Analysis} in {Python}},
	copyright = {Creative Commons NonCommercial ShareAlike 4.0 International},
	shorttitle = {Think {Stats}},
	url = {https://greenteapress.com/wp/think-stats-2e/},
	language = {en-US},
	urldate = {2020-09-02},
	publisher = {Green Tea Press},
	author = {Downey, Allen B.},
	year = {2014}
}

@book{vu_introductory_2020,
	edition = {First},
	title = {Introductory {Statistics} for the {Life} and {Biomedical} {Sciences}},
	copyright = {Creative Commons NonCommercial ShareAlike 4.0 International},
	url = {https://www.openintro.org/book/biostat/},
	publisher = {OpenIntro.org},
	author = {Vu, Julie and Harrington, David},
	month = jul,
	year = {2020}
}

@article{dahmen_synsys_2019,
	title = {{SynSys}: {A} {Synthetic} {Data} {Generation} {System} for {Healthcare} {Applications}},
	volume = {19},
	issn = {1424-8220},
	shorttitle = {{SynSys}},
	url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6427177/},
	doi = {10.3390/s19051181},
	abstract = {Creation of realistic synthetic behavior-based sensor data is an important aspect of testing machine learning techniques for healthcare applications. Many of the existing approaches for generating synthetic data are often limited in terms of complexity and realism. We introduce SynSys, a machine learning-based synthetic data generation method, to improve upon these limitations. We use this method to generate synthetic time series data that is composed of nested sequences using hidden Markov models and regression models which are initially trained on real datasets. We test our synthetic data generation technique on a real annotated smart home dataset. We use time series distance measures as a baseline to determine how realistic the generated data is compared to real data and demonstrate that SynSys produces more realistic data in terms of distance compared to random data generation, data from another home, and data from another time period. Finally, we apply our synthetic data generation technique to the problem of generating data when only a small amount of ground truth data is available. Using semi-supervised learning we demonstrate that SynSys is able to improve activity recognition accuracy compared to using the small amount of real data alone.},
	number = {5},
	urldate = {2020-09-02},
	journal = {Sensors (Basel, Switzerland)},
	author = {Dahmen, Jessamyn and Cook, Diane},
	month = mar,
	year = {2019},
	pmid = {30857130},
	pmcid = {PMC6427177}
}

@article{crane_peer_2018,
	title = {In peer review we (don't) trust: {How} peer review's filtering poses a systemic risk to science},
	shorttitle = {In peer review we (don't) trust},
	url = {https://www.researchers.one/article/2018-09-17},
	abstract = {This article describes how the filtering role played by peer review may actually be harmful rather than helpful to the quality of the scientific literature. We argue that, instead of trying to filter out the low-quality research, as is done by traditional journals, a better strategy is to let everything through but with an acknowledgment of the uncertain quality of what is published, as is done on the RESEARCHERS.ONE platform.  We refer to this as},
	language = {en},
	urldate = {2020-09-04},
	journal = {Researchers.One},
	author = {Crane, Harry and Martin, Ryan},
	month = sep,
	year = {2018}
}

@article{brembs_reliable_2019,
	title = {Reliable novelty: {New} should not trump true},
	volume = {17},
	issn = {1544-9173},
	shorttitle = {Reliable novelty},
	url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6372144/},
	doi = {10.1371/journal.pbio.3000117},
	abstract = {Although a case can be made for rewarding scientists for risky, novel science rather than for incremental, reliable science, novelty without reliability ceases to be science. The currently available evidence suggests that the most prestigious journals are no better at detecting unreliable science than other journals. In fact, some of the most convincing studies show a negative correlation, with the most prestigious journals publishing the least reliable science. With the credibility of science increasingly under siege, how much longer can we afford to reward novelty at the expense of reliability? Here, I argue for replacing the legacy journals with a modern information infrastructure that is governed by scholars. This infrastructure would allow renewed focus on scientific reliability, with improved sort, filter, and discovery functionalities, at massive cost savings. If these savings were invested in additional infrastructure for research data and scientific code and/or software, scientific reliability would receive additional support, and funding woes—for, e.g., biological databases—would be a concern of the past., This Perspective article asserts that the most prestigious journals publish the least reliable science, and asks how long we can afford to reward scientists for publishing there.},
	number = {2},
	urldate = {2020-09-04},
	journal = {PLoS Biology},
	author = {Brembs, Björn},
	month = feb,
	year = {2019},
	pmid = {30753184},
	pmcid = {PMC6372144}
}

@article{stern_proposal_2019,
	title = {A proposal for the future of scientific publishing in the life sciences},
	volume = {17},
	issn = {1544-9173},
	url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6372143/},
	doi = {10.1371/journal.pbio.3000116},
	abstract = {Science advances through rich, scholarly discussion. More than ever before, digital tools allow us to take that dialogue online. To chart a new future for open publishing, we must consider alternatives to the core features of the legacy print publishing system, such as an access paywall and editorial selection before publication. Although journals have their strengths, the traditional approach of selecting articles before publication (“curate first, publish second”) forces a focus on “getting into the right journals,” which can delay dissemination of scientific work, create opportunity costs for pushing science forward, and promote undesirable behaviors among scientists and the institutions that evaluate them. We believe that a “publish first, curate second” approach with the following features would be a strong alternative: authors decide when and what to publish; peer review reports are published, either anonymously or with attribution; and curation occurs after publication, incorporating community feedback and expert judgment to select articles for target audiences and to evaluate whether scientific work has stood the test of time. These proposed changes could optimize publishing practices for the digital age, emphasizing transparency, peer-mediated improvement, and post-publication appraisal of scientific articles., This Perspective article proposes new practices for scientific publishing that align better with today's digital environment than do legacy practices.},
	number = {2},
	urldate = {2020-09-04},
	journal = {PLoS Biology},
	author = {Stern, Bodo M. and O’Shea, Erin K.},
	month = feb,
	year = {2019},
	pmid = {30753179},
	pmcid = {PMC6372143}
}

@article{packer_cardiovascular_2020,
	title = {Cardiovascular and {Renal} {Outcomes} with {Empagliflozin} in {Heart} {Failure}},
	volume = {0},
	issn = {0028-4793},
	url = {https://sci-hub.tw/10.1056/NEJMoa2022190},
	doi = {10.1056/NEJMoa2022190},
	number = {0},
	urldate = {2020-09-04},
	journal = {New England Journal of Medicine},
	author = {Packer, Milton and Anker, Stefan D. and Butler, Javed and Filippatos, Gerasimos and Pocock, Stuart J. and Carson, Peter and Januzzi, James and Verma, Subodh and Tsutsui, Hiroyuki and Brueckmann, Martina and Jamal, Waheed and Kimura, Karen and Schnee, Janet and Zeller, Cordula and Cotton, Daniel and Bocchi, Edimar and Böhm, Michael and Choi, Dong-Ju and Chopra, Vijay and Chuquiure, Eduardo and Giannetti, Nadia and Janssens, Stefan and Zhang, Jian and Gonzalez Juanatey, Jose R. and Kaul, Sanjay and Brunner-La Rocca, Hans-Peter and Merkely, Bela and Nicholls, Stephen J. and Perrone, Sergio and Pina, Ileana and Ponikowski, Piotr and Sattar, Naveed and Senni, Michele and Seronde, Marie-France and Spinar, Jindrich and Squire, Iain and Taddei, Stefano and Wanner, Christoph and Zannad, Faiez},
	month = aug,
	year = {2020},
	note = {Publisher: Massachusetts Medical Society
\_eprint: https://doi.org/10.1056/NEJMoa2022190},
	pages = {null}
}

@article{student_probable_1908,
	title = {The {Probable} {Error} of a {Mean}},
	volume = {6},
	issn = {00063444},
	url = {https://www.jstor.org/stable/2331554?origin=crossref},
	doi = {10.2307/2331554},
	number = {1},
	urldate = {2020-09-04},
	journal = {Biometrika},
	author = {{Student}},
	month = mar,
	year = {1908},
	pages = {1}
}

@article{jabbou_effect_2015,
	title = {Effect of {Study} {Design} on the {Reported} {Effect} of {Cardiac} {Resynchronization} {Therapy} ({CRT}) on {Quantitative} {Physiological} {Measures}: {Stratified} {Meta}‐{Analysis} in {Narrow}‐{QRS} {Heart} {Failure} and {Implications} for {Planning} {Future} {Studies}},
	volume = {4},
	shorttitle = {Effect of {Study} {Design} on the {Reported} {Effect} of {Cardiac} {Resynchronization} {Therapy} ({CRT}) on {Quantitative} {Physiological} {Measures}},
	url = {https://www.ahajournals.org/doi/10.1161/JAHA.114.000896},
	doi = {10.1161/JAHA.114.000896},
	abstract = {BackgroundBiventricular pacing (CRT) shows clear benefits in heart failure with wide QRS, but results in narrow QRS have appeared conflicting. We tested the hypothesis that study design might have influenced findings.Method and ResultsWe identified all reports of CRT‐P/D therapy in subjects with narrow QRS reporting effects on continuous physiological variables. Twelve studies (2074 patients) met these criteria. Studies were stratified by presence of bias‐resistance steps: the presence of a randomized control arm over a single arm, and blinded outcome measurement. Change in each endpoint was quantified using a standardized effect size (Cohen's d). We conducted separate meta‐analyses for each variable in turn, stratified by trial quality. In non‐randomized, non‐blinded studies, the majority of variables (10 of 12, 83\%) showed significant improvement, ranging from a standardized mean effect size of +1.57 (95\%CI +0.43 to +2.7) for ejection fraction to +2.87 (+1.78 to +3.95) for NYHA class. In the randomized, non‐blinded study, only 3 out of 6 variables (50\%) showed improvement. For the randomized blinded studies, 0 out of 9 variables (0\%) showed benefit, ranging from −0.04 (−0.31 to +0.22) for ejection fraction to −0.1 (−0.73 to +0.53) for 6‐minute walk test.ConclusionsDifferences in degrees of resistance to bias, rather than choice of endpoint, explain the variation between studies of CRT in narrow‐QRS heart failure addressing physiological variables. When bias‐resistance features are implemented, it becomes clear that these patients do not improve in any tested physiological variable. Guidance from studies without careful planning to resist bias may be far less useful than commonly perceived.},
	number = {1},
	urldate = {2020-09-05},
	journal = {Journal of the American Heart Association},
	author = {Jabbou, Richard J. and Shun‐Shin, Matthew J. and Finegold, Judith A. and Afzal, Sohaib S. M. and Cook, Christopher and Nijjer, Sukhjinder S. and Whinnett, Zachary I. and Manisty, Charlotte H. and Brugada, Josep and Francis, Darrel P.},
	month = may,
	year = {2015},
	note = {Publisher: American Heart Association},
	pages = {e000896}
}

@article{vickers_selecting_2006,
	title = {Selecting patients for randomized trials: a systematic approach based on risk group},
	volume = {7},
	issn = {1745-6215},
	shorttitle = {Selecting patients for randomized trials},
	url = {https://doi.org/10.1186/1745-6215-7-30},
	doi = {10.1186/1745-6215-7-30},
	abstract = {A key aspect of randomized trial design is the choice of risk group. Some trials include patients from the entire at-risk population, others accrue only patients deemed to be at increased risk. We present a simple statistical approach for choosing between these approaches. The method is easily adapted to determine which of several competing definitions of high risk is optimal.},
	number = {1},
	urldate = {2020-09-05},
	journal = {Trials},
	author = {Vickers, Andrew J. and Kramer, Barry S. and Baker, Stuart G.},
	month = oct,
	year = {2006},
	pages = {30}
}

@misc{noauthor_nih_nodate,
	title = {{NIH} {Clinical} {Center}: {Ethics} in {Clinical} {Research}},
	url = {https://www.cc.nih.gov/recruit/ethics.html},
	urldate = {2020-09-05}
}

@article{emanuel_what_2000,
	title = {What {Makes} {Clinical} {Research} {Ethical}?},
	volume = {283},
	issn = {0098-7484},
	url = {https://jamanetwork.com/journals/jama/fullarticle/192740},
	doi = {10.1001/jama.283.20.2701},
	abstract = {Many believe that informed consent makes clinical research ethical. However, informed consent is neither necessary nor sufficient for ethical clinical research. Drawing on the basic philosophies underlying major codes, declarations, and other documents relevant to research with human subjects, we propose 7 requirements that systematically elucidate a coherent framework for evaluating the ethics of clinical research studies: (1) value—enhancements of health or knowledge must be derived from the research; (2) scientific validity—the research must be methodologically rigorous; (3) fair subject selection—scientific objectives, not vulnerability or privilege, and the potential for and distribution of risks and benefits, should determine communities selected as study sites and the inclusion criteria for individual subjects; (4) favorable risk-benefit ratio—within the context of standard clinical practice and the research protocol, risks must be minimized, potential benefits enhanced, and the potential benefits to individuals and knowledge gained for society must outweigh the risks; (5) independent review—unaffiliated individuals must review the research and approve, amend, or terminate it; (6) informed consent—individuals should be informed about the research and provide their voluntary consent; and (7) respect for enrolled subjects—subjects should have their privacy protected, the opportunity to withdraw, and their well-being monitored. Fulfilling all 7 requirements is necessary and sufficient to make clinical research ethical. These requirements are universal, although they must be adapted to the health, economic, cultural, and technological conditions in which clinical research is conducted.},
	language = {en},
	number = {20},
	urldate = {2020-09-05},
	journal = {JAMA},
	author = {Emanuel, Ezekiel J. and Wendler, David and Grady, Christine},
	month = may,
	year = {2000},
	note = {Publisher: American Medical Association},
	pages = {2701--2711}
}

@article{rafferty_assessing_2013,
	title = {Assessing {Radiologist} {Performance} {Using} {Combined} {Digital} {Mammography} and {Breast} {Tomosynthesis} {Compared} with {Digital} {Mammography} {Alone}: {Results} of a {Multicenter}, {Multireader} {Trial}},
	volume = {266},
	issn = {0033-8419},
	shorttitle = {Assessing {Radiologist} {Performance} {Using} {Combined} {Digital} {Mammography} and {Breast} {Tomosynthesis} {Compared} with {Digital} {Mammography} {Alone}},
	url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5410947/},
	doi = {10.1148/radiol.12120674},
	abstract = {The addition of tomosynthesis to digital mammography offers the dual benefit of improved diagnostic accuracy and significant reduction in false-positive recall rate, thereby avoiding unnecessary additional testing and decreasing attendant anxiety, inconvenience, and cost for women.},
	number = {1},
	urldate = {2020-09-18},
	journal = {Radiology},
	author = {Rafferty, Elizabeth A. and Park, Jeong Mi and Philpotts, Liane E. and Poplack, Steven P. and Sumkin, Jules H. and Halpern, Elkan F. and Niklason, Loren T.},
	month = jan,
	year = {2013},
	pmid = {23169790},
	pmcid = {PMC5410947},
	pages = {104--113}
}

@article{greco_axillary_2001,
	title = {Axillary {Lymph} {Node} {Staging} in {Breast} {Cancer} by 2-{Fluoro}-2-deoxy-d-glucose–{Positron} {Emission} {Tomography}: {Clinical} {Evaluation} and {Alternative} {Management}},
	volume = {93},
	issn = {0027-8874},
	shorttitle = {Axillary {Lymph} {Node} {Staging} in {Breast} {Cancer} by 2-{Fluoro}-2-deoxy-d-glucose–{Positron} {Emission} {Tomography}},
	url = {https://academic.oup.com/jnci/article/93/8/630/2906548},
	doi = {10.1093/jnci/93.8.630},
	abstract = {Abstract.  Background: Surgical removal of axillary lymph node and histologic examination for metastases are used to determine whether adjuvant treatment is nec},
	language = {en},
	number = {8},
	urldate = {2020-09-18},
	journal = {JNCI: Journal of the National Cancer Institute},
	author = {Greco, Marco and Crippa, Flavio and Agresti, Roberto and Seregni, Ettore and Gerali, Alberto and Giovanazzi, Riccardo and Micheli, Andrea and Asero, Salvatore and Ferraris, Cristina and Gennaro, Massimiliano and Bombardieri, Emilio and Cascinelli, Natale},
	month = apr,
	year = {2001},
	note = {Publisher: Oxford Academic},
	pages = {630--635}
}

@article{jurca_analysis_2019,
	title = {Analysis of 1.2 million foot scans from {North} {America}, {Europe} and {Asia}},
	volume = {9},
	copyright = {2019 The Author(s)},
	issn = {2045-2322},
	url = {https://www.nature.com/articles/s41598-019-55432-z},
	doi = {10.1038/s41598-019-55432-z},
	abstract = {For decades, footwear brands have developed products using outdated methods and measurements, working with limited insight into the foot shapes and dimensions of their target customers. The integration of 3D scanning technology into footwear retail stores has made it possible for this research to analyze a database containing a large number of male and female 3D foot scans collected across North America, Europe, and Asia. Foot scans were classified into length classes with 5mm length increments; mean width, instep height, and heel width were calculated for each length class. This study confirms the existence of many statistically significant differences in mean foot measurements amongst the regions and between the sexes, and a large dispersion of foot measurements within each group of customers. Therefore, shoes should be developed separately for each group, region, and sex, and at least 3 shoe widths per length class are required to provide a proper fit for 90\% of customers. Beyond this, our analysis asserts that a shoe designed for a single group will fit a different segment of the population in another group, and that existing last grading tables should be updated to reflect the foot dimensions of current consumers.},
	language = {en},
	number = {1},
	urldate = {2020-09-18},
	journal = {Scientific Reports},
	author = {Jurca, Ales and Žabkar, Jure and Džeroski, Sašo},
	month = dec,
	year = {2019},
	note = {Number: 1
Publisher: Nature Publishing Group},
	pages = {19155}
}

@article{wu_automated_2016,
	title = {Automated {Inference} on {Criminality} using {Face} {Images}},
	shorttitle = {Automated {Inference} on {Criminality} using {Face} {Images}},
	url = {http://arxiv.org/abs/1611.04135},
	abstract = {We study, for the first time, automated inference on criminality based solely on still face images. Via supervised machine learning, we build four classifiers (logistic regression, KNN, SVM, CNN) using facial images of 1856 real persons controlled for race, gender, age and facial expressions, nearly half of whom were convicted criminals, for discriminating between criminals and non-criminals. All four classifiers perform consistently well and produce evidence for the validity of automated face-induced inference on criminality, despite the historical controversy surrounding the topic. Also, we find some discriminating structural features for predicting criminality, such as lip curvature, eye inner corner distance, and the so-called nose-mouth angle. Above all, the most important discovery of this research is that criminal and non-criminal face images populate two quite distinctive manifolds. The variation among criminal faces is significantly greater than that of the non-criminal faces. The two manifolds consisting of criminal and non-criminal faces appear to be concentric, with the non-criminal manifold lying in the kernel with a smaller span, exhibiting a law of normality for faces of non-criminals. In other words, the faces of general law-biding public have a greater degree of resemblance compared with the faces of criminals, or criminals have a higher degree of dissimilarity in facial appearance than normal people.},
	urldate = {2020-09-21},
	journal = {arXiv:1611.04135 [cs]},
	author = {Wu, Xiaolin and Zhang, Xi},
	month = nov,
	year = {2016},
	note = {arXiv: 1611.04135
version: 1},
	keywords = {Computer Science - Computer Vision and Pattern Recognition}
}

@book{pishro-nik_introduction_2014,
	title = {Introduction to probability, statistics, and random processes},
	shorttitle = {Probability},
	url = {https://www.probabilitycourse.com/},
	publisher = {Kappa Research LLC},
	author = {Pishro-Nik, Hossein},
	year = {2014}
}

@article{anwar_francisella_2009,
	title = {Francisella tularensis novicida proteomic and transcriptomic data integration and annotation based on semantic web technologies},
	volume = {10},
	issn = {1471-2105},
	url = {https://doi.org/10.1186/1471-2105-10-S10-S3},
	doi = {10.1186/1471-2105-10-S10-S3},
	abstract = {This paper summarises the lessons and experiences gained from a case study of the application of semantic web technologies to the integration of data from the bacterial species Francisella tularensis novicida (Fn). Fn data sources are disparate and heterogeneous, as multiple laboratories across the world, using multiple technologies, perform experiments to understand the mechanism of virulence. It is hard to integrate these data sources in a flexible manner that allows new experimental data to be added and compared when required.},
	number = {10},
	urldate = {2020-09-24},
	journal = {BMC Bioinformatics},
	author = {Anwar, Nadia and Hunt, Ela},
	month = oct,
	year = {2009},
	pages = {S3}
}

@article{maxim_screening_2014,
	title = {Screening tests: a review with examples},
	volume = {26},
	issn = {0895-8378},
	shorttitle = {Screening tests},
	url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4389712/},
	doi = {10.3109/08958378.2014.955932},
	abstract = {Screening tests are widely used in medicine to assess the likelihood that members of a defined population have a particular disease. This article presents an overview of such tests including the definitions of key technical (sensitivity and specificity) and population characteristics necessary to assess the benefits and limitations of such tests. Several examples are used to illustrate calculations, including the characteristics of low dose computed tomography as a lung cancer screen, choice of an optimal PSA cutoff and selection of the population to undergo mammography. The importance of careful consideration of the consequences of both false positives and negatives is highlighted. Receiver operating characteristic curves are explained as is the need to carefully select the population group to be tested.},
	number = {13},
	urldate = {2020-09-26},
	journal = {Inhalation Toxicology},
	author = {Maxim, L. Daniel and Niebo, Ron and Utell, Mark J.},
	month = nov,
	year = {2014},
	pmid = {25264934},
	pmcid = {PMC4389712},
	pages = {811--828}
}

@article{versi_gold_1992,
	title = {"{Gold} standard" is an appropriate term.},
	volume = {305},
	issn = {0959-8138},
	url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC1883235/},
	number = {6846},
	urldate = {2020-09-26},
	journal = {BMJ : British Medical Journal},
	author = {Versi, E.},
	month = jul,
	year = {1992},
	pmid = {1515860},
	pmcid = {PMC1883235},
	pages = {187}
}

@article{lam_imaging-based_2014,
	title = {Imaging-{Based} {Screening}: {Understanding} the {Controversies}},
	volume = {203},
	issn = {0361-803X},
	shorttitle = {Imaging-{Based} {Screening}},
	url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4220242/},
	doi = {10.2214/AJR.14.13049},
	abstract = {Objective
The goals of this article are to provide an overview of controversial aspects of imaging-based screening and to elucidate potential risks that may offset anticipated benefits.

Conclusion
Current controversial topics associated with imaging-based screening include false-positive results, incidental findings, overdiagnosis, radiation risks, and costs. Alongside the benefits of screening, radiologists should be prepared to discuss these additional diagnostic consequences with providers and patients to better guide shared decision making regarding imaging-based screening.},
	number = {5},
	urldate = {2020-09-26},
	journal = {AJR. American journal of roentgenology},
	author = {Lam, Diana L. and Pandharipande, Pari V. and Lee, Janie M. and Lehman, Constance D. and Lee, Christoph I.},
	month = nov,
	year = {2014},
	pmid = {25341132},
	pmcid = {PMC4220242},
	pages = {952--956}
}

@article{alberg_use_2004,
	title = {The {Use} of “{Overall} {Accuracy}” to {Evaluate} the {Validity} of {Screening} or {Diagnostic} {Tests}},
	volume = {19},
	issn = {0884-8734},
	url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC1492250/},
	doi = {10.1111/j.1525-1497.2004.30091.x},
	abstract = {OBJECTIVE
Evaluations of screening or diagnostic tests sometimes incorporate measures of overall accuracy, diagnostic accuracy, or test efficiency. These terms refer to a single summary measurement calculated from 2 × 2 contingency tables that is the overall probability that a patient will be correctly classified by a screening or diagnostic test. We assessed the value of overall accuracy in studies of test validity, a topic that has not received adequate emphasis in the clinical literature.

DESIGN
Guided by previous reports, we summarize the issues concerning the use of overall accuracy. To document its use in contemporary studies, a search was performed for test evaluation studies published in the clinical literature from 2000 to 2002 in which overall accuracy derived from a 2 × 2 contingency table was reported.

MEASUREMENTS AND MAIN RESULTS
Overall accuracy is the weighted average of a test's sensitivity and specificity, where sensitivity is weighted by prevalence and specificity is weighted by the complement of prevalence. Overall accuracy becomes particularly problematic as a measure of validity as 1) the difference between sensitivity and specificity increases and/or 2) the prevalence deviates away from 50\%. Both situations lead to an increasing deviation between overall accuracy and either sensitivity or specificity. A summary of results from published studies (N=25) illustrated that the prevalence-dependent nature of overall accuracy has potentially negative consequences that can lead to a distorted impression of the validity of a screening or diagnostic test.

CONCLUSIONS
Despite the intuitive appeal of overall accuracy as a single measure of test validity, its dependence on prevalence renders it inferior to the careful and balanced consideration of sensitivity and specificity.},
	number = {5 Pt 1},
	urldate = {2020-09-26},
	journal = {Journal of General Internal Medicine},
	author = {Alberg, Anthony J and Park, Ji Wan and Hager, Brant W and Brock, Malcolm V and Diener-West, Marie},
	month = may,
	year = {2004},
	pmid = {15109345},
	pmcid = {PMC1492250},
	pages = {460--465}
}

@article{safra_tracking_2020,
	title = {Tracking historical changes in trustworthiness using machine learning analyses of facial cues in paintings},
	volume = {11},
	copyright = {2020 The Author(s)},
	issn = {2041-1723},
	url = {https://www.nature.com/articles/s41467-020-18566-7},
	doi = {10.1038/s41467-020-18566-7},
	abstract = {Social trust is linked to a host of positive societal outcomes, including improved economic performance, lower crime rates and more inclusive institutions. Yet, the origins of trust remain elusive, partly because social trust is difficult to document in time. Building on recent advances in social cognition, we design an algorithm to automatically generate trustworthiness evaluations for the facial action units (smile, eye brows, etc.) of European portraits in large historical databases. Our results show that trustworthiness in portraits increased over the period 1500–2000 paralleling the decline of interpersonal violence and the rise of democratic values observed in Western Europe. Further analyses suggest that this rise of trustworthiness displays is associated with increased living standards.},
	language = {en},
	number = {1},
	urldate = {2020-09-28},
	journal = {Nature Communications},
	author = {Safra, Lou and Chevallier, Coralie and Grèzes, Julie and Baumard, Nicolas},
	month = sep,
	year = {2020},
	note = {Number: 1
Publisher: Nature Publishing Group},
	pages = {4728}
}

@article{oaten_disease_2011,
	title = {Disease avoidance as a functional basis for stigmatization},
	volume = {366},
	issn = {0962-8436},
	url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3189356/},
	doi = {10.1098/rstb.2011.0095},
	abstract = {Stigmatization is characterized by chronic social and physical avoidance of a person(s) by other people. Infectious disease may produce an apparently similar form of isolation—disease avoidance—but on symptom remission this often abates. We propose that many forms of stigmatization reflect the activation of this disease-avoidance system, which is prone to respond to visible signs and labels that connote disease, irrespective of their accuracy. A model of this system is presented, which includes an emotional component, whereby visible disease cues directly activate disgust and contamination, motivating avoidance, and a cognitive component, whereby disease labels bring to mind disease cues, indirectly activating disgust and contamination. The unique predictions of this model are then examined, notably that people who are stigmatized evoke disgust and are contaminating. That animals too show avoidance of diseased conspecifics, and that disease-related stigma targets are avoided in most cultures, also supports this evolutionary account. The more general implications of this approach are then examined, notably how it can be used to good (e.g. improving hygiene) or bad (e.g. racial vilification) ends, by yoking particular labels with cues that connote disease and disgust. This broadening of the model allows for stigmatization of groups with little apparent connection to disease.},
	number = {1583},
	urldate = {2020-10-06},
	journal = {Philosophical Transactions of the Royal Society B: Biological Sciences},
	author = {Oaten, Megan and Stevenson, Richard J. and Case, Trevor I.},
	month = dec,
	year = {2011},
	pmid = {22042920},
	pmcid = {PMC3189356},
	pages = {3433--3452}
}

@article{walonoski_synthea_2018,
	title = {Synthea: {An} approach, method, and software mechanism for generating synthetic patients and the synthetic electronic health care record},
	volume = {25},
	issn = {1067-5027},
	shorttitle = {Synthea},
	url = {https://academic.oup.com/jamia/article/25/3/230/4098271},
	doi = {10.1093/jamia/ocx079},
	abstract = {AbstractObjective.  Our objective is to create a source of synthetic electronic health records that is readily available; suited to industrial, innovation, rese},
	language = {en},
	number = {3},
	urldate = {2020-10-10},
	journal = {Journal of the American Medical Informatics Association},
	author = {Walonoski, Jason and Kramer, Mark and Nichols, Joseph and Quina, Andre and Moesel, Chris and Hall, Dylan and Duffett, Carlton and Dube, Kudakwashe and Gallagher, Thomas and McLachlan, Scott},
	month = mar,
	year = {2018},
	note = {Publisher: Oxford Academic},
	pages = {230--238}
}

@misc{schunemann_grade_2013,
	title = {{GRADE} handbook},
	url = {https://gdt.gradepro.org/app/handbook/handbook.html},
	abstract = {The GRADE handbook describes the process of rating the quality of the best available evidence and developing health care recommendations following the approach proposed by the Grading of Recommendations, Assessment, Development and Evaluation (GRADE) Working Group.},
	urldate = {2020-10-26},
	editor = {Schünemann, Holger and Brożek, Jan and Guyatt, Gordon and Oxman, Andrew},
	month = oct,
	year = {2013}
}

@article{imai_experimental_2013,
	title = {Experimental designs for identifying causal mechanisms},
	volume = {176},
	issn = {0964-1998},
	url = {https://www.jstor.org/stable/23355175},
	abstract = {Experimentation is a powerful methodology that enables scientists to establish causal claims empirically. However, one important criticism is that experiments merely provide a black box view of causality and fail to identify causal mechanisms. Specifically, critics argue that, although experiments can identify average causal effects, they cannot explain the process through which such effects come about. If true, this represents a serious limitation of experimentation, especially for social and medical science research that strives to identify causal mechanisms. We consider several experimental designs that help to identify average natural indirect effects. Some of these designs require the perfect manipulation of an intermediate variable, whereas others can be used even when only imperfect manipulation is possible. We use recent social science experiments to illustrate the key ideas that underlie each of the designs proposed.},
	number = {1},
	urldate = {2020-10-27},
	journal = {Journal of the Royal Statistical Society. Series A (Statistics in Society)},
	author = {Imai, Kosuke and Tingley, Dustin and Yamamoto, Teppei},
	year = {2013},
	note = {Publisher: Wiley},
	pages = {5--32}
}

@book{hume_treatise_1740,
	title = {Treatise of {Human} {Nature}},
	url = {https://en.wikisource.org/wiki/Page:Treatise_of_Human_Nature_(1888).djvu/109},
	urldate = {2020-10-27},
	author = {Hume, David},
	year = {1740}
}

@book{popper_logic_1959,
	title = {The {Logic} of {Scientific} {Discovery}},
	copyright = {http://creativecommons.org/publicdomain/zero/1.0/},
	url = {http://archive.org/details/PopperLogicScientificDiscovery},
	abstract = {The Logic of Scientific Discovery   is a 1934 book by Karl Popper. Popper rewrote his book in English and republished it in 1959. It argues that science should adopt a methodology based on falsifiability, because no number of experiments can ever prove a theory, but a single experiment can contradict one. Popper holds that empirical theories are characterized by falsifiability.},
	language = {eng},
	urldate = {2020-10-27},
	author = {Popper, Karl},
	year = {1959},
	keywords = {History of science}
}

@article{bellary_basics_2014,
	title = {Basics of case report form designing in clinical research},
	volume = {5},
	issn = {2229-3485},
	url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4170533/},
	doi = {10.4103/2229-3485.140555},
	abstract = {Case report form (CRF) is a specialized document in clinical research. It should be study protocol driven, robust in content and have material to collect the study specific data. Though paper CRFs are still used largely, use of electronic CRFs (eCRFS) are gaining popularity due to the advantages they offer such as improved data quality, online discrepancy management and faster database lock etc. Main objectives behind CRF development are preserving and maintaining quality and integrity of data. CRF design should be standardized to address the needs of all users such as investigator, site coordinator, study monitor, data entry personnel, medical coder and statistician. Data should be organized in a format that facilitates and simplifies data analysis. Collection of large amount of data will result in wasted resources in collecting and processing it and in many circumstances, will not be utilized for analysis. Apart from that, standard guidelines should be followed while designing the CRF. CRF completion manual should be provided to the site personnel to promote accurate data entry by them. These measures will result in reduced query generations and improved data integrity. It is recommended to establish and maintain a library of templates of standard CRF modules as they are time saving and cost-effective. This article is an attempt to describe the methods of CRF designing in clinical research and discusses the challenges encountered in this process.},
	number = {4},
	urldate = {2020-10-28},
	journal = {Perspectives in Clinical Research},
	author = {Bellary, Shantala and Krishnankutty, Binny and Latha, M. S.},
	year = {2014},
	pmid = {25276625},
	pmcid = {PMC4170533},
	pages = {159--166}
}

@book{leavy_research_2017,
	title = {Research {Design}: {Quantitative}, {Qualitative}, {Mixed} {Methods}, {Arts}-{Based}, and {Community}-{Based} {Participatory} {Research} {Approaches}},
	isbn = {978-1-4625-1438-0},
	shorttitle = {Research {Design}},
	url = {https://www.guilford.com/books/Research-Design/Patricia-Leavy/9781462514380},
	abstract = {This user-friendly book provides a step-by-step guide to using the five major approaches to research design: quantitative, qualitative, mixed methods, arts-based, and community-based participatory research. Chapters on each approach follow a unique format—they present a template for a research proposal and explain in detail how to conceptualize and fill in every section.},
	language = {en-US},
	urldate = {2020-10-29},
	author = {Leavy, Patricia},
	month = apr,
	year = {2017}
}

@book{creswell_research_2017,
	title = {Research {Design} {Qualitative}, {Quantitative}, and {Mixed} {Methods} {Approaches}},
	isbn = {978-1-5443-3842-2},
	shorttitle = {Research {Design}},
	url = {https://us.sagepub.com/en-us/nam/research-design/book255675},
	abstract = {Qualitative, Quantitative, and Mixed Methods Approaches},
	language = {en},
	urldate = {2020-10-29},
	publisher = {Sage},
	author = {Creswell, John W. and Creswell, J. David},
	month = dec,
	year = {2017}
}

@article{schwarzer_prevalence_2001,
	title = {The prevalence of {Peyronie}'s disease: results of a large survey},
	volume = {88},
	issn = {1464-410X},
	shorttitle = {The prevalence of {Peyronie}'s disease},
	url = {https://bjui-journals.onlinelibrary.wiley.com/doi/abs/10.1046/j.1464-4096.2001.02436.x},
	doi = {10.1046/j.1464-4096.2001.02436.x},
	abstract = {Objectives To determine the prevalence of Peyronie's disease, a localized connective tissue disorder of the penile tunica albuginea, the symptoms of which include palpable plaque, painful erections and curvature of the penis, in a large sample of men in Germany. Subjects and methods A standardized questionnaire was sent to 8000 male inhabitants (age range 30–80 years) of the greater Cologne area (≈ 1.5 million inhabitants). Three questions about the self-diagnosis of Peyronie's disease were previously assessed for validity on 158 healthy men and 24 patients with confirmed Peyronie's disease. To optimize the response rate, the questionnaire was mailed three times to all the men. Results The response rate after the third mailing was 55.4\% (4432 men); 142 men (3.2\%, mean age 57.4 years, sd 13.4) reported the new appearance of a palpable plaque which, from the previous validation, was the most sensitive question and the main symptom of the disease. In men aged 30–39 years only 1.5\% reported localized penile induration, compared with 3.0\% in those 40–49 and 50–59 years, 4.0\% in those 60–69 years and 6.5\% of those {\textgreater} 70 years old. Newly occurring angulation was reported by 119 of the 142 men (84\%) and painful erection by 66 (46.5\%). The combination of the three symptoms (plaque, deviation and painful erection) was reported by 46 of the 4432 respondents (1.04\%), i.e. 32\% of the 142 men with penile induration; 58 of the 142 men (41\%) reported erectile dysfunction. Conclusions This is the first large cross-sectional, community-based study to examine the prevalence of Peyronie's disease. Using previously validated questions the prevalence of Peyronie's disease in the sample was 3.2\%; this is much higher than indicated in previous reports. A comparably high prevalence is reported for diabetes and urolithiasis, suggesting that this ‘rare’ disease is more widespread than previously thought.},
	language = {en},
	number = {7},
	urldate = {2020-10-30},
	journal = {BJU International},
	author = {Schwarzer, U. and Sommer, F. and Klotz, T. and Braun, M. and Reifenrath, B. and Engelmann, U.},
	month = jul,
	year = {2001},
	note = {\_eprint: https://bjui-journals.onlinelibrary.wiley.com/doi/pdf/10.1046/j.1464-4096.2001.02436.x},
	keywords = {penile induration, Peyronie's disease, prevalence, sexual dysfunction},
	pages = {727--730}
}

@article{savian_studies_2020,
	title = {Studies on the aetiology of kiwifruit decline: interaction between soil-borne pathogens and waterlogging},
	volume = {456},
	issn = {1573-5036},
	shorttitle = {Studies on the aetiology of kiwifruit decline},
	url = {https://doi.org/10.1007/s11104-020-04671-5},
	doi = {10.1007/s11104-020-04671-5},
	abstract = {In 2012, Italian kiwifruit orchards were hit by a serious root disease of unknown aetiology (kiwifruit decline, KD) that still causes extensive damage to the sector. While waterlogging was soon observed to be associated with its outbreak, the putative role of soil microbiota remains unknown. This work investigates the role of these two factors in the onset of the disease.},
	language = {en},
	number = {1},
	urldate = {2020-10-30},
	journal = {Plant and Soil},
	author = {Savian, Francesco and Ginaldi, Fabrizio and Musetti, Rita and Sandrin, Nicola and Tarquini, Giulia and Pagliari, Laura and Firrao, Giuseppe and Martini, Marta and Ermacora, Paolo},
	month = nov,
	year = {2020},
	pages = {113--128}
}

@article{bardi_early_2020,
	title = {Early {Kiwifruit} {Decline}: {A} {Soil}-{Borne} {Disease} {Syndrome} or a {Climate} {Change} {Effect} on {Plant}–{Soil} {Relations}?},
	volume = {2},
	issn = {2673-3218},
	shorttitle = {Early {Kiwifruit} {Decline}},
	url = {https://www.frontiersin.org/articles/10.3389/fagro.2020.00003/full},
	doi = {10.3389/fagro.2020.00003},
	abstract = {Kiwifruit early decline is a physiological disorder whose appearance was reported for the first time in New Zealand, following a cyclone that caused a heavy and prolonged flooding of kiwifruit orchards. Following studies on kiwifruit vine physiology and anatomy demonstrated that this plant has a significant water demand, but is also extremely sensible to roots waterlogging and soil anoxic conditions. Pathogenic microorganisms were sometimes identified in soil and root samples of declining plants, but they were not considered the primary cause of kiwifruit decline, and their presence in roots was considered a consequence of waterlogging and plant weakening. Agronomic practices have been developed and adopted to deliver water in amounts adequate to plant needs, but avoiding excess and stagnation in soil, and to improve soil aeration. However, in recent years early decline is spreading worldwide and is affecting even orchards in which waterlogging is prevented or is only occasionally caused by intense local rainfall. A global overview of the knowledge on botanical, physiological and ecological traits of kiwifruit, along with the examination of phenomena concomitant to early decline appearance, can help to identify the causes and the possible actions to prevent its occurrence. Some assumptions and possible solution attempts are proposed.},
	language = {English},
	urldate = {2020-10-30},
	journal = {Frontiers in Agronomy},
	author = {Bardi, Laura},
	year = {2020},
	note = {Publisher: Frontiers},
	keywords = {abiotic stress, Climate Change, Kiwifruit, Physiological disorder, roots, Soil}
}

@article{imai_experimental_2013-1,
	title = {Experimental designs for identifying causal mechanisms},
	volume = {176},
	issn = {0964-1998},
	url = {https://www.jstor.org/stable/23355175},
	abstract = {Experimentation is a powerful methodology that enables scientists to establish causal claims empirically. However, one important criticism is that experiments merely provide a black box view of causality and fail to identify causal mechanisms. Specifically, critics argue that, although experiments can identify average causal effects, they cannot explain the process through which such effects come about. If true, this represents a serious limitation of experimentation, especially for social and medical science research that strives to identify causal mechanisms. We consider several experimental designs that help to identify average natural indirect effects. Some of these designs require the perfect manipulation of an intermediate variable, whereas others can be used even when only imperfect manipulation is possible. We use recent social science experiments to illustrate the key ideas that underlie each of the designs proposed.},
	number = {1},
	urldate = {2020-10-30},
	journal = {Journal of the Royal Statistical Society. Series A (Statistics in Society)},
	author = {Imai, Kosuke and Tingley, Dustin and Yamamoto, Teppei},
	year = {2013},
	note = {Publisher: Wiley},
	pages = {5--32}
}

@article{balasooriya_possible_2019,
	title = {Possible links between groundwater geochemistry and chronic kidney disease of unknown etiology ({CKDu}): an investigation from the {Ginnoruwa} region in {Sri} {Lanka}},
	issn = {2451-9685},
	shorttitle = {Possible links between groundwater geochemistry and chronic kidney disease of unknown etiology ({CKDu})},
	url = {https://doi.org/10.1007/s12403-019-00340-w},
	doi = {10.1007/s12403-019-00340-w},
	abstract = {Since at least two decades, Chronic Kidney Disease of Uncertain Etiology (CKDu) has become an increasingly discussed health issue in Sri Lanka and as well as in other tropical regions. Areas that are particularly affected with the disease are mostly located in the dry zone of Sri Lanka. The disease is more prominent among communities that consume groundwater as their main source of drinking water. Hydrogeochemical investigations were carried out in the Ginnoruwa area, a known hotspot of CKDu. It revealed possible links between drinking water chemistry and the spreading of the disease. This work compares hydrogeochemical data of drinking water sources of wells whose consumers are affected by CKDu and other nearby wells whose consumers were not affected by the disease. A total of 63 groundwater samples were collected from selected wells. About one-third of these samples (i.e., 19) were collected from wells used by CKDu patients. Significantly higher values of pH, total hardness, electrical conductivity, Ca2+, Mg2+, F−, Cl−, PO43−, and SO42− were found in wells that were used by CKDu patients. Mean contents of Na+, Ca2+, and Mg2+ in CKDu affected wells were 33.8 mg/L, 30.1 mg/L, and 14.9 mg/L, respectively, compared to 23.1 mg/L, 26.7 mg/L, and 9.65 mg/L in non-CKDu wells. Differences in major ion geochemistry in groundwaters are possibly governed by variable time periods of water storage in fractured hard rock aquifers in this region. Hydrogeochemical parameters were statistically compared by a Mann–Whitney U test and indicated significant differences in total dissolved solids (TDS) (p = 0.016), SO42− (p = 0.005), PO43− (p = 0.030), F− (p = 0.048), Na+ (p = 0.008), and Mg2+(p = 0.008) between non-CKDu and CKDu wells at p = 0.050 level. Other suspected solutes such as nephrotoxic trace elements including As, Cd, and Pb were similar in both types of wells. They were also lower than the accepted guideline limits of the World Health Organization (WHO). Results of this study suggest that fluoride in drinking water in combination with water hardness may be one of the responsible factors for kidney damage and progression of the disease. This may be particularly the case when elevated amounts of Mg2+ are present in hard groundwater.},
	language = {en},
	urldate = {2020-11-01},
	journal = {Exposure and Health},
	author = {Balasooriya, Shyamalie and Munasinghe, Harshaka and Herath, A. T. and Diyabalanage, Saranga and Ileperuma, O. A. and Manthrithilake, Herath and Daniel, Christoph and Amann, Kerstin and Zwiener, Christian and Barth, Johannes A. C. and Chandrajith, Rohana},
	month = dec,
	year = {2019}
}

@book{chait_data_2020,
	title = {Data as a {Science}},
	copyright = {Creative Commons Attribution-ShareAlike 4.0 International and the GNU Affero General Public License},
	url = {https://github.com/whythawk/data-as-a-science},
	abstract = {A data scientist is a researcher who answers a research question using data, and can lead the development of the research process. They may design the methods to acquire primary or secondary sources of data that inform the research process, monitor and ensure ethical responsibilities, curate the research data and results, or communicate the process and results to stakeholders. Coding is incidental to that process, and it is possible to be a data scientist without programming at all. The course is based on the Sloyd model of technical training. Each lesson is discrete, building on the previous lesson, and provides a functional and holistic understanding of the scientific method as it applies to data. It is not about learning an algorithm and applying it to abstract, arbitrary data. The course has the objective of training complete data scientists, you will learn how research works and apply tools to a specific case-study.},
	urldate = {2020-11-02},
	publisher = {Whythawk},
	author = {Chait, Gavin},
	year = {2020}
}