book.bib


@article{piolat_version_2011,
	title = {La version française du dictionnaire pour le {LIWC} : modalités de construction et exemples d’utilisation},
	volume = {56},
	issn = {00332984},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S0033298411000355},
	doi = {10.1016/j.psfr.2011.07.002},
	shorttitle = {La version française du dictionnaire pour le {LIWC}},
	pages = {145--159},
	number = {3},
	journaltitle = {Psychologie Française},
	shortjournal = {Psychologie Française},
	author = {Piolat, A. and Booth, R.J. and Chung, C.K. and Davids, M. and Pennebaker, J.W.},
	urldate = {2020-07-01},
	date = {2011-09},
	langid = {french},
}

@inproceedings{denecke_using_2008,
	location = {Cancun, Mexico},
	title = {Using {SentiWordNet} for multilingual sentiment analysis},
	isbn = {978-1-4244-2161-9 978-1-4244-2162-6},
	url = {http://ieeexplore.ieee.org/document/4498370/},
	doi = {10.1109/ICDEW.2008.4498370},
	eventtitle = {2008 {IEEE} 24th International Conference on Data Engineeing workshop ({ICDE} Workshop 2008)},
	pages = {507--512},
	booktitle = {2008 {IEEE} 24th International Conference on Data Engineering Workshop},
	publisher = {{IEEE}},
	author = {Denecke, Kerstin},
	urldate = {2020-07-02},
	date = {2008-04},
}

@book{boullier_opinion_2012,
	title = {Opinion mining et sentiment analysis méthodes et outils.},
	isbn = {978-2-8218-1887-3 978-2-8218-1227-7 978-2-8218-1226-0},
	author = {Boullier, Dominique and Lohard, Audrey},
	date = {2012},
	note = {{OCLC}: 1096948624},
}

@inproceedings{baccianella_sentiwordnet_2010,
	location = {Valletta, {MT}},
	title = {{SentiWordNet} 3.0: An Enhanced Lexical Resource for Sentiment Analysis and Opinion Mining.},
	volume = {pp. 2200-2204.},
	eventtitle = {({LREC} 2010)},
	booktitle = {Proceedings of the 7th Conference on Language Resources and Evaluation},
	author = {Baccianella, Stephano and Esuli, Andrea and Sebastiani, Fabrizio},
	date = {2010},
}

@article{chan_sentiment_2017,
	title = {Sentiment analysis in financial texts},
	volume = {94},
	issn = {01679236},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S0167923616301828},
	doi = {10.1016/j.dss.2016.10.006},
	pages = {53--64},
	journaltitle = {Decision Support Systems},
	shortjournal = {Decision Support Systems},
	author = {Chan, Samuel W.K. and Chong, Mickey W.C.},
	urldate = {2020-07-12},
	date = {2017-02},
	langid = {english},
}

@article{duval_analyse_2016,
	title = {L'analyse automatisée du ton médiatique : construction et utilisation de la version française du \textit{Lexicoder} Sentiment Dictionary},
	volume = {49},
	issn = {0008-4239, 1744-9324},
	url = {https://www.cambridge.org/core/product/identifier/S000842391600055X/type/journal_article},
	doi = {10.1017/S000842391600055X},
	shorttitle = {L'analyse automatisée du ton médiatique},
	abstract = {Résumé Cet article introduit un nouveau dictionnaire permettant l'analyse automatisée du ton des médias francophones, que nous avons appelé Lexicoder Sentiment Dictionnaire Français ( {LSDFr} ) en référence au lexique anglophone de Young et Soroka (2012), Lexicoder Sentiment Dictionary ( {LSD} ) à partir duquel le {LSDFr} a été construit. Une fois construit, nous comparons le {LSDFr} au seul autre dictionnaire francophone existant de ce genre, Linguistic Inquiry and Word Count ( {LIWC} ). Nous testons ensuite la validité interne du {LSDFr} en le comparant avec un corpus de textes codés manuellement. Nous testons enfin la validité externe du {LSDFr} en mesurant jusqu'où le ton médiatique, calculé à l'aide de notre dictionnaire, prédit les intentions de vote des Québécois lors des quatre dernières campagnes électorales. En développant cet outil, notre objectif est de permettre à d'autres chercheurs d'effectuer des analyses médiatiques dans un corpus de textes comparables en français. , Abstract This article introduces a new dictionary for the automated analysis of the tone of French media. We named it the French Lexicoder Sentiment Dictionary ( {LSDFr} ) in reference to the English lexicon developed by Young and Soroka (2012), the Lexicoder Sentiment Dictionary ( {LSD} ), from which the {LSDFr} was built. We compare the {LSDFr} to the only other French sentiment lexicon, Linguistic Inquiry and Word Count ( {LIWC} ). First, we detail the construction of the dictionary. We then test the internal validity of the {LSDFr} comparing it with a corpus of manually coded texts. Finally, we test the external validity of {LSDFr} by measuring how the media tone, calculated using our dictionary, predicts voting intentions in the last four Quebec elections. Our goal is to enable other researchers to conduct media analyses with a comparable corpus of texts in French.},
	pages = {197--220},
	number = {2},
	journaltitle = {Canadian Journal of Political Science},
	author = {Duval, Dominic and Pétry, François},
	urldate = {2019-05-24},
	date = {2016-06},
	langid = {english},
}

@article{fruchterman_graph_1991,
	title = {Graph drawing by force-directed placement},
	volume = {21},
	issn = {00380644, 1097024X},
	url = {http://doi.wiley.com/10.1002/spe.4380211102},
	doi = {10.1002/spe.4380211102},
	pages = {1129--1164},
	number = {11},
	journaltitle = {Software: Practice and Experience},
	author = {Fruchterman, Thomas M. J. and Reingold, Edward M.},
	urldate = {2019-08-11},
	date = {1991-11},
	langid = {english},
}

@article{arnold_tidy_2017,
	title = {A Tidy Data Model for Natural Language Processing using {cleanNLP}},
	volume = {9},
	issn = {2073-4859},
	url = {https://journal.r-project.org/archive/2017/RJ-2017-035/index.html},
	doi = {10.32614/RJ-2017-035},
	abstract = {Recent advances in natural language processing have produced libraries that extract lowlevel features from a collection of raw texts. These features, known as annotations, are usually stored internally in hierarchical, tree-based data structures. This paper proposes a data model to represent annotations as a collection of normalized relational data tables optimized for exploratory data analysis and predictive modeling. The R package {cleanNLP}, which calls one of two state of the art {NLP} libraries ({CoreNLP} or {spaCy}), is presented as an implementation of this data model. It takes raw text as an input and returns a list of normalized tables. Speciﬁc annotations provided include tokenization, part of speech tagging, named entity recognition, sentiment analysis, dependency parsing, coreference resolution, and word embeddings. The package currently supports input text in English, German, French, and Spanish.},
	pages = {248},
	number = {2},
	journaltitle = {The R Journal},
	author = {Arnold, Taylor},
	urldate = {2019-08-11},
	date = {2017},
	langid = {english},
}

@article{van_der_maaten_laurens_visualizing_2008,
	title = {Visualizing Data using t-{SNE}},
	pages = {2579--2605},
	journaltitle = {Journal of Machine learning},
	author = {{Van der Maaten, Laurens} and Hinton, Geoffrey},
	date = {2008},
}

@article{shirdastian_using_2019,
	title = {Using big data analytics to study brand authenticity sentiments: The case of Starbucks on Twitter},
	volume = {48},
	issn = {02684012},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S0268401217302657},
	doi = {10.1016/j.ijinfomgt.2017.09.007},
	shorttitle = {Using big data analytics to study brand authenticity sentiments},
	pages = {291--307},
	journaltitle = {International Journal of Information Management},
	author = {Shirdastian, Hamid and Laroche, Michel and Richard, Marie-Odile},
	urldate = {2019-10-08},
	date = {2019-10},
	langid = {english},
}

@article{plutchik_psychoevolutionary_1982,
	title = {A psychoevolutionary theory of emotions},
	volume = {21},
	issn = {0539-0184, 1461-7412},
	url = {http://journals.sagepub.com/doi/10.1177/053901882021004003},
	doi = {10.1177/053901882021004003},
	pages = {529--553},
	number = {4},
	journaltitle = {Social Science Information},
	author = {Plutchik, Robert},
	urldate = {2019-01-18},
	date = {1982-07},
	langid = {english},
}

@article{blei_latent_2003,
	title = {Latent Dirichlet Allocation},
	volume = {3},
	issn = {1532-4435},
	url = {http://dl.acm.org/citation.cfm?id=944919.944937},
	pages = {993--1022},
	journaltitle = {J. Mach. Learn. Res.},
	author = {Blei, David M. and Ng, Andrew Y. and Jordan, Michael I.},
	date = {2003-03},
}

@article{gunter_sentiment_2014,
	title = {Sentiment Analysis: A Market-Relevant and Reliable Measure of Public Feeling?},
	volume = {56},
	issn = {1470-7853, 2515-2173},
	url = {http://journals.sagepub.com/doi/10.2501/IJMR-2014-014},
	doi = {10.2501/IJMR-2014-014},
	shorttitle = {Sentiment Analysis},
	abstract = {This paper critically examines emergent research with sentiment analysis tools to assess their current status and relevance to applied opinion and behaviour measurement. The rapid spread of online news and online chatter in blogs, micro-blogs and social media sites has created a potentially rich source of public opinion. Waves of public feeling are vented spontaneously on a wide range of issues on a minute-by-minute basis in the online world. These online discourses are continually being refreshed, and businesses and advertisers, governments and policy makers have woken up to the fact that this universe of self-perpetuating human sentiment could represent a valuable resource to guide political and business decisions. The massive size of this repository of emotional content renders manual analysis of it feasible only for tiny portions of its totality, and even then can be labour intensive. Computer scientists have however produced software tools that can apply linguistic rules to provide electronic readings of meanings and emotions. These tools are now being utilised by applied social science and market researchers to yield sentiment profiles from online discourses created within specific platforms that purport to represent reliable substitutes for more traditional, offline measures of public opinion. This paper considers what these tools have demonstrated so far and where caution in their application is still called for.},
	pages = {231--247},
	number = {2},
	journaltitle = {International Journal of Market Research},
	author = {Gunter, Barrie and Koteyko, Nelya and Atanasova, Dimitrinka},
	urldate = {2019-10-20},
	date = {2014-03},
	langid = {english},
}

@article{tausczik_psychological_2010,
	title = {The Psychological Meaning of Words: {LIWC} and Computerized Text Analysis Methods},
	volume = {29},
	issn = {0261-927X, 1552-6526},
	url = {http://journals.sagepub.com/doi/10.1177/0261927X09351676},
	doi = {10.1177/0261927X09351676},
	shorttitle = {The Psychological Meaning of Words},
	pages = {24--54},
	number = {1},
	journaltitle = {Journal of Language and Social Psychology},
	author = {Tausczik, Yla R. and Pennebaker, James W.},
	urldate = {2019-11-07},
	date = {2010-03},
	langid = {english},
}

@inproceedings{nielsen_new_2011,
	title = {A New {ANEW}: Evaluation of a Word List for Sentiment Analysis in Microblogs.},
	volume = {718},
	url = {http://dblp.uni-trier.de/db/conf/msm/msm2011.html#Nielsen11},
	series = {{CEUR} Workshop Proceedings},
	pages = {93--98},
	booktitle = {\#{MSM}},
	publisher = {{CEUR}-{WS}.org},
	author = {Nielsen, Finn Årup},
	editor = {Rowe, Matthew and Stankovic, Milan and Dadzie, Aba-Sah and Hardey, Mariann},
	date = {2011},
	keywords = {dblp},
}

@inproceedings{ding_holistic_2008,
	location = {New York, {NY}, {USA}},
	title = {A Holistic Lexicon-based Approach to Opinion Mining},
	isbn = {978-1-59593-927-2},
	url = {http://doi.acm.org/10.1145/1341531.1341561},
	doi = {10.1145/1341531.1341561},
	series = {{WSDM} '08},
	pages = {231--240},
	booktitle = {Proceedings of the 2008 International Conference on Web Search and Data Mining},
	publisher = {{ACM}},
	author = {Ding, Xiaowen and Liu, Bing and Yu, Philip S.},
	date = {2008},
	keywords = {context dependent opinions, opinion mining, sentiment analysis},
}

@article{puschmann_turning_2018,
	title = {Turning Words Into Consumer Preferences: How Sentiment Analysis Is Framed in Research and the News Media},
	volume = {4},
	issn = {2056-3051, 2056-3051},
	url = {http://journals.sagepub.com/doi/10.1177/2056305118797724},
	doi = {10.1177/2056305118797724},
	shorttitle = {Turning Words Into Consumer Preferences},
	pages = {205630511879772},
	number = {3},
	journaltitle = {Social Media + Society},
	author = {Puschmann, Cornelius and Powell, Alison},
	urldate = {2019-01-15},
	date = {2018-07},
	langid = {english},
}

@book{banda_large-scale_2020,
	title = {A large-scale {COVID}-19 Twitter chatter dataset for open scientific research - an international collaboration},
	rights = {Open Access},
	url = {https://zenodo.org/record/3757272},
	abstract = {{\textbackslash}textlessstrong{\textbackslash}{textgreaterDue} to the relevance of the {COVID}-19 global pandemic, we are releasing our dataset of tweets acquired from the Twitter Stream related to {COVID}-19 chatter. Since our first release we have received additional data from our new collaborators, allowing this resource to grow to its current size. Dedicated data gathering started from March 11th yielding over 4 million tweets a day. We have added additional data provided by our new collaborators from January 27th to March 27th, to provide extra longitudinal coverage.{\textbackslash}textless/strong{\textbackslash}textgreater {\textbackslash}textlessstrong{\textbackslash}{textgreaterThe} data collected from the stream captures all languages, but the higher prevalence are: English, Spanish, and French. We release all tweets and retweets on the full\_dataset.tsv file (205,409,413 unique tweets), and a cleaned version with no retweets on the full\_dataset-clean.tsv file (44,726,568{\textbackslash}textless/strong{\textbackslash}textgreater{\textbackslash}textlessstrong{\textbackslash}textgreater unique tweets). There are several practical reasons for us to leave the retweets, tracing important tweets and their dissemination is one of them. For {NLP} tasks we provide the top 1000 frequent terms in frequent\_terms.csv, the top 1000 bigrams in frequent\_bigrams.csv, and the top 1000 trigrams in frequent\_trigrams.csv. Some general statistics per day are included for both datasets in the statistics-full\_dataset.tsv and statistics-full\_dataset-clean.tsv files. {\textbackslash}textless/strong{\textbackslash}textgreater {\textbackslash}textlessstrong{\textbackslash}{textgreaterMore} details can be found (and will be updated faster at: https://github.com/thepanacealab/covid19\_twitter) and our pre-print about the dataset (https://arxiv.org/abs/2004.03688) {\textbackslash}textless/strong{\textbackslash}textgreater {\textbackslash}textlessstrong{\textbackslash}{textgreaterAs} always, the tweets distributed here are only tweet identifiers (with date and time added) due to the terms and conditions of Twitter to re-distribute Twitter data {ONLY} for research purposes. The need to be hydrated to be used. {\textbackslash}textless/strong{\textbackslash}textgreater},
	publisher = {Zenodo},
	author = {Banda, Juan M. and Tekumalla, Ramya and Wang, Guanyu and Yu, Jingyuan and Liu, Tuo and Ding, Yuning and Chowell, Gerardo},
	urldate = {2020-04-25},
	date = {2020-04},
	langid = {english},
	doi = {10.5281/ZENODO.3757272},
	keywords = {covid-19, covid19, nlp, social media, twitter},
}

@article{pearce_no_2003,
	title = {[No title found]},
	volume = {28},
	issn = {0921030X},
	url = {http://link.springer.com/10.1023/A:1022917721797},
	doi = {10.1023/A:1022917721797},
	pages = {211--228},
	number = {2},
	journaltitle = {Natural Hazards},
	author = {Pearce, Laurie},
	urldate = {2020-08-22},
	date = {2003},
}

@article{cambria_jumping_2014,
	title = {Jumping {NLP} Curves: A Review of Natural Language Processing Research [Review Article]},
	volume = {9},
	issn = {1556-603X},
	url = {http://ieeexplore.ieee.org/document/6786458/},
	doi = {10.1109/MCI.2014.2307227},
	shorttitle = {Jumping {NLP} Curves},
	pages = {48--57},
	number = {2},
	journaltitle = {{IEEE} Computational Intelligence Magazine},
	shortjournal = {{IEEE} Comput. Intell. Mag.},
	author = {Cambria, Erik and White, Bebo},
	urldate = {2020-08-23},
	date = {2014-05},
	file = {Cambria et White - 2014 - Jumping NLP Curves A Review of Natural Language P.pdf:C\:\\Users\\33623\\Zotero\\storage\\ZGN9TBJU\\Cambria et White - 2014 - Jumping NLP Curves A Review of Natural Language P.pdf:application/pdf},
}

@article{anastasopoulos_computational_2017,
	title = {Computational Text Analysis for Public Management Research},
	issn = {1556-5068},
	url = {https://www.ssrn.com/abstract=3269520},
	doi = {10.2139/ssrn.3269520},
	journaltitle = {{SSRN} Electronic Journal},
	shortjournal = {{SSRN} Journal},
	author = {Anastasopoulos, Lefteris Jason and Moldogaziev, Tima T. and Scott, Tyler},
	urldate = {2020-08-23},
	date = {2017},
	langid = {english},
}

@article{bourdieu_opinion_1973,
	title = {L'opinion publique n'existe pas},
	pages = {1292--1309},
	issue = {n°318},
	journaltitle = {Les Temps modernes},
	author = {Bourdieu, Pierre},
	date = {1973-01},
}

@article{humphreys_automated_2018,
	title = {Automated Text Analysis for Consumer Research},
	volume = {44},
	issn = {0093-5301, 1537-5277},
	url = {https://academic.oup.com/jcr/article/44/6/1274/4283031},
	doi = {10.1093/jcr/ucx104},
	abstract = {Abstract
            The amount of digital text available for analysis by consumer researchers has risen dramatically. Consumer discussions on the internet, product reviews, and digital archives of news articles and press releases are just a few potential sources for insights about consumer attitudes, interaction, and culture. Drawing from linguistic theory and methods, this article presents an overview of automated text analysis, providing integration of linguistic theory with constructs commonly used in consumer research, guidance for choosing amongst methods, and advice for resolving sampling and statistical issues unique to text analysis. We argue that although automated text analysis cannot be used to study all phenomena, it is a useful tool for examining patterns in text that neither researchers nor consumers can detect unaided. Text analysis can be used to examine psychological and sociological constructs in consumer-produced digital text by enabling discovery or by providing ecological validity.},
	pages = {1274--1306},
	number = {6},
	journaltitle = {Journal of Consumer Research},
	author = {Humphreys, Ashlee and Wang, Rebecca Jen-Hui},
	editor = {Fischer, Eileen and Price, Linda},
	urldate = {2020-11-15},
	date = {2018-04-01},
	langid = {english},
	file = {Humphreys et Wang - 2018 - Automated Text Analysis for Consumer Research.pdf:C\:\\Users\\33623\\Zotero\\storage\\GRJZQ84D\\Humphreys et Wang - 2018 - Automated Text Analysis for Consumer Research.pdf:application/pdf},
}

@article{lock_quantitative_2015,
	title = {Quantitative content analysis as a method for business ethics research},
	volume = {24},
	issn = {09628770},
	url = {http://doi.wiley.com/10.1111/beer.12095},
	doi = {10.1111/beer.12095},
	pages = {S24--S40},
	journaltitle = {Business Ethics: A European Review},
	shortjournal = {Bus Ethics Eur Rev},
	author = {Lock, Irina and Seele, Peter},
	urldate = {2020-11-15},
	date = {2015-07},
	langid = {english},
	file = {Lock et Seele - 2015 - Quantitative content analysis as a method for busi.pdf:C\:\\Users\\33623\\Zotero\\storage\\G33JEG6F\\Lock et Seele - 2015 - Quantitative content analysis as a method for busi.pdf:application/pdf},
}

@article{coleman_computer_1975,
	title = {A computer readability formula designed for machine scoring.},
	volume = {60},
	issn = {0021-9010},
	url = {http://content.apa.org/journals/apl/60/2/283},
	doi = {10.1037/h0076540},
	pages = {283--284},
	number = {2},
	journaltitle = {Journal of Applied Psychology},
	author = {Coleman, Meri and Liau, T. L.},
	urldate = {2019-01-16},
	date = {1975},
	langid = {english},
}

@inproceedings{canini_online_2009,
	location = {Hilton Clearwater Beach Resort, Clearwater Beach, Florida {USA}},
	title = {Online Inference of Topics with Latent Dirichlet Allocation},
	volume = {5},
	url = {http://proceedings.mlr.press/v5/canini09a.html},
	series = {Proceedings of Machine Learning Research},
	abstract = {Inference algorithms for topic models are typically designed to be run over an entire collection of documents after they have been observed. However, in many applications of these models, the collection grows over time, making it infeasible to run batch algorithms repeatedly. This problem can be addressed by using online algorithms, which update estimates of the topics as each document is observed. We introduce two related Rao-Blackwellized online inference algorithms for the latent Dirichlet allocation ({LDA}) model – incremental Gibbs samplers and particle filters – and compare their runtime and performance to that of existing algorithms.},
	pages = {65--72},
	booktitle = {Proceedings of the Twelth International Conference on Artificial Intelligence and Statistics},
	publisher = {{PMLR}},
	author = {Canini, Kevin and Shi, Lei and Griffiths, Thomas},
	editor = {Dyk, David van and Welling, Max},
	date = {2009-04},
}

@article{suster_investigation_2015,
	title = {An investigation into language complexity of World-of-Warcraft game-external texts},
	url = {http://arxiv.org/abs/1502.02655},
	abstract = {We present a language complexity analysis of World of Warcraft ({WoW}) community texts, which we compare to texts from a general corpus of web English. Results from several complexity types are presented, including lexical diversity, density, readability and syntactic complexity. The language of {WoW} texts is found to be comparable to the general corpus on some complexity measures, yet more specialized on other measures. Our findings can be used by educators willing to include game-related activities into school curricula.},
	journaltitle = {{arXiv}:1502.02655 [cs]},
	author = {Šuster, Simon},
	urldate = {2019-01-21},
	date = {2015-02},
	keywords = {Computer Science - Computation and Language},
}

@article{sievert_ldavis_2014,
	title = {{LDAvis}: A method for visualizing and interpreting topics},
	volume = {Baltimore, Maryland, {USA}},
	pages = {63--70},
	journaltitle = {Proceedings of the Workshop on Interactive Language Learning, Visualization, and Interfaces.},
	author = {Sievert, Carson},
	date = {2014-06},
}

@article{liu_towards_2017,
	title = {Towards better analysis of machine learning models: A visual analytics perspective},
	volume = {1},
	issn = {2468502X},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S2468502X17300086},
	doi = {10.1016/j.visinf.2017.01.006},
	shorttitle = {Towards better analysis of machine learning models},
	pages = {48--56},
	number = {1},
	journaltitle = {Visual Informatics},
	author = {Liu, Shixia and Wang, Xiting and Liu, Mengchen and Zhu, Jun},
	urldate = {2018-12-22},
	date = {2017-03},
	langid = {english},
}

@article{tibshirani_regression_2011,
	title = {Regression shrinkage and selection via the lasso: a retrospective: Regression Shrinkage and Selection via the Lasso},
	volume = {73},
	issn = {13697412},
	url = {http://doi.wiley.com/10.1111/j.1467-9868.2011.00771.x},
	doi = {10.1111/j.1467-9868.2011.00771.x},
	shorttitle = {Regression shrinkage and selection via the lasso},
	pages = {273--282},
	number = {3},
	journaltitle = {Journal of the Royal Statistical Society: Series B (Statistical Methodology)},
	author = {Tibshirani, Robert},
	urldate = {2018-12-22},
	date = {2011-06},
	langid = {english},
}

@article{pekar_discovery_2008,
	title = {Discovery of subjective evaluations of product features in hotel reviews},
	volume = {14},
	issn = {1356-7667, 1479-1870},
	url = {http://journals.sagepub.com/doi/10.1177/1356766707087522},
	doi = {10.1177/1356766707087522},
	pages = {145--155},
	number = {2},
	journaltitle = {Journal of Vacation Marketing},
	author = {Pekar, Viktor and {Shiyan Ou}},
	urldate = {2019-01-15},
	date = {2008-04},
	langid = {english},
}

@article{hug_loi_2004,
	title = {La loi de Menzerath appliquée à un ensemble de textes},
	journaltitle = {Lexicometrica},
	author = {Hug, Marc},
	date = {2004},
}

@article{senter_automated_1967,
	title = {Automated Readability Index},
	author = {Senter, R.J.},
	date = {1967-11},
}

@article{tweedie_how_1998,
	title = {How Variable May a Constant be? Measures of Lexical Richness in Perspective},
	volume = {32},
	pages = {323--352},
	journaltitle = {Computers and the Humanities},
	author = {Tweedie, Fiona J. and Baayen, R. Harald},
	date = {1998},
}

@report{bennani_les_2019,
	title = {Les déterminants locaux de la participation numérique au Grand débat national: une analyse économétrique},
	url = {https://EconPapers.repec.org/RePEc:drm:wpaper:2019-7},
	abstract = {This paper analyses the local determinants of the electronic participation to the "Grand débat". First, we highlight the spatial heterogeneity of the participants using their zip code. Second, we use an econometric approach to assess the local determinants of the general participation and the participation on each of the four topics of the "Grand débat". The results show that the median standard of living and the education level are the main determinants of the general participation, whereas some specific variables explain the participation of each of the four topics.},
	number = {2019-7},
	institution = {University of Paris Nanterre, {EconomiX}},
	type = {{EconomiX} Working Papers},
	author = {Bennani, Hamza and Gandré, Pauline and Monnery, Benjamin},
	date = {2019},
	keywords = {electronic participation, Grand débat, local determinants},
}

@book{isabelle_serca_les_2010,
	location = {Paris},
	title = {{LES} {COUTURES} {APPARENTES} {DE} {LA} {RECHERCHE} {PROUST} {ET} {LA} {PONCTUATION}},
	series = {« Recherches proustiennes »},
	publisher = {Honoré Champion},
	author = {{ISABELLE SERCA}},
	date = {2010},
}

@article{canut_sociolinguistique_2000,
	title = {De la sociolinguistique à la sociologie du langage : de l'usage des frontières},
	volume = {91},
	issn = {0181-4095, 2101-0382},
	url = {http://www.cairn.info/revue-langage-et-societe-2000-1-page-89.htm},
	doi = {10.3917/ls.091.0089},
	shorttitle = {De la sociolinguistique à la sociologie du langage},
	pages = {89},
	number = {1},
	journaltitle = {Langage et société},
	author = {Canut, Cécile},
	urldate = {2019-07-14},
	date = {2000},
	langid = {french},
}

@article{abdaoui_feel_2017,
	title = {{FEEL}: a French Expanded Emotion Lexicon},
	volume = {51},
	issn = {1574-020X, 1574-0218},
	url = {http://link.springer.com/10.1007/s10579-016-9364-5},
	doi = {10.1007/s10579-016-9364-5},
	shorttitle = {{FEEL}},
	pages = {833--855},
	number = {3},
	journaltitle = {Language Resources and Evaluation},
	author = {Abdaoui, Amine and Azé, Jérôme and Bringay, Sandra and Poncelet, Pascal},
	urldate = {2019-07-14},
	date = {2017-09},
	langid = {english},
}

@article{firth_synopsis_1957,
	title = {A synopsis of linguistic theory 1930-55.},
	volume = {1952-59},
	abstract = {Reprinted in: Palmer, F. R. (ed.) (1968). Selected Papers of J. R. Firth 1952-59, pages 168-205. Longmans, London.},
	pages = {1--32},
	journaltitle = {Studies in Linguistic Analysis (special volume of the Philological Society)},
	author = {Firth, J. R.},
	date = {1957},
	keywords = {classic linguistics meanign relatedness semantic},
}

@book{chomsky_aspects_1969,
	title = {Aspects of the Theory of Syntax},
	isbn = {978-0-262-26050-3},
	url = {https://books.google.fr/books?id=u0ksbFqagU8C},
	series = {The {MIT} Press},
	publisher = {{MIT} Press},
	author = {Chomsky, N.},
	date = {1969},
}

@article{grishman_message_1997,
	title = {Message Understanding Conference- 6: A Brief History},
	abstract = {We have recently completed the sixth in a series of "Message Understanding Conferences" which are designed to promote and evaluate research in information extraction. {MUC}-6 introduced several innovations over prior {MUCs}, most notably in the range of different tasks for which evaluations were conducted. We describe some of the motivations for the new format and briefly discuss some of the results of the evaluations.},
	pages = {6},
	author = {Grishman, Ralph and Sundheim, Beth},
	date = {1997},
	langid = {english},
}

@article{verdelhan-bourgade_lucien_2020,
	title = {Lucien Tesnière, professeur de linguistique à Montpellier de 1937 à 1954. L’aventure d’une grammaire},
	volume = {51},
	number = {4562},
	journaltitle = {Bulletin de l'Academie des sciences et lettres de Montpellier},
	author = {Verdelhan-Bourgade,, M.},
	date = {2020-12-14},
}

@article{flesch_new_1948,
	title = {A new readability yardstick.},
	volume = {32},
	issn = {1939-1854, 0021-9010},
	url = {http://doi.apa.org/getdoi.cfm?doi=10.1037/h0057532},
	doi = {10.1037/h0057532},
	pages = {221--233},
	number = {3},
	journaltitle = {Journal of Applied Psychology},
	shortjournal = {Journal of Applied Psychology},
	author = {Flesch, Rudolph},
	urldate = {2021-07-08},
	date = {1948},
	langid = {english},
}

@article{thompson_programming_1968,
	title = {Programming Techniques: Regular expression search algorithm},
	volume = {11},
	issn = {0001-0782, 1557-7317},
	url = {https://dl.acm.org/doi/10.1145/363347.363387},
	doi = {10.1145/363347.363387},
	shorttitle = {Programming Techniques},
	abstract = {A method for locating specific character strings embedded in character text is described and an implementation of this method in the form of a compiler is discussed. The compiler accepts a regular expression as source language and produces an {IBM} 7094 program as object language. The object program then accepts the text to be searched as input and produces a signal every time an embedded string in the text matches the given regular expression. Examples, problems, and solutions are also presented.},
	pages = {419--422},
	number = {6},
	journaltitle = {Communications of the {ACM}},
	shortjournal = {Commun. {ACM}},
	author = {Thompson, Ken},
	urldate = {2021-07-08},
	date = {1968-06},
	langid = {english},
}

@inbook{jakobson_linguistics_1981,
	title = {Linguistics and Poetics},
	isbn = {978-90-279-3178-8},
	url = {https://www.degruyter.com/document/doi/10.1515/9783110802122.18/html},
	pages = {18--51},
	booktitle = {Poetry of Grammar and Grammar of Poetry},
	publisher = {De Gruyter Mouton},
	bookauthor = {Jakobson, Roman},
	urldate = {2021-07-29},
	date = {1981-12-31},
	doi = {10.1515/9783110802122.18},
}

@article{benzecri_analyse_2006,
	title = {L'analyse de données : Histoire, Bilan, Projets et Perspectives},
	pages = {5},
	author = {Benzecri, Jean-Paul},
	date = {2006},
}

@report{fodor_survey_2002,
	title = {A Survey of Dimension Reduction Techniques},
	url = {http://www.osti.gov/servlets/purl/15002155-mumfPN/native/},
	author = {Fodor, I K},
	date = {2002-05-09},
	doi = {10.2172/15002155},
}

@article{roberts_model_2016,
	title = {A Model of Text for Experimentation in the Social Sciences},
	volume = {111},
	url = {https://www.tandfonline.com/doi/full/10.1080/01621459.2016.1141684},
	doi = {10.1080/01621459.2016.1141684},
	pages = {988--1003},
	number = {515},
	journaltitle = {Journal of the American Statistical Association},
	author = {Roberts, Margaret E. and Stewart, Brandon M. and Airoldi, Edoardo M.},
	date = {2016-07-02},
}

@article{balech_nlp_2019,
	title = {{NLP} text mining V4.0 - une introduction - cours programme doctoral},
	url = {http://rgdoi.net/10.13140/RG.2.2.34248.06405},
	doi = {10.13140/RG.2.2.34248.06405},
	abstract = {The purpose of this chapter is to introduce natural language processing techniques and textual analysis, such as the developments of data mining and linguistics define it, automating it by taking advantage of the distributional properties of language. Largely automated, natural language processing techniques sequence a series of operations from the constitution of the corpus to its annotation, resulting in representation and qualification models. These methods are now widely available through the r and python language libraries. They make it possible to exploit the large corpus that digitisation makes it possible to build: consumer comments, news bases, activity reports, interview reports. The purpose of this text is essentially technical, however without giving any operating method. It indicates generic methods that can be used via r and their context of use. This is a short manual of modern textual analysis. For business research.},
	author = {Balech, Sophie and Benavent, Christophe},
	urldate = {2021-08-14},
	date = {2019},
	langid = {english},
}

@article{beaudouin_retour_2016,
	title = {Retour aux origines de la statistique textuelle: Benzécri et l'école française d'analyse des données},
	abstract = {In this article, we have attempted to trace the history of the statistical analysis of textual data, focusing on the influence of Benzécri’s work and school, and to make explicit their theoretical positions, clearly opposed to {AI} and to Chomskyan linguistics. After a presentation of the intellectual project, as an inductive approach to language based on the exploration of corpora, we present the principles of correspondence analysis, which is the main method developed in the Data Analysis School, used for corpus analysis but also for many other types of datasets. Then, we will focus on textual data analysis. Based on the fact that software programmes have played a major role in the use of these statistical techniques, we shall examine a selection of these, display their specificities and their underlying theoretical bases.},
	pages = {21},
	author = {Beaudouin, Valérie},
	date = {2016},
	langid = {french},
	file = {Beaudouin - 2016 - Retour aux origines de la statistique textuelle B.pdf:C\:\\Users\\33623\\Zotero\\storage\\SPN698W9\\Beaudouin - 2016 - Retour aux origines de la statistique textuelle B.pdf:application/pdf},
}

@article{chen_nonnegative_1984,
	title = {The nonnegative rank factorizations of nonnegative matrices},
	volume = {62},
	issn = {00243795},
	url = {https://linkinghub.elsevier.com/retrieve/pii/002437958490096X},
	doi = {10.1016/0024-3795(84)90096-X},
	pages = {207--217},
	journaltitle = {Linear Algebra and its Applications},
	author = {Chen, Ji-Cheng},
	urldate = {2021-08-03},
	date = {1984-11},
	langid = {english},
}

@article{limem_methodes_nodate,
	title = {Méthodes informées de factorisation matricielle non-négative. Application à l'identification de sources de particules industrielles.},
	pages = {232},
	author = {Limem, Abdelhakim},
	langid = {french},
}

@article{gillis_why_2014,
	title = {The Why and How of Nonnegative Matrix Factorization},
	url = {http://arxiv.org/abs/1401.5226},
	abstract = {Nonnegative matrix factorization ({NMF}) has become a widely used tool for the analysis of high-dimensional data as it automatically extracts sparse and meaningful features from a set of nonnegative data vectors. We ﬁrst illustrate this property of {NMF} on three applications, in image processing, text mining and hyperspectral imaging –this is the why. Then we address the problem of solving {NMF}, which is {NP}-hard in general. We review some standard {NMF} algorithms, and also present a recent subclass of {NMF} problems, referred to as near-separable {NMF}, that can be solved eﬃciently (that is, in polynomial time), even in the presence of noise –this is the how. Finally, we brieﬂy describe some problems in mathematics and computer science closely related to {NMF} via the nonnegative rank.},
	journaltitle = {{arXiv}:1401.5226 [cs, math, stat]},
	author = {Gillis, Nicolas},
	urldate = {2021-08-03},
	date = {2014-03},
	langid = {english},
	keywords = {Computer Science - Information Retrieval, Computer Science - Machine Learning, Statistics - Machine Learning, Mathematics - Optimization and Control},
}

@article{cazalet_nonnegative_nodate,
	title = {Nonnegative Matrix Factorization and Financial Applications},
	abstract = {Nonnegative matrix factorization ({NMF}) is a recent tool to analyse multivariate data. It can be compared to other decomposition methods like principal component analysis ({PCA}) or independent component analysis ({ICA}). However, {NMF} diﬀers from them because it requires and imposes the nonnegativity of matrices. In this paper, we use this special feature in order to identify patterns in stock market data. Indeed, we may use {NMF} to estimate common factors from the dynamics of stock prices. In this perspective, we compare {NMF} and clustering algorithms to identify endogenous equity sectors.},
	pages = {31},
	author = {Cazalet, Zélia and Roncalli, Thierry},
	langid = {english},
}

@inproceedings{shu_beyond_2019,
	location = {Melbourne {VIC} Australia},
	title = {Beyond News Contents: The Role of Social Context for Fake News Detection},
	isbn = {978-1-4503-5940-5},
	url = {https://dl.acm.org/doi/10.1145/3289600.3290994},
	doi = {10.1145/3289600.3290994},
	shorttitle = {Beyond News Contents},
	abstract = {Social media is becoming popular for news consumption due to its fast dissemination, easy access, and low cost. However, it also enables the wide propagation of fake news, i.e., news with intentionally false information. Detecting fake news is an important task, which not only ensures users receive authentic information but also helps maintain a trustworthy news ecosystem. The majority of existing detection algorithms focus on finding clues from news contents, which are generally not effective because fake news is often intentionally written to mislead users by mimicking true news. Therefore, we need to explore auxiliary information to improve detection. The social context during news dissemination process on social media forms the inherent tri-relationship, the relationship among publishers, news pieces, and users, which has potential to improve fake news detection. For example, partisan-biased publishers are more likely to publish fake news, and low-credible users are more likely to share fake news. In this paper, we study the novel problem of exploiting social context for fake news detection. We propose a tri-relationship embedding framework {TriFN}, which models publisher-news relations and user-news interactions simultaneously for fake news classification. We conduct experiments on two real-world datasets, which demonstrate that the proposed approach significantly outperforms other baseline methods for fake news detection.},
	pages = {312--320},
	booktitle = {Proceedings of the Twelfth {ACM} International Conference on Web Search and Data Mining},
	publisher = {{ACM}},
	author = {Shu, Kai and Wang, Suhang and Liu, Huan},
	urldate = {2021-08-03},
	date = {2019-01},
	langid = {english},
}

@article{evangelopoulos_latent_2012,
	title = {Latent Semantic Analysis: five methodological recommendations},
	volume = {21},
	issn = {0960-085X, 1476-9344},
	url = {https://www.tandfonline.com/doi/full/10.1057/ejis.2010.61},
	doi = {10.1057/ejis.2010.61},
	shorttitle = {Latent Semantic Analysis},
	abstract = {The recent influx in generation, storage and availability of textual data presents researchers with the challenge of developing suitable methods for their analysis. Latent Semantic Analysis ({LSA}), a member of a family of methodological approaches that offers an opportunity to address this gap by describing the semantic content in textual data as a set of vectors, was pioneered by researchers in psychology, information retrieval, and bibliometrics. {LSA} involves a matrix operation called singular value decomposition, an extension of principal component analysis. {LSA} generates latent semantic dimensions that are either interpreted, if the researcher’s primary interest lies with the understanding of the thematic structure in the textual data, or used for purposes of clustering, categorisation and predictive modelling, if the interest lies with the conversion of raw text into numerical data, as a precursor to subsequent analysis. This paper reviews five methodological issues that need to be addressed by the researcher who will embark on {LSA}. We examine the dilemmas, present the choices, and discuss the considerations under which good methodological decisions are made. We illustrate these issues with the help of four small studies, involving the analysis of abstracts for papers published in the European Journal of Information Systems.},
	pages = {70--86},
	number = {1},
	journaltitle = {European Journal of Information Systems},
	author = {Evangelopoulos, Nicholas and Zhang, Xiaoni and Prybutok, Victor R},
	urldate = {2021-08-09},
	date = {2012-01},
	langid = {english},
}

@article{song_genetic_2009,
	title = {Genetic algorithm for text clustering based on latent semantic indexing},
	volume = {57},
	issn = {08981221},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S0898122108005300},
	doi = {10.1016/j.camwa.2008.10.010},
	abstract = {In this paper, we develop a genetic algorithm method based on a latent semantic model ({GAL}) for text clustering. The main difficulty in the application of genetic algorithms ({GAs}) for document clustering is thousands or even tens of thousands of dimensions in feature space which is typical for textual data. Because the most straightforward and popular approach represents texts with the vector space model ({VSM}), that is, each unique term in the vocabulary represents one dimension. Latent semantic indexing ({LSI}) is a successful technology in information retrieval which attempts to explore the latent semantics implied by a query or a document through representing them in a dimension-reduced space. Meanwhile, {LSI} takes into account the effects of synonymy and polysemy, which constructs a semantic structure in textual data. {GA} belongs to search techniques that can efficiently evolve the optimal solution in the reduced space. We propose a variable string length genetic algorithm which has been exploited for automatically evolving the proper number of clusters as well as providing near optimal data set clustering. {GA} can be used in conjunction with the reduced latent semantic structure and improve clustering efficiency and accuracy. The superiority of {GAL} approach over conventional {GA} applied in {VSM} model is demonstrated by providing good Reuter document clustering results.},
	pages = {1901--1907},
	number = {11},
	journaltitle = {Computers \& Mathematics with Applications},
	author = {Song, Wei and Park, Soon Cheol},
	urldate = {2021-08-09},
	date = {2009-06},
	langid = {english},
}

@incollection{buntine_variational_2002,
	location = {Berlin, Heidelberg},
	title = {Variational Extensions to {EM} and Multinomial {PCA}},
	volume = {2430},
	isbn = {978-3-540-44036-9 978-3-540-36755-0},
	url = {http://link.springer.com/10.1007/3-540-36755-1_3},
	abstract = {Several authors in recent years have proposed discrete analogues to principle component analysis intended to handle discrete or positive only data, for instance suited to analyzing sets of documents. Methods include non-negative matrix factorization, probabilistic latent semantic analysis, and latent Dirichlet allocation. This paperbegins with a review of the basic theory of the variational extension to the expectationmaximization algorithm, and then presents discrete component ﬁnding algorithms in that light. Experiments are conducted on both bigram word data and document bag-of-word to expose some of the subtleties of this new class of algorithms.},
	pages = {23--34},
	booktitle = {Machine Learning: {ECML} 2002},
	publisher = {Springer Berlin Heidelberg},
	author = {Buntine, Wray},
	editor = {Goos, Gerhard and Hartmanis, Juris and van Leeuwen, Jan and Elomaa, Tapio and Mannila, Heikki and Toivonen, Hannu},
	urldate = {2021-08-09},
	date = {2002},
	langid = {english},
	doi = {10.1007/3-540-36755-1_3},
}

@article{hassani_text_2020,
	title = {Text Mining using Nonnegative Matrix Factorization and Latent Semantic Analysis},
	url = {http://arxiv.org/abs/1911.04705},
	abstract = {Text clustering is arguably one of the most important topics in modern data mining. Nevertheless, text data require tokenization which usually yields a very large and highly sparse term-document matrix, which is usually diﬃcult to process using conventional machine learning algorithms. Methods such as Latent Semantic Analysis have helped mitigate this issue, but are nevertheless not completely stable in practice. As a result, we propose a new feature agglomeration method based on Nonnegative Matrix Factorization, which is employed to separate the terms into groups, and then each group’s term vectors are agglomerated into a new feature vector. Together, these feature vectors create a new feature space much more suitable for clustering. In addition, we propose a new deterministic initialization for spherical K-Means, which proves very useful for this speciﬁc type of data. In order to evaluate the proposed method, we compare it to some of the latest research done in this ﬁeld, as well as some of the most practiced methods. In our experiments, we conclude that the proposed method either signiﬁcantly improves clustering performance, or maintains the performance of other methods, while improving stability in results.},
	journaltitle = {{arXiv}:1911.04705 [cs, stat]},
	author = {Hassani, Ali and Iranmanesh, Amir and Mansouri, Najme},
	urldate = {2021-08-14},
	date = {2020-02},
	langid = {english},
	keywords = {Computer Science - Machine Learning, Statistics - Machine Learning},
}

@article{vavasis_complexity_2007,
	title = {On the complexity of nonnegative matrix factorization},
	url = {http://arxiv.org/abs/0708.4149},
	abstract = {Nonnegative matrix factorization ({NMF}) has become a prominent technique for the analysis of image databases, text databases and other information retrieval and clustering applications. In this report, we deﬁne an exact version of {NMF}. Then we establish several results about exact {NMF}: (1) that it is equivalent to a problem in polyhedral combinatorics; (2) that it is {NP}-hard; and (3) that a polynomial-time local search heuristic exists.},
	journaltitle = {{arXiv}:0708.4149 [cs]},
	author = {Vavasis, Stephen A.},
	urldate = {2021-08-14},
	date = {2007-09},
	langid = {english},
	keywords = {Computer Science - Information Retrieval, G.1.3, H.3.3, Mathematics - Numerical Analysis},
}

@article{lee_algorithms_nodate,
	title = {Algorithms for Non-negative Matrix Factorization},
	abstract = {Non-negative matrix factorization ({NMF}) has previously been shown to be a useful decomposition for multivariate data. Two different multiplicative algorithms for {NMF} are analyzed. They differ only slightly in the multiplicative factor used in the update rules. One algorithm can be shown to minimize the conventional least squares error while the other minimizes the generalized Kullback-Leibler divergence. The monotonic convergence of both algorithms can be proven using an auxiliary function analogous to that used for proving convergence of the {ExpectationMaximization} algorithm. The algorithms can also be interpreted as diagonally rescaled gradient descent, where the rescaling factor is optimally chosen to ensure convergence.},
	pages = {7},
	author = {Lee, Daniel D and Seung, H Sebastian},
	langid = {english},
}

@inproceedings{li_documents_2014,
	location = {Lanzhou, China},
	title = {Documents clustering based on max-correntropy nonnegative matrix factorization},
	isbn = {978-1-4799-4215-2 978-1-4799-4216-9 978-1-4799-4217-6},
	url = {http://ieeexplore.ieee.org/document/7009720/},
	doi = {10.1109/ICMLC.2014.7009720},
	abstract = {Nonnegative matrix factorization ({NMF}) has been successfully applied to many areas for classiﬁcation and clustering. Commonly-used {NMF} algorithms mainly target on minimizing the l2 distance or Kullback-Leibler ({KL}) divergence, which may not be suitable for nonlinear case. In this paper, we propose a new decomposition method by maximizing the correntropy between the original and the product of two low-rank matrices for document clustering. This method also allows us to learn the new basis vectors of the semantic feature space from the data. To our knowledge, we haven’t seen any work has been done by maximizing correntropy in {NMF} to cluster high dimensional document data. Our experiment results show the supremacy of our proposed method over other variants of {NMF} algorithm on Reuters21578 and {TDT}2 databasets.},
	pages = {850--855},
	booktitle = {2014 International Conference on Machine Learning and Cybernetics},
	publisher = {{IEEE}},
	author = {Li, Le and Yang, Jianjun and Xu, Yang and Qin, Zhen and Zhang, Honggang},
	urldate = {2021-08-14},
	date = {2014-07},
	langid = {english},
}

@article{votte_algorithmes_nodate,
	title = {Algorithmes de factorisation en matrices non-ne´gatives fonde´e sur la β-divergence},
	abstract = {This paper describes algorithms for nonnegative matrix factorization ({NMF}) with the β-divergence (β-{NMF}). The β-divergence is a family of cost functions parametrized by a single shape parameter β that takes the Euclidean distance, the Kullback-Leibler divergence and the Itakura-Saito divergence as special cases (β = 2, 1, 0 respectively). The proposed algorithms are based on a surrogate auxiliary function (an upper bound of the objective function constructed locally). We ﬁrst describe a majorization-minimization ({MM}) algorithm that leads to multiplicative updates. Then we introduce the concept of majorization-equalization ({ME}) algorithm which produces updates that move along constant level sets of the auxiliary function and lead to larger steps than {MM}. Simulations illustrate the faster convergence of the {ME} approach.},
	pages = {4},
	author = {Votte, Cedric {FE} and Idier, Jerome},
	langid = {french},
}

@article{shitov_nonnegative_2017,
	title = {The nonnegative rank of a matrix: Hard problems, easy solutions},
	url = {http://arxiv.org/abs/1605.04000},
	shorttitle = {The nonnegative rank of a matrix},
	abstract = {Using elementary linear algebra, we develop a technique that leads to solutions of two widely known problems on nonnegative matrices. First, we give a short proof of the result by Vavasis stating that the nonnegative rank of a matrix is {NP}-hard to compute. This proof is essentially contained in the paper by Jiang and Ravikumar, who discussed this topic in diﬀerent terms ﬁfteen years before the work of Vavasis. Secondly, we present a solution of the Cohen–Rothblum problem on rational nonnegative factorizations, which was posed in 1993 and remained open.},
	journaltitle = {{arXiv}:1605.04000 [cs, math]},
	author = {Shitov, Yaroslav},
	urldate = {2021-08-14},
	date = {2017-12},
	langid = {english},
	keywords = {Computer Science - Computational Complexity, Mathematics - Combinatorics},
}

@article{zurada_nonnegative_2013,
	title = {Nonnegative Matrix Factorization and Its Application to Pattern Analysis and Text Mining},
	abstract = {Nonnegative Matrix Factorization ({NMF}) is one of the most promising techniques to reduce the dimensionality of the data. This presentation compares the method with other popular matrix decomposition approaches for various pattern analysis tasks. Among others, {NMF} has been also widely applied for clustering and latent feature extraction. Several types of the objective functions have been used for {NMF} in the literature. Instead of minimizing the common Euclidean Distance ({EucD}) error, we review an alternative method that maximizes the correntropy similarity measure to produce the factorization. Correntropy is an entropy-based criterion defined as a nonlinear similarity measure. Following the discussion of maximization of the correntropy function, we use it to cluster document data set and compare the clustering performance with the {EucD}-based {NMF}. Our approach was applied and illustrated for the clustering of documents in the 20-Newsgroups data set. The comparison is illustrated with 20-Newsgroups data set. The results show that our approach produces per average better clustering compared with other methods which use {EucD} as an objective function.},
	pages = {6},
	author = {Zurada, Jacek M and Ensari, Tolga and Asl, Ehsan Hosseini and Chorowski, Jan},
	date = {2013},
	langid = {english},
}

@article{gaujoux_generating_nodate,
	title = {Generating heatmaps for Nonnegative Matrix Factorization},
	abstract = {This vignette describes how to produce diﬀerent informative heatmaps from {NMF} objects, such as returned by the function nmf in the {NMF} package1 (Gaujoux et al. 2010). The main drawing engine is implemented by the function aheatmap, which is a highly enhanced modiﬁcation of the function pheatmap from the pheatmap package2, and provides convenient and quick ways of producing high quality and customizable annotated heatmaps. Currently this function is part of the package {NMF} , but may eventually compose a separate package on its own.},
	pages = {12},
	author = {Gaujoux, Renaud},
	langid = {english},
}

@article{gaujoux_flexible_2010,
	title = {A flexible R package for nonnegative matrix factorization},
	volume = {11},
	issn = {1471-2105},
	url = {https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-11-367},
	doi = {10.1186/1471-2105-11-367},
	pages = {367},
	number = {1},
	journaltitle = {{BMC} Bioinformatics},
	author = {Gaujoux, Renaud and Seoighe, Cathal},
	urldate = {2021-08-15},
	date = {2010-12},
	langid = {english},
}

@collection{fellbaum_wordnet_1998,
	location = {Cambridge, Mass},
	title = {{WordNet}: an electronic lexical database},
	isbn = {978-0-262-06197-1},
	series = {Language, speech, and communication},
	shorttitle = {{WordNet}},
	publisher = {{MIT} Press},
	editor = {Fellbaum, Christiane},
	date = {1998},
	keywords = {Data processing, English language, Lexicology, Semantics, {WordNet}},
}

@article{balech_masque_2022,
	title = {Le masque, figure polaire de la crise de la Covid-19 : une exploration par {NLP} du flux des conversations Twitter (février - mai 2020):},
	volume = {n° 43},
	issn = {1953-6119},
	url = {https://www.cairn.info/revue-marche-et-organisations-2022-1-page-151.htm?ref=doi},
	doi = {10.3917/maorg.043.0151},
	shorttitle = {Le masque, figure polaire de la crise de la Covid-19},
	pages = {151--187},
	number = {1},
	journaltitle = {Marché et organisations},
	author = {Balech, Sophie and Calciu, Michel and Monnot, Julien and Benavent, Christophe},
	urldate = {2022-04-27},
	date = {2022-02-11},
	file = {Balech et al. - 2022 - Le masque, figure polaire de la crise de la Covid-.pdf:C\:\\Users\\33623\\Zotero\\storage\\MX6LJX55\\Balech et al. - 2022 - Le masque, figure polaire de la crise de la Covid-.pdf:application/pdf},
}

@article{mudambi_research_2010,
	title = {Research Note: What Makes a Helpful Online Review? A Study of Customer Reviews on Amazon.com},
	volume = {34},
	issn = {02767783},
	url = {https://www.jstor.org/stable/10.2307/20721420},
	doi = {10.2307/20721420},
	shorttitle = {Research Note},
	pages = {185},
	number = {1},
	journaltitle = {{MIS} Quarterly},
	author = {{Mudambi} and {Schuff}},
	urldate = {2019-10-20},
	date = {2010},
	file = {Mudambi et Schuff - 2010 - Research Note What Makes a Helpful Online Review.pdf:C\:\\Users\\33623\\Zotero\\storage\\URTVSV5X\\Mudambi et Schuff - 2010 - Research Note What Makes a Helpful Online Review.pdf:application/pdf},
}

@article{nielek_spiral_2010,
	title = {Spiral of hatred: social effects in Internet auctions. Between informativity and emotion},
	volume = {10},
	issn = {1389-5753, 1572-9362},
	url = {http://link.springer.com/10.1007/s10660-010-9058-9},
	doi = {10.1007/s10660-010-9058-9},
	shorttitle = {Spiral of hatred},
	pages = {313--330},
	number = {3},
	journaltitle = {Electronic Commerce Research},
	author = {Nielek, Radoslaw and Wawer, Aleksander and Wierzbicki, Adam},
	urldate = {2019-10-21},
	date = {2010-12},
	langid = {english},
	file = {Nielek et al. - 2010 - Spiral of hatred social effects in Internet aucti.pdf:C\:\\Users\\33623\\Zotero\\storage\\AKBJ66NM\\Nielek et al. - 2010 - Spiral of hatred social effects in Internet aucti.pdf:application/pdf},
}

@article{dailey_retelling_2014,
	title = {Retelling Stories in Organizations: Understanding the Functions of Narrative Repetition},
	volume = {39},
	issn = {0363-7425, 1930-3807},
	url = {http://journals.aom.org/doi/10.5465/amr.2011.0329},
	doi = {10.5465/amr.2011.0329},
	shorttitle = {Retelling Stories in Organizations},
	pages = {22--43},
	number = {1},
	journaltitle = {Academy of Management Review},
	author = {Dailey, Stephanie L. and Browning, Larry},
	urldate = {2022-07-11},
	date = {2014-01},
	langid = {english},
	file = {Dailey et Browning - 2014 - Retelling Stories in Organizations Understanding .pdf:C\:\\Users\\33623\\Zotero\\storage\\634GUVKH\\Dailey et Browning - 2014 - Retelling Stories in Organizations Understanding .pdf:application/pdf},
}

@article{rathore_pre-_2020,
	title = {Pre- and post-launch emotions in new product development: Insights from twitter analytics of three products},
	volume = {50},
	issn = {02684012},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S0268401218310508},
	doi = {10.1016/j.ijinfomgt.2019.05.015},
	shorttitle = {Pre- and post-launch emotions in new product development},
	abstract = {The paper showcases the possible application of social media analytics in new product development ({NPD}). It compares users’ emotions before and after the launch of three new products in the market—a pizza, a car and a smart phone—for possible inputs for {NPD}. The user-generated content offers an alternative to conventional survey data and is cross-cultural in nature, relatively inexpensive and provides real-time information about user behaviour. A total of 302,632 tweets that mentioned the three new products before and after the launch were collected and analysed. Sentiment analysis of the tweets from two time periods was conducted and compared. The users’ responses to the pre- and post-launch of three products vary. The dissatisfaction with the new products represented by negative emotions aligns with the market performance. In the pre-launch period, trust and joy were more common for pizza, joy was more common for the car, and trust was more common for the phone. In the post-launch period, anger and disgust were more common for pizza, joy and trust were more common for the car, and joy was more common for only one aspect of the phone. Further analysis showed that for the car and the phone, firms need to focus on user attitudes towards product attributes, whereas for pizza, firms should concentrate on physiological changes, i.e., changes in product attributes, service and promotional sides. By using the proposed alternative approach, businesses can obtain real-time feedback about the expectations and experiences of the new products. The {NPD} process can be adjusted accordingly.},
	pages = {111--127},
	journaltitle = {International Journal of Information Management},
	author = {Rathore, Ashish Kumar and Ilavarasan, P. Vigneswara},
	urldate = {2022-07-11},
	date = {2020-02},
	langid = {english},
	file = {Rathore et Ilavarasan - 2020 - Pre- and post-launch emotions in new product devel.pdf:C\:\\Users\\33623\\Zotero\\storage\\7TWWJDTA\\Rathore et Ilavarasan - 2020 - Pre- and post-launch emotions in new product devel.pdf:application/pdf},
}

@article{elsafoury_teargas_nodate,
	title = {Teargas, Water Cannons and Twitter: A case study on detecting protest repression events in Turkey 2013},
	abstract = {Since the Arab spring in 2011, protests have been spreading around the world for di↵erent reasons, often these protests are faced with violent repression. Studying protest repression requires appropriate datasets. Existing datasets like {GDELT} focus mainly on events reported in news media. However, news media reports have issues including censorship and coverage bias. Recently, social scientists have started using Machine Learning ({ML}) to detect political events, but it is costly and time consuming to hand label data for training {ML} models. This paper proposes using {ML} and crowdsourcing to detect protest repression events from Twitter. Our case study is the Turkish Gezi Park protest in 2013. Our results show that Twitter is a reliable source reﬂecting events happening on the ground as soon as they happen. Moreover, training conventional {ML} models on crowdsourced labelled data gave good results with an {AUC} score of 0.896 to detect protest events and 0.8189 to detect repression events.},
	pages = {9},
	author = {Elsafoury, Fatma},
	langid = {english},
	file = {Elsafoury - Teargas, Water Cannons and Twitter A case study o.pdf:C\:\\Users\\33623\\Zotero\\storage\\UUML6D76\\Elsafoury - Teargas, Water Cannons and Twitter A case study o.pdf:application/pdf},
}

@article{kang_natural_2020,
	title = {Natural language processing ({NLP}) in management research: A literature review},
	volume = {7},
	issn = {2327-0012, 2327-0039},
	url = {https://www.tandfonline.com/doi/full/10.1080/23270012.2020.1756939},
	doi = {10.1080/23270012.2020.1756939},
	shorttitle = {Natural language processing ({NLP}) in management research},
	pages = {139--172},
	number = {2},
	journaltitle = {Journal of Management Analytics},
	author = {Kang, Yue and Cai, Zhao and Tan, Chee-Wee and Huang, Qian and Liu, Hefu},
	urldate = {2022-07-11},
	date = {2020-04},
	langid = {english},
	file = {Kang et al. - 2020 - Natural language processing (NLP) in management re.pdf:C\:\\Users\\33623\\Zotero\\storage\\DI3CY33N\\Kang et al. - 2020 - Natural language processing (NLP) in management re.pdf:application/pdf},
}

@article{kowalski_improving_2020,
	title = {Improving public services by mining citizen feedback: An application of natural language processing},
	volume = {98},
	issn = {0033-3298, 1467-9299},
	url = {https://onlinelibrary.wiley.com/doi/10.1111/padm.12656},
	doi = {10.1111/padm.12656},
	shorttitle = {Improving public services by mining citizen feedback},
	abstract = {Research on user satisfaction has increased substantially in recent years. To date, most studies have tested the significance of predefined factors thought to influence user satisfaction, with no scalable means of verifying the validity of their assumptions. Digital technology has created new methods of collecting user feedback where service users post comments. As topic models can analyse large volumes of feedback, they have been proposed as a feasible approach to aggregating user opinions. This novel approach has been applied to process reviews of primary care practices in England. Findings from an analysis of more than 200,000 reviews show that the quality of interactions with staff and bureaucratic exigencies are the key drivers of user satisfaction. In addition, patient satisfaction is strongly influenced by factors that are not measured by state-ofthe-art patient surveys. These results highlight the potential benefits of text mining and machine learning for public administration.},
	pages = {1011--1026},
	number = {4},
	journaltitle = {Public Administration},
	author = {Kowalski, Radoslaw and Esteve, Marc and Jankin Mikhaylov, Slava},
	urldate = {2022-07-11},
	date = {2020-12},
	langid = {english},
	file = {Kowalski et al. - 2020 - Improving public services by mining citizen feedba.pdf:C\:\\Users\\33623\\Zotero\\storage\\4ERVZS9Z\\Kowalski et al. - 2020 - Improving public services by mining citizen feedba.pdf:application/pdf},
}

@article{anastasopoulos_computational_nodate,
	title = {Computational Text Analysis for Public Management Research: An Annotated Application to County Budgets},
	abstract = {Organizations produce copious volumes of written documents, including position papers, meeting summaries, minutes from hearings, presentations, and budget justiﬁcations. These documents present a wealth of untapped information, which can shed light on a variety of organizational factors–individual and group behaviors, managerial and policy choices, and other key inter- and intra-organizational dynamics that are of great interest to public management scholars. Computational text analysis methods oﬀer a highly generalizable means of tapping into these documents in order to generate objective organizational data. We demonstrate a general method for analyzing public texts by applying the Latent Dirichlet Allocation ({LDA}) approach to measuring budget orientations in county budget documents. {LDA} is a nonparametric Bayesian method, which is used to extract topical content from collections of documents. We demonstrate how this method can be utilized to measure the functions of ∗Assistant Professor, Department of Public Administration and Policy, Department of Political Science, University of Georgia. Microsoft Visiting Professor, Center for Information Technology Policy, Princeton University (2017–2018).},
	pages = {47},
	author = {Anastasopoulos, L Jason and Moldogaziev, Tima T and Scott, Tyler A},
	langid = {english},
	file = {Anastasopoulos et al. - Computational Text Analysis for Public Management .pdf:C\:\\Users\\33623\\Zotero\\storage\\8U436XBF\\Anastasopoulos et al. - Computational Text Analysis for Public Management .pdf:application/pdf},
}

@article{schoonvelde_friends_2019,
	title = {Friends with text as data benefits: Assessing and extending the use of automated text analysis in political science and political psychology},
	volume = {7},
	issn = {2195-3325},
	url = {https://jspp.psychopen.eu/index.php/jspp/article/view/5115},
	doi = {10.5964/jspp.v7i1.964},
	shorttitle = {Friends with text as data benefits},
	abstract = {Applications of automated text analysis measuring topics, ideology, sentiment or even personality are booming in fields like political science and political psychology. These developments are to be applauded as they bring about novel insights about politics using new sources of (unstructured) data. However, a divide exists between work in both disciplines using text as data. In this paper we argue in favor of more integration across disciplinary boundaries, structuring our case around four key issues in the research process: (i) sampling text; (ii) authorship as meta data; (iii) pre-processing text; (iv) analyzing text. Along the way we demonstrate that an assessment of speaker characteristics may crucially depend on the text sources under study, and that the use of sentiment words correlates with estimates of policy positions, with implications for interpretation of the latter. As such, this paper contributes to a critical discussion about the merits of automated text analysis methods in political psychology and political science, with an eye towards advancing the considerable potential of text as data in the study of politics.},
	pages = {124--143},
	number = {1},
	journaltitle = {Journal of Social and Political Psychology},
	author = {Schoonvelde, Martijn and Schumacher, Gijs and Bakker, Bert N.},
	urldate = {2022-07-11},
	date = {2019-02},
	langid = {english},
	file = {Schoonvelde et al. - 2019 - Friends with text as data benefits Assessing and .pdf:C\:\\Users\\33623\\Zotero\\storage\\2DQMXB89\\Schoonvelde et al. - 2019 - Friends with text as data benefits Assessing and .pdf:application/pdf},
}

@article{benavent_les_nodate,
	title = {Les techniques du {NLP} pour la recherche en sciences de gestion.},
	abstract = {The purpose of this chapter is to introduce natural language processing techniques and textual analysis, such as the developments of data mining and linguistics define it, automating it by taking advantage of the distributional properties of language. Largely automated, natural language processing techniques sequence a series of operations from the constitution of the corpus to its annotation, resulting in representation and qualification models. These methods are now widely available through the r and python language libraries. They make it possible to exploit the large corpus that digitisation makes it possible to build: consumer comments, news bases, activity reports, interview reports. The purpose of this text is essentially technical, however without giving any operating method. It indicates generic methods that can be used via r and their context of use. This is a short manual of modern textual analysis. For business research.},
	pages = {23},
	author = {Benavent, Christophe},
	langid = {french},
	file = {Benavent - Les techniques du NLP pour la recherche en science.pdf:C\:\\Users\\33623\\Zotero\\storage\\UFBMPAMY\\Benavent - Les techniques du NLP pour la recherche en science.pdf:application/pdf},
}

@article{gloor_put_2020,
	title = {Put your money where your mouth is: Using deep learning to identify consumer tribes from word usage},
	volume = {51},
	issn = {02684012},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S0268401218313057},
	doi = {10.1016/j.ijinfomgt.2019.03.011},
	shorttitle = {Put your money where your mouth is},
	abstract = {Internet and social media oﬀer ﬁrms novel ways of managing their marketing strategy and gain competitive advantage. The groups of users expressing themselves on the Internet about a particular topic, product, or brand are frequently called a virtual tribe or E-tribe. However, there are no automatic tools for identifying and studying the characteristics of these virtual tribes. Towards this aim, this paper presents Tribeﬁnder, a system to reveal Twitter users’ tribal aﬃliations, by analyzing their tweets and language use. To show the potential of this instrument, we provide an example considering three speciﬁc tribal macro-categories: alternative realities, lifestyle, and recreation. In addition, we discuss the diﬀerent characteristics of each identiﬁed tribe, in terms of use of language and social interaction metrics. Tribeﬁnder illustrates the importance of adopting a new lens for studying virtual tribes, which is crucial for ﬁrms to properly design their marketing strategy, and for scholars to extend prior marketing research.},
	pages = {101924},
	journaltitle = {International Journal of Information Management},
	author = {Gloor, Peter and Fronzetti Colladon, Andrea and de Oliveira, Joao Marcos and Rovelli, Paola},
	urldate = {2022-07-11},
	date = {2020-04},
	langid = {english},
	file = {Gloor et al. - 2020 - Put your money where your mouth is Using deep lea.pdf:C\:\\Users\\33623\\Zotero\\storage\\ICH9A7BP\\Gloor et al. - 2020 - Put your money where your mouth is Using deep lea.pdf:application/pdf},
}

@article{kobayashi_text_2018,
	title = {Text Mining in Organizational Research},
	volume = {21},
	issn = {1094-4281, 1552-7425},
	url = {http://journals.sagepub.com/doi/10.1177/1094428117722619},
	doi = {10.1177/1094428117722619},
	abstract = {Despite the ubiquity of textual data, so far few researchers have applied text mining to answer organizational research questions. Text mining, which essentially entails a quantitative approach to the analysis of (usually) voluminous textual data, helps accelerate knowledge discovery by radically increasing the amount data that can be analyzed. This article aims to acquaint organizational researchers with the fundamental logic underpinning text mining, the analytical stages involved, and contemporary techniques that may be used to achieve different types of objectives. The specific analytical techniques reviewed are (a) dimensionality reduction, (b) distance and similarity computing, (c) clustering, (d) topic modeling, and (e) classification. We describe how text mining may extend contemporary organizational research by allowing the testing of existing or new research questions with data that are likely to be rich, contextualized, and ecologically valid. After an exploration of how evidence for the validity of text mining output may be generated, we conclude the article by illustrating the text mining process in a job analysis setting using a dataset composed of job vacancies.},
	pages = {733--765},
	number = {3},
	journaltitle = {Organizational Research Methods},
	author = {Kobayashi, Vladimer B. and Mol, Stefan T. and Berkers, Hannah A. and Kismihók, Gábor and Den Hartog, Deanne N.},
	urldate = {2022-07-11},
	date = {2018-07},
	langid = {english},
	file = {Kobayashi et al. - 2018 - Text Mining in Organizational Research.pdf:C\:\\Users\\33623\\Zotero\\storage\\QHN839BN\\Kobayashi et al. - 2018 - Text Mining in Organizational Research.pdf:application/pdf;Kobayashi et al. - 2018 - Text Mining in Organizational Research.pdf:C\:\\Users\\33623\\Zotero\\storage\\PYV3CRGA\\Kobayashi et al. - 2018 - Text Mining in Organizational Research.pdf:application/pdf},
}

@article{gentzkow_text_2019,
	title = {Text as Data},
	volume = {57},
	issn = {0022-0515},
	url = {https://pubs.aeaweb.org/doi/10.1257/jel.20181020},
	doi = {10.1257/jel.20181020},
	abstract = {An ever-increasing share of human interaction, communication, and culture is recorded as digital text. We provide an introduction to the use of text as an input to economic research. We discuss the features that make text different from other forms of data, offer a practical overview of relevant statistical methods, and survey a variety of applications. ({JEL} C38, C55, L82, Z13)},
	pages = {535--574},
	number = {3},
	journaltitle = {Journal of Economic Literature},
	author = {Gentzkow, Matthew and Kelly, Bryan and Taddy, Matt},
	urldate = {2022-07-11},
	date = {2019-09},
	langid = {english},
	file = {Gentzkow et al. - 2019 - Text as Data.pdf:C\:\\Users\\33623\\Zotero\\storage\\ECEJWHYA\\Gentzkow et al. - 2019 - Text as Data.pdf:application/pdf},
}

@article{evans_machine_2016,
	title = {Machine Translation: Mining Text for Social Theory},
	volume = {42},
	issn = {0360-0572, 1545-2115},
	url = {https://www.annualreviews.org/doi/10.1146/annurev-soc-081715-074206},
	doi = {10.1146/annurev-soc-081715-074206},
	shorttitle = {Machine Translation},
	abstract = {More of the social world lives within electronic text than ever before, from collective activity on the web, social media, and instant messaging to online transactions, government intelligence, and digitized libraries. This supply of text has elicited demand for natural language processing and machine learning tools to ﬁlter, search, and translate text into valuable data. We survey some of the most exciting computational approaches to text analysis, highlighting both supervised methods that extend old theories to new data and unsupervised techniques that discover hidden regularities worth theorizing. We then review recent research that uses these tools to develop social insight by exploring (a) collective attention and reasoning through the content of communication; (b) social relationships through the process of communication; and (c) social states, roles, and moves identiﬁed through heterogeneous signals within communication. We highlight social questions for which these advances could offer powerful new insight.},
	pages = {21--50},
	number = {1},
	journaltitle = {Annual Review of Sociology},
	author = {Evans, James A. and Aceves, Pedro},
	urldate = {2022-07-11},
	date = {2016-07},
	langid = {english},
	file = {Evans et Aceves - 2016 - Machine Translation Mining Text for Social Theory.pdf:C\:\\Users\\33623\\Zotero\\storage\\58IXWAWS\\Evans et Aceves - 2016 - Machine Translation Mining Text for Social Theory.pdf:application/pdf},
}

@article{kliegr_advances_2020,
	title = {Advances in Machine Learning for the Behavioral Sciences},
	volume = {64},
	issn = {0002-7642, 1552-3381},
	url = {http://arxiv.org/abs/1911.03249},
	doi = {10.1177/0002764219859639},
	abstract = {The areas of machine learning and knowledge discovery in databases have considerably matured in recent years. In this article, we brieﬂy review recent developments as well as classical algorithms that stood the test of time. Our goal is to provide a general introduction into diﬀerent tasks such as learning from tabular data, behavioral data, or textual data, with a particular focus on actual and potential applications in behavioral sciences. The supplemental appendix to the article also provides practical guidance for using the methods by pointing the reader to proven software implementations. The focus is on R, but we also cover some libraries in other programming languages as well as systems with easy-to-use graphical interfaces.},
	pages = {145--175},
	number = {2},
	journaltitle = {American Behavioral Scientist},
	author = {Kliegr, Tomáš and Bahník, Štěpán and Fürnkranz, Johannes},
	urldate = {2022-07-11},
	date = {2020-02},
	langid = {english},
	keywords = {Computer Science - Computation and Language, Computer Science - Machine Learning, Statistics - Machine Learning},
	file = {Kliegr et al. - 2020 - Advances in Machine Learning for the Behavioral Sc.pdf:C\:\\Users\\33623\\Zotero\\storage\\CT623S7K\\Kliegr et al. - 2020 - Advances in Machine Learning for the Behavioral Sc.pdf:application/pdf},
}

@article{corciolani_does_2020,
	title = {Does involvement in corporate social irresponsibility affect the linguistic features of corporate social responsibility reports?},
	volume = {27},
	issn = {1535-3958, 1535-3966},
	url = {https://onlinelibrary.wiley.com/doi/10.1002/csr.1832},
	doi = {10.1002/csr.1832},
	abstract = {Companies publish corporate social responsibility ({CSR}) reports to inform their stakeholders of their {CSR} efforts. However, the literature has shown that these reports can be used as a way to offset companies' involvement in corporate social irresponsibility ({CSIR}). By relying on a cognitive‐linguistic perspective, we investigate whether firms respond to their own irresponsible business conduct by changing their {CSR} reports' linguistic features and, if so, how. We use a sample of 135 large corporations headquartered in developed countries between 1995 and 2014. An analysis of their {CSR} reports reveals that the more a firm is involved in irresponsible business conduct, the more likely it is to use narrative (instead of analytical) and deceptive (instead of authentic) language. Moreover, we show that these two trends are particularly evident for highly internationalised firms.},
	pages = {670--680},
	number = {2},
	journaltitle = {Corporate Social Responsibility and Environmental Management},
	author = {Corciolani, Matteo and Nieri, Federica and Tuan, Annamaria},
	urldate = {2022-07-11},
	date = {2020-03},
	langid = {english},
	file = {Corciolani et al. - 2020 - Does involvement in corporate social irresponsibil.pdf:C\:\\Users\\33623\\Zotero\\storage\\A4FGJU55\\Corciolani et al. - 2020 - Does involvement in corporate social irresponsibil.pdf:application/pdf},
}

@article{schafer_mapping_2020,
	title = {Mapping and Modeling of Discussions Related to Gastrointestinal Discomfort in French-Speaking Online Forums: Results of a 15-Year Retrospective Infodemiology Study},
	volume = {22},
	issn = {1438-8871},
	url = {https://www.jmir.org/2020/11/e17247},
	doi = {10.2196/17247},
	shorttitle = {Mapping and Modeling of Discussions Related to Gastrointestinal Discomfort in French-Speaking Online Forums},
	abstract = {Background: Gastrointestinal ({GI}) discomfort is prevalent and known to be associated with impaired quality of life. Real-world information on factors of {GI} discomfort and solutions used by people is, however, limited. Social media, including online forums, have been considered a new source of information to examine the health of populations in real-life settings. Objective: The aims of this retrospective infodemiology study are to identify discussion topics, characterize users, and identify perceived determinants of {GI} discomfort in web-based messages posted by users of French social media. Methods: Messages related to {GI} discomfort posted between January 2003 and August 2018 were extracted from 14 French-speaking general and specialized publicly available online forums. Extracted messages were cleaned and deidentified. Relevant medical concepts were determined on the basis of the Medical Dictionary for Regulatory Activities and vernacular terms. The identification of discussion topics was carried out by using a correlated topic model on the basis of the latent Dirichlet allocation. A nonsupervised clustering algorithm was applied to cluster forum users according to the reported symptoms of {GI} discomfort, discussion topics, and activity on online forums. Users’ age and gender were determined by linear regression and application of a support vector machine, respectively, to characterize the identified clusters according to demographic parameters. Perceived factors of {GI} discomfort were classified by a combined method on the basis of syntactic analysis to identify messages with causality terms and a second topic modeling in a relevant segment of phrases. Results: A total of 198,866 messages associated with {GI} discomfort were included in the analysis corpus after extraction and cleaning. These messages were posted by 36,989 separate web users, most of them being women younger than 40 years. Everyday life, diet, digestion, abdominal pain, impact on the quality of life, and tips to manage stress were among the most discussed topics. Segmentation of users identified 5 clusters corresponding to chronic and acute {GI} concerns. Diet topic was associated with each cluster, and stress was strongly associated with abdominal pain. Psychological factors, food, and allergens were perceived as the main causes of {GI} discomfort by web users. Conclusions: {GI} discomfort is actively discussed by web users. This study reveals a complex relationship between food, stress, and {GI} discomfort. Our approach has shown that identifying web-based discussion topics associated with {GI} discomfort and its perceived factors is feasible and can serve as a complementary source of real-world evidence for caregivers.},
	pages = {e17247},
	number = {11},
	journaltitle = {Journal of Medical Internet Research},
	author = {Schäfer, Florent and Faviez, Carole and Voillot, Paméla and Foulquié, Pierre and Najm, Matthieu and Jeanne, Jean-François and Fagherazzi, Guy and Schück, Stéphane and Le Nevé, Boris},
	urldate = {2022-07-11},
	date = {2020-11},
	langid = {english},
	file = {Schäfer et al. - 2020 - Mapping and Modeling of Discussions Related to Gas.pdf:C\:\\Users\\33623\\Zotero\\storage\\MXZW5NTH\\Schäfer et al. - 2020 - Mapping and Modeling of Discussions Related to Gas.pdf:application/pdf},
}

@article{cointet_ce_2018,
	title = {Ce que le big data fait à l’analyse sociologique des textes: Un panorama critique des recherches contemporaines},
	volume = {Vol. 59},
	issn = {0035-2969},
	url = {https://www.cairn.info/revue-francaise-de-sociologie-2018-3-page-533.htm?ref=doi},
	doi = {10.3917/rfs.593.0533},
	shorttitle = {Ce que le big data fait à l’analyse sociologique des textes},
	pages = {533--557},
	number = {3},
	journaltitle = {Revue française de sociologie},
	author = {Cointet, Jean-Philippe and Parasie, Sylvain},
	urldate = {2022-07-11},
	date = {2018-09},
	langid = {french},
	file = {Cointet et Parasie - 2018 - Ce que le big data fait à l’analyse sociologique d.pdf:C\:\\Users\\33623\\Zotero\\storage\\9MDL84JC\\Cointet et Parasie - 2018 - Ce que le big data fait à l’analyse sociologique d.pdf:application/pdf},
}

@article{berger_uniting_2020,
	title = {Uniting the Tribes: Using Text for Marketing Insight},
	volume = {84},
	issn = {0022-2429, 1547-7185},
	url = {http://journals.sagepub.com/doi/10.1177/0022242919873106},
	doi = {10.1177/0022242919873106},
	shorttitle = {Uniting the Tribes},
	abstract = {Words are part of almost every marketplace interaction. Online reviews, customer service calls, press releases, marketing communications, and other interactions create a wealth of textual data. But how can marketers best use such data? This article provides an overview of automated textual analysis and details how it can be used to generate marketing insights. The authors discuss how text reflects qualities of the text producer (and the context in which the text was produced) and impacts the audience or text recipient. Next, they discuss how text can be a powerful tool both for prediction and for understanding (i.e., insights). Then, the authors overview methodologies and metrics used in text analysis, providing a set of guidelines and procedures. Finally, they further highlight some common metrics and challenges and discuss how researchers can address issues of internal and external validity. They conclude with a discussion of potential areas for future work. Along the way, the authors note how textual analysis can unite the tribes of marketing. While most marketing problems are interdisciplinary, the field is often fragmented. By involving skills and ideas from each of the subareas of marketing, text analysis has the potential to help unite the field with a common set of tools and approaches.},
	pages = {1--25},
	number = {1},
	journaltitle = {Journal of Marketing},
	author = {Berger, Jonah and Humphreys, Ashlee and Ludwig, Stephan and Moe, Wendy W. and Netzer, Oded and Schweidel, David A.},
	urldate = {2022-07-11},
	date = {2020-01},
	langid = {english},
	file = {Berger et al. - 2020 - Uniting the Tribes Using Text for Marketing Insig.pdf:C\:\\Users\\33623\\Zotero\\storage\\DX4DWXYP\\Berger et al. - 2020 - Uniting the Tribes Using Text for Marketing Insig.pdf:application/pdf},
}

@article{mcarthur_innovations_2015,
	title = {Innovations in the systematic review of text and opinion},
	volume = {13},
	issn = {1744-1609},
	url = {https://journals.lww.com/01787381-201509000-00011},
	doi = {10.1097/XEB.0000000000000060},
	abstract = {Background: Evidence-based healthcare focuses on the need to use interventions that are supported by the best available and most up-to-date evidence or knowledge. Many clinical questions cannot be fully answered by evidence derived from quantitative or qualitative research designs alone, since many areas in healthcare are supported by clinicians’ tacit knowledge derived from their clinical experiences. In this situation, evidence generated from a systematic review of text and opinion may be required as the best available evidence. The aim of this study is to highlight the importance and role of expert opinion synthesis in healthcare, and present results of an international methodological group review. Methods: A methodological group was formed to review this approach, and update the guidance and processes for undertaking a systematic review of text and opinion. Results: The methodology of systematic reviews of text and opinion had already been developed by the Joanna Briggs Institute. We reviewed and updated several steps in the process, such as inclusion criteria, search strategy, critical appraisal and data extraction. Conclusions: The Joanna Briggs Institute methodology for the systematic review of text and opinion is unique, and continuously evolving and being further developed. Systematic reviews of text and opinion may be considered as legitimate sources of evidence, especially when there is an absence of other research designs.},
	pages = {188--195},
	number = {3},
	journaltitle = {International Journal of Evidence-Based Healthcare},
	author = {{McArthur}, Alexa and Klugárová, Jitka and Yan, Hu and Florescu, Silvia},
	urldate = {2022-07-11},
	date = {2015-09},
	langid = {english},
	file = {McArthur et al. - 2015 - Innovations in the systematic review of text and o.pdf:C\:\\Users\\33623\\Zotero\\storage\\HFMBGZUI\\McArthur et al. - 2015 - Innovations in the systematic review of text and o.pdf:application/pdf},
}

@article{piepenbrink_topic_2017,
	title = {Topic models as a novel approach to identify themes in content analysis},
	volume = {2017},
	issn = {0065-0668, 2151-6561},
	url = {http://journals.aom.org/doi/10.5465/AMBPP.2017.141},
	doi = {10.5465/AMBPP.2017.141},
	abstract = {We present the key features of topic modeling based on Latent Dirichlet Allocation ({LDA}), and demonstrate its application by analyzing Organization Research Methods articles since its inception. Our analysis, based on 421 {ORM} articles reveals 15 topics, which are quite similar to other, more human intensive review exercises.},
	pages = {11335},
	number = {1},
	journaltitle = {Academy of Management Proceedings},
	author = {Piepenbrink, Anke and Gaur, Ajai Singh},
	urldate = {2022-07-11},
	date = {2017-08},
	langid = {english},
	file = {Piepenbrink et Gaur - 2017 - Topic models as a novel approach to identify theme.pdf:C\:\\Users\\33623\\Zotero\\storage\\RLMS3CU6\\Piepenbrink et Gaur - 2017 - Topic models as a novel approach to identify theme.pdf:application/pdf},
}

@article{atalay_natural_2019,
	title = {A Natural Language Processing Approach to Predicting the Persuasiveness of Marketing Communications},
	issn = {1556-5068},
	url = {https://www.ssrn.com/abstract=3410351},
	doi = {10.2139/ssrn.3410351},
	journaltitle = {{SSRN} Electronic Journal},
	shortjournal = {{SSRN} Journal},
	author = {Atalay, A. Selin and El Kihal, Siham and Ellsäßer, Florian},
	urldate = {2022-07-11},
	date = {2019},
	langid = {english},
	file = {Atalay et al. - 2019 - A Natural Language Processing Approach to Predicti.pdf:C\:\\Users\\33623\\Zotero\\storage\\M3LSLA4X\\Atalay et al. - 2019 - A Natural Language Processing Approach to Predicti.pdf:application/pdf},
}

@article{liu_examining_2021,
	title = {Examining the impact of luxury brand's social media marketing on customer engagement​: Using big data analytics and natural language processing},
	volume = {125},
	issn = {01482963},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S0148296319302954},
	doi = {10.1016/j.jbusres.2019.04.042},
	shorttitle = {Examining the impact of luxury brand's social media marketing on customer engagement​},
	abstract = {This research utilizes big data in investigating the impact of a luxury brand's social media marketing activities on customer engagement. In particular, applying the dual perspective of customer engagement, this research examines the influence of focusing on the entertainment, interaction, trendiness, and customization dimensions of a luxury brand's social media activities on customer engagement with brand-related social media content. Using big data retrieved from a 60-month period on Twitter (July 2012 to June 2017), this paper analyzes 3.78 million tweets from the top 15 luxury brands with the highest number of Twitter followers. The results indicate that focusing on the entertainment, interaction, and trendiness dimensions of a luxury brand's social media marketing efforts significantly increases customer engagement, while focusing on the customization dimension does not. The findings have important implications for the design, delivery, and management of social media marketing for luxury brands to engage customers with social media content.},
	pages = {815--826},
	journaltitle = {Journal of Business Research},
	shortjournal = {Journal of Business Research},
	author = {Liu, Xia and Shin, Hyunju and Burns, Alvin C.},
	urldate = {2022-07-11},
	date = {2021-03},
	langid = {english},
	file = {Liu et al. - 2021 - Examining the impact of luxury brand's social medi.pdf:C\:\\Users\\33623\\Zotero\\storage\\Y9BIG4K7\\Liu et al. - 2021 - Examining the impact of luxury brand's social medi.pdf:application/pdf},
}

@article{piris_customer_2021,
	title = {Customer satisfaction and natural language processing},
	volume = {124},
	issn = {01482963},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S0148296320308249},
	doi = {10.1016/j.jbusres.2020.11.065},
	abstract = {This study uses natural language processing in order to increase knowledge concerning customer satisfaction. A total of 12,000 customer returns were analyzed, 6,800 of which contained freely expressed qualitative feedback. Eight themes emerge from the analysis and bring to light the factors influencing satisfaction. It is also noted that satisfaction is not vertical or horizontal but can involve a more or less important combination of themes. This study also shows the link between the level of satisfaction and the number of themes addressed, thus challenging traditional approaches that do not seem to distinguish the discursive differences between satisfied and dissat­ isfied customers. Finally, this investigation lays the foundations for automatic and personalized processing of customer comments.},
	pages = {264--271},
	journaltitle = {Journal of Business Research},
	shortjournal = {Journal of Business Research},
	author = {Piris, Yolande and Gay, Anne-Cécile},
	urldate = {2022-07-11},
	date = {2021-01},
	langid = {english},
	file = {Piris et Gay - 2021 - Customer satisfaction and natural language process.pdf:C\:\\Users\\33623\\Zotero\\storage\\FDFQSVBW\\Piris et Gay - 2021 - Customer satisfaction and natural language process.pdf:application/pdf},
}

@article{hartmann_power_2021,
	title = {The Power of Brand Selfies},
	volume = {58},
	issn = {0022-2437, 1547-7193},
	url = {http://journals.sagepub.com/doi/10.1177/00222437211037258},
	doi = {10.1177/00222437211037258},
	abstract = {Smartphones have made it nearly effortless to share images of branded experiences. This research classiﬁes social media brand imagery and studies user response. Aside from packshots (standalone product images), two types of brand-related selﬁe images appear online: consumer selﬁes (featuring brands and consumers’ faces) and an emerging phenomenon the authors term “brand selﬁes” (invisible consumers holding a branded product). The authors use convolutional neural networks to identify these archetypes and train language models to infer social media response to more than a quarter-million brand-image posts (185 brands on Twitter and Instagram). They ﬁnd that consumer-selﬁe images receive more sender engagement (i.e., likes and comments), whereas brand selﬁes result in more brand engagement, expressed by purchase intentions. These results cast doubt on whether conventional social media metrics are appropriate indicators of brand engagement. Results for display ads are consistent with this observation, with higher click-through rates for brand selﬁes than for consumer selﬁes. A controlled lab experiment suggests that self-reference is driving the differential response to selﬁe images. Collectively, these results demonstrate how (interpretable) machine learning helps extract marketing-relevant information from unstructured multimedia content and that selﬁe images are a matter of perspective in terms of actual brand engagement.},
	pages = {1159--1177},
	number = {6},
	journaltitle = {Journal of Marketing Research},
	shortjournal = {Journal of Marketing Research},
	author = {Hartmann, Jochen and Heitmann, Mark and Schamp, Christina and Netzer, Oded},
	urldate = {2022-07-11},
	date = {2021-12},
	langid = {english},
	file = {Hartmann et al. - 2021 - The Power of Brand Selfies.pdf:C\:\\Users\\33623\\Zotero\\storage\\NXR69KH9\\Hartmann et al. - 2021 - The Power of Brand Selfies.pdf:application/pdf},
}

@article{grewal_marketing_2021,
	title = {Marketing Insights from Multimedia Data: Text, Image, Audio, and Video},
	volume = {58},
	issn = {0022-2437, 1547-7193},
	url = {http://journals.sagepub.com/doi/10.1177/00222437211054601},
	doi = {10.1177/00222437211054601},
	shorttitle = {Marketing Insights from Multimedia Data},
	pages = {1025--1033},
	number = {6},
	journaltitle = {Journal of Marketing Research},
	shortjournal = {Journal of Marketing Research},
	author = {Grewal, Rajdeep and Gupta, Sachin and Hamilton, Rebecca},
	urldate = {2022-07-11},
	date = {2021-12},
	langid = {english},
	file = {Grewal et al. - 2021 - Marketing Insights from Multimedia Data Text, Ima.pdf:C\:\\Users\\33623\\Zotero\\storage\\9REDRB5Q\\Grewal et al. - 2021 - Marketing Insights from Multimedia Data Text, Ima.pdf:application/pdf},
}

@inproceedings{fan_using_2019,
	location = {Hong Kong, China},
	title = {Using Local Knowledge Graph Construction to Scale Seq2Seq Models to Multi-Document Inputs},
	url = {https://www.aclweb.org/anthology/D19-1428},
	doi = {10.18653/v1/D19-1428},
	abstract = {Query-based open-domain {NLP} tasks require information synthesis from long and diverse web results. Current approaches extractively select portions of web text as input to Sequence-to-Sequence models using methods such as {TF}-{IDF} ranking. We propose constructing a local graph structured knowledge base for each query, which compresses the web search information and reduces redundancy. We show that by linearizing the graph into a structured input sequence, models can encode the graph representations within a standard Sequence-to-Sequence setting. For two generative tasks with very long text input, long-form question answering and multidocument summarization, feeding graph representations as input can achieve better performance than using retrieved text portions.},
	eventtitle = {Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing ({EMNLP}-{IJCNLP})},
	pages = {4184--4194},
	booktitle = {Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing ({EMNLP}-{IJCNLP})},
	publisher = {Association for Computational Linguistics},
	author = {Fan, Angela and Gardent, Claire and Braud, Chloé and Bordes, Antoine},
	urldate = {2022-07-11},
	date = {2019},
	langid = {english},
	file = {Fan et al. - 2019 - Using Local Knowledge Graph Construction to Scale .pdf:C\:\\Users\\33623\\Zotero\\storage\\4NXU9MHL\\Fan et al. - 2019 - Using Local Knowledge Graph Construction to Scale .pdf:application/pdf},
}

@article{lascarides_semantics_nodate,
	title = {Semantics and Pragmatics of {NLP}  Lexical Semantics: Machine Learning},
	pages = {29},
	journaltitle = {Machine Learning},
	author = {Lascarides, Alex},
	langid = {english},
	file = {Lascarides - Semantics and Pragmatics of NLP  Lexical Semantics.pdf:C\:\\Users\\33623\\Zotero\\storage\\XL4UQL4A\\Lascarides - Semantics and Pragmatics of NLP  Lexical Semantics.pdf:application/pdf},
}

@article{wagner-egger_truth_2007,
	title = {The Truth Lies Elsewhere: Correlates of Belief in Conspiracy Theories},
	volume = {20},
	issn = {0992-986X},
	shorttitle = {The Truth Lies Elsewhere},
	abstract = {Little empirical research about conspiracy theories ({CT}) has been conducted in psychology. By conspiracy theory we mean a lay theory about socially significant and negative events (i.e. assassinations, terrorist attacks, etc.), which often implies the intervention of one or more groups acting in secret, and contradicting the official account of the event. In this paper, we address two research questions: (1) Is the belief in conspiracy theories a single dimension, or is it possible to disentangle several subdimensions??? (2) Which variables are likely to predict this belief? In a questionnaire submitted to 198 university students, we identified two partially distinct categories of conspiracy theories: (1) {CTs} which blame authorities (the system) and (2) the {CTs} which accuse a minority (e.g. Jews, Muslim terrorists). Finally, we showed that fear and distrust predicted the two types of {CTs}. Moreover, irrationality specifically predicted the belief in the System {CTs}, whereas political conservatism predicted the belief in the Minority {CTs}.},
	pages = {31},
	number = {4},
	journaltitle = {Revue internationale de psychologie sociale},
	author = {Wagner-Egger, Pascal and Bangerter, Adrian},
	date = {2007},
	langid = {french},
	note = {{OCLC}: 7290788924},
	file = {Wagner-Egger et Bangerter - 2007 - The Truth Lies Elsewhere Correlates of Belief in .pdf:C\:\\Users\\33623\\Zotero\\storage\\ADXNUNTS\\Wagner-Egger et Bangerter - 2007 - The Truth Lies Elsewhere Correlates of Belief in .pdf:application/pdf},
}

@article{deveaud_quantication_nodate,
	title = {Quantiﬁcation et identiﬁcation des concepts implicites d’une requête},
	abstract = {In this paper we introduce an unsupervised method for mining and modeling latent search concepts. We use Latent Dirichlet Allocation ({LDA}), a generative probabilistic topic model, to exhibit highly-speciﬁc query-related topics from pseudo-relevant feedback documents. Our approach automatically estimates the number of latent concepts as well as the needed amount of feedback documents, without any prior training step. Latent concepts are then weighted to reﬂect their relative adequacy and are further used to automatically reformulate the initial user query. We also explore the use of different types of sources of information for modeling the latent concepts. For this purpose, we use four general sources of information of various nature (web, news, encyclopedic) from which the feedback documents are extracted. We evaluate our approach over two large ad-hoc {TREC} collections, and results show that it significantly improves document retrieval effectiveness while best results are achieved by combining latent concepts modeled from all available sources.},
	pages = {16},
	author = {Deveaud, Romain and Bonnefoy, Ludovic and Bellot, Patrice},
	langid = {french},
	file = {Deveaud et al. - Quantiﬁcation et identiﬁcation des concepts implic.pdf:C\:\\Users\\33623\\Zotero\\storage\\DLZ9EAZQ\\Deveaud et al. - Quantiﬁcation et identiﬁcation des concepts implic.pdf:application/pdf},
}

@article{university_of_virginia_big_2016,
	title = {Big Data Research in Information Systems: Toward an Inclusive Research Agenda},
	volume = {17},
	issn = {15369323},
	url = {http://aisel.aisnet.org/jais/vol17/iss2/3/},
	doi = {10.17705/1jais.00423},
	shorttitle = {Big Data Research in Information Systems},
	abstract = {Big data has received considerable attention from the information systems ({IS}) discipline over the past few years, with several recent commentaries, editorials, and special issue introductions on the topic appearing in leading {IS} outlets. These papers present varying perspectives on promising big data research topics and highlight some of the challenges that big data poses. In this editorial, we synthesize and contribute further to this discourse. We offer a first step toward an inclusive big data research agenda for {IS} by focusing on the interplay between big data’s characteristics, the information value chain encompassing people-process-technology, and the three dominant {IS} research traditions (behavioral, design, and economics of {IS}). We view big data as a disruption to the value chain that has widespread impacts, which include but are not limited to changing the way academics conduct scholarly work. Importantly, we critically discuss the opportunities and challenges for behavioral, design science, and economics of {IS} research and the emerging implications for theory and methodology arising due to big data’s disruptive effects.},
	pages = {I--XXXII},
	number = {2},
	journaltitle = {Journal of the Association for Information Systems},
	shortjournal = {{JAIS}},
	author = {{University of Virginia} and Abbasi, Ahmed and Sarker, Suprateek and {University of Virginia} and {Aalto University} and Chiang, Roger and {University of Cincinnati}},
	urldate = {2022-07-11},
	date = {2016-02},
	langid = {english},
	file = {University of Virginia et al. - 2016 - Big Data Research in Information Systems Toward a.pdf:C\:\\Users\\33623\\Zotero\\storage\\2X8TKJCA\\University of Virginia et al. - 2016 - Big Data Research in Information Systems Toward a.pdf:application/pdf},
}

@article{afful-dadzie_liberation_2017,
	title = {Liberation of public data: Exploring central themes in open government data and freedom of information research},
	volume = {37},
	issn = {02684012},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S026840121630487X},
	doi = {10.1016/j.ijinfomgt.2017.05.009},
	shorttitle = {Liberation of public data},
	abstract = {This paper conducts a comparative literature survey of Open Government Data ({OGD}) and Freedom of Information ({FOI}), with a view to tracking the central themes in the two civil society campaigns. With seeming similarities and a growing popularity in research, the major themes framing research on the two movements have not clearly emerged. Topic modelling, text mining and document analysis methods are used to extract the themes as well as key named entities. The topics are subsequently labeled and with expert guidance, their semantic meaning are provided. The results indicate that the major theme in {FOI} research borders on issues relating to disclosure, publishing, access and cost of requests. On the other hand, themes in {OGD} research have largely centered on technology and related concepts. The approach also helped in determining key similarities and diﬀerences in the two campaigns as reported in research.},
	pages = {664--672},
	number = {6},
	journaltitle = {International Journal of Information Management},
	shortjournal = {International Journal of Information Management},
	author = {Afful-Dadzie, Eric and Afful-Dadzie, Anthony},
	urldate = {2022-07-11},
	date = {2017-12},
	langid = {english},
	file = {Afful-Dadzie et Afful-Dadzie - 2017 - Liberation of public data Exploring central theme.pdf:C\:\\Users\\33623\\Zotero\\storage\\JQ79HGNR\\Afful-Dadzie et Afful-Dadzie - 2017 - Liberation of public data Exploring central theme.pdf:application/pdf},
}

@article{sharma_future_2018,
	title = {The Future Scope of Netnography and Social Network Analysis in the Field of Marketing},
	volume = {17},
	issn = {1533-2861, 1533-287X},
	url = {https://www.tandfonline.com/doi/full/10.1080/15332861.2017.1423533},
	doi = {10.1080/15332861.2017.1423533},
	abstract = {The marketing landscape has evolved to a great extent with the advent of Internet strategy integration, refined Internet marketing metrics, increase in wireless networking, rising consumer ownership of computers, the era of big data and e-commerce, influencer marketing, and the evolution of the Internet. To tap this virtual environment, newer methods of Internet research are required. This article studies the usage of two Internet research methodologies—Netnography and Social Network Analysis—and explores their potential toward the domain of marketing. An exhaustive literature review was conducted and after analyzing the previously published literature in this domain, it is seen that a clear link between these methodologies and the field of marketing has not been established. With a detailed analysis of previously published research work, using these two methodologies the authors are able to derive correlations with marketing concepts and are able to establish the future potential of the two methodologies of Netnography and Social Network Analysis as marketing research methodologies.},
	pages = {26--45},
	number = {1},
	journaltitle = {Journal of Internet Commerce},
	shortjournal = {Journal of Internet Commerce},
	author = {Sharma, Radhika and Ahuja, Vandana and Alavi, Shirin},
	urldate = {2022-07-11},
	date = {2018-01-02},
	langid = {english},
	file = {Sharma et al. - 2018 - The Future Scope of Netnography and Social Network.pdf:C\:\\Users\\33623\\Zotero\\storage\\7G48CRFM\\Sharma et al. - 2018 - The Future Scope of Netnography and Social Network.pdf:application/pdf},
}

@misc{das_cobweb_2019,
	title = {{CobWeb}: A Research Prototype for Exploring User Bias in Political Fact-Checking},
	url = {http://arxiv.org/abs/1907.03718},
	shorttitle = {{CobWeb}},
	abstract = {The effect of user bias in fact-checking has not been explored extensively from a user-experience perspective. We estimate the user bias as a function of the user’s perceived reputation of the news sources (e.g., a user with liberal beliefs may tend to trust liberal sources). We build an interface to communicate the role of estimated user bias in the context of a fact-checking task. We also explore the utility of helping users visualize their detected level of bias. 80\% of the users of our system find that the presence of an indicator for user bias is useful in judging the veracity of a political claim.},
	number = {{arXiv}:1907.03718},
	publisher = {{arXiv}},
	author = {Das, Anubrata and Mehta, Kunjan and Lease, Matthew},
	urldate = {2022-07-11},
	date = {2019-07-08},
	langid = {english},
	eprinttype = {arxiv},
	eprint = {1907.03718 [cs]},
	keywords = {Computer Science - Information Retrieval, Computer Science - Computers and Society},
	file = {Das et al. - 2019 - CobWeb A Research Prototype for Exploring User Bi.pdf:C\:\\Users\\33623\\Zotero\\storage\\H5LM4WRC\\Das et al. - 2019 - CobWeb A Research Prototype for Exploring User Bi.pdf:application/pdf},
}

@article{armstrong_social_2019,
	title = {The social life of data points: Antecedents of digital technologies},
	volume = {49},
	issn = {0306-3127, 1460-3659},
	url = {http://journals.sagepub.com/doi/10.1177/0306312718821726},
	doi = {10.1177/0306312718821726},
	shorttitle = {The social life of data points},
	abstract = {Recent technological advances such as microprocessors and random-access memory have had a significant role in gathering, storing and processing digital data, but the basic principles underpinning such data management were established in the century preceding the digital revolution. This paper maps the emergence of those older technologies to show that the logic and imperative for the surveillance potential of more recent digital technologies was laid down in a pre-digital age. The paper focuses on the development of the data point from its use in punch cards in the late 19th century through its manipulation in ideas about correlation to its collection via selfcompletion questionnaires. Some ways in which medicine and psychology have taken up and deployed the technology of data points are used as illustrative exemplars. The paper concludes with a discussion of the role of data points in defining human identity.},
	pages = {102--117},
	number = {1},
	journaltitle = {Social Studies of Science},
	shortjournal = {Soc Stud Sci},
	author = {Armstrong, David},
	urldate = {2022-07-11},
	date = {2019-02},
	langid = {english},
	file = {Armstrong - 2019 - The social life of data points Antecedents of dig.pdf:C\:\\Users\\33623\\Zotero\\storage\\SIWWPGF9\\Armstrong - 2019 - The social life of data points Antecedents of dig.pdf:application/pdf},
}

@article{vermeer_seeing_2019,
	title = {Seeing the wood for the trees: How machine learning can help firms in identifying relevant electronic word-of-mouth in social media},
	volume = {36},
	issn = {01678116},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S0167811619300102},
	doi = {10.1016/j.ijresmar.2019.01.010},
	shorttitle = {Seeing the wood for the trees},
	abstract = {The increasing volume of ﬁrm-related conversations on social media has made it considerably more difﬁcult for marketers to track and analyse electronic word-of-mouth ({eWOM}) about brands, products or services. Firms often use sentiment analysis to identify relevant {eWOM} that requires a response to consequently engage in webcare. In this paper, we show that sentiment analysis of any kind might not be ideal for this purpose, because it relies on the questionable assumption that only negative {eWOM} is response-worthy and it is not able to infer meaning from text. We propose and test an approach based on supervised machine learning that ﬁrst decides whether {eWOM} is relevant for the brand to respond, and then—based on a categorization of seven different types of {eWOM} (e.g., question, complaint)—classiﬁes three customer satisfaction dimensions. Using a dataset of approximately 60,000 Facebook comments and 11,000 tweets about 16 different brands in eight different industries, we test and compare the efﬁcacy of various sentiment analysis, dictionary-based and machine learning techniques to detect relevant {eWOM}. In doing so, this study identiﬁes response-worthy {eWOM} based on the content instead of its expressed sentiment. The results indicate that these machine learning techniques achieve considerably higher accuracy in detecting relevant {eWOM} on social media compared to any kind of sentiment analysis. Moreover, it is shown that industry-speciﬁc classiﬁers can further improve this process and that algorithms are applicable across different social networks.},
	pages = {492--508},
	number = {3},
	journaltitle = {International Journal of Research in Marketing},
	shortjournal = {International Journal of Research in Marketing},
	author = {Vermeer, Susan A.M. and Araujo, Theo and Bernritter, Stefan F. and van Noort, Guda},
	urldate = {2022-07-11},
	date = {2019-09},
	langid = {english},
	file = {Vermeer et al. - 2019 - Seeing the wood for the trees How machine learnin.pdf:C\:\\Users\\33623\\Zotero\\storage\\IYGHRZMB\\Vermeer et al. - 2019 - Seeing the wood for the trees How machine learnin.pdf:application/pdf},
}

@article{skinner_first-order_2020,
	title = {First-order transition in a model of prestige bias},
	volume = {8},
	issn = {2542-4653},
	url = {http://arxiv.org/abs/1910.05813},
	doi = {10.21468/SciPostPhys.8.2.030},
	abstract = {One of the major benefits of belonging to a prestigious group is that it affects the way you are viewed by others. Here I use a simple mathematical model to explore the implications of this "prestige bias" when candidates undergo repeated rounds of evaluation. In the model, candidates who are evaluated most highly are admitted to a "prestige class", and their membership biases future rounds of evaluation in their favor. I use the language of Bayesian inference to describe this bias, and show that it can lead to a runaway effect in which the weight given to the prior expectation associated with a candidate's class becomes stronger with each round. Most dramatically, the strength of the prestige bias after many rounds undergoes a first-order transition as a function of the precision of the examination on which the evaluation is based.},
	pages = {030},
	number = {2},
	journaltitle = {{SciPost} Physics},
	shortjournal = {{SciPost} Phys.},
	author = {Skinner, Brian},
	urldate = {2022-07-11},
	date = {2020-02-19},
	langid = {english},
	eprinttype = {arxiv},
	eprint = {1910.05813 [cond-mat, physics:physics]},
	keywords = {Physics - Physics and Society, Condensed Matter - Statistical Mechanics},
	file = {Skinner - 2020 - First-order transition in a model of prestige bias.pdf:C\:\\Users\\33623\\Zotero\\storage\\6J8IIP94\\Skinner - 2020 - First-order transition in a model of prestige bias.pdf:application/pdf},
}

@misc{schuster_cross-lingual_2019,
	title = {Cross-Lingual Transfer Learning for Multilingual Task Oriented Dialog},
	url = {http://arxiv.org/abs/1810.13327},
	abstract = {One of the ﬁrst steps in the utterance interpretation pipeline of many task-oriented conversational {AI} systems is to identify user intents and the corresponding slots. Since data collection for machine learning models for this task is time-consuming, it is desirable to make use of existing data in a high-resource language to train models in low-resource languages. However, development of such models has largely been hindered by the lack of multilingual training data. In this paper, we present a new data set of 57k annotated utterances in English (43k), Spanish (8.6k) and Thai (5k) across the domains weather, alarm, and reminder. We use this data set to evaluate three different cross-lingual transfer methods: (1) translating the training data, (2) using cross-lingual pre-trained embeddings, and (3) a novel method of using a multilingual machine translation encoder as contextual word representations. We ﬁnd that given several hundred training examples in the the target language, the latter two methods outperform translating the training data. Further, in very low-resource settings, multilingual contextual word representations give better results than using cross-lingual static embeddings. We also compare the cross-lingual methods to using monolingual resources in the form of contextual {ELMo} representations and ﬁnd that given just small amounts of target language data, this method outperforms all cross-lingual methods, which highlights the need for more sophisticated cross-lingual methods.},
	number = {{arXiv}:1810.13327},
	publisher = {{arXiv}},
	author = {Schuster, Sebastian and Gupta, Sonal and Shah, Rushin and Lewis, Mike},
	urldate = {2022-07-11},
	date = {2019-04-01},
	langid = {english},
	eprinttype = {arxiv},
	eprint = {1810.13327 [cs]},
	keywords = {Computer Science - Computation and Language},
	file = {Schuster et al. - 2019 - Cross-Lingual Transfer Learning for Multilingual T.pdf:C\:\\Users\\33623\\Zotero\\storage\\HV5WP8E4\\Schuster et al. - 2019 - Cross-Lingual Transfer Learning for Multilingual T.pdf:application/pdf},
}

@article{jiang_clicking_2020,
	title = {Clicking position and user posting behavior in online review systems: A data-driven agent-based modeling approach},
	volume = {512},
	issn = {00200255},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S0020025519309089},
	doi = {10.1016/j.ins.2019.09.053},
	shorttitle = {Clicking position and user posting behavior in online review systems},
	pages = {161--174},
	journaltitle = {Information Sciences},
	shortjournal = {Information Sciences},
	author = {Jiang, Guoyin and Feng, Xiaodong and Liu, Wenping and Liu, Xingjun},
	urldate = {2022-07-11},
	date = {2020-02},
	langid = {english},
	file = {Jiang et al. - 2020 - Clicking position and user posting behavior in onl.pdf:C\:\\Users\\33623\\Zotero\\storage\\TVFVG4IK\\Jiang et al. - 2020 - Clicking position and user posting behavior in onl.pdf:application/pdf},
}

@misc{ettinger_what_2020,
	title = {What {BERT} is not: Lessons from a new suite of psycholinguistic diagnostics for language models},
	url = {http://arxiv.org/abs/1907.13528},
	shorttitle = {What {BERT} is not},
	abstract = {Pre-training by language modeling has become a popular and successful approach to {NLP} tasks, but we have yet to understand exactly what linguistic capacities these pretraining processes confer upon models. In this paper we introduce a suite of diagnostics drawn from human language experiments, which allow us to ask targeted questions about information used by language models for generating predictions in context. As a case study, we apply these diagnostics to the popular {BERT} model, ﬁnding that it can generally distinguish good from bad completions involving shared category or role reversal, albeit with less sensitivity than humans, and it robustly retrieves noun hypernyms, but it struggles with challenging inference and role-based event prediction—and in particular, it shows clear insensitivity to the contextual impacts of negation.},
	number = {{arXiv}:1907.13528},
	publisher = {{arXiv}},
	author = {Ettinger, Allyson},
	urldate = {2022-07-11},
	date = {2020-07-13},
	langid = {english},
	eprinttype = {arxiv},
	eprint = {1907.13528 [cs]},
	keywords = {Computer Science - Computation and Language, Computer Science - Artificial Intelligence},
	file = {Ettinger - 2020 - What BERT is not Lessons from a new suite of psyc.pdf:C\:\\Users\\33623\\Zotero\\storage\\JNV6D2EV\\Ettinger - 2020 - What BERT is not Lessons from a new suite of psyc.pdf:application/pdf},
}

@article{naseem_transformer_2020,
	title = {Transformer based Deep Intelligent Contextual Embedding for Twitter sentiment analysis},
	volume = {113},
	issn = {0167739X},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S0167739X2030306X},
	doi = {10.1016/j.future.2020.06.050},
	abstract = {Along with the emergence of the Internet, the rapid development of handheld devices has democratized content creation due to the extensive use of social media and has resulted in an explosion of short informal texts. Although a sentiment analysis of these texts is valuable for many reasons, this task is often perceived as a challenge given that these texts are often short, informal, noisy, and rich in language ambiguities, such as polysemy. Moreover, most of the existing sentiment analysis methods are based on clean data. In this paper, we present {DICET} , a transformer-based method for sentiment analysis that encodes representation from a transformer and applies deep intelligent contextual embedding to enhance the quality of tweets by removing noise while taking word sentiments, polysemy, syntax, and semantic knowledge into account. We also use the bidirectional long- and short-term memory network to determine the sentiment of a tweet. To validate the performance of the proposed framework, we perform extensive experiments on three benchmark datasets, and results show that {DICET} considerably outperforms the state of the art in sentiment classification.},
	pages = {58--69},
	journaltitle = {Future Generation Computer Systems},
	shortjournal = {Future Generation Computer Systems},
	author = {Naseem, Usman and Razzak, Imran and Musial, Katarzyna and Imran, Muhammad},
	urldate = {2022-07-11},
	date = {2020-12},
	langid = {english},
	file = {Naseem et al. - 2020 - Transformer based Deep Intelligent Contextual Embe.pdf:C\:\\Users\\33623\\Zotero\\storage\\KU4ZJBNN\\Naseem et al. - 2020 - Transformer based Deep Intelligent Contextual Embe.pdf:application/pdf},
}

@misc{asai_learning_2020,
	title = {Learning to Retrieve Reasoning Paths over Wikipedia Graph for Question Answering},
	url = {http://arxiv.org/abs/1911.10470},
	abstract = {Answering questions that require multi-hop reasoning at web-scale necessitates retrieving multiple evidence documents, one of which often has little lexical or semantic relationship to the question. This paper introduces a new graph-based recurrent retrieval approach that learns to retrieve reasoning paths over the Wikipedia graph to answer multi-hop open-domain questions. Our retriever model trains a recurrent neural network that learns to sequentially retrieve evidence paragraphs in the reasoning path by conditioning on the previously retrieved documents. Our reader model ranks the reasoning paths and extracts the answer span included in the best reasoning path. Experimental results show state-of-the-art results in three open-domain {QA} datasets, showcasing the effectiveness and robustness of our method. Notably, our method achieves significant improvement in {HotpotQA}, outperforming the previous best model by more than 14 points.},
	number = {{arXiv}:1911.10470},
	publisher = {{arXiv}},
	author = {Asai, Akari and Hashimoto, Kazuma and Hajishirzi, Hannaneh and Socher, Richard and Xiong, Caiming},
	urldate = {2022-07-11},
	date = {2020-02-14},
	langid = {english},
	eprinttype = {arxiv},
	eprint = {1911.10470 [cs]},
	keywords = {Computer Science - Computation and Language},
	file = {Asai et al. - 2020 - Learning to Retrieve Reasoning Paths over Wikipedi.pdf:C\:\\Users\\33623\\Zotero\\storage\\QS8P5TNM\\Asai et al. - 2020 - Learning to Retrieve Reasoning Paths over Wikipedi.pdf:application/pdf},
}

@article{bourgade_lucien_2020,
	title = {Lucien Tesnière, professeur de linguistique à Montpellier de 1937 à 1954 L’aventure d’une grammaire},
	volume = {51},
	abstract = {Professor Lucien Tesnière, one of the most celebrated linguists of his time, taught at the School of Humanities in Montpellier, in the south of France, for seventeen years. He was forceful, eccentric, prodigious in his knowledge of and capacity to analyze languages, and pedagogically innovative in how he taught them. Trained in historic and comparative grammar as well structural linguistics, he developed an entirely original theory of syntaxic parsing. Extensions of his theory have moved well beyond the academy, becoming the basis of natural language processing and its applications.},
	pages = {15},
	author = {Bourgade, Michèle {VERDELHAN}},
	date = {2020},
	langid = {french},
	file = {Bourgade - 2020 - Lucien Tesnière, professeur de linguistique à Mont.pdf:C\:\\Users\\33623\\Zotero\\storage\\W2E78EGX\\Bourgade - 2020 - Lucien Tesnière, professeur de linguistique à Mont.pdf:application/pdf},
}

@inproceedings{wolf_transformers_2020,
	location = {Online},
	title = {Transformers: State-of-the-Art Natural Language Processing},
	url = {https://www.aclweb.org/anthology/2020.emnlp-demos.6},
	doi = {10.18653/v1/2020.emnlp-demos.6},
	shorttitle = {Transformers},
	abstract = {Recent progress in natural language processing has been driven by advances in both model architecture and model pretraining. Transformer architectures have facilitated building higher-capacity models and pretraining has made it possible to effectively utilize this capacity for a wide variety of tasks. Transformers is an open-source library with the goal of opening up these advances to the wider machine learning community. The library consists of carefully engineered stateof-the art Transformer architectures under a uniﬁed {API}. Backing this library is a curated collection of pretrained models made by and available for the community. Transformers is designed to be extensible by researchers, simple for practitioners, and fast and robust in industrial deployments. The library is available at https://github.com/ huggingface/transformers.},
	eventtitle = {Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing: System Demonstrations},
	pages = {38--45},
	booktitle = {Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing: System Demonstrations},
	publisher = {Association for Computational Linguistics},
	author = {Wolf, Thomas and Debut, Lysandre and Sanh, Victor and Chaumond, Julien and Delangue, Clement and Moi, Anthony and Cistac, Pierric and Rault, Tim and Louf, Remi and Funtowicz, Morgan and Davison, Joe and Shleifer, Sam and von Platen, Patrick and Ma, Clara and Jernite, Yacine and Plu, Julien and Xu, Canwen and Le Scao, Teven and Gugger, Sylvain and Drame, Mariama and Lhoest, Quentin and Rush, Alexander},
	urldate = {2022-07-11},
	date = {2020},
	langid = {english},
	file = {Wolf et al. - 2020 - Transformers State-of-the-Art Natural Language Pr.pdf:C\:\\Users\\33623\\Zotero\\storage\\RE377VIA\\Wolf et al. - 2020 - Transformers State-of-the-Art Natural Language Pr.pdf:application/pdf},
}

@article{rustam_performance_2021,
	title = {A performance comparison of supervised machine learning models for Covid-19 tweets sentiment analysis},
	volume = {16},
	issn = {1932-6203},
	url = {https://dx.plos.org/10.1371/journal.pone.0245909},
	doi = {10.1371/journal.pone.0245909},
	abstract = {The spread of Covid-19 has resulted in worldwide health concerns. Social media is increasingly used to share news and opinions about it. A realistic assessment of the situation is necessary to utilize resources optimally and appropriately. In this research, we perform Covid-19 tweets sentiment analysis using a supervised machine learning approach. Identification of Covid-19 sentiments from tweets would allow informed decisions for better handling the current pandemic situation. The used dataset is extracted from Twitter using {IDs} as provided by the {IEEE} data port. Tweets are extracted by an in-house built crawler that uses the Tweepy library. The dataset is cleaned using the preprocessing techniques and sentiments are extracted using the {TextBlob} library. The contribution of this work is the performance evaluation of various machine learning classifiers using our proposed feature set. This set is formed by concatenating the bag-of-words and the term frequency-inverse document frequency. Tweets are classified as positive, neutral, or negative. Performance of classifiers is evaluated on the accuracy, precision, recall, and
              F
              1
              score. For completeness, further investigation is made on the dataset using the Long Short-Term Memory ({LSTM}) architecture of the deep learning model. The results show that Extra Trees Classifiers outperform all other models by achieving a 0.93 accuracy score using our proposed concatenated features set. The {LSTM} achieves low accuracy as compared to machine learning classifiers. To demonstrate the effectiveness of our proposed feature set, the results are compared with the Vader sentiment analysis technique based on the {GloVe} feature extraction approach.},
	pages = {e0245909},
	number = {2},
	journaltitle = {{PLOS} {ONE}},
	shortjournal = {{PLoS} {ONE}},
	author = {Rustam, Furqan and Khalid, Madiha and Aslam, Waqar and Rupapara, Vaibhav and Mehmood, Arif and Choi, Gyu Sang},
	editor = {Mumtaz, Wajid},
	urldate = {2022-07-11},
	date = {2021-02-25},
	langid = {english},
	file = {Rustam et al. - 2021 - A performance comparison of supervised machine lea.pdf:C\:\\Users\\33623\\Zotero\\storage\\WLBHK2JV\\Rustam et al. - 2021 - A performance comparison of supervised machine lea.pdf:application/pdf},
}

@article{dowlagar_graph_nodate,
	title = {Graph Convolutional Networks with Multi-headed Attention for Code-Mixed Sentiment Analysis},
	abstract = {Code-mixing is a frequently observed phenomenon in multilingual communities where a speaker uses multiple languages in an utterance or sentence. Code-mixed texts are abundant, especially in social media, and pose a problem for {NLP} tools as they are typically trained on monolingual corpora. Recently, ﬁnding the sentiment from code-mixed text has been attempted by some researchers in {SentiMix} {SemEval} 2020 and {DravidianCodeMix} {FIRE} 2020 shared tasks. Mostly, the attempts include traditional methods, long short term memory, convolutional neural networks, and transformer models for code-mixed sentiment analysis ({CMSA}). However, no study has explored graph convolutional neural networks on {CMSA}. In this paper, we propose the graph convolutional networks ({GCN}) for sentiment analysis on code-mixed text. We have used the datasets from the {DravidianCodeMix} {FIRE} 2020. Our experimental results on multiple {CMSA} datasets demonstrate that the {GCN} with multi-headed attention model has shown an improvement in classiﬁcation metrics.},
	pages = {8},
	author = {Dowlagar, Suman and Mamidi, Radhika},
	langid = {english},
	file = {Dowlagar et Mamidi - Graph Convolutional Networks with Multi-headed Att.pdf:C\:\\Users\\33623\\Zotero\\storage\\Z5FWGUUW\\Dowlagar et Mamidi - Graph Convolutional Networks with Multi-headed Att.pdf:application/pdf},
}

@inproceedings{mcconnell_case-level_2021,
	location = {São Paulo Brazil},
	title = {Case-level prediction of motion outcomes in civil litigation},
	isbn = {978-1-4503-8526-8},
	url = {https://dl.acm.org/doi/10.1145/3462757.3466101},
	doi = {10.1145/3462757.3466101},
	abstract = {Lawyers regularly predict court outcomes to make strategic decisions, including when, if at all, to sue or settle, what to argue, and how to reduce their clients’ liability risk. Yet, lawyer predictions tend to be poorly calibrated and biased, which exacerbate unjustifiable disparities in civil case outcomes. Current machine learning ({ML}) approaches for predicting court outcomes are typically constrained to final dispositions or are based on features unavailable in real-time during litigation, like judicial opinions. Here, we present the first {ML}-based methods to support lawyer and client decision making in real-time for motion filings in civil proceedings. Using the State of Connecticut Judicial Branch administrative data and court case documents, we trained six classifiers to predict motion to strike outcomes in tort and vehicular cases between July 1, 2004 and February 18, 2019. Integrating dense word embeddings from complaint documents, which contain information specific to the claims alleged, with the Judicial Branch data improved classification accuracy across all models. Subsequent models defined using a novel attorney case-entropy feature, dense word embeddings using corpus specific {TF}-{IDF} weightings, and algorithmic classification rules yielded the best predictor, Adaboost, with a classification accuracy of 64.4\%. An analysis of feature importance weights confirmed the usefulness of incorporating attorney case-entropy and natural language features from complaint documents. Since all features used in model training are available during litigation, these methods will help lawyers make better predictions than they otherwise could given disparities in lawyer and client resources. All {ML} models, training code, and evaluation scripts are available at https://github.com/aguiarlab/motionpredict.},
	eventtitle = {{ICAIL} '21: Eighteenth International Conference for Artificial Intelligence and Law},
	pages = {99--108},
	booktitle = {Proceedings of the Eighteenth International Conference on Artificial Intelligence and Law},
	publisher = {{ACM}},
	author = {{McConnell}, Devin J. and Zhu, James and Pandya, Sachin and Aguiar, Derek},
	urldate = {2022-07-11},
	date = {2021-06-21},
	langid = {english},
	file = {McConnell et al. - 2021 - Case-level prediction of motion outcomes in civil .pdf:C\:\\Users\\33623\\Zotero\\storage\\NVF4YEF3\\McConnell et al. - 2021 - Case-level prediction of motion outcomes in civil .pdf:application/pdf},
}

@article{tarnowska_nlp-based_2021,
	title = {{NLP}-Based Customer Loyalty Improvement Recommender System ({CLIRS}2)},
	volume = {5},
	issn = {2504-2289},
	url = {https://www.mdpi.com/2504-2289/5/1/4},
	doi = {10.3390/bdcc5010004},
	abstract = {Structured data on customer feedback is becoming more costly and timely to collect and organize. On the other hand, unstructured opinionated data, e.g., in the form of free-text comments, is proliferating and available on public websites, such as social media websites, blogs, forums, and websites that provide recommendations. This research proposes a novel method to develop a knowledge-based recommender system from unstructured (text) data. The method is based on applying an opinion mining algorithm, extracting aspect-based sentiment score per text item, and transforming text into a structured form. An action rule mining algorithm is applied to the data table constructed from sentiment mining. The proposed application of the method is the problem of improving customer satisfaction ratings. The results obtained from the dataset of customer comments related to the repair services were evaluated with accuracy and coverage. Further, the results were incorporated into the framework of a web-based user-friendly recommender system to advise the business on how to maximally increase their proﬁts by introducing minimal sets of changes in their service. Experiments and evaluation results from comparing the structured data-based version of the system {CLIRS} (Customer Loyalty Improvement Recommender System) with the unstructured data-based version of the system ({CLIRS}2) are provided.},
	pages = {4},
	number = {1},
	journaltitle = {Big Data and Cognitive Computing},
	shortjournal = {{BDCC}},
	author = {Tarnowska, Katarzyna Anna and Ras, Zbigniew},
	urldate = {2022-07-11},
	date = {2021-01-19},
	langid = {english},
	file = {Tarnowska et Ras - 2021 - NLP-Based Customer Loyalty Improvement Recommender.pdf:C\:\\Users\\33623\\Zotero\\storage\\UBG2V3UQ\\Tarnowska et Ras - 2021 - NLP-Based Customer Loyalty Improvement Recommender.pdf:application/pdf},
}

@article{boegershausen_express_2022,
	title = {{EXPRESS}: Fields of Gold: Scraping Web Data for Marketing Insights},
	issn = {0022-2429, 1547-7185},
	url = {http://journals.sagepub.com/doi/10.1177/00222429221100750},
	doi = {10.1177/00222429221100750},
	shorttitle = {{EXPRESS}},
	abstract = {Marketing scholars increasingly use web scraping and Application Programming Interfaces ({APIs}) to collect data from the internet. Yet, despite the widespread use of such web data, the idiosyncratic and sometimes insidious challenges in its collection have received limited attention. How can researchers ensure that the datasets generated via web scraping and {APIs} are valid? While existing resources emphasize technical details of extracting web data, the authors propose a novel methodological framework focused on enhancing its validity. In particular, the framework highlights how addressing validity concerns requires the joint consideration of idiosyncratic technical and legal/ethical questions along the three stages of collecting web data: selecting data sources, designing the data collection, and extracting the data. The authors further review more than 300 articles using web data published in the top five marketing journals and offer a typology of how web data has advanced marketing thought. The article concludes with directions for future research to identify promising web data sources and to embrace novel approaches for using web data to capture and describe evolving marketplace realities.},
	pages = {002224292211007},
	journaltitle = {Journal of Marketing},
	shortjournal = {Journal of Marketing},
	author = {Boegershausen, Johannes and Datta, Hannes and Borah, Abhishek and Stephen, Andrew T.},
	urldate = {2022-07-11},
	date = {2022-05-02},
	langid = {english},
	file = {Boegershausen et al. - 2022 - EXPRESS Fields of Gold Scraping Web Data for Mar.pdf:C\:\\Users\\33623\\Zotero\\storage\\G57JIZXB\\Boegershausen et al. - 2022 - EXPRESS Fields of Gold Scraping Web Data for Mar.pdf:application/pdf},
}

@misc{hamilton_diachronic_2018,
	title = {Diachronic Word Embeddings Reveal Statistical Laws of Semantic Change},
	url = {http://arxiv.org/abs/1605.09096},
	abstract = {Understanding how words change their meanings over time is key to models of language and cultural evolution, but historical data on meaning is scarce, making theories hard to develop and test. Word embeddings show promise as a diachronic tool, but have not been carefully evaluated. We develop a robust methodology for quantifying semantic change by evaluating word embeddings ({PPMI}, {SVD}, word2vec) against known historical changes. We then use this methodology to reveal statistical laws of semantic evolution. Using six historical corpora spanning four languages and two centuries, we propose two quantitative laws of semantic change: (i) the law of conformity—the rate of semantic change scales with an inverse power-law of word frequency; (ii) the law of innovation—independent of frequency, words that are more polysemous have higher rates of semantic change.},
	number = {{arXiv}:1605.09096},
	publisher = {{arXiv}},
	author = {Hamilton, William L. and Leskovec, Jure and Jurafsky, Dan},
	urldate = {2022-07-11},
	date = {2018-10-25},
	langid = {english},
	eprinttype = {arxiv},
	eprint = {1605.09096 [cs]},
	keywords = {Computer Science - Computation and Language},
	file = {Hamilton et al. - 2018 - Diachronic Word Embeddings Reveal Statistical Laws.pdf:C\:\\Users\\33623\\Zotero\\storage\\3N25AGFK\\Hamilton et al. - 2018 - Diachronic Word Embeddings Reveal Statistical Laws.pdf:application/pdf},
}

@article{jensen_credibility_2013,
	title = {Credibility of Anonymous Online Product Reviews: A Language Expectancy Perspective},
	volume = {30},
	issn = {0742-1222, 1557-928X},
	url = {https://www.tandfonline.com/doi/full/10.2753/MIS0742-1222300109},
	doi = {10.2753/MIS0742-1222300109},
	shorttitle = {Credibility of Anonymous Online Product Reviews},
	abstract = {Online reviews play a significant role in forming and shaping perceptions about a product. With the credibility of online reviewers a frequent question, this research investigates how potential buyers assess the credibility of anonymous reviewers. Technology separates the reviewer from the review, and potential buyers are left to rely on characteristics of the review itself to determine the credibility of the reviewer. By extending the language expectancy theory to the online setting, we develop hypotheses about how expectancy violations of lexical complexity, twosidedness (highlighting positive and negative aspects of a product), and affect intensity influence credibility attributions. We present an experiment in which favorable experimental reviews were generated based on actual reviews for a digital camera. The results indicate that two-sidedness caused a positive expectancy violation resulting in greater credibility attribution. High affect intensity caused a negative expectancy violation resulting in lower credibility attribution. Finally, high reviewer credibility significantly improved perceptions of product quality. Our results demonstrate the importance of expectancies and violations when attributing credibility to anonymous individuals. Even small expectancy violations can meaningfully influence reviewer credibility and perceptions of products.},
	pages = {293--324},
	number = {1},
	journaltitle = {Journal of Management Information Systems},
	shortjournal = {Journal of Management Information Systems},
	author = {Jensen, Matthew L. and Averbeck, Joshua M. and Zhang, Zhu and Wright, Kevin B.},
	urldate = {2022-07-11},
	date = {2013-07},
	langid = {english},
	file = {Jensen et al. - 2013 - Credibility of Anonymous Online Product Reviews A.pdf:C\:\\Users\\33623\\Zotero\\storage\\8T7Q5RD7\\Jensen et al. - 2013 - Credibility of Anonymous Online Product Reviews A.pdf:application/pdf},
}

@article{he_selection_nodate,
	title = {Selection bias in documenting online conversations},
	abstract = {Analyzing Twitter content for topic-speciﬁc interest and opinion among the public or even predicting outcomes of real-world events, such as elections or sports events, is a popular research topic. This paper investigates a more fundamental problem underlying the research on Twitter data — extracting topic-related documents with both high precision and high recall from this noisy online discussion. We propose an automated sequential approach to tackle this retrieval problem as well as a method for document classiﬁcation and informative keyword selection. We illustrate the selection bias associated with unreliable retrieval method (e.g., considering only speciﬁc names) with four diﬀerent variables — size, keywords, sentiment, and user interaction —which portray diﬀerent stories depending on how a researcher controls the selection issue. In addition, we examine the ex-ante scenario and compare it with the more common ex-post examination. We conclude by arguing the potential improvements and applications of our system in this popular and growing ﬁeld of drawing conclusions from online and social media data.},
	pages = {27},
	author = {He, Ran and Rothschild, David},
	langid = {english},
	file = {He et Rothschild - Selection bias in documenting online conversations.pdf:C\:\\Users\\33623\\Zotero\\storage\\W2PDGRFL\\He et Rothschild - Selection bias in documenting online conversations.pdf:application/pdf},
}

@article{briscoe_introduction_nodate,
	title = {Introduction to Linguistics for Natural Language Processing},
	abstract = {This handout is a guide to the linguistic theory and techniques of analysis that will be useful for the {ACS} language and speech modules. If you have done some (computational) linguistics, then reading it and attempting the questions interspersed in the text as well as the exercises will help you decide if you need to do any supplementary reading. If not, you will need to do some additional reading and then check your understanding by attempting the exercises. See the end of the handout for suggested readings – this handout is not meant to replace them. I will set additional (ticked) exercises during sessions which will be due in the following week. Ticks will contribute 20\% of the ﬁnal mark assigned for the module. Successful completion of the assessed practicals will require an understanding of much of the material presented, so you are advised to attend all the sessions and do the supplementary exercises and reading.},
	pages = {37},
	author = {Briscoe, Ted},
	langid = {english},
	file = {Briscoe - Introduction to Linguistics for Natural Language P.pdf:C\:\\Users\\33623\\Zotero\\storage\\EYAA9KZI\\Briscoe - Introduction to Linguistics for Natural Language P.pdf:application/pdf},
}

@article{miranda_tailoring_nodate,
	title = {Tailoring Media Monitoring with User Feedback},
	abstract = {Media monitoring is the activity of monitoring the output of the print, online and broadcast media to power the decision-making process of people and organizations (e.g., analysis of emerging technologies, competitive intelligence, public reputation, brand awareness). This is a resource intensive task which raises several challenges – The main issue discussed in this talk is how we can process and aggregate a vast amount of multilingual data to discover relevant stories, entities, topics and events; while at the same time meeting the speciﬁc information needs of each user. These can range from monitoring speciﬁc entities (competitors, brands, inﬂuencers), to coarse topics (e.g. “Aerospace Industry”) and even to ﬁne-grained or ephemeral queries (e.g., “Return of the 737 Max model to service”). In this talk we will discuss how we can empower users with relevant and personalized content in the context of the media monitoring setting and introduce the approach Priberam is taking to the problems at hand; in particular by training text retrieval models on-the-ﬂy from user feedback and integrating them in a media monitoring workﬂow.},
	pages = {1},
	author = {Miranda, Sebastiao},
	langid = {english},
	file = {Miranda - Tailoring Media Monitoring with User Feedback.pdf:C\:\\Users\\33623\\Zotero\\storage\\U74ESPYT\\Miranda - Tailoring Media Monitoring with User Feedback.pdf:application/pdf},
}

@misc{eisenschlos_multifit_2020,
	title = {{MultiFiT}: Efficient Multi-lingual Language Model Fine-tuning},
	url = {http://arxiv.org/abs/1909.04761},
	shorttitle = {{MultiFiT}},
	abstract = {Pretrained language models are promising particularly for low-resource languages as they only require unlabelled data. However, training existing models requires huge amounts of compute, while pretrained cross-lingual models often underperform on low-resource languages. We propose Multi-lingual language model Fine-Tuning ({MultiFiT}) to enable practitioners to train and ﬁne-tune language models efﬁciently in their own language. In addition, we propose a zero-shot method using an existing pretrained cross-lingual model. We evaluate our methods on two widely used cross-lingual classiﬁcation datasets where they outperform models pretrained on orders of magnitude more data and compute. We release all models and code1.},
	number = {{arXiv}:1909.04761},
	publisher = {{arXiv}},
	author = {Eisenschlos, Julian Martin and Ruder, Sebastian and Czapla, Piotr and Kardas, Marcin and Gugger, Sylvain and Howard, Jeremy},
	urldate = {2022-07-11},
	date = {2020-06-03},
	langid = {english},
	eprinttype = {arxiv},
	eprint = {1909.04761 [cs]},
	keywords = {Computer Science - Computation and Language, Computer Science - Machine Learning},
	file = {Eisenschlos et al. - 2020 - MultiFiT Efficient Multi-lingual Language Model F.pdf:C\:\\Users\\33623\\Zotero\\storage\\YG7CYZJ4\\Eisenschlos et al. - 2020 - MultiFiT Efficient Multi-lingual Language Model F.pdf:application/pdf},
}

@inproceedings{huang_context-aware_2021,
	location = {São Paulo Brazil},
	title = {Context-aware legal citation recommendation using deep learning},
	isbn = {978-1-4503-8526-8},
	url = {https://dl.acm.org/doi/10.1145/3462757.3466066},
	doi = {10.1145/3462757.3466066},
	abstract = {Lawyers and judges spend a large amount of time researching the proper legal authority to cite while drafting decisions. In this paper, we develop a citation recommendation tool that can help improve efficiency in the process of opinion drafting. We train four types of machine learning models, including a citation-list based method (collaborative filtering) and three context-based methods (text similarity, {BiLSTM} and {RoBERTa} classifiers). Our experiments show that leveraging local textual context improves recommendation, and that deep neural models achieve decent performance. We show that non-deep text-based methods benefit from access to structured case metadata, but deep models only benefit from such access when predicting from context of insufficient length. We also find that, even after extensive training, {RoBERTa} does not outperform a recurrent neural model, despite its benefits of pretraining. Our behavior analysis of the {RoBERTa} model further shows that predictive performance is stable across time and citation classes.},
	eventtitle = {{ICAIL} '21: Eighteenth International Conference for Artificial Intelligence and Law},
	pages = {79--88},
	booktitle = {Proceedings of the Eighteenth International Conference on Artificial Intelligence and Law},
	publisher = {{ACM}},
	author = {Huang, Zihan and Low, Charles and Teng, Mengqiu and Zhang, Hongyi and Ho, Daniel E. and Krass, Mark S. and Grabmair, Matthias},
	urldate = {2022-07-11},
	date = {2021-06-21},
	langid = {english},
	file = {Huang et al. - 2021 - Context-aware legal citation recommendation using .pdf:C\:\\Users\\33623\\Zotero\\storage\\MBRT6N6J\\Huang et al. - 2021 - Context-aware legal citation recommendation using .pdf:application/pdf},
}

@article{li_semantics_2021,
	title = {From semantics to pragmatics: where {IS} can lead in Natural Language Processing ({NLP}) research},
	volume = {30},
	issn = {0960-085X, 1476-9344},
	url = {https://www.tandfonline.com/doi/full/10.1080/0960085X.2020.1816145},
	doi = {10.1080/0960085X.2020.1816145},
	shorttitle = {From semantics to pragmatics},
	abstract = {Natural Language Processing ({NLP}) is now widely integrated into web and mobile applications, enabling natural interactions between humans and computers. Although there is a large body of {NLP} studies published in Information Systems ({IS}), a comprehensive review of how {NLP} research is conceptualised and realised in the context of {IS} has not been conducted. To assess the current state of {NLP} research in {IS}, we use a variety of techniques to analyse a literature corpus comprising 356 {NLP} research articles published in {IS} journals between 2004 and 2018. Our analysis indicates the need to move from semantics to pragmatics. More importantly, our findings unpack the challenges and assumptions underlying current research trends in {NLP}. We argue that overcoming these challenges will require a renewed disciplinary {IS} focus. By proposing a roadmap of {NLP} research in {IS}, we draw attention to three {NLP} research perspec­ tives and present future directions that {IS} researchers are uniquely positioned to address.},
	pages = {569--590},
	number = {5},
	journaltitle = {European Journal of Information Systems},
	shortjournal = {European Journal of Information Systems},
	author = {Li, Yan and Thomas, Manoj a and Liu, Dapeng},
	urldate = {2022-07-11},
	date = {2021-09-03},
	langid = {english},
	file = {Li et al. - 2021 - From semantics to pragmatics where IS can lead in.pdf:C\:\\Users\\33623\\Zotero\\storage\\SQEKSYVJ\\Li et al. - 2021 - From semantics to pragmatics where IS can lead in.pdf:application/pdf},
}

@article{liu_monitoring_2021,
	title = {Monitoring {COVID}-19 pandemic through the lens of social media using natural language processing and machine learning},
	volume = {9},
	issn = {2047-2501},
	url = {https://link.springer.com/10.1007/s13755-021-00158-4},
	doi = {10.1007/s13755-021-00158-4},
	abstract = {Purpose:  It has been over a year since the first known case of coronavirus disease ({COVID}-19) emerged, yet the pandemic is far from over. To date, the coronavirus pandemic has infected over eighty million people and has killed more than 1.78 million worldwide. This study aims to explore “how useful is Reddit social media platform to surveil {COVID}-19 pandemic?” and “how do people’s concerns/behaviors change over the course of {COVID}-19 pandemic in North Carolina?”. The purpose of this study was to compare people’s thoughts, behavior changes, discussion topics, and the number of confirmed cases and deaths by applying natural language processing ({NLP}) to {COVID}-19 related data.
Methods:  In this study, we collected {COVID}-19 related data from 18 subreddits of North Carolina from March to August 2020. Next, we applied methods from natural language processing and machine learning to analyze collected Reddit posts using feature engineering, topic modeling, custom named-entity recognition ({NER}), and {BERT}-based (Bidirectional Encoder Representations from Transformers) sentence clustering. Using these methods, we were able to glean people’s responses and their concerns about {COVID}-19 pandemic in North Carolina.
Results:  We observed a positive change in attitudes towards masks for residents in North Carolina. The highfrequency words in all subreddit corpora for each of the {COVID}-19 mitigation strategy categories are: Distancing ({DIST})—“social distance/distancing”, “lockdown”, and “work from home”; Disinfection ({DIT})—“(hand) sanitizer/soap”, “hygiene”, and "wipe"; Personal Protective Equipment ({PPE})—“mask/facemask(s)/face shield”, “n95(s)/kn95”, and “cloth/ gown”; Symptoms ({SYM})—“death”, “flu/influenza”, and “cough/coughed”; Testing ({TEST})—“cases”, “(antibody) test”, and “test results (positive/negative)”.
Conclusion:  The findings in our study show that the use of Reddit data to monitor {COVID}-19 pandemic in North Carolina ({NC}) was effective. The study shows the utility of {NLP} methods (e.g. cosine similarity, Latent Dirichlet Allocation ({LDA}) topic modeling, custom {NER} and {BERT}-based sentence clustering) in discovering the change of the public’s concerns/behaviors over the course of {COVID}-19 pandemic in {NC} using Reddit data. Moreover, the results show that social media data can be utilized to surveil the epidemic situation in a specific community.},
	pages = {25},
	number = {1},
	journaltitle = {Health Information Science and Systems},
	shortjournal = {Health Inf Sci Syst},
	author = {Liu, Yang and Whitfield, Christopher and Zhang, Tianyang and Hauser, Amanda and Reynolds, Taeyonn and Anwar, Mohd},
	urldate = {2022-07-11},
	date = {2021-12},
	langid = {english},
	file = {Liu et al. - 2021 - Monitoring COVID-19 pandemic through the lens of s.pdf:C\:\\Users\\33623\\Zotero\\storage\\2SL6JCH4\\Liu et al. - 2021 - Monitoring COVID-19 pandemic through the lens of s.pdf:application/pdf},
}

@article{radovanovic_small_nodate,
	title = {Small talk in the Digital Age: Making Sense of Phatic Posts},
	abstract = {This paper presents some practical implications of a theoretical web desktop analysis and addresses microposts in the Social Web contextual sense and their role contributing diverse information to the Web as part of informal and semi-formal communication and social activities on Social Networking Sites ({SNS}). We reflect upon and present the most pervasive and relevant sociocommunication function of an online presence on microposts and social networks: the phatic communication function. Although some theorists such as Malinowski say these microposts have no practical information value, we argue that they have semantic and social value for the interlocutors, determined by sociotechnological and cultural factors such as online presence and social awareness. We investigate and offer new implications for emerging social and communication dynamics formed around microposts, what we call here “phatic posts”. We suggest that apparently trivial uses and features of {SNS} actually play an important role in setting the social and informational context of the rest of the conversation - a “phatic” function - and thus that these phatic posts are key to the success of {SNS}.},
	pages = {4},
	author = {Radovanovic, Danica},
	langid = {english},
	file = {Radovanovic - Small talk in the Digital Age Making Sense of Pha.pdf:C\:\\Users\\33623\\Zotero\\storage\\H7MJWR4K\\Radovanovic - Small talk in the Digital Age Making Sense of Pha.pdf:application/pdf},
}

@article{li_data_2022,
	title = {Data Augmentation Approaches in Natural Language Processing: A Survey},
	issn = {26666510},
	url = {http://arxiv.org/abs/2110.01852},
	doi = {10.1016/j.aiopen.2022.03.001},
	shorttitle = {Data Augmentation Approaches in Natural Language Processing},
	abstract = {As an eﬀective strategy, data augmentation ({DA}) alleviates data scarcity scenarios where deep learning techniques may fail. It is widely applied in computer vision then introduced to natural language processing and achieves improvements in many tasks. One of the main focuses of the {DA} methods is to improve the diversity of training data, thereby helping the model to better generalize to unseen testing data. In this survey, we frame {DA} methods into three categories based on the diversity of augmented data, including paraphrasing, noising, and sampling. Our paper sets out to analyze {DA} methods in detail according to the above categories. Further, we also introduce their applications in {NLP} tasks as well as the challenges.},
	pages = {S2666651022000080},
	journaltitle = {{AI} Open},
	shortjournal = {{AI} Open},
	author = {Li, Bohan and Hou, Yutai and Che, Wanxiang},
	urldate = {2022-07-11},
	date = {2022-03},
	langid = {english},
	eprinttype = {arxiv},
	eprint = {2110.01852 [cs]},
	keywords = {Computer Science - Computation and Language, Computer Science - Artificial Intelligence, Computer Science - Machine Learning},
	file = {Li et al. - 2022 - Data Augmentation Approaches in Natural Language P.pdf:C\:\\Users\\33623\\Zotero\\storage\\MQKSXYQJ\\Li et al. - 2022 - Data Augmentation Approaches in Natural Language P.pdf:application/pdf},
}

@article{watanabe_theory-driven_2022,
	title = {Theory-Driven Analysis of Large Corpora: Semisupervised Topic Classification of the {UN} Speeches},
	volume = {40},
	issn = {0894-4393, 1552-8286},
	url = {http://journals.sagepub.com/doi/10.1177/0894439320907027},
	doi = {10.1177/0894439320907027},
	shorttitle = {Theory-Driven Analysis of Large Corpora},
	abstract = {There is a growing interest in quantitative analysis of large corpora among the international relations ({IR}) scholars, but many of them find it difficult to perform analysis consistently with existing theoretical frameworks using unsupervised machine learning models to further develop the field. To solve this problem, we created a set of techniques that utilize a semisupervised model that allows researchers to classify documents into predefined categories efficiently. We propose a dictionary making procedure to avoid inclusion of words that are likely to confuse the model and deteriorate the its classification performance classification accuracy using a new entropy-based diagnostic tool. In our experiments, we classify sentences of the United Nations General Assembly speeches into six predefined categories using the seeded Latent Dirichlet allocation and Newsmap, which were trained with a small “seed word dictionary” that we created following the procedure. The result shows that, while keyword dictionary can only classify 25\% of sentences, Newsmap can classify over 60\% of them accurately correctly and; its accuracy exceeds 70\% when contextual information is taken into consideration by kernel smoothing of topic likelihoods. We argue that once seed word dictionaries are created by the international relations community, semisupervised models would become more useful than unsupervised models for theory-driven text analysis.},
	pages = {346--366},
	number = {2},
	journaltitle = {Social Science Computer Review},
	shortjournal = {Social Science Computer Review},
	author = {Watanabe, Kohei and Zhou, Yuan},
	urldate = {2022-07-11},
	date = {2022-04},
	langid = {english},
	file = {Watanabe et Zhou - 2022 - Theory-Driven Analysis of Large Corpora Semisuper.pdf:C\:\\Users\\33623\\Zotero\\storage\\E44GTF2M\\Watanabe et Zhou - 2022 - Theory-Driven Analysis of Large Corpora Semisuper.pdf:application/pdf},
}

@article{bouillon_description_nodate,
	title = {The Description of Adjectives for Natural Language Processing: Theoretical and Applied Perspectives},
	pages = {18},
	author = {Bouillon, Pierrette and Viegas, Evelyne},
	langid = {english},
	file = {Bouillon et Viegas - The Description of Adjectives for Natural Language.pdf:C\:\\Users\\33623\\Zotero\\storage\\E3SNDZWI\\Bouillon et Viegas - The Description of Adjectives for Natural Language.pdf:application/pdf},
}

@article{streb_personality_nodate,
	title = {Personality and Political Orientation in Germany: A {NLP}-based Analysis using Twitter Data},
	abstract = {Despite having found some clear tendencies, research about personality and political orientation has been rather fragmented. We compliment the picture by examining systematic personality diﬀerences among sympathisers of Germany’s seven main political parties as well as the relationship between personality traits and political views on economic and social issues. While previous research has only covered personality traits included in the Big Five model, we extend this by also covering fundamental needs and values. A natural language processing approach in combination with machine learning is applied in order to generate personality proﬁles based on politicians’ public Twitter posts. We identify various personality aspects signiﬁcantly related to certain political views, including Openness, Conscientiousness and Agreeabless as well as the needs Curiosity, Excitement and Love and the values Conservation, Hedonism and Self-transcendence. Furthermore, we ﬁnd remarkable similarities between members of the {AfD} and Die Linke, despite their major ideological diﬀerences.},
	pages = {26},
	author = {Streb, Marius and Wiesner, Dominik},
	langid = {english},
	file = {Streb et Wiesner - Personality and Political Orientation in Germany .pdf:C\:\\Users\\33623\\Zotero\\storage\\6LEVH7AG\\Streb et Wiesner - Personality and Political Orientation in Germany .pdf:application/pdf},
}

@article{deichmann_ideas_2020,
	title = {Ideas with impact: How connectivity shapes idea diffusion},
	volume = {49},
	issn = {00487333},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S0048733319302008},
	doi = {10.1016/j.respol.2019.103881},
	shorttitle = {Ideas with impact},
	abstract = {Despite a growing body of research on idea diﬀusion, there is a lack of knowledge on why some ideas successfully diﬀuse and stand out from the crowd while others do not surface or remain unnoticed. We address this question by looking into the characteristics of an idea, speciﬁcally its connectivity in a content network. In a content network, ideas connect to other ideas through their content—the words that the ideas have in common. We hypothesize that a high connectivity of an idea in a content network is beneﬁcial for idea diﬀusion because this idea will more likely be conceived as novel yet at the same time also as more useful because it appears as more familiar to the audience. Moreover, we posit that a high social connectivity of the team working on the idea further enhances the eﬀect of high content connectivity on idea diﬀusion. Our study focuses on academic conference publications and the co-authorship data of a community of computer science researchers from 2006 to 2012. We ﬁnd conﬁrmation for our hypotheses and discuss the implications of these ﬁndings.},
	pages = {103881},
	number = {1},
	journaltitle = {Research Policy},
	shortjournal = {Research Policy},
	author = {Deichmann, Dirk and Moser, Christine and Birkholz, Julie M. and Nerghes, Adina and Groenewegen, Peter and Wang, Shenghui},
	urldate = {2022-07-11},
	date = {2020-02},
	langid = {english},
	file = {Deichmann et al. - 2020 - Ideas with impact How connectivity shapes idea di.pdf:C\:\\Users\\33623\\Zotero\\storage\\PEL4R7W6\\Deichmann et al. - 2020 - Ideas with impact How connectivity shapes idea di.pdf:application/pdf},
}

@article{lee_message_2015,
	title = {Message diffusion through social network service: The case of rumor and non-rumor related tweets during Boston bombing 2013},
	volume = {17},
	issn = {1387-3326, 1572-9419},
	url = {http://link.springer.com/10.1007/s10796-015-9568-z},
	doi = {10.1007/s10796-015-9568-z},
	shorttitle = {Message diffusion through social network service},
	abstract = {Social Network Services ({SNS}) such as Twitter play a significant role in reporting media, particularly during the extreme events. We examined the impact of tweet features on the diffusion of two types of messages during 2013 Boston marathon tragedy—rumor related and non-rumor related (both in the context of the Boston tragedy). Negative binomial analysis revealed that tweet features such as reaction time, number of followers, and usage of hashtag have an impact on tweet message diffusion during the tragedy. The number of followers showed a positive relationship with message diffusion. However, the relationship between tweet reaction time and message diffusion was negative. Finally, tweet messages that did not include hashtags diffused more than messages that contained hashtags. This paper contributes by adapting the innovation diffusion model to explore tweet message diffusion in Twitter space during extreme events.},
	pages = {997--1005},
	number = {5},
	journaltitle = {Information Systems Frontiers},
	shortjournal = {Inf Syst Front},
	author = {Lee, Jaeung and Agrawal, Manish and Rao, H. R.},
	urldate = {2022-07-11},
	date = {2015-10},
	langid = {english},
	file = {Lee et al. - 2015 - Message diffusion through social network service .pdf:C\:\\Users\\33623\\Zotero\\storage\\D6FUZ4K5\\Lee et al. - 2015 - Message diffusion through social network service .pdf:application/pdf},
}

@inproceedings{fan_eli5_2019,
	location = {Florence, Italy},
	title = {{ELI}5: Long Form Question Answering},
	url = {https://www.aclweb.org/anthology/P19-1346},
	doi = {10.18653/v1/P19-1346},
	shorttitle = {{ELI}5},
	eventtitle = {Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics},
	pages = {3558--3567},
	booktitle = {Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics},
	publisher = {Association for Computational Linguistics},
	author = {Fan, Angela and Jernite, Yacine and Perez, Ethan and Grangier, David and Weston, Jason and Auli, Michael},
	urldate = {2022-07-11},
	date = {2019},
	langid = {english},
	file = {Fan et al. - 2019 - ELI5 Long Form Question Answering.pdf:C\:\\Users\\33623\\Zotero\\storage\\Z9PBAP96\\Fan et al. - 2019 - ELI5 Long Form Question Answering.pdf:application/pdf},
}

@article{eichstaedt_facebook_2018,
	title = {Facebook language predicts depression in medical records},
	volume = {115},
	issn = {0027-8424, 1091-6490},
	url = {https://pnas.org/doi/full/10.1073/pnas.1802331115},
	doi = {10.1073/pnas.1802331115},
	abstract = {Significance
            Depression is disabling and treatable, but underdiagnosed. In this study, we show that the content shared by consenting users on Facebook can predict a future occurrence of depression in their medical records. Language predictive of depression includes references to typical symptoms, including sadness, loneliness, hostility, rumination, and increased self-reference. This study suggests that an analysis of social media data could be used to screen consenting individuals for depression. Further, social media content may point clinicians to specific symptoms of depression.
          , 
            Depression, the most prevalent mental illness, is underdiagnosed and undertreated, highlighting the need to extend the scope of current screening methods. Here, we use language from Facebook posts of consenting individuals to predict depression recorded in electronic medical records. We accessed the history of Facebook statuses posted by 683 patients visiting a large urban academic emergency department, 114 of whom had a diagnosis of depression in their medical records. Using only the language preceding their first documentation of a diagnosis of depression, we could identify depressed patients with fair accuracy [area under the curve ({AUC}) = 0.69], approximately matching the accuracy of screening surveys benchmarked against medical records. Restricting Facebook data to only the 6 months immediately preceding the first documented diagnosis of depression yielded a higher prediction accuracy ({AUC} = 0.72) for those users who had sufficient Facebook data. Significant prediction of future depression status was possible as far as 3 months before its first documentation. We found that language predictors of depression include emotional (sadness), interpersonal (loneliness, hostility), and cognitive (preoccupation with the self, rumination) processes. Unobtrusive depression assessment through social media of consenting individuals may become feasible as a scalable complement to existing screening and monitoring procedures.},
	pages = {11203--11208},
	number = {44},
	journaltitle = {Proceedings of the National Academy of Sciences},
	shortjournal = {Proc. Natl. Acad. Sci. U.S.A.},
	author = {Eichstaedt, Johannes C. and Smith, Robert J. and Merchant, Raina M. and Ungar, Lyle H. and Crutchley, Patrick and Preoţiuc-Pietro, Daniel and Asch, David A. and Schwartz, H. Andrew},
	urldate = {2022-07-11},
	date = {2018-10-30},
	langid = {english},
	file = {Eichstaedt et al. - 2018 - Facebook language predicts depression in medical r.pdf:C\:\\Users\\33623\\Zotero\\storage\\QWXE82VV\\Eichstaedt et al. - 2018 - Facebook language predicts depression in medical r.pdf:application/pdf},
}

@article{jung_caution_2020,
	title = {Caution: Rumors ahead—A case study on the debunking of false information on Twitter},
	volume = {7},
	issn = {2053-9517, 2053-9517},
	url = {http://journals.sagepub.com/doi/10.1177/2053951720980127},
	doi = {10.1177/2053951720980127},
	shorttitle = {Caution},
	abstract = {As false information may spread rapidly on social media, a profound understanding of how it can be debunked is required. This study offers empirical insights into the development of rumors after they are debunked, the various user groups who are involved in the process, and their network structures. As crisis situations are highly sensitive to the spread of rumors, Twitter posts from during the 2017 G20 summit are examined. Tweets regarding five rumors that were debunked during this event were manually coded into the following categories: rumor, debunking message, uncertainty about rumor, uncertainty about debunking message, and others. Our findings show that rumors which are debunked early and vehemently by official sources are the most likely to be stopped. When individuals participate in the process, they typically do so by sharing uncommented media content, as opposed to contributing user-generated content. Depending on the conditions in which a rumor arises, different network structures can be found. Since some rumors are easier for individuals to verify than others, our results have implications for the priorities of journalists and official sources.},
	pages = {205395172098012},
	number = {2},
	journaltitle = {Big Data \& Society},
	shortjournal = {Big Data \& Society},
	author = {Jung, Anna-Katharina and Ross, Björn and Stieglitz, Stefan},
	urldate = {2022-07-11},
	date = {2020-07},
	langid = {english},
	file = {Jung et al. - 2020 - Caution Rumors ahead—A case study on the debunkin.pdf:C\:\\Users\\33623\\Zotero\\storage\\W8XUAHU8\\Jung et al. - 2020 - Caution Rumors ahead—A case study on the debunkin.pdf:application/pdf},
}

@article{guibon_emojis_nodate,
	title = {From Emojis to Sentiment Analysis},
	abstract = {Studies on Twitter are becoming quite common these years. Even so, the majority of them did not focused on emoticons, even less on emojis. An overview of emoticons related work has been made recently [11]. However there is still too little research work related to emojis. In this paper we draw up the work and future approaches worth considering for emoji usage in Sentiment Analysis. We aim to put necessary theoretical background before using emojis for sentiment analysis. Thus, we present an emoji usage typology along with linguistic and socio-linguistic studies on the interpretation of emojis. We also introduce approaches exploiting emojis in Sentiment Analysis. We conclude by presenting our perspectives in this domain considering the evolution of emoji usages.},
	pages = {8},
	author = {Guibon, Gaël and Ochs, Magalie and Bellot, Patrice},
	langid = {english},
	file = {Guibon et al. - From Emojis to Sentiment Analysis.pdf:C\:\\Users\\33623\\Zotero\\storage\\IQ2HET7H\\Guibon et al. - From Emojis to Sentiment Analysis.pdf:application/pdf},
}

@inproceedings{liu_roadmap_2017,
	title = {A Roadmap for Natural Language Processing Research in Information Systems},
	url = {http://hdl.handle.net/10125/41285},
	doi = {10.24251/HICSS.2017.132},
	abstract = {Natural Language Processing ({NLP}) is now widely integrated into web and mobile applications, enabling natural interactions between human and computers. Although many {NLP} studies have been published, none have comprehensively reviewed or synthesized tasks most commonly addressed in {NLP} research. We conduct a thorough review of {IS} literature to assess the current state of {NLP} research, and identify 12 prototypical tasks that are widely researched. Our analysis of 238 articles in Information Systems ({IS}) journals between 2004 and 2015 shows an increasing trend in {NLP} research, especially since 2011. Based on our analysis, we propose a roadmap for {NLP} research, and detail how it may be useful to guide future {NLP} research in {IS}. In addition, we employ Association Rules ({AR}) mining for data analysis to investigate co-occurrence of prototypical tasks and discuss insights from the findings.},
	eventtitle = {Hawaii International Conference on System Sciences},
	author = {Liu, Dapeng and Li, Yan and Thomas, Manoj A.},
	urldate = {2022-07-11},
	date = {2017},
	langid = {english},
	file = {Liu et al. - 2017 - A Roadmap for Natural Language Processing Research.pdf:C\:\\Users\\33623\\Zotero\\storage\\NP4MCXPZ\\Liu et al. - 2017 - A Roadmap for Natural Language Processing Research.pdf:application/pdf},
}

@article{musto_natural_nodate,
	title = {Natural Language Justiﬁcations for Recommender Systems Exploiting Text Summarization and Sentiment Analysis},
	abstract = {This paper reports and summarizes the methodology presented in [16] and accepted for publication at {ACM} {RecSys} 20191. In this work we present a methodology to justify recommendations that relies on the information extracted from users’ reviews discussing the available items. The intuition behind the approach is to conceive the justiﬁcation as a summary of the most relevant and distinguishing aspects of the item, automatically obtained by analyzing its reviews.},
	pages = {11},
	author = {Musto, Cataldo and Rossiello, Gaetano and de Gemmis, Marco and Lops, Pasquale and Semeraro, Giovanni},
	langid = {english},
	file = {Musto et al. - Natural Language Justiﬁcations for Recommender Sys.pdf:C\:\\Users\\33623\\Zotero\\storage\\W2XF6HIM\\Musto et al. - Natural Language Justiﬁcations for Recommender Sys.pdf:application/pdf},
}

@inproceedings{mahedero_natural_2005,
	location = {Hilton, Singapore},
	title = {Natural language processing of lyrics},
	isbn = {978-1-59593-044-6},
	url = {http://portal.acm.org/citation.cfm?doid=1101149.1101255},
	doi = {10.1145/1101149.1101255},
	abstract = {We report experiments on the use of standard natural language processing ({NLP}) tools for the analysis of music lyrics. A signiﬁcant amount of music audio has lyrics. Lyrics encode an important part of the semantics of a song, therefore their analysis complements that of acoustic and cultural metadata and is fundamental for the development of complete music information retrieval systems. Moreover, a textual analysis of a song can generate ground truth data that can be used to validate results from purely acoustic methods. Preliminary results on language identiﬁcation, structure extraction, categorization and similarity searches suggests that a lot of proﬁt can be gained from the analysis of lyrics.},
	eventtitle = {the 13th annual {ACM} international conference},
	pages = {475},
	booktitle = {Proceedings of the 13th annual {ACM} international conference on Multimedia  - {MULTIMEDIA} '05},
	publisher = {{ACM} Press},
	author = {Mahedero, Jose P. G. and {MartÍnez}, Álvaro and Cano, Pedro and Koppenberger, Markus and Gouyon, Fabien},
	urldate = {2022-07-11},
	date = {2005},
	langid = {english},
	file = {Mahedero et al. - 2005 - Natural language processing of lyrics.pdf:C\:\\Users\\33623\\Zotero\\storage\\7FMBKV62\\Mahedero et al. - 2005 - Natural language processing of lyrics.pdf:application/pdf},
}

@article{shutova_models_nodate,
	title = {Models of Metaphor in {NLP}},
	abstract = {Automatic processing of metaphor can be clearly divided into two subtasks: metaphor recognition (distinguishing between literal and metaphorical language in a text) and metaphor interpretation (identifying the intended literal meaning of a metaphorical expression). Both of them have been repeatedly addressed in {NLP}. This paper is the ﬁrst comprehensive and systematic review of the existing computational models of metaphor, the issues of metaphor annotation in corpora and the available resources.},
	pages = {10},
	author = {Shutova, Ekaterina},
	langid = {english},
	file = {Shutova - Models of Metaphor in NLP.pdf:C\:\\Users\\33623\\Zotero\\storage\\KJVMNPUU\\Shutova - Models of Metaphor in NLP.pdf:application/pdf},
}

@article{hirst_synthesis_nodate,
	title = {Synthesis Lectures on Human Language Technologies},
	pages = {47},
	author = {Hirst, Graeme},
	langid = {english},
	file = {Hirst - Synthesis Lectures on Human Language Technologies.pdf:C\:\\Users\\33623\\Zotero\\storage\\N8SJZN9Z\\Hirst - Synthesis Lectures on Human Language Technologies.pdf:application/pdf},
}

@article{grezause_um_nodate,
	title = {Um and Uh, and the expression of stance in conversational speech},
	pages = {265},
	author = {Grezause, Esther Le},
	langid = {english},
	file = {Grezause - Um and Uh, and the expression of stance in convers.pdf:C\:\\Users\\33623\\Zotero\\storage\\I4VHSNEM\\Grezause - Um and Uh, and the expression of stance in convers.pdf:application/pdf},
}

@article{smith_expression_nodate,
	title = {L'expression de la fonction phatique en français et en allemand: du concept de phaticité au pilotage du coénonciateur à l'aide des expressions phatiques},
	pages = {309},
	author = {Smith, Anja},
	langid = {french},
	file = {Smith - L'expression de la fonction phatique en français e.pdf:C\:\\Users\\33623\\Zotero\\storage\\GU6BJAR4\\Smith - L'expression de la fonction phatique en français e.pdf:application/pdf},
}

@article{miehakanda_proprietes_2018,
	title = {Propriétés et fonctions des actes de langage},
	pages = {16},
	author = {Miehakanda, M'Badi},
	date = {2018},
	langid = {french},
	file = {Miehakanda - 2018 - Propriétés et fonctions des actes de langage.pdf:C\:\\Users\\33623\\Zotero\\storage\\2GLYPEW6\\Miehakanda - 2018 - Propriétés et fonctions des actes de langage.pdf:application/pdf},
}

@inproceedings{bhattasali_automatic_2015,
	location = {Beijing, China},
	title = {Automatic Identification of Rhetorical Questions},
	url = {http://aclweb.org/anthology/P15-2122},
	doi = {10.3115/v1/P15-2122},
	abstract = {A question may be asked not only to elicit information, but also to make a statement. Questions serving the latter purpose, called rhetorical questions, are often lexically and syntactically indistinguishable from other types of questions. Still, it is desirable to be able to identify rhetorical questions, as it is relevant for many {NLP} tasks, including information extraction and text summarization. In this paper, we explore the largely understudied problem of rhetorical question identiﬁcation. Speciﬁcally, we present a simple n-gram based language model to classify rhetorical questions in the Switchboard Dialogue Act Corpus. We ﬁnd that a special treatment of rhetorical questions which incorporates contextual information achieves the highest performance.},
	eventtitle = {Proceedings of the 53rd Annual Meeting of the Association for Computational Linguistics and the 7th International Joint Conference on Natural Language Processing (Volume 2: Short Papers)},
	pages = {743--749},
	booktitle = {Proceedings of the 53rd Annual Meeting of the Association for Computational Linguistics and the 7th International Joint Conference on Natural Language Processing (Volume 2: Short Papers)},
	publisher = {Association for Computational Linguistics},
	author = {Bhattasali, Shohini and Cytryn, Jeremy and Feldman, Elana and Park, Joonsuk},
	urldate = {2022-07-11},
	date = {2015},
	langid = {english},
	file = {Bhattasali et al. - 2015 - Automatic Identification of Rhetorical Questions.pdf:C\:\\Users\\33623\\Zotero\\storage\\MDRZRJ9A\\Bhattasali et al. - 2015 - Automatic Identification of Rhetorical Questions.pdf:application/pdf},
}

@article{dessus_implementing_nodate,
	title = {Implementing Bakhtin's Dialogism Theory with {NLP} Techniques in Distance Learning Environments},
	abstract = {The purpose of this paper is to present and discuss a Natural Language Processing-based operationalization of Bakhtin’s dialogism ideas, and to insert it in the context of distance learning. Three core concepts from Bakhin are introduced and their application in the domain of distance learning discussed. Then, two computer-based systems based on these ideas, {PolyCAFe} and Pensum are presented.},
	pages = {9},
	author = {Dessus, Philippe and Trausan-Matu, Stefan},
	langid = {english},
	file = {Dessus et Trausan-Matu - Implementing Bakhtin's Dialogism Theory with NLP T.pdf:C\:\\Users\\33623\\Zotero\\storage\\FIAP6HJ8\\Dessus et Trausan-Matu - Implementing Bakhtin's Dialogism Theory with NLP T.pdf:application/pdf},
}

@inproceedings{smith_overview_2007,
	location = {Curitiba, Parana, Brazil},
	title = {An Overview of the Tesseract {OCR} Engine},
	isbn = {978-0-7695-2822-9},
	url = {http://ieeexplore.ieee.org/document/4376991/},
	doi = {10.1109/ICDAR.2007.4376991},
	abstract = {The Tesseract {OCR} engine, as was the {HP} Research Prototype in the {UNLV} Fourth Annual Test of {OCR} Accuracy[1], is described in a comprehensive overview. Emphasis is placed on aspects that are novel or at least unusual in an {OCR} engine, including in particular the line finding, features/classification methods, and the adaptive classifier.},
	eventtitle = {Ninth International Conference on Document Analysis and Recognition ({ICDAR} 2007) Vol 2},
	pages = {629--633},
	booktitle = {Ninth International Conference on Document Analysis and Recognition ({ICDAR} 2007) Vol 2},
	publisher = {{IEEE}},
	author = {Smith, R.},
	urldate = {2022-07-11},
	date = {2007-09},
	langid = {english},
	note = {{ISSN}: 1520-5363},
	file = {Smith - 2007 - An Overview of the Tesseract OCR Engine.pdf:C\:\\Users\\33623\\Zotero\\storage\\879WH7A8\\Smith - 2007 - An Overview of the Tesseract OCR Engine.pdf:application/pdf},
}

@article{conway_validating_2020,
	title = {Validating automated integrative complexity: Natural language processing and the Donald Trump Test},
	volume = {8},
	issn = {2195-3325},
	url = {https://jspp.psychopen.eu/index.php/jspp/article/view/5261},
	doi = {10.5964/jspp.v8i2.1307},
	shorttitle = {Validating automated integrative complexity},
	abstract = {Computer algorithms that analyze language (natural language processing systems) have seen a great increase in usage recently. While use of these systems to score key constructs in social and political psychology has many advantages, it is also dangerous if we do not fully evaluate the validity of these systems. In the present article, we evaluate a natural language processing system for one particular construct that has implications for solving key societal issues: Integrative complexity. We first review the growing body of evidence for the validity of the Automated Integrative Complexity ({AutoIC}) method for computer-scoring integrative complexity. We then provide five new validity tests: {AutoIC} successfully distinguished fourteen classic philosophic works from a large sample of both lay populations and political leaders (Test 1) and further distinguished classic philosophic works from the rhetoric of Donald Trump at higher rates than an alternative system (Test 2). Additionally, {AutoIC} successfully replicated key findings from the hand-scored {IC} literature on smoking cessation (Test 3), U.S. Presidents’ State of the Union Speeches (Test 4), and the ideology-complexity relationship (Test 5). Taken in total, this large body of evidence not only suggests that {AutoIC} is a valid system for scoring integrative complexity, but it also reveals important theory-building insights into key issues at the intersection of social and political psychology (health, leadership, and ideology). We close by discussing the broader contributions of the present validity tests to our understanding of issues vital to natural language processing.},
	pages = {504--524},
	number = {2},
	journaltitle = {Journal of Social and Political Psychology},
	shortjournal = {J. Soc. Polit. Psych.},
	author = {Conway, Lucian Gideon and Conway, Kathrene R. and Houck, Shannon C.},
	urldate = {2022-07-11},
	date = {2020-09-02},
	langid = {english},
	file = {Conway et al. - 2020 - Validating automated integrative complexity Natur.pdf:C\:\\Users\\33623\\Zotero\\storage\\KAFQNYXL\\Conway et al. - 2020 - Validating automated integrative complexity Natur.pdf:application/pdf},
}

@article{lukito_using_nodate,
	title = {Using time series and natural language processing to identify viral moments in the 2016 U.S. Presidential Debate},
	abstract = {This paper proposes a method for identifying and studying viral moments or highlights during a political debate. Using a combined strategy of time series analysis and domain adapted word embeddings, this study provides an in-depth analysis of several key moments during the 2016 U.S. Presidential election. First, a time series outlier analysis is used to identify key moments during the debate. These moments had to result in a long-term shift in attention towards either Hillary Clinton or Donald Trump (i.e., a transient change outlier or an intervention, resulting in a permanent change in the time series). To assess whether these moments also resulted in a discursive shift, two corpora are produced for each potential viral moment (a pre-viral corpus and post-viral corpus). A domain adaptation layer learns weights to combine a generic and domain speciﬁc ({DS}) word embedding into a domain adapted ({DA}) embedding. Words are then classiﬁed using a generic encoder+classiﬁer framework that relies on these word embeddings as inputs. Results suggest that both Clinton and Trump were able to induced discourse-shifting viral moments, though the former is much better at producing a topically-speciﬁc discursive shift.},
	pages = {11},
	author = {Lukito, Josephine and Sarma, Prathusha K and Foley, Jordan and Abhishek, Aman},
	langid = {english},
	file = {Lukito et al. - Using time series and natural language processing .pdf:C\:\\Users\\33623\\Zotero\\storage\\SZ97UEEQ\\Lukito et al. - Using time series and natural language processing .pdf:application/pdf},
}

@inproceedings{brenier_detection_2005,
	title = {The detection of emphatic words using acoustic and lexical features},
	url = {https://www.isca-speech.org/archive/interspeech_2005/brenier05_interspeech.html},
	doi = {10.21437/Interspeech.2005-576},
	abstract = {In this study, we describe an automatic detector for prosodically salient or emphasized words in speech. Knowledge of whether a word is emphatic or not could improve Text-to-Speech synthesis as well as spoken language summarization. Previous work on emphasis detection has focused on the automatic recognition of pitch accents. Our model extends earlier research by automatically identifying emphatic pitch accents, a subset of pitch accents that mark special discourse functions with extreme degrees of salience. The overall best performance achieved by our system was 87.8\% correct, 8.0\% above baseline performance. The results of a feature selection algorithm show that the top-performing features in our models are primarily acoustic measures. Our work identiﬁes important cues for emphasis in speech and shows that it is possible for an automated system to distinguish between two levels of perceived prominence in pitch accents with a high degree of accuracy.},
	eventtitle = {Interspeech 2005},
	pages = {3297--3300},
	booktitle = {Interspeech 2005},
	publisher = {{ISCA}},
	author = {Brenier, Jason M. and Cer, Daniel M. and Jurafsky, Daniel},
	urldate = {2022-07-11},
	date = {2005-09-04},
	langid = {english},
	file = {Brenier et al. - 2005 - The detection of emphatic words using acoustic and.pdf:C\:\\Users\\33623\\Zotero\\storage\\VA8I5X9M\\Brenier et al. - 2005 - The detection of emphatic words using acoustic and.pdf:application/pdf},
}

@article{bendle_forging_2019,
	title = {Forging a Stronger Academic-Practitioner Partnership–The Case of Net Promoter Score ({NPS})},
	volume = {27},
	issn = {1069-6679, 1944-7175},
	url = {https://www.tandfonline.com/doi/full/10.1080/10696679.2019.1577689},
	doi = {10.1080/10696679.2019.1577689},
	pages = {210--226},
	number = {2},
	journaltitle = {Journal of Marketing Theory and Practice},
	shortjournal = {Journal of Marketing Theory and Practice},
	author = {Bendle, Neil Thomas and Bagga, Charan K. and Nastasoiu, Alina},
	urldate = {2022-07-11},
	date = {2019-04-03},
	langid = {english},
	file = {Bendle et al. - 2019 - Forging a Stronger Academic-Practitioner Partnersh.pdf:C\:\\Users\\33623\\Zotero\\storage\\32W3VJJJ\\Bendle et al. - 2019 - Forging a Stronger Academic-Practitioner Partnersh.pdf:application/pdf},
}

@article{lewis_does_2020,
	title = {Does the {NPS} $^{\textrm{®}}$ reflect consumer sentiment? A qualitative examination of the {NPS} using a sentiment analysis approach},
	volume = {62},
	issn = {1470-7853, 2515-2173},
	url = {http://journals.sagepub.com/doi/10.1177/1470785319863623},
	doi = {10.1177/1470785319863623},
	shorttitle = {Does the {NPS} $^{\textrm{®}}$ reflect consumer sentiment?},
	abstract = {The Net Promoter Score ({NPS}®) is extensively used as a key performance indicator in practice. Although the scale was initially considered to be a predictor of growth, the literature has disproved this assertion. Despite this, it is argued here that the {NPS} could be used as a measure of brand health if it provided an effective representation of consumer sentiment toward the brand. This research took a respondent perspective to examine if the {NPS} effectively captured the consumer’s sentiment. Using a questionnaire design, participants were asked to provide a response on an {NPS} scale, followed by which they were asked to explain why they gave that score. Therein, a sentiment analysis approach was applied and the open-ended responses were coded based on the type and strength of the attitude. The results indicate that at an overall level, the {NPS} captures the sentiment participants feel toward a brand. However, caution should be used when classifying participants into detractors, passives, and promoters.},
	pages = {9--17},
	number = {1},
	journaltitle = {International Journal of Market Research},
	shortjournal = {International Journal of Market Research},
	author = {Lewis, Clifford and Mehmet, Michael},
	urldate = {2022-07-11},
	date = {2020-01},
	langid = {english},
	file = {Lewis et Mehmet - 2020 - Does the NPS ® reflect consumer sentime.pdf:C\:\\Users\\33623\\Zotero\\storage\\MUWHGWBV\\Lewis et Mehmet - 2020 - Does the NPS ® reflect consumer sentime.pdf:application/pdf},
}

@report{palminteri_choice-confirmation_2021,
	title = {Choice-confirmation bias and gradual perseveration in human reinforcement learning},
	url = {https://osf.io/dpqj6},
	abstract = {Do we preferentially learn from outcomes that confirm our choices? This is one of the most basic, and yet consequence-bearing, questions concerning reinforcement learning. In recent years, we investigated this question in a series of studies implementing increasingly complex behavioral protocols. The learning rates fitted in experiments featuring partial or complete feedback, as well as free and forced choices, were systematically found to be consistent with a choice-confirmation bias. This result is robust across a broad range of outcome contingencies and response modalities. One of the prominent behavioral consequences of the confirmatory learning rate pattern is choice hysteresis: that is the tendency of repeating previous choices, despite contradictory evidence. As robust and replicable as they have proven to be, these findings were (legitimately) challenged by a couple of studies pointing out that a choice-confirmatory pattern of learning rates may spuriously arise from not taking into consideration an explicit choice autocorrelation term in the model. In the present study, we re-analyze data from four previously published papers (in total nine experiments; N=363), originally included in the studies demonstrating (or criticizing) the choice-confirmation bias in human participants. We fitted two models: one featured valence-specific updates (i.e., different learning rates for confirmatory and disconfirmatory outcomes) and one additionally including an explicit choice autocorrelation process (gradual perseveration). Our analysis confirms that the inclusion of the gradual perseveration process in the model significantly reduces the estimated choice-confirmation bias. However, in all considered experiments, the choice-confirmation bias remains present at the meta-analytical level, and significantly different from zero in most experiments. Our results demonstrate that the choice-confirmation bias resists the inclusion of an explicit choice autocorrelation term, thus proving to be a robust feature of human reinforcement learning.  We conclude by discussing the psychological plausibility of the gradual perseveration process in the context of these behavioral paradigms and by pointing to additional computational processes that may play an important role in estimating and interpreting the computational biases under scrutiny.},
	institution = {{PsyArXiv}},
	type = {preprint},
	author = {Palminteri, Stefano},
	urldate = {2022-07-11},
	date = {2021-07-06},
	langid = {english},
	doi = {10.31234/osf.io/dpqj6},
	file = {Palminteri - 2021 - Choice-confirmation bias and gradual perseveration.pdf:C\:\\Users\\33623\\Zotero\\storage\\LYRMUM2V\\Palminteri - 2021 - Choice-confirmation bias and gradual perseveration.pdf:application/pdf},
}

@article{zhou_r-transformer_2019,
	title = {R-Transformer Network Based on Position and Self-Attention Mechanism for Aspect-Level Sentiment Classification},
	volume = {7},
	issn = {2169-3536},
	url = {https://ieeexplore.ieee.org/document/8822480/},
	doi = {10.1109/ACCESS.2019.2938854},
	abstract = {Aspect-level sentiment classiﬁcation ({ASC}) is a research hotspot in natural language processing, which aims to infer the sentiment polarity of a particular aspect in an opinion sentence. There are three main inﬂuence factors in the aspect-level sentiment classiﬁcation: the semantic information of the context; the interaction information of the context and aspect; the position information between the aspect and the context. Some researchers have proposed way to solve aspect-level sentiment classiﬁcation. However, previous work mainly used the average vector of the aspect to calculate the attention score of the context, which introduced the inﬂuence of noise words. Moreover, these attention-based approaches simply used relative positions to calculate positional information for contextual and aspect terms and did not provided better semantic information. Based on these above questions, in this paper, we propose the {PSRTN} model. Firstly, obtaining the position-aware inﬂuence propagate between words and aspects by Gaussian kernel and generating the inﬂuence vector for each context word. Secondly, capturing global and local information of the context by the R-Transformer, and using the self-attention mechanism to obtain the keywords in the aspect. Finally, context representation of a particular aspect is generated for classiﬁcation. In order to evaluate the validity of the model, we conduct experiments on {SemEval}2014 and Twitter. The results show that the accuracy of the {PSRTN} model can reach 83.8\%, 80.9\%, and 75.1\% on three data sets, respectively.},
	pages = {127754--127764},
	journaltitle = {{IEEE} Access},
	shortjournal = {{IEEE} Access},
	author = {Zhou, Ziyu and Liu, Fang'ai and Wang, Qianqian},
	urldate = {2022-07-11},
	date = {2019},
	langid = {english},
	file = {Zhou et al. - 2019 - R-Transformer Network Based on Position and Self-A.pdf:C\:\\Users\\33623\\Zotero\\storage\\FLZYQYDV\\Zhou et al. - 2019 - R-Transformer Network Based on Position and Self-A.pdf:application/pdf},
}

@incollection{vanderveken_chapter_2001,
	location = {Amsterdam},
	title = {Chapter 12. Speech act theory and the analysis of conversation},
	volume = {77},
	isbn = {978-90-272-5093-3 978-1-55619-835-9 978-90-272-5094-0 978-1-55619-836-6 978-90-272-9815-7},
	url = {https://benjamins.com/catalog/pbns.77.15moe},
	pages = {239--261},
	booktitle = {Pragmatics \& Beyond New Series},
	publisher = {John Benjamins Publishing Company},
	author = {Moeschler, Jacques},
	editor = {Vanderveken, Daniel and Kubo, Susumu},
	urldate = {2022-07-11},
	date = {2001},
	langid = {english},
	doi = {10.1075/pbns.77.15moe},
	file = {Moeschler - 2001 - Chapter 12. Speech act theory and the analysis of .pdf:C\:\\Users\\33623\\Zotero\\storage\\HRBKD4NE\\Moeschler - 2001 - Chapter 12. Speech act theory and the analysis of .pdf:application/pdf},
}

@article{nadeau_social_2020,
	title = {Social media responses and brand personality in product and moral harm crises: why waste a good crisis?},
	volume = {36},
	issn = {0267-257X, 1472-1376},
	url = {https://www.tandfonline.com/doi/full/10.1080/0267257X.2020.1764080},
	doi = {10.1080/0267257X.2020.1764080},
	shorttitle = {Social media responses and brand personality in product and moral harm crises},
	abstract = {The purpose of this research is to understand the process of attitudinal changes towards a brand in crisis and the brand’s communication around the crisis by utilising balance theory and brand personality. Four crisis case studies were selected and data was collected from brands’ Twitter platforms on either side of the crisis event horizon. Results demonstrate an opportunity to update the balance theory approach in a crisis by considering the type of crisis (product harm vs. moral harm) relative to brand personality (brand competence vs. brand character). Balance theory helps explain how consumer attitude changes occur through a crisis. Further, the mapping of brand communications in social media over four selected case studies show that brand personality identity can change as a result of a crisis and demonstrate how brand managers can actively frame their online communication to help the brand to recover more eﬀectively from a crisis.},
	pages = {1031--1054},
	number = {11},
	journaltitle = {Journal of Marketing Management},
	shortjournal = {Journal of Marketing Management},
	author = {Nadeau, John and Rutter, Richard and Lettice, Fiona},
	urldate = {2022-07-11},
	date = {2020-07-23},
	langid = {english},
	file = {Nadeau et al. - 2020 - Social media responses and brand personality in pr.pdf:C\:\\Users\\33623\\Zotero\\storage\\TR6X6CHN\\Nadeau et al. - 2020 - Social media responses and brand personality in pr.pdf:application/pdf},
}

@article{abeille_corpus_nodate,
	title = {Un corpus arboré pour le français : le French Treebank},
	abstract = {We present a review of the French Treebank ({FTB}) (1996-2016), a lexical and syntactic resource with rich annotation and manual validation, which is usable by linguists and for {NLP} and has about 300 users in the world. We summarize the building principles and the main annotation choices, and describe the ﬁnal version, the different formats and a ﬁrst evaluation. We also present some derived resources and some query examples.},
	pages = {25},
	author = {Abeillé, Anne and Clément, Lionel and Liégeois, Loïc},
	langid = {french},
	file = {Abeillé et al. - Un corpus arboré pour le français  le French Tree.pdf:C\:\\Users\\33623\\Zotero\\storage\\65YAF8B9\\Abeillé et al. - Un corpus arboré pour le français  le French Tree.pdf:application/pdf},
}

@article{tran_dictionnaire_nodate,
	title = {Un dictionnaire relationnel multilingue de noms propres},
	abstract = {This paper presents the modelling of Proper Name domain deﬁned by the Prolex project. This modelling is based on two main concepts: the Conceptual Proper Name and the Prolexeme. The Conceptual Proper Name do not represents the referent, but a point of view on this referent. It has a speciﬁc concept in each language, the Prolexeme, that is a structured family of lexemes. Around them, we have deﬁned other concepts and relations (synonymy, meronymy, accessibility, eponymy...). Each Conceptual Proper Name is an hyponym of a type and an existence within an ontology.},
	pages = {25},
	author = {Tran, Mickael and Maurel, Denis},
	langid = {french},
	file = {Tran et Maurel - Un dictionnaire relationnel multilingue de noms pr.pdf:C\:\\Users\\33623\\Zotero\\storage\\V7TFN5VQ\\Tran et Maurel - Un dictionnaire relationnel multilingue de noms pr.pdf:application/pdf},
}

@article{gabel_p2v-map_2019,
	title = {P2V-{MAP}: Mapping Market Structures for Large Retail Assortments},
	volume = {56},
	issn = {0022-2437, 1547-7193},
	url = {http://journals.sagepub.com/doi/10.1177/0022243719833631},
	doi = {10.1177/0022243719833631},
	shorttitle = {P2V-{MAP}},
	abstract = {The authors propose a new, exploratory approach for analyzing market structures that leverages two recent methodological advances in natural language processing and machine learning. They customize a neural network language model to derive latent product attributes by analyzing the co-occurrences of products in shopping baskets. Applying dimensionality reduction to the latent attributes yields a two-dimensional product map. This method is well-suited to retailers because it relies on data that are readily available from their checkout systems and facilitates their analyses of cross-category product complementarity, in addition to within-category substitution. The approach has high usability because it is automated, is scalable and does not require a priori assumptions. Its results are easy to interpret and update as new market basket data are collected. The authors validate their approach both by conducting an extensive simulation study and by comparing their results with those of state-of-the-art, econometric methods for modeling product relationships. The application of this approach using data collected at a leading German grocery retailer underlines its usefulness and provides novel findings that are relevant to assortment-related decisions.},
	pages = {557--580},
	number = {4},
	journaltitle = {Journal of Marketing Research},
	shortjournal = {Journal of Marketing Research},
	author = {Gabel, Sebastian and Guhl, Daniel and Klapper, Daniel},
	urldate = {2022-07-11},
	date = {2019-08},
	langid = {english},
}

@article{toubia_extracting_2019,
	title = {Extracting Features of Entertainment Products: A Guided Latent Dirichlet Allocation Approach Informed by the Psychology of Media Consumption},
	volume = {56},
	issn = {0022-2437, 1547-7193},
	url = {http://journals.sagepub.com/doi/10.1177/0022243718820559},
	doi = {10.1177/0022243718820559},
	shorttitle = {Extracting Features of Entertainment Products},
	abstract = {The authors propose a quantitative approach for describing entertainment products, in a way that allows for improving the predictive performance of consumer choice models for these products. Their approach is based on the media psychology literature, which suggests that people’s consumption of entertainment products is influenced by the psychological themes featured in these products. They classify psychological themes on the basis of the “character strengths” taxonomy from the positive psychology literature (Peterson and Seligman 2004). They develop a natural language processing tool, guided latent Dirichlet allocation ({LDA}), that automatically extracts a set of features of entertainment products from their descriptions. Guided {LDA} is flexible enough to allow features to be informed by psychological themes while allowing other relevant dimensions to emerge. The authors apply this tool to movies and show that guided {LDA} features help better predict movie-watching behavior at the individual level. They find this result with both award-winning movies and blockbuster movies. They illustrate the potential of the proposed approach in pure content-based predictive models of consumer behavior, as well as in hybrid predictive models that combine content-based models with collaborative filtering. They also show that guided {LDA} can improve the performance of models that predict aggregate outcomes.},
	pages = {18--36},
	number = {1},
	journaltitle = {Journal of Marketing Research},
	shortjournal = {Journal of Marketing Research},
	author = {Toubia, Olivier and Iyengar, Garud and Bunnell, Renée and Lemaire, Alain},
	urldate = {2022-07-11},
	date = {2019-02},
	langid = {english},
}

@article{melumad_dynamics_2021,
	title = {The Dynamics of Distortion: How Successive Summarization Alters the Retelling of News},
	volume = {58},
	issn = {0022-2437, 1547-7193},
	url = {http://journals.sagepub.com/doi/10.1177/0022243720987147},
	doi = {10.1177/0022243720987147},
	shorttitle = {The Dynamics of Distortion},
	abstract = {This work advances and tests a theory of how news information evolves as it is successively retold by consumers. Drawing on data from over 11,000 participants across ten experiments, the authors offer evidence that when news is repeatedly retold, it undergoes a stylistic transformation termed “disagreeable personalization,” wherein original facts are increasingly supplanted by opinions and interpretations with a slant toward negativity. The central thesis is that when retellers believe they are more (vs. less) knowledgeable than their recipient about the information they are relaying, they feel more compelled to provide guidance on its meaning and to do so in a persuasive manner. This enhanced motivation to guide persuasively, in turn, leads retellers to not only select the subset of facts they deem most essential but, critically, to provide their interpretations and opinions on those facts, with negativity being used as a means of grabbing their audience’s attention. Implications of this work for research on retelling and consumer information diffusion are explored.},
	pages = {1058--1078},
	number = {6},
	journaltitle = {Journal of Marketing Research},
	shortjournal = {Journal of Marketing Research},
	author = {Melumad, Shiri and Meyer, Robert and Kim, Yoon Duk},
	urldate = {2022-07-11},
	date = {2021-12},
	langid = {english},
}

@article{woolley_incentives_2021,
	title = {Incentives Increase Relative Positivity of Review Content and Enjoyment of Review Writing},
	volume = {58},
	issn = {0022-2437, 1547-7193},
	url = {http://journals.sagepub.com/doi/10.1177/00222437211010439},
	doi = {10.1177/00222437211010439},
	abstract = {A series of controlled experiments examine how the strategy of incentivizing reviews influences consumers’ expressions of positivity. Incentivized (vs. unincentivized) reviews contained a greater proportion of positive relative to negative emotion across a variety of product and service experiences (e.g., videos, service providers, consumer packaged goods companies). This effect occurred for both financial and nonfinancial incentives and when assessing review content across multiple natural language processing tools and human judgments. Incentives influence review content by modifying the experience of writing reviews. That is, when incentives are associated with review writing, they cause the positive affect that results from receiving an incentive to transfer to the review-writing experience, making review writing more enjoyable. In line with this process, the effect of an incentive on review positivity attenuates when incentives are weakly (vs. strongly) associated with review writing (i.e., incentive for “participating in an experiment” vs. “writing a review”) and when the incentive does not transfer positive affect (i.e., when an incentive is provided by a disliked company). By examining when incentives do (vs. do not) adjust the relative positivity of written reviews, this research offers theoretical insight into the literature on incentives, motivation, and word of mouth, with practical implications for managers.},
	pages = {539--558},
	number = {3},
	journaltitle = {Journal of Marketing Research},
	shortjournal = {Journal of Marketing Research},
	author = {Woolley, Kaitlin and Sharif, Marissa A.},
	urldate = {2022-07-11},
	date = {2021-06},
	langid = {english},
}

@article{liu_large-scale_2019,
	title = {Large-Scale Cross-Category Analysis of Consumer Review Content on Sales Conversion Leveraging Deep Learning},
	volume = {56},
	issn = {0022-2437, 1547-7193},
	url = {http://journals.sagepub.com/doi/10.1177/0022243719866690},
	doi = {10.1177/0022243719866690},
	abstract = {How consumers use review content has remained opaque due to the unstructured nature of text and the lack of review-reading behavior data. The authors overcome this challenge by applying deep learning–based natural language processing on data that tracks individual-level review reading, searching, and purchasing behaviors on an e-commerce site to investigate how consumers use review content. They extract quality and price content from 500,000 reviews of 600 product categories and achieve two objectives. First, the authors describe consumers’ review-content-reading behaviors. Although consumers do not read review content all the time, they do rely on it for products that are expensive or of uncertain quality. Second, the authors quantify the causal impact of read-review content on sales by using supervised deep learning to tag six theory-driven content dimensions and applying a regression discontinuity in time design. They find that aesthetics and price content significantly increase conversion across almost all product categories. Review content has a higher impact on sales when the average rating is higher, ratings variance is lower, the market is more competitive or immature, or brand information is not accessible. A counterfactual simulation suggests that reordering reviews based on content can have the same effect as a 1.6\% price cut for boosting conversion.},
	pages = {918--943},
	number = {6},
	journaltitle = {Journal of Marketing Research},
	shortjournal = {Journal of Marketing Research},
	author = {Liu, Xiao and Lee, Dokyun and Srinivasan, Kannan},
	urldate = {2022-07-11},
	date = {2019-12},
	langid = {english},
}

@article{melumad_selectively_2019,
	title = {Selectively Emotional: How Smartphone Use Changes User-Generated Content},
	volume = {56},
	issn = {0022-2437, 1547-7193},
	url = {http://journals.sagepub.com/doi/10.1177/0022243718815429},
	doi = {10.1177/0022243718815429},
	shorttitle = {Selectively Emotional},
	abstract = {User-generated content has become ubiquitous and very influential in the marketplace. Increasingly, this content is generated on smartphones rather than personal computers ({PCs}). This article argues that because of its physically constrained nature, smartphone (vs. {PC}) use leads consumers to generate briefer content, which encourages them to focus on the overall gist of their experiences. This focus on gist, in turn, tends to manifest as reviews that emphasize the emotional aspects of an experience in lieu of more specific details. Across five studies—two field studies and three controlled experiments—the authors use natural language processing tools and human assessments to analyze the linguistic characteristics of user-generated content. The findings support the thesis that smartphone use results in the creation of content that is less specific and privileges affect—especially positive affect—relative to {PC}-generated content. The findings also show that differences in emotional content are driven by the tendency to generate briefer content on smartphones rather than user self-selection, differences in topical content, or timing of writing. Implications for research and practice are discussed.},
	pages = {259--275},
	number = {2},
	journaltitle = {Journal of Marketing Research},
	shortjournal = {Journal of Marketing Research},
	author = {Melumad, Shiri and Inman, J. Jeffrey and Pham, Michel Tuan},
	urldate = {2022-07-11},
	date = {2019-04},
	langid = {english},
}

@article{pitt_new_2020,
	title = {New approaches to psychographic consumer segmentation: Exploring fine art collectors using artificial intelligence, automated text analysis and correspondence analysis},
	volume = {ahead-of-print},
	issn = {0309-0566, 0309-0566},
	url = {https://www.emerald.com/insight/content/doi/10.1108/EJM-01-2019-0083/full/html},
	doi = {10.1108/EJM-01-2019-0083},
	shorttitle = {New approaches to psychographic consumer segmentation},
	abstract = {Purpose
              While the motivation for collecting art has received considerable attention in the literature, less is known about the characteristics of the typical art collector. This paper aims to explore these characteristics to develop a typology of art consumers using a mixed method approach over several studies.
            
            
              Design/methodology/approach
              This is achieved by analyzing qualitative data, gathered via semi-structured interviews of art collectors, and quantitatively by means of natural language processing analysis and automated text analysis and using correspondence analysis to analyze and present the results.
            
            
              Findings
              The study’s findings reveal four distinct clusters of art collectors based on their “Big Five” personality traits, as well as uncovering insights into how these types talk about their possessions.
            
            
              Research limitations/implications
              In addition to contributing to the arts marketing literature, the findings provide a more nuanced understanding of consumers that managers can use for market segmentation and target marketing decisions in other markets. The paper also offers a methodological contribution to the literature on correspondence analysis by demonstrating the “doubling” procedure to deal with percentile data.
            
            
              Practical implications
              In addition to contributing to the arts marketing literature, the findings provide a more nuanced understanding of art collectors that managers can use for market segmentation and target marketing decisions. The paper also offers a methodological contribution to the literature on correspondence analysis by demonstrating a non-traditional application of correspondence analysis using the “doubling” procedure. Buyer behavior in the fine art market is not exhaustively studied. By understanding the personality traits of consumers in the art market, sales forces can better provide assistance and product to consumers. Further, understanding the personalities of consumers is better for art retail spaces to better serve consumers.
            
            
              Originality/value
              This paper demonstrates a unique mixed methods approach to analyzing unstructured qualitative data. It shows how text data can be used to identify measurable market segments for which targeted strategies can be developed.},
	issue = {ahead-of-print},
	journaltitle = {European Journal of Marketing},
	shortjournal = {{EJM}},
	author = {Pitt, Christine S. and Bal, Anjali Suniti and Plangger, Kirk},
	urldate = {2022-07-11},
	date = {2020-01-02},
	langid = {english},
}

@article{kauffmann_framework_2020,
	title = {A framework for big data analytics in commercial social networks: A case study on sentiment analysis and fake review detection for marketing decision-making},
	volume = {90},
	issn = {00198501},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S0019850118307612},
	doi = {10.1016/j.indmarman.2019.08.003},
	shorttitle = {A framework for big data analytics in commercial social networks},
	pages = {523--537},
	journaltitle = {Industrial Marketing Management},
	shortjournal = {Industrial Marketing Management},
	author = {Kauffmann, Erick and Peral, Jesús and Gil, David and Ferrández, Antonio and Sellers, Ricardo and Mora, Higinio},
	urldate = {2022-07-11},
	date = {2020-10},
	langid = {english},
}

@article{peng_speaking_2022,
	title = {Speaking the same language: the power of words in crowdfunding success and failure},
	volume = {33},
	issn = {0923-0645, 1573-059X},
	url = {https://link.springer.com/10.1007/s11002-021-09595-3},
	doi = {10.1007/s11002-021-09595-3},
	shorttitle = {Speaking the same language},
	pages = {311--323},
	number = {2},
	journaltitle = {Marketing Letters},
	shortjournal = {Mark Lett},
	author = {Peng, Ling and Cui, Geng and Bao, Ziru and Liu, Shuman},
	urldate = {2022-07-11},
	date = {2022-06},
	langid = {english},
}

@article{zaki_text_2020,
	title = {Text mining analysis roadmap ({TMAR}) for service research},
	volume = {34},
	issn = {0887-6045, 0887-6045},
	url = {https://www.emerald.com/insight/content/doi/10.1108/JSM-02-2019-0074/full/html},
	doi = {10.1108/JSM-02-2019-0074},
	abstract = {Purpose
              The purpose of this paper is to offer a step-by-step text mining analysis roadmap ({TMAR}) for service researchers. The paper provides guidance on how to choose between alternative tools, using illustrative examples from a range of business contexts.
            
            
              Design/methodology/approach
              The authors provide a six-stage {TMAR} on how to use text mining methods in practice. At each stage, the authors provide a guiding question, articulate the aim, identify a range of methods and demonstrate how machine learning and linguistic techniques can be used in practice with illustrative examples drawn from business, from an array of data types, services and contexts.
            
            
              Findings
              At each of the six stages, this paper demonstrates useful insights that result from the text mining techniques to provide an in-depth understanding of the phenomenon and actionable insights for research and practice.
            
            
              Originality/value
              There is little research to guide scholars and practitioners on how to gain insights from the extensive “big data” that arises from the different data sources. In a first, this paper addresses this important gap highlighting the advantages of using text mining to gain useful insights for theory testing and practice in different service contexts.},
	pages = {30--47},
	number = {1},
	journaltitle = {Journal of Services Marketing},
	shortjournal = {{JSM}},
	author = {Zaki, Mohamed and {McColl}-Kennedy, Janet R.},
	urldate = {2022-07-11},
	date = {2020-01-08},
	langid = {english},
}

@article{shankar_overview_2022,
	title = {An overview and empirical comparison of natural language processing ({NLP}) models and an introduction to and empirical application of autoencoder models in marketing},
	issn = {0092-0703, 1552-7824},
	url = {https://link.springer.com/10.1007/s11747-022-00840-3},
	doi = {10.1007/s11747-022-00840-3},
	journaltitle = {Journal of the Academy of Marketing Science},
	shortjournal = {J. of the Acad. Mark. Sci.},
	author = {Shankar, Venkatesh and Parsana, Sohil},
	urldate = {2022-07-11},
	date = {2022-03-04},
	langid = {english},
}

@article{alantari_empirical_2022,
	title = {An empirical comparison of machine learning methods for text-based sentiment analysis of online consumer reviews},
	volume = {39},
	issn = {01678116},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S0167811621000926},
	doi = {10.1016/j.ijresmar.2021.10.011},
	pages = {1--19},
	number = {1},
	journaltitle = {International Journal of Research in Marketing},
	shortjournal = {International Journal of Research in Marketing},
	author = {Alantari, Huwail J. and Currim, Imran S. and Deng, Yiting and Singh, Sameer},
	urldate = {2022-07-11},
	date = {2022-03},
	langid = {english},
}

@article{hartmann_comparing_2019,
	title = {Comparing automated text classification methods},
	volume = {36},
	issn = {01678116},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S0167811618300545},
	doi = {10.1016/j.ijresmar.2018.09.009},
	pages = {20--38},
	number = {1},
	journaltitle = {International Journal of Research in Marketing},
	shortjournal = {International Journal of Research in Marketing},
	author = {Hartmann, Jochen and Huppertz, Juliana and Schamp, Christina and Heitmann, Mark},
	urldate = {2022-07-11},
	date = {2019-03},
	langid = {english},
}

@article{hovy_wordify_2021,
	title = {\textit{Wordify:} A Tool for Discovering and Differentiating Consumer Vocabularies},
	volume = {48},
	issn = {0093-5301, 1537-5277},
	url = {https://academic.oup.com/jcr/article/48/3/394/6199426},
	doi = {10.1093/jcr/ucab018},
	shorttitle = {{\textless}i{\textgreater}Wordify},
	abstract = {Abstract
            This work describes and illustrates a free and easy-to-use online text-analysis tool for understanding how consumer word use varies across contexts. The tool, Wordify, uses randomized logistic regression ({RLR}) to identify the words that best discriminate texts drawn from different pre-classified corpora, such as posts written by men versus women, or texts containing mostly negative versus positive valence. We present illustrative examples to show how the tool can be used for such diverse purposes as (1) uncovering the distinctive vocabularies that consumers use when writing reviews on smartphones versus {PCs}, (2) discovering how the words used in Tweets differ between presumed supporters and opponents of a controversial ad, and (3) expanding the dictionaries of dictionary-based sentiment-measurement tools. We show empirically that Wordify’s {RLR} algorithm performs better at discriminating vocabularies than support vector machines and chi-square selectors, while offering significant advantages in computing time. A discussion is also provided on the use of Wordify in conjunction with other text-analysis tools, such as probabilistic topic modeling and sentiment analysis, to gain more profound knowledge of the role of language in consumer behavior.},
	pages = {394--414},
	number = {3},
	journaltitle = {Journal of Consumer Research},
	author = {Hovy, Dirk and Melumad, Shiri and Inman, J Jeffrey},
	editor = {Lutz, Richard J and Hofacker, Charles F},
	urldate = {2022-07-11},
	date = {2021-10-22},
	langid = {english},
}

@article{shumanov_using_2022,
	title = {Using {AI} predicted personality to enhance advertising effectiveness},
	volume = {56},
	issn = {0309-0566, 0309-0566},
	url = {https://www.emerald.com/insight/content/doi/10.1108/EJM-12-2019-0941/full/html},
	doi = {10.1108/EJM-12-2019-0941},
	abstract = {Purpose
              The purpose of this study is twofold: first to demonstrate the application of an algorithm using contextual data to ascertain consumer personality traits; and second to explore the factors impacting the relationship between personality traits and advertisement persuasiveness.
            
            
              Design/methodology/approach
              A mixed-method approach that comprises two distinct yet complementary studies. The first uses quantitative methods and is based on a sample of 35,264 retail banking customers. Study 2 explores the findings that emerge from Study 1 using qualitative methods.
            
            
              Findings
              This paper finds that matching consumer personality with congruent advertising messages can lead to more effective consumer persuasion for most personality types. For consumers who exhibit neurotic personality traits, ameliorating perceived risks during purchasing and providing cues for social acceptance and goal attainment are important factors for advertising effectiveness. These factors also had a positive impact on the purchasing behaviour of extroverted consumers.
            
            
              Research limitations/implications
              This research focusses on understanding purchasing behaviour based on the most dominant personality trait. However, people are likely to exhibit a combination of most or even all of the Big Five personality traits.
            
            
              Practical implications
              Building on advances in natural language processing, enabling the identification of personality from language, this study demonstrates the possibility of influencing consumer behaviour by matching machine inferred personality to congruent persuasive advertising. It is one of the few studies to use contextual instead of social media data to capture individual personality. Such data serves to capture an authentic rather than contrived persona. Further, the study identifies the factors that may moderate this relationship and thereby provides an explanation of why some personality traits exhibit differences in purchasing behaviour from those that are anticipated by existing theory.
            
            
              Originality/value
              Although the idea that people are more likely to be responsive to advertising messages that are congruent with their personality type has already been successfully applied by advertising practitioners and documented by advertising scholars, this study extends existing research by identifying the factors that may moderate this relationship and thereby provides an explanation why some personality traits may exhibit differences in purchasing behaviour from those that are anticipated by existing theory.},
	pages = {1590--1609},
	number = {6},
	journaltitle = {European Journal of Marketing},
	shortjournal = {{EJM}},
	author = {Shumanov, Michael and Cooper, Holly and Ewing, Mike},
	urldate = {2022-07-11},
	date = {2022-06-07},
	langid = {english},
}

@article{wang_attribute_2021,
	title = {Attribute Embedding: Learning Hierarchical Representations of Product Attributes from Consumer Reviews},
	issn = {0022-2429, 1547-7185},
	url = {http://journals.sagepub.com/doi/10.1177/00222429211047822},
	doi = {10.1177/00222429211047822},
	shorttitle = {Attribute Embedding},
	abstract = {Sales, product design, and engineering teams benefit immensely from better understanding customer perspectives. How do customers combine a product's technical specifications (i.e., engineered attributes) to form abstract product benefits (i.e., meta-attributes)? To address this question, the authors use machine learning and natural language processing to develop a methodological framework that extracts a hierarchy of product attributes based on contextual information of how attributes are expressed in consumer reviews. The attribute hierarchy reveals linkages between engineered attributes and meta-attributes within a product category, enabling flexible sentiment analysis that can identify how consumers receive meta-attributes, and which engineered attributes are main drivers. The framework can guide managers to monitor only portions of review content that are relevant to specific attributes of interest. Moreover, managers can compare products within and between brands, where different names and attribute combinations are often associated with similar benefits. The authors apply the framework to the tablet computer category to generate dashboards and perceptual maps and provide validations of the attribute hierarchy using both primary and secondary data. Resultant insights allow the exploration of substantive questions, such as how Apple improved successive generations of {iPads} and why Hewlett-Packard and Toshiba discontinued their tablet product lines.},
	pages = {002224292110478},
	journaltitle = {Journal of Marketing},
	shortjournal = {Journal of Marketing},
	author = {Wang, Xin (Shane) and He, Jiaxiu and Curry, David J. and Ryoo, Jun Hyun (Joseph)},
	urldate = {2022-07-11},
	date = {2021-11-17},
	langid = {english},
}

@article{dhillon_modeling_2021,
	title = {Modeling Dynamic User Interests: A Neural Matrix Factorization Approach},
	issn = {0732-2399, 1526-548X},
	url = {http://pubsonline.informs.org/doi/10.1287/mksc.2021.1293},
	doi = {10.1287/mksc.2021.1293},
	shorttitle = {Modeling Dynamic User Interests},
	abstract = {We propose an interpretable model that combines the simplicity of matrix factorization with the flexibility of neural networks to model evolving user interests by efficiently extracting nonlinear patterns from massive text data collections.
          , 
            In recent years, there has been significant interest in understanding users’ online content consumption patterns. But the unstructured, high-dimensional, and dynamic nature of such data makes extracting valuable insights challenging. Here we propose a model that combines the simplicity of matrix factorization with the flexibility of neural networks to efficiently extract nonlinear patterns from massive text data collections relevant to consumers’ online consumption patterns. Our model decomposes a user’s content consumption journey into nonlinear user and content factors that are used to model their dynamic interests. This natural decomposition allows us to summarize each user’s content consumption journey with a dynamic probabilistic weighting over a set of underlying content attributes. The model is fast to estimate, easy to interpret, and can harness external data sources as an empirical prior. These advantages make our method well suited to the challenges posed by modern data sets used by digital marketers. We use our model to understand the dynamic news consumption interests of Boston Globe readers over five years. Thorough qualitative studies, including a crowdsourced evaluation, highlight our model’s ability to accurately identify nuanced and coherent consumption patterns. These results are supported by our model’s superior and robust predictive performance over several competitive baseline methods.},
	pages = {mksc.2021.1293},
	journaltitle = {Marketing Science},
	shortjournal = {Marketing Science},
	author = {Dhillon, Paramveer S. and Aral, Sinan},
	urldate = {2022-07-11},
	date = {2021-09-16},
	langid = {english},
}

@article{berger_marketing_2022,
	title = {Marketing insights from text analysis},
	issn = {0923-0645, 1573-059X},
	url = {https://link.springer.com/10.1007/s11002-022-09635-6},
	doi = {10.1007/s11002-022-09635-6},
	journaltitle = {Marketing Letters},
	shortjournal = {Mark Lett},
	author = {Berger, Jonah and Packard, Grant and Boghrati, Reihane and Hsu, Ming and Humphreys, Ashlee and Luangrath, Andrea and Moore, Sarah and Nave, Gideon and Olivola, Christopher and Rocklage, Matthew},
	urldate = {2022-07-11},
	date = {2022-06-10},
	langid = {english},
}

@article{timoshenko_identifying_2019,
	title = {Identifying Customer Needs from User-Generated Content},
	volume = {38},
	issn = {0732-2399, 1526-548X},
	url = {http://pubsonline.informs.org/doi/10.1287/mksc.2018.1123},
	doi = {10.1287/mksc.2018.1123},
	abstract = {We evaluate user-generated content as a source of customer needs and propose and test a machine-learning approach for identifying customer needs more efficiently.
          , 
            Firms traditionally rely on interviews and focus groups to identify customer needs for marketing strategy and product development. User-generated content ({UGC}) is a promising alternative source for identifying customer needs. However, established methods are neither efficient nor effective for large {UGC} corpora because much content is noninformative or repetitive. We propose a machine-learning approach to facilitate qualitative analysis by selecting content for efficient review. We use a convolutional neural network to filter out noninformative content and cluster dense sentence embeddings to avoid sampling repetitive content. We further address two key questions: Are {UGC}-based customer needs comparable to interview-based customer needs? Do the machine-learning methods improve customer-need identification? These comparisons are enabled by a custom data set of customer needs for oral care products identified by professional analysts using industry-standard experiential interviews. The analysts also coded 12,000 {UGC} sentences to identify which previously identified customer needs and/or new customer needs were articulated in each sentence. We show that (1) {UGC} is at least as valuable as a source of customer needs for product development, likely more valuable, compared with conventional methods, and (2) machine-learning methods improve efficiency of identifying customer needs from {UGC} (unique customer needs per unit of professional services cost).
            Data are available at https://doi.org/10.1287/mksc.2018.1123 .},
	pages = {1--20},
	number = {1},
	journaltitle = {Marketing Science},
	shortjournal = {Marketing Science},
	author = {Timoshenko, Artem and Hauser, John R.},
	urldate = {2022-07-11},
	date = {2019-01},
	langid = {english},
}

@article{srivastava_enhancing_2019,
	title = {Enhancing the Helpfulness of Online Consumer Reviews: The Role of Latent (Content) Factors},
	volume = {48},
	issn = {10949968},
	url = {https://journals.sagepub.com/doi/full/10.1016/j.intmar.2018.12.003},
	doi = {10.1016/j.intmar.2018.12.003},
	shorttitle = {Enhancing the Helpfulness of Online Consumer Reviews},
	pages = {33--50},
	journaltitle = {Journal of Interactive Marketing},
	shortjournal = {Journal of Interactive Marketing},
	author = {Srivastava, Vartika and Kalro, Arti D.},
	urldate = {2022-07-11},
	date = {2019-11},
	langid = {english},
}

@article{kopalle_examining_2022,
	title = {Examining artificial intelligence ({AI}) technologies in marketing via a global lens: Current trends and future research opportunities},
	volume = {39},
	issn = {01678116},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S016781162100094X},
	doi = {10.1016/j.ijresmar.2021.11.002},
	shorttitle = {Examining artificial intelligence ({AI}) technologies in marketing via a global lens},
	pages = {522--540},
	number = {2},
	journaltitle = {International Journal of Research in Marketing},
	shortjournal = {International Journal of Research in Marketing},
	author = {Kopalle, Praveen K. and Gangwar, Manish and Kaplan, Andreas and Ramachandran, Divya and Reinartz, Werner and Rindfleisch, Aric},
	urldate = {2022-07-19},
	date = {2022-06},
	langid = {english},
}

@article{ma_machine_2020,
	title = {Machine learning and {AI} in marketing – Connecting computing power to human insights},
	volume = {37},
	issn = {01678116},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S0167811620300410},
	doi = {10.1016/j.ijresmar.2020.04.005},
	pages = {481--504},
	number = {3},
	journaltitle = {International Journal of Research in Marketing},
	shortjournal = {International Journal of Research in Marketing},
	author = {Ma, Liye and Sun, Baohong},
	urldate = {2022-07-19},
	date = {2020-09},
	langid = {english},
}

@article{eliashberg_story_2007,
	title = {From Story Line to Box Office: A New Approach for Green-Lighting Movie Scripts},
	volume = {53},
	issn = {0025-1909, 1526-5501},
	url = {http://pubsonline.informs.org/doi/10.1287/mnsc.1060.0668},
	doi = {10.1287/mnsc.1060.0668},
	shorttitle = {From Story Line to Box Office},
	abstract = {Movie studios often have to choose among thousands of scripts to decide which ones to turn into movies. Despite the huge amount of money at stake, this process—known as green-lighting in the movie industry—is largely a guesswork based on experts’ experience and intuitions. In this paper, we propose a new approach to help studios evaluate scripts that will then lead to more profitable green-lighting decisions. Our approach combines screenwriting domain knowledge, natural-language processing techniques, and statistical learning methods to forecast a movie’s return on investment ({ROI}) based only on textual information available in movie scripts. We test our model in a holdout decision task to show that our model is able to significantly improve a studio’s gross {ROI}.},
	pages = {881--893},
	number = {6},
	journaltitle = {Management Science},
	shortjournal = {Management Science},
	author = {Eliashberg, Jehoshua and Hui, Sam K. and Zhang, Z. John},
	urldate = {2022-07-19},
	date = {2007-06},
	langid = {english},
}

@article{netzer_mine_2012,
	title = {Mine Your Own Business: Market-Structure Surveillance Through Text Mining},
	volume = {31},
	issn = {0732-2399, 1526-548X},
	url = {http://pubsonline.informs.org/doi/10.1287/mksc.1120.0713},
	doi = {10.1287/mksc.1120.0713},
	shorttitle = {Mine Your Own Business},
	abstract = {Web 2.0 provides gathering places for Internet users in blogs, forums, and chat rooms. These gathering places leave footprints in the form of colossal amounts of data regarding consumers' thoughts, beliefs, experiences, and even interactions. In this paper, we propose an approach for firms to explore online user-generated content and “listen” to what customers write about their and their competitors' products. Our objective is to convert the user-generated content to market structures and competitive landscape insights. The difficulty in obtaining such market-structure insights from online user-generated content is that consumers' postings are often not easy to syndicate. To address these issues, we employ a text-mining approach and combine it with semantic network analysis tools. We demonstrate this approach using two cases—sedan cars and diabetes drugs—generating market-structure perceptual maps and meaningful insights without interviewing a single consumer. We compare a market structure based on user-generated content data with a market structure derived from more traditional sales and survey-based data to establish validity and highlight meaningful differences.},
	pages = {521--543},
	number = {3},
	journaltitle = {Marketing Science},
	shortjournal = {Marketing Science},
	author = {Netzer, Oded and Feldman, Ronen and Goldenberg, Jacob and Fresko, Moshe},
	urldate = {2022-07-19},
	date = {2012-05},
	langid = {english},
}

@article{carlson_complementing_2022,
	title = {Complementing human effort in online reviews: A deep learning approach to automatic content generation and review synthesis},
	issn = {01678116},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S016781162200009X},
	doi = {10.1016/j.ijresmar.2022.02.004},
	shorttitle = {Complementing human effort in online reviews},
	pages = {S016781162200009X},
	journaltitle = {International Journal of Research in Marketing},
	shortjournal = {International Journal of Research in Marketing},
	author = {Carlson, Keith and Kopalle, Praveen K. and Riddell, Allen and Rockmore, Daniel and Vana, Prasad},
	urldate = {2022-07-19},
	date = {2022-02},
	langid = {english},
}

@article{vadalkar_critical_2021,
	title = {A critical review of international print advertisements: evolutionary analysis, assessment and elucidations, from 1965 to 2020},
	volume = {38},
	issn = {0265-1335},
	url = {https://www.emerald.com/insight/content/doi/10.1108/IMR-11-2020-0257/full/html},
	doi = {10.1108/IMR-11-2020-0257},
	shorttitle = {A critical review of international print advertisements},
	abstract = {Purpose
              Amidst the plethora of mass communication methods that technology bestowed business with, print advertisements still remain an effective and widely utilized advertising tool, and retain a diachronically venerable position in international marketing practice. Bar and transcending mere academic fascination or curiosity, this research provides insights into the past, an understanding of the present and an outlook into the future. In this vein, through a methodical and comprehensive critical review of extant literature on print advertisements since 1965, this research aims to identify gaps in extant knowledge, to map its trends and divergences, to trace its paradigm shifts and to ultimately develop agendas for truly significant future research.
            
            
              Design/methodology/approach
              This spatial-temporal study reviews 256 methodically selected articles, using {VantagePoint} software, and adopts a novel methodology through natural language processing ({NLP}), text mining, auto-correlation maps, and bubble maps to conduct and present a robust analysis and explicit findings.
            
            
              Findings
              Using also the {VOSviewer} for density and network visualization, the results identify the predominant literature themes and, conversely, the relatively under-researched areas, and provide a more insightful collective interpretation of extant works, while laying the foundation for future research of greater value and significance to academia and industry.
            
            
              Originality/value
              This study transcends the partial and/or limited analyses and perspectives of extant literature to present scholars with the first comprehensive and long term meta-analysis or systematic study of print advertising, with explicit findings of both scholarly and executive worth.},
	pages = {806--839},
	number = {5},
	journaltitle = {International Marketing Review},
	shortjournal = {{IMR}},
	author = {Vadalkar, Suniti and Chavan, Gitesh and Chaudhuri, Ranjan and Vrontis, Demetris},
	urldate = {2022-07-19},
	date = {2021-09-07},
	langid = {english},
}

@article{ray_exploring_2021,
	title = {Exploring values affecting e-Learning adoption from the user-generated-content: A consumption-value-theory perspective},
	volume = {29},
	issn = {0965-254X, 1466-4488},
	url = {https://www.tandfonline.com/doi/full/10.1080/0965254X.2020.1749875},
	doi = {10.1080/0965254X.2020.1749875},
	shorttitle = {Exploring values affecting e-Learning adoption from the user-generated-content},
	pages = {430--452},
	number = {5},
	journaltitle = {Journal of Strategic Marketing},
	shortjournal = {Journal of Strategic Marketing},
	author = {Ray, Arghya and Bala, Pradip Kumar and Dwivedi, Yogesh K},
	urldate = {2022-07-19},
	date = {2021-07-04},
	langid = {english},
}

@article{li_is_2020,
	title = {Is a Picture Worth a Thousand Words? An Empirical Study of Image Content and Social Media Engagement},
	volume = {57},
	issn = {0022-2437, 1547-7193},
	url = {http://journals.sagepub.com/doi/10.1177/0022243719881113},
	doi = {10.1177/0022243719881113},
	shorttitle = {Is a Picture Worth a Thousand Words?},
	abstract = {Are social media posts with pictures more popular than those without? Why do pictures with certain characteristics induce higher engagement than some other pictures? Using data sets of social media posts about major airlines and sport utility vehicle brands collected from Twitter and Instagram, the authors empirically examine the influence of image content on social media engagement. After accounting for selection bias on the inclusion of image content, the authors find a significant and robust positive mere presence effect of image content on user engagement in both product categories on Twitter. They also find that high-quality and professionally shot pictures consistently lead to higher engagement on both platforms for both product categories. However, the effect of colorfulness varies by product category, while the presence of human face and image–text fit can induce higher user engagement on Twitter but not on Instagram. These findings shed light on how to improve social media engagement using image content.},
	pages = {1--19},
	number = {1},
	journaltitle = {Journal of Marketing Research},
	shortjournal = {Journal of Marketing Research},
	author = {Li, Yiyi and Xie, Ying},
	urldate = {2022-07-19},
	date = {2020-02},
	langid = {english},
}

@article{kozlowski_geometry_2019,
	title = {The Geometry of Culture: Analyzing the Meanings of Class through Word Embeddings},
	volume = {84},
	issn = {0003-1224, 1939-8271},
	url = {http://journals.sagepub.com/doi/10.1177/0003122419877135},
	doi = {10.1177/0003122419877135},
	shorttitle = {The Geometry of Culture},
	abstract = {We argue word embedding models are a useful tool for the study of culture using a historical analysis of shared understandings of social class as an empirical case. Word embeddings represent semantic relations between words as relationships between vectors in a high-dimensional space, specifying a relational model of meaning consistent with contemporary theories of culture. Dimensions induced by word differences ( rich – poor) in these spaces correspond to dimensions of cultural meaning, and the projection of words onto these dimensions reflects widely shared associations, which we validate with surveys. Analyzing text from millions of books published over 100 years, we show that the markers of class continuously shifted amidst the economic transformations of the twentieth century, yet the basic cultural dimensions of class remained remarkably stable. The notable exception is education, which became tightly linked to affluence independent of its association with cultivated taste.},
	pages = {905--949},
	number = {5},
	journaltitle = {American Sociological Review},
	shortjournal = {Am Sociol Rev},
	author = {Kozlowski, Austin C. and Taddy, Matt and Evans, James A.},
	urldate = {2022-07-19},
	date = {2019-10},
	langid = {english},
}

@misc{mikolov_efficient_2013,
	title = {Efficient Estimation of Word Representations in Vector Space},
	url = {http://arxiv.org/abs/1301.3781},
	abstract = {We propose two novel model architectures for computing continuous vector representations of words from very large data sets. The quality of these representations is measured in a word similarity task, and the results are compared to the previously best performing techniques based on different types of neural networks. We observe large improvements in accuracy at much lower computational cost, i.e. it takes less than a day to learn high quality word vectors from a 1.6 billion words data set. Furthermore, we show that these vectors provide state-of-the-art performance on our test set for measuring syntactic and semantic word similarities.},
	number = {{arXiv}:1301.3781},
	publisher = {{arXiv}},
	author = {Mikolov, Tomas and Chen, Kai and Corrado, Greg and Dean, Jeffrey},
	urldate = {2022-07-19},
	date = {2013-09-06},
	eprinttype = {arxiv},
	eprint = {1301.3781 [cs]},
	keywords = {Computer Science - Computation and Language},
	file = {arXiv.org Snapshot:C\:\\Users\\33623\\Zotero\\storage\\B23WHTTA\\1301.html:text/html},
}

@article{shi_hype_2022,
	title = {Hype News Diffusion and Risk of Misinformation: The Oz Effect in Health Care},
	volume = {59},
	issn = {0022-2437, 1547-7193},
	url = {http://journals.sagepub.com/doi/10.1177/00222437211044472},
	doi = {10.1177/00222437211044472},
	shorttitle = {Hype News Diffusion and Risk of Misinformation},
	abstract = {Consumers’ choices about health products are heavily influenced by public information, such as news articles, research articles, online customer reviews, online product discussion, and {TV} shows. Dr. Oz, a celebrity physician, often makes medical recommendations with limited or marginal scientific evidence. Although reputable news agencies have traditionally acted as gatekeepers of reliable information, they face the intense pressure of “the eyeball game.” Customer reviews, despite their authenticity, may come from deceived consumers. Therefore, it remains unclear whether public information sources can correct the misleading health information. In the context of over-the-counter weight loss products, the authors carefully analyze the cascading of information post endorsement. The analysis of extensive textual content with deep-learning methods reveals that legitimate news outlets respond to Dr. Oz's endorsement by generating more news articles about the ingredient; on average, articles after the endorsement contain higher sentiment, so news agencies seem to amplify rather than rectify the misleading endorsement. The finding highlights a serious concern: the risk of hype news diffusion. Research articles react too slowly to mitigate the problem, and online customer reviews and product discussions provide only marginal corrections. The findings underscore the importance of oversight to mitigate the risk of cascading hype news.},
	pages = {327--352},
	number = {2},
	journaltitle = {Journal of Marketing Research},
	shortjournal = {Journal of Marketing Research},
	author = {Shi, Zijun (June) and Liu, Xiao and Srinivasan, Kannan},
	urldate = {2022-07-20},
	date = {2022-04},
	langid = {english},
}

@article{melumad_full_2020,
	title = {Full Disclosure: How Smartphones Enhance Consumer Self-Disclosure},
	volume = {84},
	issn = {0022-2429, 1547-7185},
	url = {http://journals.sagepub.com/doi/10.1177/0022242920912732},
	doi = {10.1177/0022242920912732},
	shorttitle = {Full Disclosure},
	abstract = {Results from three large-scale field studies and two controlled experiments show that consumers tend to be more self-disclosing when generating content on their smartphone versus personal computer. This tendency is found in a wide range of domains including social media posts, online restaurant reviews, open-ended survey responses, and compliance with requests for personal information in web advertisements. The authors show that this increased willingness to self-disclose on one’s smartphone arises from the psychological effects of two distinguishing properties of the device: (1) feelings of comfort that many associate with their smartphone and (2) a tendency to narrowly focus attention on the disclosure task at hand due to the relative difficulty of generating content on the smaller device. The enhancing effect of smartphones on self-disclosure yields several important marketing implications, including the creation of content that is perceived as more persuasive by outside readers. The authors explore implications for how these findings can be strategically leveraged by managers, including how they may generalize to other emerging technologies.},
	pages = {28--45},
	number = {3},
	journaltitle = {Journal of Marketing},
	shortjournal = {Journal of Marketing},
	author = {Melumad, Shiri and Meyer, Robert},
	urldate = {2022-07-20},
	date = {2020-05},
	langid = {english},
}

@article{dotzel_relative_2019,
	title = {The Relative Effects of Business-to-Business (vs. Business-to-Consumer) Service Innovations on Firm Value and Firm Risk: An Empirical Analysis},
	volume = {83},
	issn = {0022-2429, 1547-7185},
	url = {http://journals.sagepub.com/doi/10.1177/0022242919847221},
	doi = {10.1177/0022242919847221},
	shorttitle = {The Relative Effects of Business-to-Business (vs. Business-to-Consumer) Service Innovations on Firm Value and Firm Risk},
	abstract = {Many firms introduce both business-to-business service innovations (B2B-{SIs}) and business-to-consumer service innovations (B2C-{SIs}) and need to better allocate their resources. However, they are unsure about B2B-{SIs}’ effects on firm value or risk, especially relative to those of B2C-{SIs}. The authors address this problem by developing hypotheses that relate the number of B2B-{SIs} and B2C-{SIs} to firm value and firm risk together with the moderators (the number of product innovations and customer-focus innovations). To test the hypotheses, the authors develop and estimate a model using unique panel data of 2,263 {SIs} across 15 industries over eight years assembled from multiple data sources and controlling for firm- and market-specific factors, heterogeneity, and endogeneity. They analyze innovation announcements using natural language processing. The results show that B2B-{SIs} have a positive effect on firm value and an insignificant influence on firm risk. Importantly, the effect of a B2B-{SI} on firm value is significantly greater than that of a B2C-{SI}. Unlike B2C-{SIs}, the effect of B2B-{SIs} on firm value is greater when the firm has more product innovations. Surprisingly, unlike B2C-{SIs}, the effect of B2B-{SIs} on firm value is less positive when the {SIs} emphasize customers. These findings offer important insights about the relative value of B2B-{SIs}.},
	pages = {133--152},
	number = {5},
	journaltitle = {Journal of Marketing},
	shortjournal = {Journal of Marketing},
	author = {Dotzel, Thomas and Shankar, Venkatesh},
	urldate = {2022-07-20},
	date = {2019-09},
	langid = {english},
}

@article{xu_understanding_2021,
	title = {Understanding changes in a brand’s core positioning and customer engagement: a sentiment analysis of a brand-owned Facebook site},
	volume = {9},
	issn = {2050-3318, 2050-3326},
	url = {http://link.springer.com/10.1057/s41270-020-00099-z},
	doi = {10.1057/s41270-020-00099-z},
	shorttitle = {Understanding changes in a brand’s core positioning and customer engagement},
	pages = {3--16},
	number = {1},
	journaltitle = {Journal of Marketing Analytics},
	shortjournal = {J Market Anal},
	author = {Xu, Zhenning and Vail, Colin and Kohli, Amarpreet S. and Tajdini, Saeed},
	urldate = {2022-07-20},
	date = {2021-03},
	langid = {english},
}

@article{zierau_voice_2022,
	title = {Voice bots on the frontline: Voice-based interfaces enhance flow-like consumer experiences \& boost service outcomes},
	issn = {0092-0703, 1552-7824},
	url = {https://link.springer.com/10.1007/s11747-022-00868-5},
	doi = {10.1007/s11747-022-00868-5},
	shorttitle = {Voice bots on the frontline},
	abstract = {Abstract
            Voice-based interfaces provide new opportunities for firms to interact with consumers along the customer journey. The current work demonstrates across four studies that voice-based (as opposed to text-based) interfaces promote more flow-like user experiences, resulting in more positively-valenced service experiences, and ultimately more favorable behavioral firm outcomes (i.e., contract renewal, conversion rates, and consumer sentiment). Moreover, we also provide evidence for two important boundary conditions that reduce such flow-like user experiences in voice-based interfaces (i.e., semantic disfluency and the amount of conversational turns). The findings of this research highlight how fundamental theories of human communication can be harnessed to create more experiential service experiences with positive downstream consequences for consumers and firms. These findings have important practical implications for firms that aim at leveraging the potential of voice-based interfaces to improve consumers’ service experiences and the theory-driven “conversational design” of voice-based interfaces.},
	journaltitle = {Journal of the Academy of Marketing Science},
	shortjournal = {J. of the Acad. Mark. Sci.},
	author = {Zierau, Naim and Hildebrand, Christian and Bergner, Anouk and Busquet, Francesc and Schmitt, Anuschka and Marco Leimeister, Jan},
	urldate = {2022-07-20},
	date = {2022-06-21},
	langid = {english},
}

@article{biswas_critical_2022,
	title = {A critical assessment of consumer reviews: A hybrid {NLP}-based methodology},
	volume = {159},
	issn = {01679236},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S0167923622000707},
	doi = {10.1016/j.dss.2022.113799},
	shorttitle = {A critical assessment of consumer reviews},
	pages = {113799},
	journaltitle = {Decision Support Systems},
	shortjournal = {Decision Support Systems},
	author = {Biswas, Baidyanath and Sengupta, Pooja and Kumar, Ajay and Delen, Dursun and Gupta, Shivam},
	urldate = {2022-07-20},
	date = {2022-08},
	langid = {english},
}

@article{van_dinter_automation_2021,
	title = {Automation of systematic literature reviews: A systematic literature review},
	volume = {136},
	issn = {09505849},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S0950584921000690},
	doi = {10.1016/j.infsof.2021.106589},
	shorttitle = {Automation of systematic literature reviews},
	pages = {106589},
	journaltitle = {Information and Software Technology},
	shortjournal = {Information and Software Technology},
	author = {van Dinter, Raymon and Tekinerdogan, Bedir and Catal, Cagatay},
	urldate = {2022-07-20},
	date = {2021-08},
	langid = {english},
}

@article{lee_influencer-generated_2022,
	title = {Influencer-Generated Reference Groups},
	volume = {49},
	issn = {0093-5301, 1537-5277},
	url = {https://academic.oup.com/jcr/article/49/1/25/6380146},
	doi = {10.1093/jcr/ucab056},
	abstract = {Abstract
            This article explores the idea that consumer influencers can shape reference group meanings in social media. Through a survey in which over 5,000 participants provided open-ended reference group associations for 25 major brands, the authors find that social media influencers can either strengthen or change brand reference group associations. Specifically, the typicality of the influencer (relative to a brand’s stereotypical consumer) can shape ideas about the perceived homogeneity of the brand’s consumers, which ultimately influences the strength and tightness of brand associations. This research combines seminal theories regarding cultural and sociological influences on branding, concepts relating to stereotype change, and a multi-method approach to assess new digital flows of cultural meaning from consumer influencers to brands.},
	pages = {25--45},
	number = {1},
	journaltitle = {Journal of Consumer Research},
	author = {Lee, Jeffrey K and Junqué de Fortuny, Enric},
	editor = {Inman, J. Jeffrey and Moore, Sarah G},
	urldate = {2022-07-20},
	date = {2022-05-19},
	langid = {english},
}

@article{berger_what_2021,
	title = {What Makes Content Engaging? How Emotional Dynamics Shape Success},
	volume = {48},
	issn = {0093-5301, 1537-5277},
	url = {https://academic.oup.com/jcr/article/48/2/235/6146928},
	doi = {10.1093/jcr/ucab010},
	shorttitle = {What Makes Content Engaging?},
	abstract = {Abstract
            Some cultural products (e.g., books and movies) catch on and become popular, while others fail. Why? While some have argued that success is unpredictable, we suggest that period-to-period shifts in sentiment—what we term sentiment volatility—enhance engagement. Automated sentiment analysis of over 4,000 movies demonstrates that more volatile movies are evaluated more positively. Consistent with the notion that sentiment volatility makes experiences more stimulating, the effect is stronger in genres where evaluations are more likely to be driven stimulation (i.e., thrillers rather than romance). Further, analysis of over 30,000 online articles demonstrate that people are more likely to continue reading more volatile articles. By manipulating sentiment volatility in follow-up experiments, we underscore its causal impact on evaluations, and provide evidence for the role of stimulation in these effects. Taken together, the results shed light on what drives engagement, the time dynamics of sentiment, and cultural analytics or why some cultural items are more successful.},
	pages = {235--250},
	number = {2},
	journaltitle = {Journal of Consumer Research},
	author = {Berger, Jonah and Kim, Yoon Duk and Meyer, Robert},
	editor = {Inman, J. Jeffrey and Stephen, Andrew T},
	urldate = {2022-07-20},
	date = {2021-08-13},
	langid = {english},
}

@article{mosteller_inference_1963,
	title = {Inference in an Authorship Problem},
	volume = {58},
	issn = {01621459},
	url = {https://www.jstor.org/stable/2283270?origin=crossref},
	doi = {10.2307/2283270},
	pages = {275},
	number = {302},
	journaltitle = {Journal of the American Statistical Association},
	shortjournal = {Journal of the American Statistical Association},
	author = {Mosteller, Frederick and Wallace, David L.},
	urldate = {2022-07-21},
	date = {1963-06},
}

@article{sawyer_readability_2008,
	title = {The Readability of Marketing Journals: Are Award-Winning Articles Better Written?},
	volume = {72},
	issn = {0022-2429, 1547-7185},
	url = {http://journals.sagepub.com/doi/10.1509/jmkg.72.1.108},
	doi = {10.1509/jmkg.72.1.108},
	shorttitle = {The Readability of Marketing Journals},
	abstract = {This is a study of the readability of articles in four marketing journals: Journal of Marketing, Journal of Marketing Research, Journal of International Marketing, and Journal of Public Policy \& Marketing. For each journal, the authors compare articles that have won an award with articles that have not. The authors find that award-winning articles are more readable, as measured by indexes focusing on sentence and word length, than nonwinning articles. The authors also identify and analyze other characteristics of more readable journal articles and discuss the importance of good writing.},
	pages = {108--117},
	number = {1},
	journaltitle = {Journal of Marketing},
	shortjournal = {Journal of Marketing},
	author = {Sawyer, Alan G. and Laran, Juliano and Xu, Jun},
	urldate = {2022-07-21},
	date = {2008-01},
	langid = {english},
}

@article{huang_illusion_2016,
	title = {Illusion of variety: Lower readability enhances perceived variety},
	volume = {33},
	issn = {01678116},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S0167811615001445},
	doi = {10.1016/j.ijresmar.2015.11.006},
	shorttitle = {Illusion of variety},
	pages = {674--687},
	number = {3},
	journaltitle = {International Journal of Research in Marketing},
	shortjournal = {International Journal of Research in Marketing},
	author = {Huang, Zhongqiang (Tak) and Kwong, Jessica Y.Y.},
	urldate = {2022-07-21},
	date = {2016-09},
	langid = {english},
}

@article{beard_increasing_1988,
	title = {Increasing the effectiveness of direct mail copy through the use of readability measures},
	volume = {2},
	issn = {1522-7138, 0892-0591},
	url = {http://journals.sagepub.com/doi/10.1002/dir.4000020204},
	doi = {10.1002/dir.4000020204},
	abstract = {This article presents a potentially valuable, yet seldom used tool for direct marketing: readability formulas. They can be used to increase the effectiveness of direct mail copy for technical products and services. Among the many theoretical tools available to determine if copy is readable for the intended audience, readability formulas provide a logical starting place to measure copy for its clarity and style. For the marketing practitioner, readability formulas are objective measures that can save time and money in field testing; for the marketing researcher, readability formulas lend themselves well to experimentation because they quantify differences in copy; and for the marketing educator, readability measures are valuable pedagogical tools to train copy writers.},
	pages = {6--15},
	number = {2},
	journaltitle = {Journal of Direct Marketing},
	shortjournal = {Journal of Direct Marketing},
	author = {Beard, John D. and Williams, David L.},
	urldate = {2022-07-21},
	date = {1988-05},
	langid = {english},
}

@article{zhu_online_2021,
	title = {Online critical review classification in response strategy and service provider rating: Algorithms from heuristic processing, sentiment analysis to deep learning},
	volume = {129},
	issn = {01482963},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S0148296320307505},
	doi = {10.1016/j.jbusres.2020.11.007},
	shorttitle = {Online critical review classification in response strategy and service provider rating},
	pages = {860--877},
	journaltitle = {Journal of Business Research},
	shortjournal = {Journal of Business Research},
	author = {Zhu, John Jianjun and Chang, Yung-Chun and Ku, Chih-Hao and Li, Stella Yiyan and Chen, Chi-Jen},
	urldate = {2022-07-24},
	date = {2021-05},
	langid = {english},
}

@article{mustak_artificial_2021,
	title = {Artificial intelligence in marketing: Topic modeling, scientometric analysis, and research agenda},
	volume = {124},
	issn = {01482963},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S0148296320307165},
	doi = {10.1016/j.jbusres.2020.10.044},
	shorttitle = {Artificial intelligence in marketing},
	pages = {389--404},
	journaltitle = {Journal of Business Research},
	shortjournal = {Journal of Business Research},
	author = {Mustak, Mekhail and Salminen, Joni and Plé, Loïc and Wirtz, Jochen},
	urldate = {2022-07-24},
	date = {2021-01},
	langid = {english},
}

@article{hildebrand_voice_2020,
	title = {Voice analytics in business research: Conceptual foundations, acoustic feature extraction, and applications},
	volume = {121},
	issn = {01482963},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S0148296320306044},
	doi = {10.1016/j.jbusres.2020.09.020},
	shorttitle = {Voice analytics in business research},
	pages = {364--374},
	journaltitle = {Journal of Business Research},
	shortjournal = {Journal of Business Research},
	author = {Hildebrand, Christian and Efthymiou, Fotis and Busquet, Francesc and Hampton, William H. and Hoffman, Donna L. and Novak, Thomas P.},
	urldate = {2022-07-24},
	date = {2020-12},
	langid = {english},
}

@article{birim_detecting_2022,
	title = {Detecting fake reviews through topic modelling},
	volume = {149},
	issn = {01482963},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S0148296322005264},
	doi = {10.1016/j.jbusres.2022.05.081},
	pages = {884--900},
	journaltitle = {Journal of Business Research},
	shortjournal = {Journal of Business Research},
	author = {Birim, Şule Öztürk and Kazancoglu, Ipek and Kumar Mangla, Sachin and Kahraman, Aysun and Kumar, Satish and Kazancoglu, Yigit},
	urldate = {2022-07-24},
	date = {2022-10},
	langid = {english},
}

@article{mitra_obim_2020,
	title = {{OBIM}: A computational model to estimate brand image from online consumer review},
	volume = {114},
	issn = {01482963},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S0148296320302204},
	doi = {10.1016/j.jbusres.2020.04.003},
	shorttitle = {{OBIM}},
	pages = {213--226},
	journaltitle = {Journal of Business Research},
	shortjournal = {Journal of Business Research},
	author = {Mitra, Satanik and Jenamani, Mamata},
	urldate = {2022-07-24},
	date = {2020-06},
	langid = {english},
}

@article{saura_exploring_2022,
	title = {Exploring the challenges of remote work on Twitter users' sentiments: From digital technology development to a post-pandemic era},
	volume = {142},
	issn = {01482963},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S0148296321009668},
	doi = {10.1016/j.jbusres.2021.12.052},
	shorttitle = {Exploring the challenges of remote work on Twitter users' sentiments},
	pages = {242--254},
	journaltitle = {Journal of Business Research},
	shortjournal = {Journal of Business Research},
	author = {Saura, Jose Ramon and Ribeiro-Soriano, Domingo and Zegarra Saldaña, Pablo},
	urldate = {2022-07-24},
	date = {2022-03},
	langid = {english},
}

@article{haenlein_artificial_2021,
	title = {Artificial intelligence and robotics: Shaking up the business world and society at large},
	volume = {124},
	issn = {01482963},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S0148296320307037},
	doi = {10.1016/j.jbusres.2020.10.042},
	shorttitle = {Artificial intelligence and robotics},
	pages = {405--407},
	journaltitle = {Journal of Business Research},
	shortjournal = {Journal of Business Research},
	author = {Haenlein, Michael and Kaplan, Andreas},
	urldate = {2022-07-24},
	date = {2021-01},
	langid = {english},
}

@article{lutz_are_2022,
	title = {Are longer reviews always more helpful? Disentangling the interplay between review length and line of argumentation},
	volume = {144},
	issn = {01482963},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S0148296322001278},
	doi = {10.1016/j.jbusres.2022.02.010},
	shorttitle = {Are longer reviews always more helpful?},
	pages = {888--901},
	journaltitle = {Journal of Business Research},
	shortjournal = {Journal of Business Research},
	author = {Lutz, Bernhard and Pröllochs, Nicolas and Neumann, Dirk},
	urldate = {2022-07-24},
	date = {2022-05},
	langid = {english},
}

@article{bilro_exploring_2019,
	title = {Exploring online customer engagement with hospitality products and its relationship with involvement, emotional states, experience and brand advocacy},
	volume = {28},
	issn = {1936-8623, 1936-8631},
	url = {https://www.tandfonline.com/doi/full/10.1080/19368623.2018.1506375},
	doi = {10.1080/19368623.2018.1506375},
	pages = {147--171},
	number = {2},
	journaltitle = {Journal of Hospitality Marketing \& Management},
	shortjournal = {Journal of Hospitality Marketing \& Management},
	author = {Bilro, Ricardo Godinho and Loureiro, Sandra Maria Correia and Guerreiro, João},
	urldate = {2022-07-26},
	date = {2019-02-17},
	langid = {english},
}

@article{berezina_understanding_2016,
	title = {Understanding Satisfied and Dissatisfied Hotel Customers: Text Mining of Online Hotel Reviews},
	volume = {25},
	issn = {1936-8623, 1936-8631},
	url = {http://www.tandfonline.com/doi/full/10.1080/19368623.2015.983631},
	doi = {10.1080/19368623.2015.983631},
	shorttitle = {Understanding Satisfied and Dissatisfied Hotel Customers},
	pages = {1--24},
	number = {1},
	journaltitle = {Journal of Hospitality Marketing \& Management},
	shortjournal = {Journal of Hospitality Marketing \& Management},
	author = {Berezina, Katerina and Bilgihan, Anil and Cobanoglu, Cihan and Okumus, Fevzi},
	urldate = {2022-07-26},
	date = {2016-01-02},
	langid = {english},
}

@article{baek_determinants_2020,
	title = {Determinants of hotel guests’ service experiences: an examination of differences between lifestyle and traditional hotels},
	volume = {29},
	issn = {1936-8623, 1936-8631},
	url = {https://www.tandfonline.com/doi/full/10.1080/19368623.2019.1580173},
	doi = {10.1080/19368623.2019.1580173},
	shorttitle = {Determinants of hotel guests’ service experiences},
	pages = {88--105},
	number = {1},
	journaltitle = {Journal of Hospitality Marketing \& Management},
	shortjournal = {Journal of Hospitality Marketing \& Management},
	author = {Baek, Jooa and Choe, Yeongbae and Ok, Chihyung Michael},
	urldate = {2022-07-26},
	date = {2020-01-02},
	langid = {english},
}

@article{luo_fine-grained_2021,
	title = {A fine-grained sentiment analysis of online guest reviews of economy hotels in China},
	volume = {30},
	issn = {1936-8623, 1936-8631},
	url = {https://www.tandfonline.com/doi/full/10.1080/19368623.2020.1772163},
	doi = {10.1080/19368623.2020.1772163},
	pages = {71--95},
	number = {1},
	journaltitle = {Journal of Hospitality Marketing \& Management},
	shortjournal = {Journal of Hospitality Marketing \& Management},
	author = {Luo, Jiaqi and Huang, Songshan (Sam) and Wang, Renwu},
	urldate = {2022-07-26},
	date = {2021-01-02},
	langid = {english},
}

@article{ho_customer_2022,
	title = {Customer engagement behaviours in a social media context revisited: using both the formative measurement model and text mining techniques},
	volume = {38},
	issn = {0267-257X, 1472-1376},
	url = {https://www.tandfonline.com/doi/full/10.1080/0267257X.2021.2003421},
	doi = {10.1080/0267257X.2021.2003421},
	shorttitle = {Customer engagement behaviours in a social media context revisited},
	pages = {740--770},
	number = {7},
	journaltitle = {Journal of Marketing Management},
	shortjournal = {Journal of Marketing Management},
	author = {Ho, Chaang-Iuan and Chen, Ming-Chih and Shih, Ya-Wei},
	urldate = {2022-07-26},
	date = {2022-05-04},
	langid = {english},
}

@article{jedidi_r2m_2021,
	title = {R2M Index 1.0: Assessing the Practical Relevance of Academic Marketing Articles},
	volume = {85},
	issn = {0022-2429, 1547-7185},
	url = {http://journals.sagepub.com/doi/10.1177/00222429211028145},
	doi = {10.1177/00222429211028145},
	shorttitle = {R2M Index 1.0},
	abstract = {Using text-mining, the authors develop version 1.0 of the Relevance to Marketing (R2M) Index, a dynamic index that measures the topical and timely relevance of academic marketing articles to marketing practice. The index assesses topical relevance drawing on a dictionary of marketing terms derived from 50,000 marketing articles published in practitioner outlets from 1982 to 2019. Timely relevance is based on the prevalence of academic marketing topics in practitioner publications at a given time. The authors classify topics into four quadrants based on their low/high popularity in academia and practice —“Desert,” “Academic Island,” “Executive Fields,” and “Highlands”—and score academic articles and journals: Journal of Marketing has the highest R2M score, followed by Marketing Science, Journal of Marketing Research, and Journal of Consumer Research. The index correlates with practitioner judgments of practical relevance and other relevance measures. Because the index is a work in progress, the authors discuss how to overcome current limitations and suggest correlating the index with citation counts, altmetrics, and readability measures. Marketing practitioners, authors, and journal editors can use the index to assess article relevance, and academic administrators can use it for promotion and tenure decisions (see www.R2Mindex.com). The R2M Index is thus not only a measurement instrument but also a tool for change.},
	pages = {22--41},
	number = {5},
	journaltitle = {Journal of Marketing},
	shortjournal = {Journal of Marketing},
	author = {Jedidi, Kamel and Schmitt, Bernd H. and Ben Sliman, Malek and Li, Yanyan},
	urldate = {2022-07-26},
	date = {2021-09},
	langid = {english},
}

@article{riedel_consumers_2022,
	title = {Consumers experiencing vulnerability: a state of play in the literature},
	volume = {36},
	issn = {0887-6045, 0887-6045},
	url = {https://www.emerald.com/insight/content/doi/10.1108/JSM-12-2020-0496/full/html},
	doi = {10.1108/JSM-12-2020-0496},
	shorttitle = {Consumers experiencing vulnerability},
	abstract = {Purpose
              The purpose of this paper is to provide a state-of-the-art review of research on consumers experiencing vulnerability to describe the current situation of the consumers experiencing vulnerability literature and develop an up-to-date synthesised definition of consumers experiencing vulnerability.
            
            
              Design/methodology/approach
              This systematic review, guided by the {PRISMA} framework, takes a multi-disciplinary approach to identify 310 articles published between 2010 and 2019 examining consumers experiencing vulnerability. Descriptive analysis of the data is undertaken in combination with a thematic and text mining approach using Leximancer software.
            
            
              Findings
              A definition of consumers experiencing vulnerability is developed- “unique and subjective experiences where characteristics such as states, conditions and/or external factors lead to a consumer experiencing a sense of powerlessness in consumption settings”. The findings reveal consumers experiencing vulnerability have often been classified using a uni-dimensional approach (opposed to a multi-dimensional), focussing on one factor of vulnerability, the most prevalent of these being economic and age factors. A lack of research has examined consumers experiencing vulnerability based upon geographical remoteness, gender and sexual exploitation.
            
            
              Originality/value
              This paper is one of the first to examine consumers experiencing vulnerability using a systematic approach and text mining analysis to synthesise a large set of articles, which subsequently reduces the potential for researchers’ interpretative bias. Further, it is the first to generate a data-driven definition of consumers experiencing vulnerability. It provides targeted recommendations to allow further scholarly, policy and practical contributions to this area.},
	pages = {110--128},
	number = {2},
	journaltitle = {Journal of Services Marketing},
	shortjournal = {{JSM}},
	author = {Riedel, Aimee and Messenger, Dana and Fleischman, David and Mulcahy, Rory},
	urldate = {2022-07-26},
	date = {2022-03-28},
	langid = {english},
}

@article{nam_harvesting_2017,
	title = {Harvesting Brand Information from Social Tags},
	volume = {81},
	issn = {0022-2429, 1547-7185},
	url = {http://journals.sagepub.com/doi/10.1509/jm.16.0044},
	doi = {10.1509/jm.16.0044},
	abstract = {Social tags are user-defined keywords associated with online content that reflect consumers’ perceptions of various objects, including products and brands. This research presents a new approach for harvesting rich, qualitative information on brands from user-generated social tags. The authors first compare their proposed approach with conventional techniques such as brand concept maps and text mining. They highlight the added value of their approach that results from the unconstrained, open-ended, and synoptic nature of consumer-generated content contained within social tags. The authors then apply existing text-mining and data-reduction methods to analyze disaggregate-level social tagging data for marketing research and demonstrate how marketers can utilize the information in social tags by extracting key representative topics, monitoring common dynamic trends, and understanding heterogeneous perceptions of a brand.},
	pages = {88--108},
	number = {4},
	journaltitle = {Journal of Marketing},
	shortjournal = {Journal of Marketing},
	author = {Nam, Hyoryung and Joshi, Yogesh V. and Kannan, P.K.},
	urldate = {2022-07-26},
	date = {2017-07},
	langid = {english},
}

@article{ludwig_more_2013,
	title = {More than Words: The Influence of Affective Content and Linguistic Style Matches in Online Reviews on Conversion Rates},
	volume = {77},
	issn = {0022-2429, 1547-7185},
	url = {http://journals.sagepub.com/doi/10.1509/jm.11.0560},
	doi = {10.1509/jm.11.0560},
	shorttitle = {More than Words},
	abstract = {Customers increasingly rely on other consumers' reviews to make purchase decisions online. New insights into the customer review phenomenon can be derived from studying the semantic content and style properties of verbatim customer reviews to examine their influence on online retail sites' conversion rates. The authors employ text mining to extract changes in affective content and linguistic style properties of customer book reviews on Amazon.com . A dynamic panel data model reveals that the influence of positive affective content on conversion rates is asymmetrical, such that greater increases in positive affective content in customer reviews have a smaller effect on subsequent increases in conversion rate. No such tapering-off effect occurs for changes in negative affective content in reviews. Furthermore, positive changes in affective cues and increasing congruence with the product interest group's typical linguistic style directly and conjointly increase conversion rates. These findings suggest that managers should identify and promote the most influential reviews in a given product category, provide instructions to stimulate reviewers to write powerful reviews, and adapt the style of their own editorial reviews to the relevant product category.},
	pages = {87--103},
	number = {1},
	journaltitle = {Journal of Marketing},
	shortjournal = {Journal of Marketing},
	author = {Ludwig, Stephan and de Ruyter, Ko and Friedman, Mike and Brüggen, Elisabeth C. and Wetzels, Martin and Pfann, Gerard},
	urldate = {2022-07-26},
	date = {2013-01},
	langid = {english},
}

@article{tang_digging_2015,
	title = {Digging for gold with a simple tool: Validating text mining in studying electronic word-of-mouth ({eWOM}) communication},
	volume = {26},
	issn = {0923-0645, 1573-059X},
	url = {http://link.springer.com/10.1007/s11002-013-9268-8},
	doi = {10.1007/s11002-013-9268-8},
	shorttitle = {Digging for gold with a simple tool},
	pages = {67--80},
	number = {1},
	journaltitle = {Marketing Letters},
	shortjournal = {Mark Lett},
	author = {Tang, Chuanyi and Guo, Lin},
	urldate = {2022-07-26},
	date = {2015-03},
	langid = {english},
}

@article{mahr_making_2019,
	title = {Making sense of customer service experiences: a text mining review},
	volume = {33},
	issn = {0887-6045, 0887-6045},
	url = {https://www.emerald.com/insight/content/doi/10.1108/JSM-10-2018-0295/full/html},
	doi = {10.1108/JSM-10-2018-0295},
	shorttitle = {Making sense of customer service experiences},
	abstract = {Purpose
              The purpose of this paper is to systematically review the concepts and theories underlying customer service experience ({CSE}) and its underlying five dimensions (physical, social, cognitive, affective and sensorial). In this research, the contribution of the sensorial dimension to {CSE} research is emphasized. Senses are especially important in forming perceptions within servicescapes that are typically rich in sensory stimuli.
            
            
              Design/methodology/approach
              This study systematically identifies 258 articles published between 1994 and 2018 in services and marketing journals. The analysis uses a text mining approach with the Leximancer software to extract research concepts and their relationships.
            
            
              Findings
              The results demonstrate a shift from {CSE} research focused on brands and products toward value and interaction, around three focal areas: service system architecture, with its value creation processes; servicescape, with an increasingly digital interaction interface and outcome measures, with a stronger focus on emotional and relational metrics. In {CSE} research, the physical, social and cognitive dimensions are mostly researched in the focal areas of servicescape and outcome measures. Although important in practice, the sensorial dimension is the least investigated {CSE} dimension in service marketing research. Text mining insights demonstrate rich opportunities for sensorial research, particularly in studies on servicescape.
            
            
              Practical implications
              The synthesis will inform managers and service providers which elements of {CSE} are most relevant to customers when forming perceptions. These insights help service providers to control, manage and design (multi)-sensory stimuli that influence how customers will make sense of the servicescape.
            
            
              Originality/value
              This research is one of the first studies to examine the conceptual structure of {CSE} with a text mining approach that systematically analyzes a large set of articles, therein reducing the potential for researchers’ interpretative bias. The paper provides an assessment of the role of the largely neglected but crucial sensorial dimension, and offers future research suggestions into this emerging topic.},
	pages = {88--103},
	number = {1},
	journaltitle = {Journal of Services Marketing},
	shortjournal = {{JSM}},
	author = {Mahr, Dominik and Stead, Susan and Odekerken-Schröder, Gaby},
	urldate = {2022-07-26},
	date = {2019-02-11},
	langid = {english},
}

@article{balducci_unstructured_2018,
	title = {Unstructured data in marketing},
	volume = {46},
	issn = {0092-0703, 1552-7824},
	url = {http://link.springer.com/10.1007/s11747-018-0581-x},
	doi = {10.1007/s11747-018-0581-x},
	pages = {557--590},
	number = {4},
	journaltitle = {Journal of the Academy of Marketing Science},
	shortjournal = {J. of the Acad. Mark. Sci.},
	author = {Balducci, Bitty and Marinova, Detelina},
	urldate = {2022-07-26},
	date = {2018-07},
	langid = {english},
}

@article{kumar_examining_2022,
	title = {Examining the research on social media in business-to-business marketing with a focus on sales and the selling process},
	volume = {102},
	issn = {00198501},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S0019850122000086},
	doi = {10.1016/j.indmarman.2022.01.008},
	pages = {122--140},
	journaltitle = {Industrial Marketing Management},
	shortjournal = {Industrial Marketing Management},
	author = {Kumar, Bipul and Sharma, Arun},
	urldate = {2022-07-26},
	date = {2022-04},
	langid = {english},
}

@article{moon_cultural_2016,
	title = {Cultural and Economic Impacts on Global Cultural Products: Evidence from U.S. Movies},
	volume = {24},
	issn = {1069-031X, 1547-7215},
	url = {http://journals.sagepub.com/doi/10.1509/jim.15.0080},
	doi = {10.1509/jim.15.0080},
	shorttitle = {Cultural and Economic Impacts on Global Cultural Products},
	abstract = {Existing international product diffusion studies have identified economic and cultural factors that influence consumers’ acceptance of new products, but they have not fully examined these factors’ roles in the international diffusion of global cultural products. The authors examine country-level economic and cultural factors that influence consumers’ acceptance of new global cultural products across countries. Using 846 recent U.S. movies’ box office performances in 48 national markets as the empirical context, the authors obtain the following key novel findings on product sales: (1) an inverse U-shaped impact of economic development status, (2) a positive impact of the cultural compatibility of the product and the market, and (3) a U-shaped impact of intercountry cultural distance in the presence of cultural compatibility and a decreasing linear impact of cultural distance in the absence of cultural compatibility.},
	pages = {78--97},
	number = {3},
	journaltitle = {Journal of International Marketing},
	shortjournal = {Journal of International Marketing},
	author = {Moon, Sangkil and Mishra, Arul and Mishra, Himanshu and Kang, Moon Young},
	urldate = {2022-07-26},
	date = {2016-09},
	langid = {english},
}

@article{dahl_current_2010,
	title = {Current Themes in Social Marketing Research: Text-Mining the past Five Years},
	volume = {16},
	issn = {1524-5004, 1539-4093},
	url = {http://journals.sagepub.com/doi/10.1080/15245001003746790},
	doi = {10.1080/15245001003746790},
	shorttitle = {Current Themes in Social Marketing Research},
	abstract = {Social marketing has advanced rapidly from its beginnings almost 40 years ago. This commentary takes a look at the current themes in published social marketing research by using text-mining to analyze articles published in the past 5 years. It also discusses the areas for future research, especially the need for published social marketing research to expand from mostly health-related application to other areas.},
	pages = {128--136},
	number = {2},
	journaltitle = {Social Marketing Quarterly},
	shortjournal = {Social Marketing Quarterly},
	author = {Dahl, Stephan},
	urldate = {2022-07-26},
	date = {2010-06},
	langid = {english},
}

@article{huber_topical_2014,
	title = {A Topical History of \textit{{JMR}}},
	volume = {51},
	issn = {0022-2437, 1547-7193},
	url = {http://journals.sagepub.com/doi/10.1509/jmr.51.1.02},
	doi = {10.1509/jmr.51.1.02},
	abstract = {Using subject indexes and text mining of author abstracts, the authors track the evolution of content in Journal of Marketing Research since its inception 50 years ago. These data reveal that the journal has expanded beyond its initial emphasis on marketing research methods and advertising to increase its coverage of other substantive topics and consumer behavior. Moreover, a joint space of topics and editors reveals that editorial orientations appear largely evolutionary rather than revolutionary and that a major shift in journal coverage occurs at the time Marketing Science began publication. The authors conclude their analysis with several policy recommendations.},
	pages = {84--91},
	number = {1},
	journaltitle = {Journal of Marketing Research},
	shortjournal = {Journal of Marketing Research},
	author = {Huber, Joel and Kamakura, Wagner and Mela, Carl F.},
	urldate = {2022-07-26},
	date = {2014-02},
	langid = {english},
}

@article{lee_automated_2011,
	title = {Automated Marketing Research Using Online Customer Reviews},
	volume = {48},
	issn = {0022-2437, 1547-7193},
	url = {http://journals.sagepub.com/doi/10.1509/jmkr.48.5.881},
	doi = {10.1509/jmkr.48.5.881},
	abstract = {Market structure analysis is a basic pillar of marketing research. Classic challenges in marketing such as pricing, campaign management, brand positioning, and new product development are rooted in an analysis of product substitutes and complements inferred from market structure. In this article, the authors present a method to support the analysis and visualization of market structure by automatically eliciting product attributes and brand's relative positions from online customer reviews. First, the method uncovers attributes and attribute dimensions using the “voice of the consumer,” as reflected in customer reviews, rather than that of manufacturers. Second, the approach runs automatically. Third, the process supports rather than supplants managerial judgment by reinforcing or augmenting attributes and dimensions found through traditional surveys and focus groups. The authors test the approach on six years of customer reviews for digital cameras during a period of rapid market evolution. They analyze and visualize results in several ways, including comparisons with expert buying guides, a laboratory survey, and correspondence analysis of automatically discovered product attributes. The authors evaluate managerial insights drawn from the analysis with respect to proprietary market research reports from the same period analyzing digital imaging products.},
	pages = {881--894},
	number = {5},
	journaltitle = {Journal of Marketing Research},
	shortjournal = {Journal of Marketing Research},
	author = {Lee, Thomas Y. and Bradlow, Eric T.},
	urldate = {2022-07-26},
	date = {2011-10},
	langid = {english},
}

@article{lee_service_2011,
	title = {Service failures and recovery actions in the hotel industry: A text-mining approach},
	volume = {17},
	issn = {1356-7667, 1479-1870},
	url = {http://journals.sagepub.com/doi/10.1177/1356766711409182},
	doi = {10.1177/1356766711409182},
	shorttitle = {Service failures and recovery actions in the hotel industry},
	abstract = {The purpose of this article is to cluster service failures and recovery actions in the hotel industry. Keywords were extracted from the descriptive responses of hotel guests and systematically clustered to identify major areas of service failures and recovery actions in the hotel industry. Two sets of textual data (service failure and service recovery) were collected from 75 hotel guests using the critical incident technique and content-analyzed with a text-mining program. Text-mining analysis identified 50 keywords in eight clusters from the service failure data and 50 keywords in seven clusters from the service recovery data. The identified keywords were conceptually graphed to map meaningful findings that are logically precise and computationally tractable. The major theoretical and practical implications are also discussed in this study.},
	pages = {197--207},
	number = {3},
	journaltitle = {Journal of Vacation Marketing},
	shortjournal = {Journal of Vacation Marketing},
	author = {Lee, Myong Jae and Singh, Neha and Chan, Eric S.W.},
	urldate = {2022-07-26},
	date = {2011-07},
	langid = {english},
}

@article{singh_efficient_2011,
	title = {Efficient Methods for Sampling Responses from Large-Scale Qualitative Data},
	volume = {30},
	issn = {0732-2399, 1526-548X},
	url = {http://pubsonline.informs.org/doi/10.1287/mksc.1100.0632},
	doi = {10.1287/mksc.1100.0632},
	abstract = {The World Wide Web contains a vast corpus of consumer-generated content that holds invaluable insights for improving the product and service offerings of firms. Yet the typical method for extracting diagnostic information from online content—text mining—has limitations. As a starting point, we propose analyzing a sample of comments before initiating text mining. Using a combination of real data and simulations, we demonstrate that a sampling procedure that selects respondents whose comments contain a large amount of information is superior to the two most popular sampling methods—simple random sampling and stratified random sampling—-in gaining insights from the data. In addition, we derive a method that determines the probability of observing diagnostic information repeated a specific number of times in the population, which will enable managers to base sample size decisions on the trade-off between obtaining additional diagnostic information and the added expense of a larger sample. We provide an illustration of one of the methods using a real data set from a website containing qualitative comments about staying at a hotel and demonstrate how sampling qualitative comments can be a useful first step in text mining.},
	pages = {532--549},
	number = {3},
	journaltitle = {Marketing Science},
	shortjournal = {Marketing Science},
	author = {Singh, Surendra N. and Hillmer, Steve and Wang, Ze},
	urldate = {2022-07-26},
	date = {2011-05},
	langid = {english},
}

@article{ludwig_decoding_2016,
	title = {Decoding social media speak: developing a speech act theory research agenda},
	volume = {33},
	issn = {0736-3761},
	url = {https://www.emerald.com/insight/content/doi/10.1108/JCM-04-2015-1405/full/html},
	doi = {10.1108/JCM-04-2015-1405},
	shorttitle = {Decoding social media speak},
	abstract = {Purpose
              – Drawing on the theoretical domain of speech act theory ({SAT}) and a discussion of its suitability for setting the agenda for social media research, this study aims to explore a range of research directions that are both relevant and conceptually robust, to stimulate the advancement of knowledge and understanding of online verbatim data.
            
            
              Design/methodology/approach
              – Examining previously published cross-disciplinary research, the study identifies how recent conceptual and empirical advances in {SAT} may further guide the development of text analytics in a social media context.
            
            
              Findings
              – Decoding content and function word use in customers’ social media communication can enhance the efficiency of determining potential impacts of customer reviews, sentiment strength, the quality of contributions in social media, customers’ socialization perceptions in online communities and deceptive messages.
            
            
              Originality/value
              – Considering the variety of managerial demand, increasing and diverging social media formats, expanding archives, rapid development of software tools and fast-paced market changes, this study provides an urgently needed, theory-driven, coherent research agenda to guide the conceptual development of text analytics in a social media context.},
	pages = {124--134},
	number = {2},
	journaltitle = {Journal of Consumer Marketing},
	author = {Ludwig, Stephan and de Ruyter, Ko},
	urldate = {2022-07-26},
	date = {2016-03-21},
	langid = {english},
}

@article{buzova_cross-cultural_2016,
	title = {Cross-cultural Perceptions of Onshore Guided Tours: A Qualitative Approach Based on {eWOM}: {CROSS}-{CULTURAL} {PERCEPTIONS} {OF} {ONSHORE} {GUIDED} {TOURS}},
	volume = {33},
	issn = {07426046},
	url = {https://onlinelibrary.wiley.com/doi/10.1002/mar.20939},
	doi = {10.1002/mar.20939},
	shorttitle = {Cross-cultural Perceptions of Onshore Guided Tours},
	pages = {1054--1061},
	number = {12},
	journaltitle = {Psychology \& Marketing},
	shortjournal = {Psychol. Mark.},
	author = {Buzova, Daniela and Sanz-Blas, Silvia and Cervera-Taulet, Amparo},
	urldate = {2022-07-26},
	date = {2016-12},
	langid = {english},
}

@article{singh_self_2016,
	title = {Self or Simulacra of Online Reviews: An Empirical Perspective: {SELF} {OR} {SIMULACRA} {OF} {ONLINE} {REVIEWS}},
	volume = {33},
	issn = {07426046},
	url = {https://onlinelibrary.wiley.com/doi/10.1002/mar.20946},
	doi = {10.1002/mar.20946},
	shorttitle = {Self or Simulacra of Online Reviews},
	pages = {1112--1118},
	number = {12},
	journaltitle = {Psychology \& Marketing},
	shortjournal = {Psychol. Mark.},
	author = {Singh, Vivek Kumar and Nishant, Rohit and Kitchen, Philip J.},
	urldate = {2022-07-26},
	date = {2016-12},
	langid = {english},
}

@article{chung_mining_2022,
	title = {Mining Consumer Minds: Downstream Consequences of Host Motivations for Home-Sharing Platforms},
	volume = {48},
	issn = {0093-5301, 1537-5277},
	url = {https://academic.oup.com/jcr/article/48/5/817/6278355},
	doi = {10.1093/jcr/ucab034},
	shorttitle = {Mining Consumer Minds},
	abstract = {Abstract
            This research sheds light on consumer motivations for participating in the sharing economy and examines downstream consequences of the uncovered motivations. We use text-mining techniques to extract Airbnb hosts’ motivations from their responses to the question “why did you start hosting.” We find that hosts are driven not only by the monetary motivation “to earn cash” but also by intrinsic motivations such as “to share beauty” and “to meet people.” Using extensive transaction-level data, we find that hosts with intrinsic motivations post more property photos and write longer property descriptions, demonstrating greater engagement with the platform. Consequently, these hosts receive higher guest satisfaction ratings. Compared to hosts who want to earn cash, hosts motivated to meet people are more likely to keep hosting and to stay active on the platform, and hosts motivated to share beauty charge higher prices. As a result, these intrinsically motivated hosts have a higher customer lifetime value compared to those with a monetary motivation. We employ a multimethod approach including text mining, Bayesian latent attrition models, and lab experiments to derive these insights. Our research provides an easy-to-implement approach to uncovering consumer motivations in practice and highlights the consequential role of these motivations for firms.},
	pages = {817--838},
	number = {5},
	journaltitle = {Journal of Consumer Research},
	author = {Chung, Jaeyeon (Jae) and Johar, Gita Venkataramani and Li, Yanyan and Netzer, Oded and Pearson, Matthew},
	editor = {Inman, J. Jeffrey and Winer, Russell S.},
	urldate = {2022-07-26},
	date = {2022-01-29},
	langid = {english},
}

@article{yi_informational_2022,
	title = {The informational value of multi-attribute online consumer reviews: A text mining approach},
	volume = {65},
	issn = {09696989},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S0969698921000850},
	doi = {10.1016/j.jretconser.2021.102519},
	shorttitle = {The informational value of multi-attribute online consumer reviews},
	pages = {102519},
	journaltitle = {Journal of Retailing and Consumer Services},
	shortjournal = {Journal of Retailing and Consumer Services},
	author = {Yi, Jisu and Oh, Yun Kyung},
	urldate = {2022-07-26},
	date = {2022-03},
	langid = {english},
}

@article{wang_journal_2015,
	title = {The \textit{Journal of Consumer Research} at 40: A Historical Analysis},
	volume = {42},
	issn = {0093-5301, 1537-5277},
	url = {https://academic.oup.com/jcr/article-lookup/doi/10.1093/jcr/ucv009},
	doi = {10.1093/jcr/ucv009},
	shorttitle = {The \textit{Journal of Consumer Research} at 40},
	pages = {5--18},
	number = {1},
	journaltitle = {Journal of Consumer Research},
	shortjournal = {J Consum Res},
	author = {Wang, Xin (Shane) and Bendle, Neil T. and Mai, Feng and Cotte, June},
	urldate = {2022-07-26},
	date = {2015-06},
	langid = {english},
}

@article{villarroel_ordenes_cutting_2019,
	title = {Cutting through Content Clutter: How Speech and Image Acts Drive Consumer Sharing of Social Media Brand Messages},
	volume = {45},
	issn = {0093-5301, 1537-5277},
	url = {https://academic.oup.com/jcr/article/45/5/988/4964963},
	doi = {10.1093/jcr/ucy032},
	shorttitle = {Cutting through Content Clutter},
	pages = {988--1012},
	number = {5},
	journaltitle = {Journal of Consumer Research},
	author = {Villarroel Ordenes, Francisco and Grewal, Dhruv and Ludwig, Stephan and Ruyter, Ko De and Mahr, Dominik and Wetzels, Martin},
	editor = {Morwitz, Vicki and Kopalle, Praveen},
	urldate = {2022-07-26},
	date = {2019-02-01},
	langid = {english},
}

@article{swaminathan_language_2022,
	title = {The Language of Brands in Social Media: Using Topic Modeling on Social Media Conversations to Drive Brand Strategy},
	volume = {57},
	issn = {1094-9968, 1520-6653},
	url = {http://journals.sagepub.com/doi/10.1177/10949968221088275},
	doi = {10.1177/10949968221088275},
	shorttitle = {The Language of Brands in Social Media},
	abstract = {This article highlights how social media data and language analysis can help managers understand brand positioning and brand competitive spaces to enable them to make various strategic and tactical decisions about brands. The authors use the output of topic models at the brand level to evaluate similarities between brands and to identify potential cobrand partners. In addition to using average topic probabilities to assess brands’ relationships to each other, they incorporate a differential language analysis framework, which implements scientific inference with multi-test-corrected hypothesis testing, to evaluate positive and negative topic correlates of brand names. The authors highlight the various applications of these approaches in decision making for brand management, including the assessment of brand positioning and future cobranding partnerships, design of marketing communication, identification of new product introductions, and identification of potential negative brand associations that can pose a threat to a brand's image. Moreover, they introduce a new metric, “temporal topic variability,” that can serve as an early warning of future changes in consumer preference. The authors evaluate social media analytic contributions against offline survey data. They demonstrate their approach with a sample of 193 brands, representing a broad set of categories, and discuss its implications.},
	pages = {255--277},
	number = {2},
	journaltitle = {Journal of Interactive Marketing},
	shortjournal = {Journal of Interactive Marketing},
	author = {Swaminathan, Vanitha and Schwartz, H. Andrew and Menezes, Rowan and Hill, Shawndra},
	urldate = {2022-07-26},
	date = {2022-05},
	langid = {english},
}

@article{srivastava_enhancing_2019-1,
	title = {Enhancing the Helpfulness of Online Consumer Reviews: The Role of Latent (Content) Factors},
	volume = {48},
	issn = {10949968},
	url = {https://journals.sagepub.com/doi/full/10.1016/j.intmar.2018.12.003},
	doi = {10.1016/j.intmar.2018.12.003},
	shorttitle = {Enhancing the Helpfulness of Online Consumer Reviews},
	pages = {33--50},
	journaltitle = {Journal of Interactive Marketing},
	shortjournal = {Journal of Interactive Marketing},
	author = {Srivastava, Vartika and Kalro, Arti D.},
	urldate = {2022-07-26},
	date = {2019-11},
	langid = {english},
}

@article{verma_past_2021,
	title = {Past, Present, and Future of Electronic Word of Mouth ({EWOM})},
	volume = {53},
	issn = {10949968},
	url = {https://journals.sagepub.com/doi/full/10.1016/j.intmar.2020.07.001},
	doi = {10.1016/j.intmar.2020.07.001},
	pages = {111--128},
	journaltitle = {Journal of Interactive Marketing},
	shortjournal = {Journal of Interactive Marketing},
	author = {Verma, Sanjeev and Yadav, Neha},
	urldate = {2022-07-26},
	date = {2021-02},
	langid = {english},
}

@article{cho_popular_2017,
	title = {Popular Research Topics in Marketing Journals, 1995–2014},
	volume = {40},
	issn = {1094-9968, 1520-6653},
	url = {http://journals.sagepub.com/doi/10.1016/j.intmar.2017.06.003},
	doi = {10.1016/j.intmar.2017.06.003},
	abstract = {During the past two decades, the focus of marketing has moved from the tactics of persuasion to the strategies of value cocreation. After moving toward cognitive science and corporate strategies in the early 2000s, marketing research returned to its traditional domains of consumer psychologies and customer management. While conscientious consumers are gradually restraining themselves from selfish indulgence, marketers have refocused on a new set of values that encompass mental, experiential, and societal well-being. In this regard, we adopt an unprecedented approach by incorporating topic modeling with social network analysis. The results show that, in terms of topic heterogeneity, the most impactful journals are the most diverse, whereas each runner-up has a unique focus. Among the journals, we detect two major co-authorship communities, and among the topics, we detect three. Further, we find that the communities of the most cited papers are composed of heterogeneous clusters of similar topics. The pivots within, and the bridges between, these communities are also reported. In the spirit of collaborative research, our topic model and network analysis are shared via online collaboration and visualization platforms that readers can use to explore our models interactively and to download the dataset for further studies.},
	pages = {52--72},
	number = {1},
	journaltitle = {Journal of Interactive Marketing},
	shortjournal = {Journal of Interactive Marketing},
	author = {Cho, Yung-Jan and Fu, Pei-Wen and Wu, Chi-Cheng},
	urldate = {2022-07-26},
	date = {2017-11},
	langid = {english},
}

@article{felbermayr_role_2016,
	title = {The Role of Emotions for the Perceived Usefulness in Online Customer Reviews},
	volume = {36},
	issn = {10949968},
	url = {https://journals.sagepub.com/doi/full/10.1016/j.intmar.2016.05.004},
	doi = {10.1016/j.intmar.2016.05.004},
	pages = {60--76},
	journaltitle = {Journal of Interactive Marketing},
	shortjournal = {Journal of Interactive Marketing},
	author = {Felbermayr, Armin and Nanopoulos, Alexandros},
	urldate = {2022-07-26},
	date = {2016-11},
	langid = {english},
}

@article{mangio_branding_2021,
	title = {Branding Rhetoric in Times of a Global Pandemic: A Text-Mining Analysis},
	volume = {50},
	issn = {0091-3367, 1557-7805},
	url = {https://www.tandfonline.com/doi/full/10.1080/00913367.2021.1927912},
	doi = {10.1080/00913367.2021.1927912},
	shorttitle = {Branding Rhetoric in Times of a Global Pandemic},
	pages = {240--252},
	number = {3},
	journaltitle = {Journal of Advertising},
	shortjournal = {Journal of Advertising},
	author = {Mangiò, Federico and Pedeliento, Giuseppe and Andreini, Daniela},
	urldate = {2022-07-26},
	date = {2021-05-27},
	langid = {english},
}

@article{liu_investigation_2017,
	title = {An Investigation of Brand-Related User-Generated Content on Twitter},
	volume = {46},
	issn = {0091-3367, 1557-7805},
	url = {https://www.tandfonline.com/doi/full/10.1080/00913367.2017.1297273},
	doi = {10.1080/00913367.2017.1297273},
	pages = {236--247},
	number = {2},
	journaltitle = {Journal of Advertising},
	shortjournal = {Journal of Advertising},
	author = {Liu, Xia and Burns, Alvin C. and Hou, Yingjian},
	urldate = {2022-07-26},
	date = {2017-04-03},
	langid = {english},
}

@article{donthu_journal_2022,
	title = {The \textit{Journal of Advertising} ’s Production and Dissemination of Advertising Knowledge: A 50th Anniversary Commemorative Review},
	volume = {51},
	issn = {0091-3367, 1557-7805},
	url = {https://www.tandfonline.com/doi/full/10.1080/00913367.2021.2006100},
	doi = {10.1080/00913367.2021.2006100},
	shorttitle = {The \textit{Journal of Advertising} ’s Production and Dissemination of Advertising Knowledge},
	pages = {153--187},
	number = {2},
	journaltitle = {Journal of Advertising},
	shortjournal = {Journal of Advertising},
	author = {Donthu, Naveen and Lim, Weng Marc and Kumar, Satish and Pattnaik, Debidutta},
	urldate = {2022-07-26},
	date = {2022-03-15},
	langid = {english},
}

@article{kietzmann_artificial_2018,
	title = {Artificial Intelligence in Advertising: How Marketers Can Leverage Artificial Intelligence Along the Consumer Journey},
	volume = {58},
	issn = {0021-8499},
	url = {http://www.journalofadvertisingresearch.com/lookup/doi/10.2501/JAR-2018-035},
	doi = {10.2501/JAR-2018-035},
	shorttitle = {Artificial Intelligence in Advertising},
	pages = {263--267},
	number = {3},
	journaltitle = {Journal of Advertising Research},
	shortjournal = {{JAR}},
	author = {Kietzmann, Jan and Paschen, Jeannette and Treen, Emily},
	urldate = {2022-07-26},
	date = {2018-09},
	langid = {english},
}

@article{rodgers_themed_2021,
	title = {Themed Issue Introduction: Promises and Perils of Artificial Intelligence and Advertising},
	volume = {50},
	issn = {0091-3367, 1557-7805},
	url = {https://www.tandfonline.com/doi/full/10.1080/00913367.2020.1868233},
	doi = {10.1080/00913367.2020.1868233},
	shorttitle = {Themed Issue Introduction},
	pages = {1--10},
	number = {1},
	journaltitle = {Journal of Advertising},
	shortjournal = {Journal of Advertising},
	author = {Rodgers, Shelly},
	urldate = {2022-07-26},
	date = {2021-01-01},
	langid = {english},
}

@article{lee_engineering_2018,
	title = {Engineering doc2vec for automatic classification of product descriptions on O2O applications},
	volume = {18},
	issn = {1389-5753, 1572-9362},
	url = {http://link.springer.com/10.1007/s10660-017-9268-5},
	doi = {10.1007/s10660-017-9268-5},
	pages = {433--456},
	number = {3},
	journaltitle = {Electronic Commerce Research},
	shortjournal = {Electron Commer Res},
	author = {Lee, Hana and Yoon, Young},
	urldate = {2022-07-26},
	date = {2018-09},
	langid = {english},
}

@article{teh_textual_2022,
	title = {Textual variations affect human judgements of sentiment values},
	volume = {53},
	issn = {15674223},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S1567422322000333},
	doi = {10.1016/j.elerap.2022.101149},
	pages = {101149},
	journaltitle = {Electronic Commerce Research and Applications},
	shortjournal = {Electronic Commerce Research and Applications},
	author = {Teh, Phoey Lee and Rayson, Paul and Pak, Irina and Piao, Scott and Ho, Jessica Sze Yin and Moore, Andrew and Cheah, Yu-N},
	urldate = {2022-07-26},
	date = {2022-05},
	langid = {english},
}

@article{mohammadi_sensetrust_2021,
	title = {{SenseTrust}: A Sentiment Based Trust Model in Social Network},
	volume = {16},
	issn = {0718-1876},
	url = {https://www.mdpi.com/0718-1876/16/6/114},
	doi = {10.3390/jtaer16060114},
	shorttitle = {{SenseTrust}},
	abstract = {Online social networks, as popular media and communications tools with their own extensive uses, play key roles in public opinion polls, politics, economy, and even governance. An important issue regarding these networks is the use of multiple sources of publishing or re-publishing news and propositions that can influence audiences depending on the level of trust in these sources between users. Therefore, estimating the level of trust in social networks between users can predict the extent of social networks’ impact on news and different publication and re-publication sources, and correspondingly provide effective strategies in news dissemination, advertisements, and other diverse contents for trustees. Therefore, trust is introduced and interpreted in the present study. A large portion of interactions in social networks is based on sending and receiving texts employing natural language processing techniques. A Hidden Markov Model ({HMM}) was designed via an efficient model, namely {SenseTrust}, to estimate the level of trust between users in social networks.},
	pages = {2031--2050},
	number = {6},
	journaltitle = {Journal of Theoretical and Applied Electronic Commerce Research},
	shortjournal = {{JTAER}},
	author = {Mohammadi, Alireza and Hashemi Golpayegani, Seyyed Alireza},
	urldate = {2022-07-26},
	date = {2021-07-27},
	langid = {english},
}

@article{nielek_spiral_2010-1,
	title = {Spiral of hatred: social effects in Internet auctions. Between informativity and emotion},
	volume = {10},
	issn = {1389-5753, 1572-9362},
	url = {http://link.springer.com/10.1007/s10660-010-9058-9},
	doi = {10.1007/s10660-010-9058-9},
	shorttitle = {Spiral of hatred},
	pages = {313--330},
	number = {3},
	journaltitle = {Electronic Commerce Research},
	shortjournal = {Electron Commer Res},
	author = {Nielek, Radoslaw and Wawer, Aleksander and Wierzbicki, Adam},
	urldate = {2022-07-26},
	date = {2010-12},
	langid = {english},
}

@article{ngai_intelligent_2021,
	title = {An intelligent knowledge-based chatbot for customer service},
	volume = {50},
	issn = {15674223},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S1567422321000703},
	doi = {10.1016/j.elerap.2021.101098},
	pages = {101098},
	journaltitle = {Electronic Commerce Research and Applications},
	shortjournal = {Electronic Commerce Research and Applications},
	author = {Ngai, Eric W.T. and Lee, Maggie C.M. and Luo, Mei and Chan, Patrick S.L. and Liang, Tenglu},
	urldate = {2022-07-26},
	date = {2021-11},
	langid = {english},
}

@article{ou_training_2018,
	title = {Training attractive attribute classifiers based on opinion features extracted from review data},
	volume = {32},
	issn = {15674223},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S1567422318300747},
	doi = {10.1016/j.elerap.2018.10.003},
	pages = {13--22},
	journaltitle = {Electronic Commerce Research and Applications},
	shortjournal = {Electronic Commerce Research and Applications},
	author = {Ou, Wei and Huynh, Van-Nam and Sriboonchitta, Songsak},
	urldate = {2022-07-26},
	date = {2018-11},
	langid = {english},
}

@article{he_evaluation_2018,
	title = {The evaluation for perceived quality of products based on text mining and fuzzy comprehensive evaluation},
	volume = {18},
	issn = {1389-5753, 1572-9362},
	url = {http://link.springer.com/10.1007/s10660-018-9292-0},
	doi = {10.1007/s10660-018-9292-0},
	pages = {277--289},
	number = {2},
	journaltitle = {Electronic Commerce Research},
	shortjournal = {Electron Commer Res},
	author = {He, Lifeng and Zhang, Ning and Yin, Lemin},
	urldate = {2022-07-26},
	date = {2018-06},
	langid = {english},
}

@article{yao_relationship_2019,
	title = {The relationship between soft information in loan titles and online peer-to-peer lending: evidence from {RenRenDai} platform},
	volume = {19},
	issn = {1389-5753, 1572-9362},
	url = {http://link.springer.com/10.1007/s10660-018-9293-z},
	doi = {10.1007/s10660-018-9293-z},
	shorttitle = {The relationship between soft information in loan titles and online peer-to-peer lending},
	pages = {111--129},
	number = {1},
	journaltitle = {Electronic Commerce Research},
	shortjournal = {Electron Commer Res},
	author = {Yao, Jianrong and Chen, Jiarui and Wei, June and Chen, Yuangao and Yang, Shuiqing},
	urldate = {2022-07-26},
	date = {2019-03},
	langid = {english},
}

@article{yoon_what_2019,
	title = {What content and context factors lead to selection of a video clip? The heuristic route perspective},
	volume = {19},
	issn = {1389-5753, 1572-9362},
	url = {http://link.springer.com/10.1007/s10660-019-09355-6},
	doi = {10.1007/s10660-019-09355-6},
	shorttitle = {What content and context factors lead to selection of a video clip?},
	pages = {603--627},
	number = {3},
	journaltitle = {Electronic Commerce Research},
	shortjournal = {Electron Commer Res},
	author = {Yoon, Sang-Hyeak and Kim, Hee-Woong},
	urldate = {2022-07-26},
	date = {2019-09},
	langid = {english},
}

@article{chen_visualizing_2015,
	title = {Visualizing market structure through online product reviews: Integrate topic modeling, {TOPSIS}, and multi-dimensional scaling approaches},
	volume = {14},
	issn = {15674223},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S156742231400088X},
	doi = {10.1016/j.elerap.2014.11.004},
	shorttitle = {Visualizing market structure through online product reviews},
	pages = {58--74},
	number = {1},
	journaltitle = {Electronic Commerce Research and Applications},
	shortjournal = {Electronic Commerce Research and Applications},
	author = {Chen, Kun and Kou, Gang and Shang, Jennifer and Chen, Yang},
	urldate = {2022-07-26},
	date = {2015-01},
	langid = {english},
}

@article{chakraborty_empirical_2022,
	title = {An empirical analysis of consumer-unfriendly E-commerce terms of service agreements: Implications for customer satisfaction and business survival},
	volume = {53},
	issn = {15674223},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S1567422322000357},
	doi = {10.1016/j.elerap.2022.101151},
	shorttitle = {An empirical analysis of consumer-unfriendly E-commerce terms of service agreements},
	pages = {101151},
	journaltitle = {Electronic Commerce Research and Applications},
	shortjournal = {Electronic Commerce Research and Applications},
	author = {Chakraborty, Aindrila and Shankar, Ramesh and Marsden, James R.},
	urldate = {2022-07-26},
	date = {2022-05},
	langid = {english},
}

@article{shen_mining_2021,
	title = {Mining sustainable fashion e-commerce: social media texts and consumer behaviors},
	issn = {1389-5753, 1572-9362},
	url = {https://link.springer.com/10.1007/s10660-021-09498-5},
	doi = {10.1007/s10660-021-09498-5},
	shorttitle = {Mining sustainable fashion e-commerce},
	journaltitle = {Electronic Commerce Research},
	shortjournal = {Electron Commer Res},
	author = {Shen, Zheng},
	urldate = {2022-07-26},
	date = {2021-06-29},
	langid = {english},
}

@article{mou_understanding_2019,
	title = {Understanding the topics of export cross-border e-commerce consumers feedback: an {LDA} approach},
	volume = {19},
	issn = {1389-5753, 1572-9362},
	url = {http://link.springer.com/10.1007/s10660-019-09338-7},
	doi = {10.1007/s10660-019-09338-7},
	shorttitle = {Understanding the topics of export cross-border e-commerce consumers feedback},
	pages = {749--777},
	number = {4},
	journaltitle = {Electronic Commerce Research},
	shortjournal = {Electron Commer Res},
	author = {Mou, Jian and Ren, Gang and Qin, Chunxiu and Kurcz, Kerry},
	urldate = {2022-07-26},
	date = {2019-12},
	langid = {english},
}

@article{abu-shanab_e-government_2019,
	title = {E-government research insights: Text mining analysis},
	volume = {38},
	issn = {15674223},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S1567422319300699},
	doi = {10.1016/j.elerap.2019.100892},
	shorttitle = {E-government research insights},
	pages = {100892},
	journaltitle = {Electronic Commerce Research and Applications},
	shortjournal = {Electronic Commerce Research and Applications},
	author = {Abu-Shanab, Emad and Harb, Yousra},
	urldate = {2022-07-26},
	date = {2019-11},
	langid = {english},
}

@article{he_novel_2022,
	title = {A novel approach for product competitive analysis based on online reviews},
	issn = {1389-5753, 1572-9362},
	url = {https://link.springer.com/10.1007/s10660-022-09534-y},
	doi = {10.1007/s10660-022-09534-y},
	journaltitle = {Electronic Commerce Research},
	shortjournal = {Electron Commer Res},
	author = {He, Zhen and Zheng, Lu and He, Shuguang},
	urldate = {2022-07-26},
	date = {2022-02-14},
	langid = {english},
}

@article{fang_impact_2020,
	title = {Impact of air quality on online restaurant review comprehensiveness},
	issn = {1389-5753, 1572-9362},
	url = {http://link.springer.com/10.1007/s10660-020-09445-w},
	doi = {10.1007/s10660-020-09445-w},
	journaltitle = {Electronic Commerce Research},
	shortjournal = {Electron Commer Res},
	author = {Fang, Jiaming and Hu, Lixue and Liu, Xiangqian and Prybutok, Victor R.},
	urldate = {2022-07-26},
	date = {2020-11-13},
	langid = {english},
}

@article{zhang_2020_2020,
	title = {A 2020 perspective on “From buzz to bucks: The impact of social media opinions on the locus of innovation”: From surfaces to essences},
	volume = {40},
	issn = {15674223},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S1567422320300417},
	doi = {10.1016/j.elerap.2020.100964},
	shorttitle = {A 2020 perspective on “From buzz to bucks},
	pages = {100964},
	journaltitle = {Electronic Commerce Research and Applications},
	shortjournal = {Electronic Commerce Research and Applications},
	author = {Zhang, Wenping and Kang, Lele and Jiang, Qiqi and Pei, Lei},
	urldate = {2022-07-26},
	date = {2020-03},
	langid = {english},
}

@article{ou_training_2018-1,
	title = {Training attractive attribute classifiers based on opinion features extracted from review data},
	volume = {32},
	issn = {15674223},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S1567422318300747},
	doi = {10.1016/j.elerap.2018.10.003},
	pages = {13--22},
	journaltitle = {Electronic Commerce Research and Applications},
	shortjournal = {Electronic Commerce Research and Applications},
	author = {Ou, Wei and Huynh, Van-Nam and Sriboonchitta, Songsak},
	urldate = {2022-07-26},
	date = {2018-11},
	langid = {english},
}

@article{mosteller_inference_1963-1,
	title = {Inference in an Authorship Problem},
	volume = {58},
	issn = {01621459},
	url = {https://www.jstor.org/stable/2283270?origin=crossref},
	doi = {10.2307/2283270},
	pages = {275},
	number = {302},
	journaltitle = {Journal of the American Statistical Association},
	shortjournal = {Journal of the American Statistical Association},
	author = {Mosteller, Frederick and Wallace, David L.},
	urldate = {2022-07-28},
	date = {1963-06},
}

@article{balech_masque_2022-1,
	title = {Le masque, figure polaire de la crise de la Covid-19 : une exploration par {NLP} du flux des conversations Twitter (février - mai 2020):},
	volume = {n° 43},
	issn = {1953-6119},
	url = {https://www.cairn.info/revue-marche-et-organisations-2022-1-page-151.htm?ref=doi},
	doi = {10.3917/maorg.043.0151},
	shorttitle = {Le masque, figure polaire de la crise de la Covid-19},
	pages = {151--187},
	number = {1},
	journaltitle = {Marché et organisations},
	author = {Balech, Sophie and Calciu, Michel and Monnot, Julien and Benavent, Christophe},
	urldate = {2022-07-28},
	date = {2022-02-11},
}

@article{mariani_ai_2022,
	title = {{AI} in marketing, consumer research and psychology: A systematic literature review and research agenda},
	volume = {39},
	issn = {0742-6046, 1520-6793},
	url = {https://onlinelibrary.wiley.com/doi/10.1002/mar.21619},
	doi = {10.1002/mar.21619},
	shorttitle = {{AI} in marketing, consumer research and psychology},
	pages = {755--776},
	number = {4},
	journaltitle = {Psychology \& Marketing},
	shortjournal = {Psychology and Marketing},
	author = {Mariani, Marcello M. and Perez‐Vega, Rodrigo and Wirtz, Jochen},
	urldate = {2022-07-28},
	date = {2022-04},
	langid = {english},
}

@article{marshall_latent_2022,
	title = {A Latent Allocation Model for Brand Awareness and Mindset Metrics},
	volume = {64},
	issn = {1470-7853, 2515-2173},
	url = {http://journals.sagepub.com/doi/10.1177/14707853211040052},
	doi = {10.1177/14707853211040052},
	abstract = {Mindset metrics, the measurement of consumers’ perceptions, attitudes, and intentions, have a long tradition in marketing, particularly in advertising and branding. Some of the most usual mindset metrics are brand awareness, brand image, personality traits, and attribute importance. Brand awareness and other mindset measures have the form of texts (bag of words). And, a natural methodology for analyzing these variables is topic modeling and the popular Latent Dirichlet allocation ({LDA}) model. The {LDA} methodology assumes that brands or concepts are represented by clusters of brands in consumers’ minds. This study proposes an extension/modification of the {LDA} model for brand awareness and other mindset variables that incorporate Bernoulli observations instead of the Multinomial specification present in the usual {LDA} specification. This extension is relevant since, unlike words in texts, brands and mindset concepts are not repeated within a document and have a dichotomous form, present or absent. The proposed model is applied to two brand awareness datasets. The results show significant gains in both managerial insights in analyzing brand clusters and consumers’ profiles.},
	pages = {526--540},
	number = {4},
	journaltitle = {International Journal of Market Research},
	shortjournal = {International Journal of Market Research},
	author = {Marshall, Pablo},
	urldate = {2022-07-28},
	date = {2022-07},
	langid = {english},
}

@article{tang_digging_2015-1,
	title = {Digging for gold with a simple tool: Validating text mining in studying electronic word-of-mouth ({eWOM}) communication},
	volume = {26},
	issn = {0923-0645, 1573-059X},
	url = {http://link.springer.com/10.1007/s11002-013-9268-8},
	doi = {10.1007/s11002-013-9268-8},
	shorttitle = {Digging for gold with a simple tool},
	pages = {67--80},
	number = {1},
	journaltitle = {Marketing Letters},
	shortjournal = {Mark Lett},
	author = {Tang, Chuanyi and Guo, Lin},
	urldate = {2022-07-31},
	date = {2015-03},
	langid = {english},
}

@article{azimi_how_2022,
	title = {How fakes make it through: the role of review features versus consumer characteristics},
	volume = {39},
	issn = {0736-3761, 0736-3761},
	url = {https://www.emerald.com/insight/content/doi/10.1108/JCM-04-2021-4597/full/html},
	doi = {10.1108/JCM-04-2021-4597},
	shorttitle = {How fakes make it through},
	abstract = {Purpose
              This study aims to examine how characteristics of an online review and a consumer reading the review influence the probability that the consumer will assess the review as authentic (real) or inauthentic (fake). This study further examines the specific factors that increase or decrease a consumer’s ability to detect a review’s authenticity and reasons a consumer makes these authenticity assessments.
            
            
              Design/methodology/approach
              Hypothesized relationships were tested using an online experiment of over 400 respondents who collectively provided 3,224 authenticity assessments along with 3,181 written self-report reasons for assessing a review as authentic or inauthentic.
            
            
              Findings
              The findings indicate that specific combinations of factors including review valence, length, readability, type of content and consumer personality traits and demographics lead to systematic bias in assessing review authenticity. Using qualitative analysis, this paper provided further insight into why consumers are deceived.
            
            
              Research limitations/implications
              This research showed there are important differences in the way the authenticity assessment process works for positive versus negative reviews and identified factors that can make a fake review hard to spot or a real review hard to believe.
            
            
              Practical implications
              This research has implications for both consumers and businesses by emphasizing areas of vulnerability for fake information and providing guidance for how to design review systems for improved veracity.
            
            
              Originality/value
              This research is one of the few works that explicates how people assess information authenticity and their consequent assessment accuracy in the context of online reviews.},
	pages = {523--537},
	number = {5},
	journaltitle = {Journal of Consumer Marketing},
	shortjournal = {{JCM}},
	author = {Azimi, Shabnam and Chan, Kwong and Krasnikov, Alexander},
	urldate = {2022-07-31},
	date = {2022-07-25},
	langid = {english},
}

@article{mcshane_emoji_2021,
	title = {Emoji, Playfulness, and Brand Engagement on Twitter},
	volume = {53},
	issn = {10949968},
	url = {https://journals.sagepub.com/doi/full/10.1016/j.intmar.2020.06.002},
	doi = {10.1016/j.intmar.2020.06.002},
	pages = {96--110},
	journaltitle = {Journal of Interactive Marketing},
	shortjournal = {Journal of Interactive Marketing},
	author = {{McShane}, Lindsay and Pancer, Ethan and Poole, Maxwell and Deng, Qi},
	urldate = {2022-07-31},
	date = {2021-02},
	langid = {english},
}

@article{duggento_text_2022,
	title = {A text data mining approach to the study of emotions triggered by new advertising formats during the {COVID}-19 pandemic},
	issn = {0033-5177, 1573-7845},
	url = {https://link.springer.com/10.1007/s11135-022-01460-3},
	doi = {10.1007/s11135-022-01460-3},
	abstract = {Abstract
            Under the influence of the health emergency triggered by the {COVID}-19 pandemic, many brands changed their communication strategy and included more or less explicit references to the principles of solidarity and fraternity in their {TV} commercials to boost the confidence and hope of Italian families during the lockdown. The traditional attitudes of the advertising format, which focused on product characteristics, were relegated to the background in order to reinforce the “brand image” through words, signs, hashtags and music that spread empathetic messages to all those who needed to regain hope and trust in a time of extreme emotional fragility. The objective of this paper is to identify the emotions and brand awareness during the lockdown using text mining techniques by measuring customer sentiment expressed on the Twitter social network. Our proposal starts from an unstructured corpus of 20,982 tweets processed with text data mining techniques to identify patterns and trends in people’s posts related to specific hashtags and {TV} ads produced during the {COVID}-19 pandemic. The innovations in the brand’s advertising among consumers seem to have triggered some sense of appreciation and gratitude, as well as a strong sense of belonging that was not present before, as the {TV} ads were perceived as a disruptive element in consumers’ tweets. Although this effect is clearly documented, in this paper we demonstrate its transitory nature, in the sense that the frequency of occurrence of terms associated with an emotional dimension peaks during the weeks of lockdown, and then gradually decreases.},
	journaltitle = {Quality \& Quantity},
	shortjournal = {Qual Quant},
	author = {D’Uggento, Angela Maria and Biafora, Albino and Manca, Fabio and Marin, Claudia and Bilancia, Massimo},
	urldate = {2022-08-01},
	date = {2022-06-30},
	langid = {english},
}

@inproceedings{le_distributed_2014,
	location = {Bejing, China},
	title = {Distributed Representations of Sentences and Documents},
	volume = {32},
	url = {https://proceedings.mlr.press/v32/le14.html},
	series = {Proceedings of Machine Learning Research},
	abstract = {Many machine learning algorithms require the input to be represented as a fixed length feature vector. When it comes to texts, one of the most common representations is bag-of-words. Despite their popularity, bag-of-words models have two major weaknesses: they lose the ordering of the words and they also ignore semantics of the words. For example, "powerful," "strong" and "Paris" are equally distant. In this paper, we propose an unsupervised algorithm that learns vector representations of sentences and text documents. This algorithm represents each document by a dense vector which is trained to predict words in the document. Its construction gives our algorithm the potential to overcome the weaknesses of bag-of-words models. Empirical results show that our technique outperforms bag-of-words models as well as other techniques for text representations. Finally, we achieve new state-of-the-art results on several text classification and sentiment analysis tasks.},
	pages = {1188--1196},
	booktitle = {Proceedings of the 31st International Conference on Machine Learning},
	publisher = {{PMLR}},
	author = {Le, Quoc and Mikolov, Tomas},
	editor = {Xing, Eric P. and Jebara, Tony},
	date = {2014-06-22},
	note = {Issue: 2},
}

@article{gennaro_emotion_2022,
	title = {Emotion and Reason in Political Language},
	volume = {132},
	issn = {0013-0133, 1468-0297},
	url = {https://academic.oup.com/ej/article/132/643/1037/6490125},
	doi = {10.1093/ej/ueab104},
	abstract = {Abstract
            This paper studies the use of emotion and reason in political discourse. Adopting computational-linguistics techniques to construct a validated text-based scale, we measure emotionality in six million speeches given in U.S. Congress over the years 1858–2014. Intuitively, emotionality spikes during times of war and is highest in speeches about patriotism. In the time series, emotionality was relatively low and stable in earlier years but increased significantly starting in the late 1970s. Across Congress members, emotionality is higher for Democrats, for women, for ethnic/religious minorities, for the opposition party and for members with ideologically extreme roll-call voting records.},
	pages = {1037--1059},
	number = {643},
	journaltitle = {The Economic Journal},
	author = {Gennaro, Gloria and Ash, Elliott},
	urldate = {2023-01-30},
	date = {2022-04-01},
	langid = {english},
}