-
Notifications
You must be signed in to change notification settings - Fork 4
/
book.bib
4331 lines (3994 loc) · 326 KB
/
book.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
@article{piolat_version_2011,
title = {La version française du dictionnaire pour le {LIWC} : modalités de construction et exemples d’utilisation},
volume = {56},
issn = {00332984},
url = {https://linkinghub.elsevier.com/retrieve/pii/S0033298411000355},
doi = {10.1016/j.psfr.2011.07.002},
shorttitle = {La version française du dictionnaire pour le {LIWC}},
pages = {145--159},
number = {3},
journaltitle = {Psychologie Française},
shortjournal = {Psychologie Française},
author = {Piolat, A. and Booth, R.J. and Chung, C.K. and Davids, M. and Pennebaker, J.W.},
urldate = {2020-07-01},
date = {2011-09},
langid = {french},
}
@inproceedings{denecke_using_2008,
location = {Cancun, Mexico},
title = {Using {SentiWordNet} for multilingual sentiment analysis},
isbn = {978-1-4244-2161-9 978-1-4244-2162-6},
url = {http://ieeexplore.ieee.org/document/4498370/},
doi = {10.1109/ICDEW.2008.4498370},
eventtitle = {2008 {IEEE} 24th International Conference on Data Engineeing workshop ({ICDE} Workshop 2008)},
pages = {507--512},
booktitle = {2008 {IEEE} 24th International Conference on Data Engineering Workshop},
publisher = {{IEEE}},
author = {Denecke, Kerstin},
urldate = {2020-07-02},
date = {2008-04},
}
@book{boullier_opinion_2012,
title = {Opinion mining et sentiment analysis méthodes et outils.},
isbn = {978-2-8218-1887-3 978-2-8218-1227-7 978-2-8218-1226-0},
author = {Boullier, Dominique and Lohard, Audrey},
date = {2012},
note = {{OCLC}: 1096948624},
}
@inproceedings{baccianella_sentiwordnet_2010,
location = {Valletta, {MT}},
title = {{SentiWordNet} 3.0: An Enhanced Lexical Resource for Sentiment Analysis and Opinion Mining.},
volume = {pp. 2200-2204.},
eventtitle = {({LREC} 2010)},
booktitle = {Proceedings of the 7th Conference on Language Resources and Evaluation},
author = {Baccianella, Stephano and Esuli, Andrea and Sebastiani, Fabrizio},
date = {2010},
}
@article{chan_sentiment_2017,
title = {Sentiment analysis in financial texts},
volume = {94},
issn = {01679236},
url = {https://linkinghub.elsevier.com/retrieve/pii/S0167923616301828},
doi = {10.1016/j.dss.2016.10.006},
pages = {53--64},
journaltitle = {Decision Support Systems},
shortjournal = {Decision Support Systems},
author = {Chan, Samuel W.K. and Chong, Mickey W.C.},
urldate = {2020-07-12},
date = {2017-02},
langid = {english},
}
@article{duval_analyse_2016,
title = {L'analyse automatisée du ton médiatique : construction et utilisation de la version française du \textit{Lexicoder} Sentiment Dictionary},
volume = {49},
issn = {0008-4239, 1744-9324},
url = {https://www.cambridge.org/core/product/identifier/S000842391600055X/type/journal_article},
doi = {10.1017/S000842391600055X},
shorttitle = {L'analyse automatisée du ton médiatique},
abstract = {Résumé Cet article introduit un nouveau dictionnaire permettant l'analyse automatisée du ton des médias francophones, que nous avons appelé Lexicoder Sentiment Dictionnaire Français ( {LSDFr} ) en référence au lexique anglophone de Young et Soroka (2012), Lexicoder Sentiment Dictionary ( {LSD} ) à partir duquel le {LSDFr} a été construit. Une fois construit, nous comparons le {LSDFr} au seul autre dictionnaire francophone existant de ce genre, Linguistic Inquiry and Word Count ( {LIWC} ). Nous testons ensuite la validité interne du {LSDFr} en le comparant avec un corpus de textes codés manuellement. Nous testons enfin la validité externe du {LSDFr} en mesurant jusqu'où le ton médiatique, calculé à l'aide de notre dictionnaire, prédit les intentions de vote des Québécois lors des quatre dernières campagnes électorales. En développant cet outil, notre objectif est de permettre à d'autres chercheurs d'effectuer des analyses médiatiques dans un corpus de textes comparables en français. , Abstract This article introduces a new dictionary for the automated analysis of the tone of French media. We named it the French Lexicoder Sentiment Dictionary ( {LSDFr} ) in reference to the English lexicon developed by Young and Soroka (2012), the Lexicoder Sentiment Dictionary ( {LSD} ), from which the {LSDFr} was built. We compare the {LSDFr} to the only other French sentiment lexicon, Linguistic Inquiry and Word Count ( {LIWC} ). First, we detail the construction of the dictionary. We then test the internal validity of the {LSDFr} comparing it with a corpus of manually coded texts. Finally, we test the external validity of {LSDFr} by measuring how the media tone, calculated using our dictionary, predicts voting intentions in the last four Quebec elections. Our goal is to enable other researchers to conduct media analyses with a comparable corpus of texts in French.},
pages = {197--220},
number = {2},
journaltitle = {Canadian Journal of Political Science},
author = {Duval, Dominic and Pétry, François},
urldate = {2019-05-24},
date = {2016-06},
langid = {english},
}
@article{fruchterman_graph_1991,
title = {Graph drawing by force-directed placement},
volume = {21},
issn = {00380644, 1097024X},
url = {http://doi.wiley.com/10.1002/spe.4380211102},
doi = {10.1002/spe.4380211102},
pages = {1129--1164},
number = {11},
journaltitle = {Software: Practice and Experience},
author = {Fruchterman, Thomas M. J. and Reingold, Edward M.},
urldate = {2019-08-11},
date = {1991-11},
langid = {english},
}
@article{arnold_tidy_2017,
title = {A Tidy Data Model for Natural Language Processing using {cleanNLP}},
volume = {9},
issn = {2073-4859},
url = {https://journal.r-project.org/archive/2017/RJ-2017-035/index.html},
doi = {10.32614/RJ-2017-035},
abstract = {Recent advances in natural language processing have produced libraries that extract lowlevel features from a collection of raw texts. These features, known as annotations, are usually stored internally in hierarchical, tree-based data structures. This paper proposes a data model to represent annotations as a collection of normalized relational data tables optimized for exploratory data analysis and predictive modeling. The R package {cleanNLP}, which calls one of two state of the art {NLP} libraries ({CoreNLP} or {spaCy}), is presented as an implementation of this data model. It takes raw text as an input and returns a list of normalized tables. Specific annotations provided include tokenization, part of speech tagging, named entity recognition, sentiment analysis, dependency parsing, coreference resolution, and word embeddings. The package currently supports input text in English, German, French, and Spanish.},
pages = {248},
number = {2},
journaltitle = {The R Journal},
author = {Arnold, Taylor},
urldate = {2019-08-11},
date = {2017},
langid = {english},
}
@article{van_der_maaten_laurens_visualizing_2008,
title = {Visualizing Data using t-{SNE}},
pages = {2579--2605},
journaltitle = {Journal of Machine learning},
author = {{Van der Maaten, Laurens} and Hinton, Geoffrey},
date = {2008},
}
@article{shirdastian_using_2019,
title = {Using big data analytics to study brand authenticity sentiments: The case of Starbucks on Twitter},
volume = {48},
issn = {02684012},
url = {https://linkinghub.elsevier.com/retrieve/pii/S0268401217302657},
doi = {10.1016/j.ijinfomgt.2017.09.007},
shorttitle = {Using big data analytics to study brand authenticity sentiments},
pages = {291--307},
journaltitle = {International Journal of Information Management},
author = {Shirdastian, Hamid and Laroche, Michel and Richard, Marie-Odile},
urldate = {2019-10-08},
date = {2019-10},
langid = {english},
}
@article{plutchik_psychoevolutionary_1982,
title = {A psychoevolutionary theory of emotions},
volume = {21},
issn = {0539-0184, 1461-7412},
url = {http://journals.sagepub.com/doi/10.1177/053901882021004003},
doi = {10.1177/053901882021004003},
pages = {529--553},
number = {4},
journaltitle = {Social Science Information},
author = {Plutchik, Robert},
urldate = {2019-01-18},
date = {1982-07},
langid = {english},
}
@article{blei_latent_2003,
title = {Latent Dirichlet Allocation},
volume = {3},
issn = {1532-4435},
url = {http://dl.acm.org/citation.cfm?id=944919.944937},
pages = {993--1022},
journaltitle = {J. Mach. Learn. Res.},
author = {Blei, David M. and Ng, Andrew Y. and Jordan, Michael I.},
date = {2003-03},
}
@article{gunter_sentiment_2014,
title = {Sentiment Analysis: A Market-Relevant and Reliable Measure of Public Feeling?},
volume = {56},
issn = {1470-7853, 2515-2173},
url = {http://journals.sagepub.com/doi/10.2501/IJMR-2014-014},
doi = {10.2501/IJMR-2014-014},
shorttitle = {Sentiment Analysis},
abstract = {This paper critically examines emergent research with sentiment analysis tools to assess their current status and relevance to applied opinion and behaviour measurement. The rapid spread of online news and online chatter in blogs, micro-blogs and social media sites has created a potentially rich source of public opinion. Waves of public feeling are vented spontaneously on a wide range of issues on a minute-by-minute basis in the online world. These online discourses are continually being refreshed, and businesses and advertisers, governments and policy makers have woken up to the fact that this universe of self-perpetuating human sentiment could represent a valuable resource to guide political and business decisions. The massive size of this repository of emotional content renders manual analysis of it feasible only for tiny portions of its totality, and even then can be labour intensive. Computer scientists have however produced software tools that can apply linguistic rules to provide electronic readings of meanings and emotions. These tools are now being utilised by applied social science and market researchers to yield sentiment profiles from online discourses created within specific platforms that purport to represent reliable substitutes for more traditional, offline measures of public opinion. This paper considers what these tools have demonstrated so far and where caution in their application is still called for.},
pages = {231--247},
number = {2},
journaltitle = {International Journal of Market Research},
author = {Gunter, Barrie and Koteyko, Nelya and Atanasova, Dimitrinka},
urldate = {2019-10-20},
date = {2014-03},
langid = {english},
}
@article{tausczik_psychological_2010,
title = {The Psychological Meaning of Words: {LIWC} and Computerized Text Analysis Methods},
volume = {29},
issn = {0261-927X, 1552-6526},
url = {http://journals.sagepub.com/doi/10.1177/0261927X09351676},
doi = {10.1177/0261927X09351676},
shorttitle = {The Psychological Meaning of Words},
pages = {24--54},
number = {1},
journaltitle = {Journal of Language and Social Psychology},
author = {Tausczik, Yla R. and Pennebaker, James W.},
urldate = {2019-11-07},
date = {2010-03},
langid = {english},
}
@inproceedings{nielsen_new_2011,
title = {A New {ANEW}: Evaluation of a Word List for Sentiment Analysis in Microblogs.},
volume = {718},
url = {http://dblp.uni-trier.de/db/conf/msm/msm2011.html#Nielsen11},
series = {{CEUR} Workshop Proceedings},
pages = {93--98},
booktitle = {\#{MSM}},
publisher = {{CEUR}-{WS}.org},
author = {Nielsen, Finn Årup},
editor = {Rowe, Matthew and Stankovic, Milan and Dadzie, Aba-Sah and Hardey, Mariann},
date = {2011},
keywords = {dblp},
}
@inproceedings{ding_holistic_2008,
location = {New York, {NY}, {USA}},
title = {A Holistic Lexicon-based Approach to Opinion Mining},
isbn = {978-1-59593-927-2},
url = {http://doi.acm.org/10.1145/1341531.1341561},
doi = {10.1145/1341531.1341561},
series = {{WSDM} '08},
pages = {231--240},
booktitle = {Proceedings of the 2008 International Conference on Web Search and Data Mining},
publisher = {{ACM}},
author = {Ding, Xiaowen and Liu, Bing and Yu, Philip S.},
date = {2008},
keywords = {context dependent opinions, opinion mining, sentiment analysis},
}
@article{puschmann_turning_2018,
title = {Turning Words Into Consumer Preferences: How Sentiment Analysis Is Framed in Research and the News Media},
volume = {4},
issn = {2056-3051, 2056-3051},
url = {http://journals.sagepub.com/doi/10.1177/2056305118797724},
doi = {10.1177/2056305118797724},
shorttitle = {Turning Words Into Consumer Preferences},
pages = {205630511879772},
number = {3},
journaltitle = {Social Media + Society},
author = {Puschmann, Cornelius and Powell, Alison},
urldate = {2019-01-15},
date = {2018-07},
langid = {english},
}
@book{banda_large-scale_2020,
title = {A large-scale {COVID}-19 Twitter chatter dataset for open scientific research - an international collaboration},
rights = {Open Access},
url = {https://zenodo.org/record/3757272},
abstract = {{\textbackslash}textlessstrong{\textbackslash}{textgreaterDue} to the relevance of the {COVID}-19 global pandemic, we are releasing our dataset of tweets acquired from the Twitter Stream related to {COVID}-19 chatter. Since our first release we have received additional data from our new collaborators, allowing this resource to grow to its current size. Dedicated data gathering started from March 11th yielding over 4 million tweets a day. We have added additional data provided by our new collaborators from January 27th to March 27th, to provide extra longitudinal coverage.{\textbackslash}textless/strong{\textbackslash}textgreater {\textbackslash}textlessstrong{\textbackslash}{textgreaterThe} data collected from the stream captures all languages, but the higher prevalence are: English, Spanish, and French. We release all tweets and retweets on the full\_dataset.tsv file (205,409,413 unique tweets), and a cleaned version with no retweets on the full\_dataset-clean.tsv file (44,726,568{\textbackslash}textless/strong{\textbackslash}textgreater{\textbackslash}textlessstrong{\textbackslash}textgreater unique tweets). There are several practical reasons for us to leave the retweets, tracing important tweets and their dissemination is one of them. For {NLP} tasks we provide the top 1000 frequent terms in frequent\_terms.csv, the top 1000 bigrams in frequent\_bigrams.csv, and the top 1000 trigrams in frequent\_trigrams.csv. Some general statistics per day are included for both datasets in the statistics-full\_dataset.tsv and statistics-full\_dataset-clean.tsv files. {\textbackslash}textless/strong{\textbackslash}textgreater {\textbackslash}textlessstrong{\textbackslash}{textgreaterMore} details can be found (and will be updated faster at: https://github.com/thepanacealab/covid19\_twitter) and our pre-print about the dataset (https://arxiv.org/abs/2004.03688) {\textbackslash}textless/strong{\textbackslash}textgreater {\textbackslash}textlessstrong{\textbackslash}{textgreaterAs} always, the tweets distributed here are only tweet identifiers (with date and time added) due to the terms and conditions of Twitter to re-distribute Twitter data {ONLY} for research purposes. The need to be hydrated to be used. {\textbackslash}textless/strong{\textbackslash}textgreater},
publisher = {Zenodo},
author = {Banda, Juan M. and Tekumalla, Ramya and Wang, Guanyu and Yu, Jingyuan and Liu, Tuo and Ding, Yuning and Chowell, Gerardo},
urldate = {2020-04-25},
date = {2020-04},
langid = {english},
doi = {10.5281/ZENODO.3757272},
keywords = {covid-19, covid19, nlp, social media, twitter},
}
@article{pearce_no_2003,
title = {[No title found]},
volume = {28},
issn = {0921030X},
url = {http://link.springer.com/10.1023/A:1022917721797},
doi = {10.1023/A:1022917721797},
pages = {211--228},
number = {2},
journaltitle = {Natural Hazards},
author = {Pearce, Laurie},
urldate = {2020-08-22},
date = {2003},
}
@article{cambria_jumping_2014,
title = {Jumping {NLP} Curves: A Review of Natural Language Processing Research [Review Article]},
volume = {9},
issn = {1556-603X},
url = {http://ieeexplore.ieee.org/document/6786458/},
doi = {10.1109/MCI.2014.2307227},
shorttitle = {Jumping {NLP} Curves},
pages = {48--57},
number = {2},
journaltitle = {{IEEE} Computational Intelligence Magazine},
shortjournal = {{IEEE} Comput. Intell. Mag.},
author = {Cambria, Erik and White, Bebo},
urldate = {2020-08-23},
date = {2014-05},
file = {Cambria et White - 2014 - Jumping NLP Curves A Review of Natural Language P.pdf:C\:\\Users\\33623\\Zotero\\storage\\ZGN9TBJU\\Cambria et White - 2014 - Jumping NLP Curves A Review of Natural Language P.pdf:application/pdf},
}
@article{anastasopoulos_computational_2017,
title = {Computational Text Analysis for Public Management Research},
issn = {1556-5068},
url = {https://www.ssrn.com/abstract=3269520},
doi = {10.2139/ssrn.3269520},
journaltitle = {{SSRN} Electronic Journal},
shortjournal = {{SSRN} Journal},
author = {Anastasopoulos, Lefteris Jason and Moldogaziev, Tima T. and Scott, Tyler},
urldate = {2020-08-23},
date = {2017},
langid = {english},
}
@article{bourdieu_opinion_1973,
title = {L'opinion publique n'existe pas},
pages = {1292--1309},
issue = {n°318},
journaltitle = {Les Temps modernes},
author = {Bourdieu, Pierre},
date = {1973-01},
}
@article{humphreys_automated_2018,
title = {Automated Text Analysis for Consumer Research},
volume = {44},
issn = {0093-5301, 1537-5277},
url = {https://academic.oup.com/jcr/article/44/6/1274/4283031},
doi = {10.1093/jcr/ucx104},
abstract = {Abstract
The amount of digital text available for analysis by consumer researchers has risen dramatically. Consumer discussions on the internet, product reviews, and digital archives of news articles and press releases are just a few potential sources for insights about consumer attitudes, interaction, and culture. Drawing from linguistic theory and methods, this article presents an overview of automated text analysis, providing integration of linguistic theory with constructs commonly used in consumer research, guidance for choosing amongst methods, and advice for resolving sampling and statistical issues unique to text analysis. We argue that although automated text analysis cannot be used to study all phenomena, it is a useful tool for examining patterns in text that neither researchers nor consumers can detect unaided. Text analysis can be used to examine psychological and sociological constructs in consumer-produced digital text by enabling discovery or by providing ecological validity.},
pages = {1274--1306},
number = {6},
journaltitle = {Journal of Consumer Research},
author = {Humphreys, Ashlee and Wang, Rebecca Jen-Hui},
editor = {Fischer, Eileen and Price, Linda},
urldate = {2020-11-15},
date = {2018-04-01},
langid = {english},
file = {Humphreys et Wang - 2018 - Automated Text Analysis for Consumer Research.pdf:C\:\\Users\\33623\\Zotero\\storage\\GRJZQ84D\\Humphreys et Wang - 2018 - Automated Text Analysis for Consumer Research.pdf:application/pdf},
}
@article{lock_quantitative_2015,
title = {Quantitative content analysis as a method for business ethics research},
volume = {24},
issn = {09628770},
url = {http://doi.wiley.com/10.1111/beer.12095},
doi = {10.1111/beer.12095},
pages = {S24--S40},
journaltitle = {Business Ethics: A European Review},
shortjournal = {Bus Ethics Eur Rev},
author = {Lock, Irina and Seele, Peter},
urldate = {2020-11-15},
date = {2015-07},
langid = {english},
file = {Lock et Seele - 2015 - Quantitative content analysis as a method for busi.pdf:C\:\\Users\\33623\\Zotero\\storage\\G33JEG6F\\Lock et Seele - 2015 - Quantitative content analysis as a method for busi.pdf:application/pdf},
}
@article{coleman_computer_1975,
title = {A computer readability formula designed for machine scoring.},
volume = {60},
issn = {0021-9010},
url = {http://content.apa.org/journals/apl/60/2/283},
doi = {10.1037/h0076540},
pages = {283--284},
number = {2},
journaltitle = {Journal of Applied Psychology},
author = {Coleman, Meri and Liau, T. L.},
urldate = {2019-01-16},
date = {1975},
langid = {english},
}
@inproceedings{canini_online_2009,
location = {Hilton Clearwater Beach Resort, Clearwater Beach, Florida {USA}},
title = {Online Inference of Topics with Latent Dirichlet Allocation},
volume = {5},
url = {http://proceedings.mlr.press/v5/canini09a.html},
series = {Proceedings of Machine Learning Research},
abstract = {Inference algorithms for topic models are typically designed to be run over an entire collection of documents after they have been observed. However, in many applications of these models, the collection grows over time, making it infeasible to run batch algorithms repeatedly. This problem can be addressed by using online algorithms, which update estimates of the topics as each document is observed. We introduce two related Rao-Blackwellized online inference algorithms for the latent Dirichlet allocation ({LDA}) model – incremental Gibbs samplers and particle filters – and compare their runtime and performance to that of existing algorithms.},
pages = {65--72},
booktitle = {Proceedings of the Twelth International Conference on Artificial Intelligence and Statistics},
publisher = {{PMLR}},
author = {Canini, Kevin and Shi, Lei and Griffiths, Thomas},
editor = {Dyk, David van and Welling, Max},
date = {2009-04},
}
@article{suster_investigation_2015,
title = {An investigation into language complexity of World-of-Warcraft game-external texts},
url = {http://arxiv.org/abs/1502.02655},
abstract = {We present a language complexity analysis of World of Warcraft ({WoW}) community texts, which we compare to texts from a general corpus of web English. Results from several complexity types are presented, including lexical diversity, density, readability and syntactic complexity. The language of {WoW} texts is found to be comparable to the general corpus on some complexity measures, yet more specialized on other measures. Our findings can be used by educators willing to include game-related activities into school curricula.},
journaltitle = {{arXiv}:1502.02655 [cs]},
author = {Šuster, Simon},
urldate = {2019-01-21},
date = {2015-02},
keywords = {Computer Science - Computation and Language},
}
@article{sievert_ldavis_2014,
title = {{LDAvis}: A method for visualizing and interpreting topics},
volume = {Baltimore, Maryland, {USA}},
pages = {63--70},
journaltitle = {Proceedings of the Workshop on Interactive Language Learning, Visualization, and Interfaces.},
author = {Sievert, Carson},
date = {2014-06},
}
@article{liu_towards_2017,
title = {Towards better analysis of machine learning models: A visual analytics perspective},
volume = {1},
issn = {2468502X},
url = {https://linkinghub.elsevier.com/retrieve/pii/S2468502X17300086},
doi = {10.1016/j.visinf.2017.01.006},
shorttitle = {Towards better analysis of machine learning models},
pages = {48--56},
number = {1},
journaltitle = {Visual Informatics},
author = {Liu, Shixia and Wang, Xiting and Liu, Mengchen and Zhu, Jun},
urldate = {2018-12-22},
date = {2017-03},
langid = {english},
}
@article{tibshirani_regression_2011,
title = {Regression shrinkage and selection via the lasso: a retrospective: Regression Shrinkage and Selection via the Lasso},
volume = {73},
issn = {13697412},
url = {http://doi.wiley.com/10.1111/j.1467-9868.2011.00771.x},
doi = {10.1111/j.1467-9868.2011.00771.x},
shorttitle = {Regression shrinkage and selection via the lasso},
pages = {273--282},
number = {3},
journaltitle = {Journal of the Royal Statistical Society: Series B (Statistical Methodology)},
author = {Tibshirani, Robert},
urldate = {2018-12-22},
date = {2011-06},
langid = {english},
}
@article{pekar_discovery_2008,
title = {Discovery of subjective evaluations of product features in hotel reviews},
volume = {14},
issn = {1356-7667, 1479-1870},
url = {http://journals.sagepub.com/doi/10.1177/1356766707087522},
doi = {10.1177/1356766707087522},
pages = {145--155},
number = {2},
journaltitle = {Journal of Vacation Marketing},
author = {Pekar, Viktor and {Shiyan Ou}},
urldate = {2019-01-15},
date = {2008-04},
langid = {english},
}
@article{hug_loi_2004,
title = {La loi de Menzerath appliquée à un ensemble de textes},
journaltitle = {Lexicometrica},
author = {Hug, Marc},
date = {2004},
}
@article{senter_automated_1967,
title = {Automated Readability Index},
author = {Senter, R.J.},
date = {1967-11},
}
@article{tweedie_how_1998,
title = {How Variable May a Constant be? Measures of Lexical Richness in Perspective},
volume = {32},
pages = {323--352},
journaltitle = {Computers and the Humanities},
author = {Tweedie, Fiona J. and Baayen, R. Harald},
date = {1998},
}
@report{bennani_les_2019,
title = {Les déterminants locaux de la participation numérique au Grand débat national: une analyse économétrique},
url = {https://EconPapers.repec.org/RePEc:drm:wpaper:2019-7},
abstract = {This paper analyses the local determinants of the electronic participation to the "Grand débat". First, we highlight the spatial heterogeneity of the participants using their zip code. Second, we use an econometric approach to assess the local determinants of the general participation and the participation on each of the four topics of the "Grand débat". The results show that the median standard of living and the education level are the main determinants of the general participation, whereas some specific variables explain the participation of each of the four topics.},
number = {2019-7},
institution = {University of Paris Nanterre, {EconomiX}},
type = {{EconomiX} Working Papers},
author = {Bennani, Hamza and Gandré, Pauline and Monnery, Benjamin},
date = {2019},
keywords = {electronic participation, Grand débat, local determinants},
}
@book{isabelle_serca_les_2010,
location = {Paris},
title = {{LES} {COUTURES} {APPARENTES} {DE} {LA} {RECHERCHE} {PROUST} {ET} {LA} {PONCTUATION}},
series = {« Recherches proustiennes »},
publisher = {Honoré Champion},
author = {{ISABELLE SERCA}},
date = {2010},
}
@article{canut_sociolinguistique_2000,
title = {De la sociolinguistique à la sociologie du langage : de l'usage des frontières},
volume = {91},
issn = {0181-4095, 2101-0382},
url = {http://www.cairn.info/revue-langage-et-societe-2000-1-page-89.htm},
doi = {10.3917/ls.091.0089},
shorttitle = {De la sociolinguistique à la sociologie du langage},
pages = {89},
number = {1},
journaltitle = {Langage et société},
author = {Canut, Cécile},
urldate = {2019-07-14},
date = {2000},
langid = {french},
}
@article{abdaoui_feel_2017,
title = {{FEEL}: a French Expanded Emotion Lexicon},
volume = {51},
issn = {1574-020X, 1574-0218},
url = {http://link.springer.com/10.1007/s10579-016-9364-5},
doi = {10.1007/s10579-016-9364-5},
shorttitle = {{FEEL}},
pages = {833--855},
number = {3},
journaltitle = {Language Resources and Evaluation},
author = {Abdaoui, Amine and Azé, Jérôme and Bringay, Sandra and Poncelet, Pascal},
urldate = {2019-07-14},
date = {2017-09},
langid = {english},
}
@article{firth_synopsis_1957,
title = {A synopsis of linguistic theory 1930-55.},
volume = {1952-59},
abstract = {Reprinted in: Palmer, F. R. (ed.) (1968). Selected Papers of J. R. Firth 1952-59, pages 168-205. Longmans, London.},
pages = {1--32},
journaltitle = {Studies in Linguistic Analysis (special volume of the Philological Society)},
author = {Firth, J. R.},
date = {1957},
keywords = {classic linguistics meanign relatedness semantic},
}
@book{chomsky_aspects_1969,
title = {Aspects of the Theory of Syntax},
isbn = {978-0-262-26050-3},
url = {https://books.google.fr/books?id=u0ksbFqagU8C},
series = {The {MIT} Press},
publisher = {{MIT} Press},
author = {Chomsky, N.},
date = {1969},
}
@article{grishman_message_1997,
title = {Message Understanding Conference- 6: A Brief History},
abstract = {We have recently completed the sixth in a series of "Message Understanding Conferences" which are designed to promote and evaluate research in information extraction. {MUC}-6 introduced several innovations over prior {MUCs}, most notably in the range of different tasks for which evaluations were conducted. We describe some of the motivations for the new format and briefly discuss some of the results of the evaluations.},
pages = {6},
author = {Grishman, Ralph and Sundheim, Beth},
date = {1997},
langid = {english},
}
@article{verdelhan-bourgade_lucien_2020,
title = {Lucien Tesnière, professeur de linguistique à Montpellier de 1937 à 1954. L’aventure d’une grammaire},
volume = {51},
number = {4562},
journaltitle = {Bulletin de l'Academie des sciences et lettres de Montpellier},
author = {Verdelhan-Bourgade,, M.},
date = {2020-12-14},
}
@article{flesch_new_1948,
title = {A new readability yardstick.},
volume = {32},
issn = {1939-1854, 0021-9010},
url = {http://doi.apa.org/getdoi.cfm?doi=10.1037/h0057532},
doi = {10.1037/h0057532},
pages = {221--233},
number = {3},
journaltitle = {Journal of Applied Psychology},
shortjournal = {Journal of Applied Psychology},
author = {Flesch, Rudolph},
urldate = {2021-07-08},
date = {1948},
langid = {english},
}
@article{thompson_programming_1968,
title = {Programming Techniques: Regular expression search algorithm},
volume = {11},
issn = {0001-0782, 1557-7317},
url = {https://dl.acm.org/doi/10.1145/363347.363387},
doi = {10.1145/363347.363387},
shorttitle = {Programming Techniques},
abstract = {A method for locating specific character strings embedded in character text is described and an implementation of this method in the form of a compiler is discussed. The compiler accepts a regular expression as source language and produces an {IBM} 7094 program as object language. The object program then accepts the text to be searched as input and produces a signal every time an embedded string in the text matches the given regular expression. Examples, problems, and solutions are also presented.},
pages = {419--422},
number = {6},
journaltitle = {Communications of the {ACM}},
shortjournal = {Commun. {ACM}},
author = {Thompson, Ken},
urldate = {2021-07-08},
date = {1968-06},
langid = {english},
}
@inbook{jakobson_linguistics_1981,
title = {Linguistics and Poetics},
isbn = {978-90-279-3178-8},
url = {https://www.degruyter.com/document/doi/10.1515/9783110802122.18/html},
pages = {18--51},
booktitle = {Poetry of Grammar and Grammar of Poetry},
publisher = {De Gruyter Mouton},
bookauthor = {Jakobson, Roman},
urldate = {2021-07-29},
date = {1981-12-31},
doi = {10.1515/9783110802122.18},
}
@article{benzecri_analyse_2006,
title = {L'analyse de données : Histoire, Bilan, Projets et Perspectives},
pages = {5},
author = {Benzecri, Jean-Paul},
date = {2006},
}
@report{fodor_survey_2002,
title = {A Survey of Dimension Reduction Techniques},
url = {http://www.osti.gov/servlets/purl/15002155-mumfPN/native/},
author = {Fodor, I K},
date = {2002-05-09},
doi = {10.2172/15002155},
}
@article{roberts_model_2016,
title = {A Model of Text for Experimentation in the Social Sciences},
volume = {111},
url = {https://www.tandfonline.com/doi/full/10.1080/01621459.2016.1141684},
doi = {10.1080/01621459.2016.1141684},
pages = {988--1003},
number = {515},
journaltitle = {Journal of the American Statistical Association},
author = {Roberts, Margaret E. and Stewart, Brandon M. and Airoldi, Edoardo M.},
date = {2016-07-02},
}
@article{balech_nlp_2019,
title = {{NLP} text mining V4.0 - une introduction - cours programme doctoral},
url = {http://rgdoi.net/10.13140/RG.2.2.34248.06405},
doi = {10.13140/RG.2.2.34248.06405},
abstract = {The purpose of this chapter is to introduce natural language processing techniques and textual analysis, such as the developments of data mining and linguistics define it, automating it by taking advantage of the distributional properties of language. Largely automated, natural language processing techniques sequence a series of operations from the constitution of the corpus to its annotation, resulting in representation and qualification models. These methods are now widely available through the r and python language libraries. They make it possible to exploit the large corpus that digitisation makes it possible to build: consumer comments, news bases, activity reports, interview reports. The purpose of this text is essentially technical, however without giving any operating method. It indicates generic methods that can be used via r and their context of use. This is a short manual of modern textual analysis. For business research.},
author = {Balech, Sophie and Benavent, Christophe},
urldate = {2021-08-14},
date = {2019},
langid = {english},
}
@article{beaudouin_retour_2016,
title = {Retour aux origines de la statistique textuelle: Benzécri et l'école française d'analyse des données},
abstract = {In this article, we have attempted to trace the history of the statistical analysis of textual data, focusing on the influence of Benzécri’s work and school, and to make explicit their theoretical positions, clearly opposed to {AI} and to Chomskyan linguistics. After a presentation of the intellectual project, as an inductive approach to language based on the exploration of corpora, we present the principles of correspondence analysis, which is the main method developed in the Data Analysis School, used for corpus analysis but also for many other types of datasets. Then, we will focus on textual data analysis. Based on the fact that software programmes have played a major role in the use of these statistical techniques, we shall examine a selection of these, display their specificities and their underlying theoretical bases.},
pages = {21},
author = {Beaudouin, Valérie},
date = {2016},
langid = {french},
file = {Beaudouin - 2016 - Retour aux origines de la statistique textuelle B.pdf:C\:\\Users\\33623\\Zotero\\storage\\SPN698W9\\Beaudouin - 2016 - Retour aux origines de la statistique textuelle B.pdf:application/pdf},
}
@article{chen_nonnegative_1984,
title = {The nonnegative rank factorizations of nonnegative matrices},
volume = {62},
issn = {00243795},
url = {https://linkinghub.elsevier.com/retrieve/pii/002437958490096X},
doi = {10.1016/0024-3795(84)90096-X},
pages = {207--217},
journaltitle = {Linear Algebra and its Applications},
author = {Chen, Ji-Cheng},
urldate = {2021-08-03},
date = {1984-11},
langid = {english},
}
@article{limem_methodes_nodate,
title = {Méthodes informées de factorisation matricielle non-négative. Application à l'identification de sources de particules industrielles.},
pages = {232},
author = {Limem, Abdelhakim},
langid = {french},
}
@article{gillis_why_2014,
title = {The Why and How of Nonnegative Matrix Factorization},
url = {http://arxiv.org/abs/1401.5226},
abstract = {Nonnegative matrix factorization ({NMF}) has become a widely used tool for the analysis of high-dimensional data as it automatically extracts sparse and meaningful features from a set of nonnegative data vectors. We first illustrate this property of {NMF} on three applications, in image processing, text mining and hyperspectral imaging –this is the why. Then we address the problem of solving {NMF}, which is {NP}-hard in general. We review some standard {NMF} algorithms, and also present a recent subclass of {NMF} problems, referred to as near-separable {NMF}, that can be solved efficiently (that is, in polynomial time), even in the presence of noise –this is the how. Finally, we briefly describe some problems in mathematics and computer science closely related to {NMF} via the nonnegative rank.},
journaltitle = {{arXiv}:1401.5226 [cs, math, stat]},
author = {Gillis, Nicolas},
urldate = {2021-08-03},
date = {2014-03},
langid = {english},
keywords = {Computer Science - Information Retrieval, Computer Science - Machine Learning, Statistics - Machine Learning, Mathematics - Optimization and Control},
}
@article{cazalet_nonnegative_nodate,
title = {Nonnegative Matrix Factorization and Financial Applications},
abstract = {Nonnegative matrix factorization ({NMF}) is a recent tool to analyse multivariate data. It can be compared to other decomposition methods like principal component analysis ({PCA}) or independent component analysis ({ICA}). However, {NMF} differs from them because it requires and imposes the nonnegativity of matrices. In this paper, we use this special feature in order to identify patterns in stock market data. Indeed, we may use {NMF} to estimate common factors from the dynamics of stock prices. In this perspective, we compare {NMF} and clustering algorithms to identify endogenous equity sectors.},
pages = {31},
author = {Cazalet, Zélia and Roncalli, Thierry},
langid = {english},
}
@inproceedings{shu_beyond_2019,
location = {Melbourne {VIC} Australia},
title = {Beyond News Contents: The Role of Social Context for Fake News Detection},
isbn = {978-1-4503-5940-5},
url = {https://dl.acm.org/doi/10.1145/3289600.3290994},
doi = {10.1145/3289600.3290994},
shorttitle = {Beyond News Contents},
abstract = {Social media is becoming popular for news consumption due to its fast dissemination, easy access, and low cost. However, it also enables the wide propagation of fake news, i.e., news with intentionally false information. Detecting fake news is an important task, which not only ensures users receive authentic information but also helps maintain a trustworthy news ecosystem. The majority of existing detection algorithms focus on finding clues from news contents, which are generally not effective because fake news is often intentionally written to mislead users by mimicking true news. Therefore, we need to explore auxiliary information to improve detection. The social context during news dissemination process on social media forms the inherent tri-relationship, the relationship among publishers, news pieces, and users, which has potential to improve fake news detection. For example, partisan-biased publishers are more likely to publish fake news, and low-credible users are more likely to share fake news. In this paper, we study the novel problem of exploiting social context for fake news detection. We propose a tri-relationship embedding framework {TriFN}, which models publisher-news relations and user-news interactions simultaneously for fake news classification. We conduct experiments on two real-world datasets, which demonstrate that the proposed approach significantly outperforms other baseline methods for fake news detection.},
pages = {312--320},
booktitle = {Proceedings of the Twelfth {ACM} International Conference on Web Search and Data Mining},
publisher = {{ACM}},
author = {Shu, Kai and Wang, Suhang and Liu, Huan},
urldate = {2021-08-03},
date = {2019-01},
langid = {english},
}
@article{evangelopoulos_latent_2012,
title = {Latent Semantic Analysis: five methodological recommendations},
volume = {21},
issn = {0960-085X, 1476-9344},
url = {https://www.tandfonline.com/doi/full/10.1057/ejis.2010.61},
doi = {10.1057/ejis.2010.61},
shorttitle = {Latent Semantic Analysis},
abstract = {The recent influx in generation, storage and availability of textual data presents researchers with the challenge of developing suitable methods for their analysis. Latent Semantic Analysis ({LSA}), a member of a family of methodological approaches that offers an opportunity to address this gap by describing the semantic content in textual data as a set of vectors, was pioneered by researchers in psychology, information retrieval, and bibliometrics. {LSA} involves a matrix operation called singular value decomposition, an extension of principal component analysis. {LSA} generates latent semantic dimensions that are either interpreted, if the researcher’s primary interest lies with the understanding of the thematic structure in the textual data, or used for purposes of clustering, categorisation and predictive modelling, if the interest lies with the conversion of raw text into numerical data, as a precursor to subsequent analysis. This paper reviews five methodological issues that need to be addressed by the researcher who will embark on {LSA}. We examine the dilemmas, present the choices, and discuss the considerations under which good methodological decisions are made. We illustrate these issues with the help of four small studies, involving the analysis of abstracts for papers published in the European Journal of Information Systems.},
pages = {70--86},
number = {1},
journaltitle = {European Journal of Information Systems},
author = {Evangelopoulos, Nicholas and Zhang, Xiaoni and Prybutok, Victor R},
urldate = {2021-08-09},
date = {2012-01},
langid = {english},
}
@article{song_genetic_2009,
title = {Genetic algorithm for text clustering based on latent semantic indexing},
volume = {57},
issn = {08981221},
url = {https://linkinghub.elsevier.com/retrieve/pii/S0898122108005300},
doi = {10.1016/j.camwa.2008.10.010},
abstract = {In this paper, we develop a genetic algorithm method based on a latent semantic model ({GAL}) for text clustering. The main difficulty in the application of genetic algorithms ({GAs}) for document clustering is thousands or even tens of thousands of dimensions in feature space which is typical for textual data. Because the most straightforward and popular approach represents texts with the vector space model ({VSM}), that is, each unique term in the vocabulary represents one dimension. Latent semantic indexing ({LSI}) is a successful technology in information retrieval which attempts to explore the latent semantics implied by a query or a document through representing them in a dimension-reduced space. Meanwhile, {LSI} takes into account the effects of synonymy and polysemy, which constructs a semantic structure in textual data. {GA} belongs to search techniques that can efficiently evolve the optimal solution in the reduced space. We propose a variable string length genetic algorithm which has been exploited for automatically evolving the proper number of clusters as well as providing near optimal data set clustering. {GA} can be used in conjunction with the reduced latent semantic structure and improve clustering efficiency and accuracy. The superiority of {GAL} approach over conventional {GA} applied in {VSM} model is demonstrated by providing good Reuter document clustering results.},
pages = {1901--1907},
number = {11},
journaltitle = {Computers \& Mathematics with Applications},
author = {Song, Wei and Park, Soon Cheol},
urldate = {2021-08-09},
date = {2009-06},
langid = {english},
}
@incollection{buntine_variational_2002,
location = {Berlin, Heidelberg},
title = {Variational Extensions to {EM} and Multinomial {PCA}},
volume = {2430},
isbn = {978-3-540-44036-9 978-3-540-36755-0},
url = {http://link.springer.com/10.1007/3-540-36755-1_3},
abstract = {Several authors in recent years have proposed discrete analogues to principle component analysis intended to handle discrete or positive only data, for instance suited to analyzing sets of documents. Methods include non-negative matrix factorization, probabilistic latent semantic analysis, and latent Dirichlet allocation. This paperbegins with a review of the basic theory of the variational extension to the expectationmaximization algorithm, and then presents discrete component finding algorithms in that light. Experiments are conducted on both bigram word data and document bag-of-word to expose some of the subtleties of this new class of algorithms.},
pages = {23--34},
booktitle = {Machine Learning: {ECML} 2002},
publisher = {Springer Berlin Heidelberg},
author = {Buntine, Wray},
editor = {Goos, Gerhard and Hartmanis, Juris and van Leeuwen, Jan and Elomaa, Tapio and Mannila, Heikki and Toivonen, Hannu},
urldate = {2021-08-09},
date = {2002},
langid = {english},
doi = {10.1007/3-540-36755-1_3},
}
@article{hassani_text_2020,
title = {Text Mining using Nonnegative Matrix Factorization and Latent Semantic Analysis},
url = {http://arxiv.org/abs/1911.04705},
abstract = {Text clustering is arguably one of the most important topics in modern data mining. Nevertheless, text data require tokenization which usually yields a very large and highly sparse term-document matrix, which is usually difficult to process using conventional machine learning algorithms. Methods such as Latent Semantic Analysis have helped mitigate this issue, but are nevertheless not completely stable in practice. As a result, we propose a new feature agglomeration method based on Nonnegative Matrix Factorization, which is employed to separate the terms into groups, and then each group’s term vectors are agglomerated into a new feature vector. Together, these feature vectors create a new feature space much more suitable for clustering. In addition, we propose a new deterministic initialization for spherical K-Means, which proves very useful for this specific type of data. In order to evaluate the proposed method, we compare it to some of the latest research done in this field, as well as some of the most practiced methods. In our experiments, we conclude that the proposed method either significantly improves clustering performance, or maintains the performance of other methods, while improving stability in results.},
journaltitle = {{arXiv}:1911.04705 [cs, stat]},
author = {Hassani, Ali and Iranmanesh, Amir and Mansouri, Najme},
urldate = {2021-08-14},
date = {2020-02},
langid = {english},
keywords = {Computer Science - Machine Learning, Statistics - Machine Learning},
}
@article{vavasis_complexity_2007,
title = {On the complexity of nonnegative matrix factorization},
url = {http://arxiv.org/abs/0708.4149},
abstract = {Nonnegative matrix factorization ({NMF}) has become a prominent technique for the analysis of image databases, text databases and other information retrieval and clustering applications. In this report, we define an exact version of {NMF}. Then we establish several results about exact {NMF}: (1) that it is equivalent to a problem in polyhedral combinatorics; (2) that it is {NP}-hard; and (3) that a polynomial-time local search heuristic exists.},
journaltitle = {{arXiv}:0708.4149 [cs]},
author = {Vavasis, Stephen A.},
urldate = {2021-08-14},
date = {2007-09},
langid = {english},
keywords = {Computer Science - Information Retrieval, G.1.3, H.3.3, Mathematics - Numerical Analysis},
}
@article{lee_algorithms_nodate,
title = {Algorithms for Non-negative Matrix Factorization},
abstract = {Non-negative matrix factorization ({NMF}) has previously been shown to be a useful decomposition for multivariate data. Two different multiplicative algorithms for {NMF} are analyzed. They differ only slightly in the multiplicative factor used in the update rules. One algorithm can be shown to minimize the conventional least squares error while the other minimizes the generalized Kullback-Leibler divergence. The monotonic convergence of both algorithms can be proven using an auxiliary function analogous to that used for proving convergence of the {ExpectationMaximization} algorithm. The algorithms can also be interpreted as diagonally rescaled gradient descent, where the rescaling factor is optimally chosen to ensure convergence.},
pages = {7},
author = {Lee, Daniel D and Seung, H Sebastian},
langid = {english},
}
@inproceedings{li_documents_2014,
location = {Lanzhou, China},
title = {Documents clustering based on max-correntropy nonnegative matrix factorization},
isbn = {978-1-4799-4215-2 978-1-4799-4216-9 978-1-4799-4217-6},
url = {http://ieeexplore.ieee.org/document/7009720/},
doi = {10.1109/ICMLC.2014.7009720},
abstract = {Nonnegative matrix factorization ({NMF}) has been successfully applied to many areas for classification and clustering. Commonly-used {NMF} algorithms mainly target on minimizing the l2 distance or Kullback-Leibler ({KL}) divergence, which may not be suitable for nonlinear case. In this paper, we propose a new decomposition method by maximizing the correntropy between the original and the product of two low-rank matrices for document clustering. This method also allows us to learn the new basis vectors of the semantic feature space from the data. To our knowledge, we haven’t seen any work has been done by maximizing correntropy in {NMF} to cluster high dimensional document data. Our experiment results show the supremacy of our proposed method over other variants of {NMF} algorithm on Reuters21578 and {TDT}2 databasets.},
pages = {850--855},
booktitle = {2014 International Conference on Machine Learning and Cybernetics},
publisher = {{IEEE}},
author = {Li, Le and Yang, Jianjun and Xu, Yang and Qin, Zhen and Zhang, Honggang},
urldate = {2021-08-14},
date = {2014-07},
langid = {english},
}
@article{votte_algorithmes_nodate,
title = {Algorithmes de factorisation en matrices non-ne´gatives fonde´e sur la β-divergence},
abstract = {This paper describes algorithms for nonnegative matrix factorization ({NMF}) with the β-divergence (β-{NMF}). The β-divergence is a family of cost functions parametrized by a single shape parameter β that takes the Euclidean distance, the Kullback-Leibler divergence and the Itakura-Saito divergence as special cases (β = 2, 1, 0 respectively). The proposed algorithms are based on a surrogate auxiliary function (an upper bound of the objective function constructed locally). We first describe a majorization-minimization ({MM}) algorithm that leads to multiplicative updates. Then we introduce the concept of majorization-equalization ({ME}) algorithm which produces updates that move along constant level sets of the auxiliary function and lead to larger steps than {MM}. Simulations illustrate the faster convergence of the {ME} approach.},
pages = {4},
author = {Votte, Cedric {FE} and Idier, Jerome},
langid = {french},
}
@article{shitov_nonnegative_2017,
title = {The nonnegative rank of a matrix: Hard problems, easy solutions},
url = {http://arxiv.org/abs/1605.04000},
shorttitle = {The nonnegative rank of a matrix},
abstract = {Using elementary linear algebra, we develop a technique that leads to solutions of two widely known problems on nonnegative matrices. First, we give a short proof of the result by Vavasis stating that the nonnegative rank of a matrix is {NP}-hard to compute. This proof is essentially contained in the paper by Jiang and Ravikumar, who discussed this topic in different terms fifteen years before the work of Vavasis. Secondly, we present a solution of the Cohen–Rothblum problem on rational nonnegative factorizations, which was posed in 1993 and remained open.},
journaltitle = {{arXiv}:1605.04000 [cs, math]},
author = {Shitov, Yaroslav},
urldate = {2021-08-14},
date = {2017-12},
langid = {english},
keywords = {Computer Science - Computational Complexity, Mathematics - Combinatorics},
}
@article{zurada_nonnegative_2013,
title = {Nonnegative Matrix Factorization and Its Application to Pattern Analysis and Text Mining},
abstract = {Nonnegative Matrix Factorization ({NMF}) is one of the most promising techniques to reduce the dimensionality of the data. This presentation compares the method with other popular matrix decomposition approaches for various pattern analysis tasks. Among others, {NMF} has been also widely applied for clustering and latent feature extraction. Several types of the objective functions have been used for {NMF} in the literature. Instead of minimizing the common Euclidean Distance ({EucD}) error, we review an alternative method that maximizes the correntropy similarity measure to produce the factorization. Correntropy is an entropy-based criterion defined as a nonlinear similarity measure. Following the discussion of maximization of the correntropy function, we use it to cluster document data set and compare the clustering performance with the {EucD}-based {NMF}. Our approach was applied and illustrated for the clustering of documents in the 20-Newsgroups data set. The comparison is illustrated with 20-Newsgroups data set. The results show that our approach produces per average better clustering compared with other methods which use {EucD} as an objective function.},
pages = {6},
author = {Zurada, Jacek M and Ensari, Tolga and Asl, Ehsan Hosseini and Chorowski, Jan},
date = {2013},
langid = {english},
}
@article{gaujoux_generating_nodate,
title = {Generating heatmaps for Nonnegative Matrix Factorization},
abstract = {This vignette describes how to produce different informative heatmaps from {NMF} objects, such as returned by the function nmf in the {NMF} package1 (Gaujoux et al. 2010). The main drawing engine is implemented by the function aheatmap, which is a highly enhanced modification of the function pheatmap from the pheatmap package2, and provides convenient and quick ways of producing high quality and customizable annotated heatmaps. Currently this function is part of the package {NMF} , but may eventually compose a separate package on its own.},
pages = {12},
author = {Gaujoux, Renaud},
langid = {english},
}
@article{gaujoux_flexible_2010,
title = {A flexible R package for nonnegative matrix factorization},
volume = {11},
issn = {1471-2105},
url = {https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-11-367},
doi = {10.1186/1471-2105-11-367},
pages = {367},
number = {1},
journaltitle = {{BMC} Bioinformatics},
author = {Gaujoux, Renaud and Seoighe, Cathal},
urldate = {2021-08-15},
date = {2010-12},
langid = {english},
}
@collection{fellbaum_wordnet_1998,
location = {Cambridge, Mass},
title = {{WordNet}: an electronic lexical database},
isbn = {978-0-262-06197-1},
series = {Language, speech, and communication},
shorttitle = {{WordNet}},
publisher = {{MIT} Press},
editor = {Fellbaum, Christiane},
date = {1998},
keywords = {Data processing, English language, Lexicology, Semantics, {WordNet}},
}
@article{balech_masque_2022,
title = {Le masque, figure polaire de la crise de la Covid-19 : une exploration par {NLP} du flux des conversations Twitter (février - mai 2020):},
volume = {n° 43},
issn = {1953-6119},
url = {https://www.cairn.info/revue-marche-et-organisations-2022-1-page-151.htm?ref=doi},
doi = {10.3917/maorg.043.0151},
shorttitle = {Le masque, figure polaire de la crise de la Covid-19},
pages = {151--187},
number = {1},
journaltitle = {Marché et organisations},
author = {Balech, Sophie and Calciu, Michel and Monnot, Julien and Benavent, Christophe},
urldate = {2022-04-27},
date = {2022-02-11},
file = {Balech et al. - 2022 - Le masque, figure polaire de la crise de la Covid-.pdf:C\:\\Users\\33623\\Zotero\\storage\\MX6LJX55\\Balech et al. - 2022 - Le masque, figure polaire de la crise de la Covid-.pdf:application/pdf},
}
@article{mudambi_research_2010,
title = {Research Note: What Makes a Helpful Online Review? A Study of Customer Reviews on Amazon.com},
volume = {34},
issn = {02767783},
url = {https://www.jstor.org/stable/10.2307/20721420},
doi = {10.2307/20721420},
shorttitle = {Research Note},
pages = {185},
number = {1},
journaltitle = {{MIS} Quarterly},
author = {{Mudambi} and {Schuff}},
urldate = {2019-10-20},
date = {2010},
file = {Mudambi et Schuff - 2010 - Research Note What Makes a Helpful Online Review.pdf:C\:\\Users\\33623\\Zotero\\storage\\URTVSV5X\\Mudambi et Schuff - 2010 - Research Note What Makes a Helpful Online Review.pdf:application/pdf},
}
@article{nielek_spiral_2010,
title = {Spiral of hatred: social effects in Internet auctions. Between informativity and emotion},
volume = {10},
issn = {1389-5753, 1572-9362},
url = {http://link.springer.com/10.1007/s10660-010-9058-9},
doi = {10.1007/s10660-010-9058-9},
shorttitle = {Spiral of hatred},
pages = {313--330},
number = {3},
journaltitle = {Electronic Commerce Research},
author = {Nielek, Radoslaw and Wawer, Aleksander and Wierzbicki, Adam},
urldate = {2019-10-21},
date = {2010-12},
langid = {english},
file = {Nielek et al. - 2010 - Spiral of hatred social effects in Internet aucti.pdf:C\:\\Users\\33623\\Zotero\\storage\\AKBJ66NM\\Nielek et al. - 2010 - Spiral of hatred social effects in Internet aucti.pdf:application/pdf},
}
@article{dailey_retelling_2014,
title = {Retelling Stories in Organizations: Understanding the Functions of Narrative Repetition},
volume = {39},
issn = {0363-7425, 1930-3807},
url = {http://journals.aom.org/doi/10.5465/amr.2011.0329},
doi = {10.5465/amr.2011.0329},
shorttitle = {Retelling Stories in Organizations},
pages = {22--43},
number = {1},
journaltitle = {Academy of Management Review},
author = {Dailey, Stephanie L. and Browning, Larry},
urldate = {2022-07-11},
date = {2014-01},
langid = {english},
file = {Dailey et Browning - 2014 - Retelling Stories in Organizations Understanding .pdf:C\:\\Users\\33623\\Zotero\\storage\\634GUVKH\\Dailey et Browning - 2014 - Retelling Stories in Organizations Understanding .pdf:application/pdf},
}
@article{rathore_pre-_2020,
title = {Pre- and post-launch emotions in new product development: Insights from twitter analytics of three products},
volume = {50},
issn = {02684012},
url = {https://linkinghub.elsevier.com/retrieve/pii/S0268401218310508},
doi = {10.1016/j.ijinfomgt.2019.05.015},
shorttitle = {Pre- and post-launch emotions in new product development},
abstract = {The paper showcases the possible application of social media analytics in new product development ({NPD}). It compares users’ emotions before and after the launch of three new products in the market—a pizza, a car and a smart phone—for possible inputs for {NPD}. The user-generated content offers an alternative to conventional survey data and is cross-cultural in nature, relatively inexpensive and provides real-time information about user behaviour. A total of 302,632 tweets that mentioned the three new products before and after the launch were collected and analysed. Sentiment analysis of the tweets from two time periods was conducted and compared. The users’ responses to the pre- and post-launch of three products vary. The dissatisfaction with the new products represented by negative emotions aligns with the market performance. In the pre-launch period, trust and joy were more common for pizza, joy was more common for the car, and trust was more common for the phone. In the post-launch period, anger and disgust were more common for pizza, joy and trust were more common for the car, and joy was more common for only one aspect of the phone. Further analysis showed that for the car and the phone, firms need to focus on user attitudes towards product attributes, whereas for pizza, firms should concentrate on physiological changes, i.e., changes in product attributes, service and promotional sides. By using the proposed alternative approach, businesses can obtain real-time feedback about the expectations and experiences of the new products. The {NPD} process can be adjusted accordingly.},
pages = {111--127},
journaltitle = {International Journal of Information Management},
author = {Rathore, Ashish Kumar and Ilavarasan, P. Vigneswara},
urldate = {2022-07-11},
date = {2020-02},
langid = {english},
file = {Rathore et Ilavarasan - 2020 - Pre- and post-launch emotions in new product devel.pdf:C\:\\Users\\33623\\Zotero\\storage\\7TWWJDTA\\Rathore et Ilavarasan - 2020 - Pre- and post-launch emotions in new product devel.pdf:application/pdf},
}
@article{elsafoury_teargas_nodate,
title = {Teargas, Water Cannons and Twitter: A case study on detecting protest repression events in Turkey 2013},
abstract = {Since the Arab spring in 2011, protests have been spreading around the world for di↵erent reasons, often these protests are faced with violent repression. Studying protest repression requires appropriate datasets. Existing datasets like {GDELT} focus mainly on events reported in news media. However, news media reports have issues including censorship and coverage bias. Recently, social scientists have started using Machine Learning ({ML}) to detect political events, but it is costly and time consuming to hand label data for training {ML} models. This paper proposes using {ML} and crowdsourcing to detect protest repression events from Twitter. Our case study is the Turkish Gezi Park protest in 2013. Our results show that Twitter is a reliable source reflecting events happening on the ground as soon as they happen. Moreover, training conventional {ML} models on crowdsourced labelled data gave good results with an {AUC} score of 0.896 to detect protest events and 0.8189 to detect repression events.},
pages = {9},
author = {Elsafoury, Fatma},
langid = {english},
file = {Elsafoury - Teargas, Water Cannons and Twitter A case study o.pdf:C\:\\Users\\33623\\Zotero\\storage\\UUML6D76\\Elsafoury - Teargas, Water Cannons and Twitter A case study o.pdf:application/pdf},
}
@article{kang_natural_2020,
title = {Natural language processing ({NLP}) in management research: A literature review},
volume = {7},
issn = {2327-0012, 2327-0039},
url = {https://www.tandfonline.com/doi/full/10.1080/23270012.2020.1756939},
doi = {10.1080/23270012.2020.1756939},
shorttitle = {Natural language processing ({NLP}) in management research},
pages = {139--172},
number = {2},
journaltitle = {Journal of Management Analytics},
author = {Kang, Yue and Cai, Zhao and Tan, Chee-Wee and Huang, Qian and Liu, Hefu},
urldate = {2022-07-11},
date = {2020-04},
langid = {english},
file = {Kang et al. - 2020 - Natural language processing (NLP) in management re.pdf:C\:\\Users\\33623\\Zotero\\storage\\DI3CY33N\\Kang et al. - 2020 - Natural language processing (NLP) in management re.pdf:application/pdf},
}
@article{kowalski_improving_2020,
title = {Improving public services by mining citizen feedback: An application of natural language processing},
volume = {98},
issn = {0033-3298, 1467-9299},
url = {https://onlinelibrary.wiley.com/doi/10.1111/padm.12656},
doi = {10.1111/padm.12656},
shorttitle = {Improving public services by mining citizen feedback},
abstract = {Research on user satisfaction has increased substantially in recent years. To date, most studies have tested the significance of predefined factors thought to influence user satisfaction, with no scalable means of verifying the validity of their assumptions. Digital technology has created new methods of collecting user feedback where service users post comments. As topic models can analyse large volumes of feedback, they have been proposed as a feasible approach to aggregating user opinions. This novel approach has been applied to process reviews of primary care practices in England. Findings from an analysis of more than 200,000 reviews show that the quality of interactions with staff and bureaucratic exigencies are the key drivers of user satisfaction. In addition, patient satisfaction is strongly influenced by factors that are not measured by state-ofthe-art patient surveys. These results highlight the potential benefits of text mining and machine learning for public administration.},
pages = {1011--1026},
number = {4},
journaltitle = {Public Administration},
author = {Kowalski, Radoslaw and Esteve, Marc and Jankin Mikhaylov, Slava},
urldate = {2022-07-11},
date = {2020-12},
langid = {english},
file = {Kowalski et al. - 2020 - Improving public services by mining citizen feedba.pdf:C\:\\Users\\33623\\Zotero\\storage\\4ERVZS9Z\\Kowalski et al. - 2020 - Improving public services by mining citizen feedba.pdf:application/pdf},
}
@article{anastasopoulos_computational_nodate,
title = {Computational Text Analysis for Public Management Research: An Annotated Application to County Budgets},