-
Notifications
You must be signed in to change notification settings - Fork 0
/
Preprocessing.m
53 lines (39 loc) · 1.32 KB
/
Preprocessing.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
%% Preprocessing data
%N = number of news
N = 1:3602;
%remove news that for some reason don't exist
N(697) = [];
N(1467) = [];
for i = N
%metaInputs will be the input for the models
%iTwomportfile12 opens the txt files and saves them as mat variables
% metaInputsTrue(:,i) = importfile(sprintf('%d-meta.txt',i));
metaInputsFake(:,i) = importfile(sprintf('%d-meta.txt',i));
%metaTargets will be the targets of the neural network
% metaTargetsTrue(:,i) = [1; 0];
metaTargetsFake(:,i) = [0; 1];
end
metaInputs = [metaInputsTrue metaInputsFake];
metaTargets = [metaTargetsTrue metaTargetsFake];
%metaInputs data:
% number of tokens good
% number of words without punctuation good
% number of types good
% number of links inside the news
% number of words in upper case
% number of verbs discard
% number of subjuntive and imperative verbs
% number of nouns discard
% number of adjectives discard
% number of adverbs discard
% number of modal verbs (mainly auxiliary verbs)
% number of singular first and second personal pronouns
% number of plural first personal pronouns
% number of pronouns discard
% pausality discard
% number of characters
% average sentence length
% average word length
% percentage of news with speeling errors good
% emotiveness
% diversity