-
Notifications
You must be signed in to change notification settings - Fork 0
/
ch2_strings.R
132 lines (95 loc) · 2.84 KB
/
ch2_strings.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# ch2_strings
s1 = 'c'; typeof(s1)
s2 = 'learning R'; typeof(s2)
s1 = "C"; typeof(s1)
s2 = "learning r"; typeof(s2)
# length of the string
nchar(s2)
# character vector
days=c('monday','tuesday','wednesday','thursday','friday','saturday','sunday')
# total number of elements
length(days)
# length of each string in the vector
nchar(days)
print(days)
# covert the char vector into a categorical data
days1 = as.factor(days)
days1
levels(days1)
nlevels(days1)
# describe the data
str(days1)
# accessing the char vector
days[1] # 1st element
days[1:3] # 1st 3 elements
days[c(2,5)] # 2nd and 5th element
# starting from 1st element, get all the alternate elements
# without hardcoding
days[seq(1,length(days),2)]
# concatenating strings
str1="my name is"
str2 = "sriraman"
paste(str1,str2,sep=" ")
# handling NA in char vector
days=c(days,NA,NA,NA)
days
# get the actual number of elements (excluding NA)
# method 1
length(days) - length(days[is.na(days)])
# method 2
length(days[!is.na(days)])
length(days[is.na(days)==F])
# install.packages("stringi",dependencies = T)
# install.packages("stringr",dependencies = T)
library(stringi)
library(stringr)
# string operations
str3 = " R programming "
# length of the string
str_length(str3)
nchar(str3)
# remove spaces
str_trim(str3)
# convert to upper case
toupper(str_trim(str3))
# lower case
tolower(str3)
# pattern matching
lov=c('jayesh','sanjay','delhi','agra','jayaram','pune','mouse','pointer')
lov
lov[grep(tolower("Jay"),lov)]
# exact match
which(lov == "jay")
# word count
para1="machine learning is making predictions. machine does not learn on its own. a machine does not think. machine is like a black box"
# count the number of occurances of the word 'machine'
ctr = str_count(para1,"machine")
print(paste("the word 'machine' occurs", ctr,"times"))
# check if a sentence has a start pattern
# returns True if yes, else False
startsWith(para1,"train") # false
startsWith(para1, "mac") # true
# check for ending pattern
endsWith(para1,"machine") # false
endsWith(para1,"box") # true
# substring - extract a portion of a string
substring(para1,1,10) # 1st pos till 10th pos
substring(para1,20,35) # 20-35th pos
# character translation
sent2="An Apple A Day keeps the Doctor Away"
# replace A with E
sent2 = chartr("A","E",sent2)
sent2
# find/replace word in a sentence
sent3='the house is very old. the house owner does not stay here. this house was last used 10 years back.'
# replace 'house' with 'property'
gsub('house', 'property',sent3)
# split a line of record based on a delimiter into individual columns
rec="194~sriraman~24~data scientist~pune"
strsplit(rec,"~")
# generating random strings
stri_rand_strings(10,5,"[A-Za-z0-9]")
# logical vector
flag = c(T,F)
flag[1]
flag[2]