stringr包主要功能處理字符串,對字符串進行分割、連接、轉(zhuǎn)換等操作。
install.packages('stringr')
library(stringr)
1、字符串拼接函數(shù)
str_c(..., sep = "", collapse = NULL),與str_join完全相同,與paste()行為不完全一致
參數(shù):
sep: 把多個字符串拼接為一個大的字符串,用于字符串的分割符
collapse: 把多個向量參數(shù)拼接為一個大的字符串,用于字符串的分割符
#把多個字符串拼接為一個大的字符串
> str_c('a','b')
[1] "ab"
> str_c('a','b',sep='-')
[1] "a-b"
> str_c(c('a','a1'),c('b','b1'),sep='-')
[1] "a-b" "a1-b1"
#把多個向量參數(shù)拼接為一個大的字符串
> str_c(head(letters), collapse = "")
[1] "abcdef"
> str_c(head(letters), collapse = ", ")
[1] "a, b, c, d, e, f"
#collapse參數(shù),對多個字符串無效
> str_c('a','b',collapse = "-")
[1] "ab"
> str_c(c('a','a1'),c('b','b1'),collapse='-')
[1] "ab-a1b1"
#拼接有NA值的字符串向量時,NA還是NA
> str_c(c("a", NA, "b"), "-d")
[1] "a-d" NA "b-d"
#對比str_c()函數(shù)和paste()函數(shù)之間的不同點
> str_c('a','b')
[1] "ab"
> paste('a','b') #多字符串拼接,默認的sep參數(shù)行為不一致
[1] "a b"
str_c(c("a", NA, "b"), "-d")
[1] "a-d" NA "b-d"
> paste(c("a", NA, "b"), "-d") #拼接有NA值的字符串向量,對NA的處理行為不一致
[1] "a -d" "NA -d" "b -d"
2、去掉字符串的空格和TAB(\t)
str_trim(string, side = c("both", "left", "right"))
string: 字符串,字符串向量
side: 過濾方式,both兩邊都過濾,left左邊過濾,right右邊過濾
#只過濾左邊的空格
> str_trim(" left space\t\n",side='left')
[1] "left space\t\n"
#只過濾右邊的空格
> str_trim(" left space\t\n",side='right')
[1] " left space"
#過濾兩邊的空格
> str_trim(" left space\t\n",side='both')
[1] "left space"
> str_trim("\nno space\n\t")
[1] "no space"
3、補充字符串的長度
str_pad(string, width, side = c("left", "right", "both"), pad = " ")
string: 字符串,字符串向量
width: 字符串填充后的長度
side: 填充方向,both兩邊都填充,left左邊填充,right右邊填充
pad: 用于填充的字符
#從左邊補充空格,直到字符串長度為20
> str_pad("conan", 20, "left")
[1] " conan"
#從右邊補充空格,直到字符串長度為20
> str_pad("conan", 20, "right")
[1] "conan "
#從左右兩邊各補充空格,直到字符串長度為20
> str_pad("conan", 20, "both")
[1] " conan "
#從左右兩邊各補充x字符,直到字符串長度為20
> str_pad("conan", 20, "both",'x')
[1] "xxxxxxxconanxxxxxxxx"
4、復制字符串
str_dup(string, times)
參數(shù):
string: 字符串,字符串向量
times: 復制數(shù)量
復制一個字符串向量
> val <- c("abca4", 123, "cba2")
> str_dup(val, 2) #復制2次
[1] "abca4abca4" "123123" "cba2cba2"
> str_dup(val, 1:3) #按位置復制
[1] "abca4" "123123" "cba2cba2cba2"
5、截取字符串
str_sub(string, start = 1L, end = -1L)
參數(shù):
string: 字符串,字符串向量
start : 開始位置
end : 結(jié)束位置
> txt <- "I am Conan."
#截取1-4的索引位置的字符串
> str_sub(txt, 1, 4)
[1] "I am"
#截取1-6的索引位置的字符串
> str_sub(txt, end=6)
[1] "I am C"
#截取6到結(jié)束的索引位置的字符串
> str_sub(txt, 6)
[1] "Conan."
#分2段截取字符串
> str_sub(txt, c(1, 4), c(6, 8))
[1] "I am C" "m Con"
#通過負坐標截取字符串
> str_sub(txt, -3)
[1] "an."
> str_sub(txt, end = -3)
[1] "I am Cona"
#對截取的字符串進行賦值
> x <- "AAABBBCCC"
> str_sub(x, 1, 1) <- 1; x #在字符串的1的位置賦值為1
[1] "1AABBBCCC"
> str_sub(x, 2, -2) <- "2345"; x #在字符串從2到-2的位置賦值為2345
[1] "12345C"
6、字符串計算函數(shù)
str_count(string, pattern = "")
參數(shù):
string: 字符串,字符串向量
pattern: 匹配的字符
#對字符串中匹配的字符計數(shù)
> str_count('aaa444sssddd', "a")
[1] 3
#對字符串向量中匹配的字符計數(shù)
> fruit <- c("apple", "banana", "pear", "pineapple")
> str_count(fruit, "a")
[1] 1 3 1 1
> str_count(fruit, "p")
[1] 2 0 1 3
#對字符串中的'.'字符計數(shù)
> str_count(c("a.", ".", ".a.",NA), ".") #由于.是正則表達式的匹配符,直接判斷計數(shù)的結(jié)果是不對的
[1] 2 1 3 NA
> str_count(c("a.", ".", ".a.",NA), fixed(".")) #用fixed匹配字符
[1] 1 1 2 NA
> str_count(c("a.", ".", ".a.",NA), "\\.") #用\\匹配字符
[1] 1 1 2 NA
7、字符串長度
str_length(string)
參數(shù):
string: 字符串,字符串向量
#計算字符串的長度
> str_length(c("I", "am", "張丹", NA))
[1] 1 2 2 NA
8、字符串值排序,同str_order索引排序
str_sort(x, decreasing = FALSE, na_last = TRUE, locale = "", ...)
參數(shù):
x: 字符串,字符串向量
decreasing: 排序方向
na_last:NA值的存放位置,一共3個值,TRUE放到最后,F(xiàn)ALSE放到最前,NA過濾處理
locale:按哪種語言習慣排序
#按ASCII字母排序
> str_sort(c('a',1,2,'11'), locale = "en")
[1] "1" "11" "2" "a"
#倒序排序
> str_sort(letters,decreasing=TRUE)
[1] "z" "y" "x" "w" "v" "u" "t" "s" "r" "q" "p" "o" "n" "m" "l" "k" "j" "i" "h" "g" "f" "e" "d" "c" "b" "a"
#按拼音排序
> str_sort(c('你','好','粉','絲','日','志'),locale = "zh")
[1] "粉" "好" "你" "日" "絲" "志"
#對NA值排序
> str_sort(c(NA,'1',NA),na_last=TRUE) #把NA放最后面
[1] "1" NA NA
> str_sort(c(NA,'1',NA),na_last=FALSE) #把NA放最前面
[1] NA NA "1"
> str_sort(c(NA,'1',NA),na_last=NA) #去掉NA值
[1] "1"
9、字符串分割,同str_split_fixed
str_split(string, pattern, n = Inf)
參數(shù):
string: 字符串,字符串向量
pattern: 匹配的字符
n: 分割個數(shù)
> val <- "abc,123,234,iuuu"
#以,進行分割
> str_split(val, ",")
[[1]]
[1] "abc" "123" "234" "iuuu"
#以,進行分割,保留2塊
> str_split(val, ",",2)
[[1]]
[1] "abc" "123,234,iuuu"
#用str_split_fixed()函數(shù)分割,結(jié)果類型是matrix
> str_split_fixed(val, ",",2)
[,1] [,2]
[1,] "abc" "123,234,iuuu"
10、返回的匹配字符串
str_subset(string, pattern)
參數(shù):
string: 字符串,字符串向量
pattern: 匹配的字符
> val <- c("abc", 123, "cba")
#全文匹配
> str_subset(val, "a")
[1] "abc" "cba"
#開頭匹配
> str_subset(val, "^a")
[1] "abc"
#結(jié)尾匹配
> str_subset(val, "a$")
[1] "cba"
11、從文本中提取單詞
word(string, start = 1L, end = start, sep = fixed(" "))
參數(shù):
string: 字符串,字符串向量
start: 開始位置
end: 結(jié)束位置
sep: 匹配字符
#默認以空格分割,取第一個位置的字符串
> val <- c("I am Conan.", "http://fens.me, ok")
> word(val, 1)
[1] "I" "http://fens.me,"
> word(val, -1)
[1] "Conan." "ok"
> word(val, 2, -1)
[1] "am Conan." "ok"
#以,分割,取第一個位置的字符串
> val<-'111,222,333,444'
> word(val, 1, sep = fixed(','))
[1] "111"
> word(val, 3, sep = fixed(','))
[1] "333"
12、匹配字符串的字符
str_detect(string, pattern)
參數(shù):
string: 字符串,字符串向量
pattern: 匹配字符
> val <- c("abca4", 123, "cba2")
#檢查字符串向量,是否包括a
> str_detect(val, "a")
[1] TRUE FALSE TRUE
#檢查字符串向量,是否以a為開頭
> str_detect(val, "^a")
[1] TRUE FALSE FALSE
#檢查字符串向量,是否以a為結(jié)尾
> str_detect(val, "a$")
[1] FALSE FALSE FALSE
13、從字符串中提取匹配組
str_match(string, pattern)
str_match_all(string, pattern)
參數(shù):
string: 字符串,字符串向量
pattern: 匹配字符
> val <- c("abc", 123, "cba")
#匹配字符a,并返回對應的字符
> str_match(val, "a")
[,1]
[1,] "a"
[2,] NA
[3,] "a"
#匹配字符0-9,限1個,并返回對應的字符
> str_match(val, "[0-9]")
[,1]
[1,] NA
[2,] "1"
[3,] NA
#匹配字符0-9,不限數(shù)量,并返回對應的字符
> str_match(val, "[0-9]*")
[,1]
[1,] ""
[2,] "123"
[3,] ""
#從字符串中提取匹配組,以字符串matrix格式返回
> str_match_all(val, "a")
[[1]]
[,1]
[1,] "a"
[[2]]
[,1]
[[3]]
[,1]
[1,] "a"
> str_match_all(val, "[0-9]")
[[1]]
[,1]
[[2]]
[,1]
[1,] "1"
[2,] "2"
[3,] "3"
[[3]]
[,1]
14、字符串替換
str_replace(string, pattern, replacement)
參數(shù):
string: 字符串,字符串向量
pattern: 匹配字符
replacement: 用于替換的字符
> val <- c("abc", 123, "cba")
#把目標字符串第一個出現(xiàn)的a或b,替換為-
> str_replace(val, "[ab]", "-")
[1] "-bc" "123" "c-a"
#把目標字符串所有出現(xiàn)的a或b,替換為-
> str_replace_all(val, "[ab]", "-")
[1] "--c" "123" "c--"
#把目標字符串所有出現(xiàn)的a,替換為被轉(zhuǎn)義的字符
> str_replace_all(val, "[a]", "\1\1")
[1] "\001\001bc" "123" "cb\001\001"
15、字符串大小寫轉(zhuǎn)換
str_to_upper(string, locale = "")
str_to_lower(string, locale = "")
str_to_title(string, locale = "")
參數(shù):
string: 字符串
locale:按哪種語言習慣排序
> val <- "I am conan. Welcome to my blog! http://fens.me"
> str_to_upper(val)
[1] "I AM CONAN. WELCOME TO MY BLOG! HTTP://FENS.ME"
> str_to_lower(val)
[1] "i am conan. welcome to my blog! http://fens.me"
> str_to_title(val)
[1] "I Am Conan. Welcome To My Blog! Http://Fens.me"








暫無數(shù)據(jù)