- 取得連結
- X
- 以電子郵件傳送
- 其他應用程式
程式語言:R
官網
官方文件
簡介:基本資料架構
character > complex > numeric > integer > logical
宣告
得值
宣告
得值
宣告
得值
宣告
運用
宣告
得值
建立時若為 vector 長度需一致,若為 matrix row 長度需一致
宣告
得值
Data Types
官網
官方文件
簡介:基本資料架構
基本
- 所有 index 皆是從 1 開始,而不是 0
- class(x)
- 判斷 x 架構,像是 matrix
- mode(x)
- 判斷 x 內含的資料屬性,像是 numeric
- typeof(x)
- 判斷更精確的 x 內含資料屬性,像是 double
- attributes(x)
- 得到 x 的屬性,像是 dim
資料屬性
- character
- 字串
- class("test")
- numeric
- 實數
- R 計算上皆是實數計算,除非特別轉換
- class(10)
- integer
- 整數
- class(1:10)
- class(as.integer(10))
- complex
- 複數
- class(2+2i)
- logical
- True 或 False
- class(T)
- raw
- bytes
- class(charToRaw('A'))
Assignment
# 正常做法 x <- c(1,2,3) # 反著寫也可以 c(1,2,3) -> x # 用 function assign("x", c(1,2,3)) # 大部分情況等同 ->,但不建議使用 x = c(1,2,3)
Vector
vector 必定是同個資料屬性,會自動轉換,依以下強弱順序character > complex > numeric > integer > logical
宣告
# by c() v <- c(1, 3, 5) # by seq v <- seq(1, 5, by=2) # by : v <- 5:1 # by rep v <- rep(1, times=5) # boolean v <- c(1, 3, 5) > 3 # is.na v <- is.na(c(1:3,NA)) # character v <- paste(c('x', 'y'), 1:5, sep="")
得值
# init v <- c(seq(1, 10, by=2), NA) vLen = length(v) vNames <- as.raw(0x41:(0x41+vLen-1)) vNames <- rawToChar(vNames) vNames <- strsplit(vNames, split='') vNames <- unlist(vNames) names(v) <- vNames print(v) # A B C D E F # 1 3 5 7 9 NA # index 從 1 開始,而不是 0 v[1:10] # A B C D E F <NA> <NA> <NA> <NA> # 1 3 5 7 9 NA NA NA NA NA v[!is.na(v) & v>5] # D E # 7 9 # 排除前兩項 v[-(1:2)] # C D E F # 5 7 9 NA v[c('A', 'D')] # A D # 1 7 # 將大於 3 的值設為負的 index <- !is.na(v) & v>3 v[index] = -v[index] print(v) # A B C D E F # 1 3 -5 -7 -9 NA
Array
宣告
# 將 z 轉換為三維陣列,大小為 2x3x4 z <- 1:24 dim(z) <- c(2,3,4) # 初始值為 0 的 2x3 陣列 array(0, c(2,3))
得值
# 建立三維陣列,大小為 2x3x4 z <- array(1:24, dim=c(2,3,4)) # , , 1 # # [,1] [,2] [,3] # [1,] 1 3 5 # [2,] 2 4 6 # # , , 2 # # [,1] [,2] [,3] # [1,] 7 9 11 # [2,] 8 10 12 # # , , 3 # # [,1] [,2] [,3] # [1,] 13 15 17 # [2,] 14 16 18 # # , , 4 # # [,1] [,2] [,3] # [1,] 19 21 23 # [2,] 20 22 24 # 得到位罝為 (1,2,3) 的值 z[1,2,3] # [1] 15 # 得到所有 x 位罝為 2 的值,大小為 3x4 z[2,,] # [,1] [,2] [,3] [,4] # [1,] 2 8 14 20 # [2,] 4 10 16 22 # [3,] 6 12 18 24
Matrix
二維 array 即是 matrix,class 皆為 matrix宣告
a <- array(0, c(2,3)) # [,1] [,2] [,3] # [1,] 0 0 0 # [2,] 0 0 0 m <- matrix(0,nrow=2, ncol=3) # [,1] [,2] [,3] # [1,] 0 0 0 # [2,] 0 0 0 class(a) == class(m) # [1] TRUE matrix(c(1:4), nrow=2, ncol=2) # [,1] [,2] # [1,] 1 3 # [2,] 2 4 # 可以更改成按照 row 填入資料 matrix(c(1:4), nrow=2, ncol=2, byrow=TRUE) # [,1] [,2] # [1,] 1 2 # [2,] 3 4 matrix(c(1:4), nrow=2, ncol=2, dimnames=list(c('A', 'B'),c('a', 'b'))) # a b # A 1 3 # B 2 4 # 同上 m2 <- matrix(c(1:4), nrow=2, ncol=2) rownames(m2) <- c('A', 'B') colnames(m2) <- c('a', 'b') print(m2) # a b # A 1 3 # B 2 4
得值
m = matrix(c(1:6), nrow=2, ncol=3, dimnames=list(c('A', 'B'), c('a', 'b', 'c')), byrow=T) # a b c # A 1 2 3 # B 4 5 6 # 得 a 行 m[,'a'] # A B # 1 4 # 得 A 欄 m[1,] # a b c # 1 2 3 # 多加一欄 rbind(m, C=7:9) # a b c # A 1 2 3 # B 4 5 6 # C 7 8 9 # 多加一行 cbind(m, 7:8) # a b c # A 1 2 3 7 # B 4 5 6 8
Factors
資料分類用宣告
# 無序 survey_vector <- c("M", "F", "F", "M", "M") factor_survey_vector <- factor(survey_vector) factor_survey_vector # [1] M F F M M # Levels: F M # 因無序,比較無意義 factor_survey_vector[1] > factor_survey_vector[2] # [1] NA # Warning message: # In Ops.factor(factor_survey_vector[1], factor_survey_vector[2]) : # ‘>’ not meaningful for factors # 有序,因溫度有高下之分 temperature_vector <- c("High", "Low", "High","Low", "Medium") factor_temperature_vector <- factor(temperature_vector, order=TRUE, levels=c("Low", "Medium", "High")) factor_temperature_vector # [1] High Low High Low Medium # Levels: Low < Medium < High # 因有序,比較才有意義 factor_temperature_vector[1] > factor_temperature_vector[2] # [1] TRUE
運用
survey_vector <- c("M", "F", "F", "M", "M") factor_survey_vector <- factor(survey_vector) factor_survey_vector # [1] M F F M M # Levels: F M # 更改值 levels(factor_survey_vector) <- c("Female", "Male") factor_survey_vector # [1] Male Female Female Male Male # Levels: Female Male # 得到第五個值 factor_survey_vector[5] # [1] Male # Levels: Female Male summary(factor_survey_vector) # Female Male # 2 3
# tapply 用法 incomes <- c(60, 49, 40, 61, 64, 60, 59, 54, 62, 69, 70, 42, 56, 61, 61, 61, 58, 51, 48, 65, 49, 49, 41, 48, 52, 46, 59, 46, 58, 43) state <- c("tas", "sa", "qld", "nsw", "nsw", "nt", "wa", "wa", "qld", "vic", "nsw", "vic", "qld", "qld", "sa", "tas", "sa", "nt", "wa", "vic", "qld", "nsw", "nsw", "wa", "sa", "act", "nsw", "vic", "vic", "act") statef <- factor(state) # [1] tas sa qld nsw nsw nt wa wa qld vic nsw vic qld qld sa tas sa nt wa # [20] vic qld nsw nsw wa sa act nsw vic vic act # Levels: act nsw nt qld sa tas vic wa levels(statef) # [1] "act" "nsw" "nt" "qld" "sa" "tas" "vic" "wa" # 算出每個 group 的 mean incmeans <- tapply(incomes, statef, mean) # act nsw nt qld sa tas vic wa # 44.50000 57.33333 55.50000 53.60000 55.00000 60.50000 56.00000 52.25000
Lists
list 可以包含不同資料屬性的資料宣告
v = c(1, 2, 3) x <- list(a=1, b=TRUE, c="test", d=v) # $a # [1] 1 # # $b # [1] TRUE # # $c # [1] "test" # # $d # [1] 1 2 3 a=1; b=TRUE; c="test"; d=c(1, 2, 3) x <- list(a, b, c, d) # 可以看到無名字,例 [[1]],因為不是直接用 = 的方法 print(x) # [[1]] # [1] 1 # # [[2]] # [1] TRUE # # [[3]] # [1] "test" # # [[4]] # [1] 1 2 3 # 加入名字後就同最上面的 list names(x) <- c('a', 'b', 'c', 'd') print(x) # 以下建立的 list 同最上面 x <- list() x$a <- 1 x['b'] <- T x[3] <- 'test' names(x)[3] = 'c' x$d <- c(1, 2, 3)
得值
x <- list(a=1, b=TRUE, c="test", d=c(1, 2, 3)) # $a # [1] 1 # # $b # [1] TRUE # # $c # [1] "test" # # $d # [1] 1 2 3 # 若單純指定 index 如下,得到的只是 list class,非是內含物 x['d'] x[4] class(x['d']) # [1] "list" # 所以會出錯,無法得到值 x[4][2] # $<NA> # NULL # 得到內含物 x[['d']] x[[4]] x$d class(x$d) # [1] "numeric" # 可正確得到值 x$d[2] # [1] 2 # list 加入 m m = matrix(0, nrow=2, ncol=2) x$m = m x # $a # [1] 1 # # $b # [1] TRUE # # $c # [1] "test" # # $d # [1] 1 2 3 # # $m # [,1] [,2] # [1,] 0 0 # [2,] 0 0
Data frames
data.frame 類似資料表,常當作大量資料集,例如:匯入外部檔或讀取資料庫資料等建立時若為 vector 長度需一致,若為 matrix row 長度需一致
宣告
# name 長度與其他不一致,會出錯 name <- c("Joe", "Bob") age <- c("28", "26", "34") gender <- c("Male","Male","Female") data.frame(name, age, gender) # 修正如下 name <- c("Joe", "Bob", "Vicky") data.frame(name, age, gender) # name age gender # 1 Joe 28 Male # 2 Bob 26 Male # 3 Vicky 34 Female # row size 不一致,會出錯 data.frame(array(0, c(4,3)), array(0, c(3,2))) # 修正如下 data.frame(array(0, c(3,3)), array(0, c(3,2))) # X1 X2 X3 X1.1 X2.1 # 1 0 0 0 0 0 # 2 0 0 0 0 0 # 3 0 0 0 0 0 df <- data.frame(character(3)) df[1] <- c("Joe", "Bob", "Vicky") df[2] <- c("28", "26", "34") df[3] <- c("Male","Male","Female") colnames(df) <- c('name', 'age', 'gender') # 同最上面的資料 print(df) # name age gender # 1 Joe 28 Male # 2 Bob 26 Male # 3 Vicky 34 Female rownames(df) <- c('第一人', '第二人', '第三人') print(df) # name age gender # 第一人 Joe 28 Male # 第二人 Bob 26 Male # 第三人 Vicky 34 Female # 顯示資料基本資訊 summary(df) # name age gender # Bob :1 26:1 Female:1 # Joe :1 28:1 Male :2 # Vicky:1 34:1
得值
name <- c("Joe", "Bob", "Vicky") age <- c("28", "26", "34") gender <- c("Male","Male","Female") df <- data.frame(name, age, gender) # name age gender # 1 Joe 28 Male # 2 Bob 26 Male # 3 Vicky 34 Female df[1,1] # [1] "Joe" df[1,] # name age gender # 1 Joe 28 Male df[,1] df[,'name'] df$name # [1] "Joe" "Bob" "Vicky"
參考
R BasicData Types
留言
張貼留言