- 取得連結
- X
- 以電子郵件傳送
- 其他應用程式
程式語言:R
官網
官方文件
簡介:基本資料架構
character > complex > numeric > integer > logical
宣告
得值
宣告
得值
宣告
得值
宣告
運用
宣告
得值
建立時若為 vector 長度需一致,若為 matrix row 長度需一致
宣告
得值
Data Types
官網
官方文件
簡介:基本資料架構
基本
- 所有 index 皆是從 1 開始,而不是 0
- class(x)
- 判斷 x 架構,像是 matrix
- mode(x)
- 判斷 x 內含的資料屬性,像是 numeric
- typeof(x)
- 判斷更精確的 x 內含資料屬性,像是 double
- attributes(x)
- 得到 x 的屬性,像是 dim
資料屬性
- character
- 字串
- class("test")
- numeric
- 實數
- R 計算上皆是實數計算,除非特別轉換
- class(10)
- integer
- 整數
- class(1:10)
- class(as.integer(10))
- complex
- 複數
- class(2+2i)
- logical
- True 或 False
- class(T)
- raw
- bytes
- class(charToRaw('A'))
Assignment
# 正常做法 x <- c(1,2,3) # 反著寫也可以 c(1,2,3) -> x # 用 function assign("x", c(1,2,3)) # 大部分情況等同 ->,但不建議使用 x = c(1,2,3)
Vector
vector 必定是同個資料屬性,會自動轉換,依以下強弱順序character > complex > numeric > integer > logical
宣告
- # by c()
- v <- c(1, 3, 5)
- # by seq
- v <- seq(1, 5, by=2)
- # by :
- v <- 5:1
- # by rep
- v <- rep(1, times=5)
- # boolean
- v <- c(1, 3, 5) > 3
- # is.na
- v <- is.na(c(1:3,NA))
- # character
- v <- paste(c('x', 'y'), 1:5, sep="")
得值
- # init
- v <- c(seq(1, 10, by=2), NA)
- vLen = length(v)
- vNames <- as.raw(0x41:(0x41+vLen-1))
- vNames <- rawToChar(vNames)
- vNames <- strsplit(vNames, split='')
- vNames <- unlist(vNames)
- names(v) <- vNames
- print(v)
- # A B C D E F
- # 1 3 5 7 9 NA
- # index 從 1 開始,而不是 0
- v[1:10]
- # A B C D E F <NA> <NA> <NA> <NA>
- # 1 3 5 7 9 NA NA NA NA NA
- v[!is.na(v) & v>5]
- # D E
- # 7 9
- # 排除前兩項
- v[-(1:2)]
- # C D E F
- # 5 7 9 NA
- v[c('A', 'D')]
- # A D
- # 1 7
- # 將大於 3 的值設為負的
- index <- !is.na(v) & v>3
- v[index] = -v[index]
- print(v)
- # A B C D E F
- # 1 3 -5 -7 -9 NA
Array
宣告
- # 將 z 轉換為三維陣列,大小為 2x3x4
- z <- 1:24
- dim(z) <- c(2,3,4)
- # 初始值為 0 的 2x3 陣列
- array(0, c(2,3))
得值
- # 建立三維陣列,大小為 2x3x4
- z <- array(1:24, dim=c(2,3,4))
- # , , 1
- #
- # [,1] [,2] [,3]
- # [1,] 1 3 5
- # [2,] 2 4 6
- #
- # , , 2
- #
- # [,1] [,2] [,3]
- # [1,] 7 9 11
- # [2,] 8 10 12
- #
- # , , 3
- #
- # [,1] [,2] [,3]
- # [1,] 13 15 17
- # [2,] 14 16 18
- #
- # , , 4
- #
- # [,1] [,2] [,3]
- # [1,] 19 21 23
- # [2,] 20 22 24
- # 得到位罝為 (1,2,3) 的值
- z[1,2,3]
- # [1] 15
- # 得到所有 x 位罝為 2 的值,大小為 3x4
- z[2,,]
- # [,1] [,2] [,3] [,4]
- # [1,] 2 8 14 20
- # [2,] 4 10 16 22
- # [3,] 6 12 18 24
Matrix
二維 array 即是 matrix,class 皆為 matrix宣告
- a <- array(0, c(2,3))
- # [,1] [,2] [,3]
- # [1,] 0 0 0
- # [2,] 0 0 0
- m <- matrix(0,nrow=2, ncol=3)
- # [,1] [,2] [,3]
- # [1,] 0 0 0
- # [2,] 0 0 0
- class(a) == class(m)
- # [1] TRUE
- matrix(c(1:4), nrow=2, ncol=2)
- # [,1] [,2]
- # [1,] 1 3
- # [2,] 2 4
- # 可以更改成按照 row 填入資料
- matrix(c(1:4), nrow=2, ncol=2, byrow=TRUE)
- # [,1] [,2]
- # [1,] 1 2
- # [2,] 3 4
- matrix(c(1:4), nrow=2, ncol=2, dimnames=list(c('A', 'B'),c('a', 'b')))
- # a b
- # A 1 3
- # B 2 4
- # 同上
- m2 <- matrix(c(1:4), nrow=2, ncol=2)
- rownames(m2) <- c('A', 'B')
- colnames(m2) <- c('a', 'b')
- print(m2)
- # a b
- # A 1 3
- # B 2 4
得值
- m = matrix(c(1:6), nrow=2, ncol=3, dimnames=list(c('A', 'B'), c('a', 'b', 'c')), byrow=T)
- # a b c
- # A 1 2 3
- # B 4 5 6
- # 得 a 行
- m[,'a']
- # A B
- # 1 4
- # 得 A 欄
- m[1,]
- # a b c
- # 1 2 3
- # 多加一欄
- rbind(m, C=7:9)
- # a b c
- # A 1 2 3
- # B 4 5 6
- # C 7 8 9
- # 多加一行
- cbind(m, 7:8)
- # a b c
- # A 1 2 3 7
- # B 4 5 6 8
Factors
資料分類用宣告
- # 無序
- survey_vector <- c("M", "F", "F", "M", "M")
- factor_survey_vector <- factor(survey_vector)
- factor_survey_vector
- # [1] M F F M M
- # Levels: F M
- # 因無序,比較無意義
- factor_survey_vector[1] > factor_survey_vector[2]
- # [1] NA
- # Warning message:
- # In Ops.factor(factor_survey_vector[1], factor_survey_vector[2]) :
- # ‘>’ not meaningful for factors
- # 有序,因溫度有高下之分
- temperature_vector <- c("High", "Low", "High","Low", "Medium")
- factor_temperature_vector <- factor(temperature_vector, order=TRUE, levels=c("Low", "Medium", "High"))
- factor_temperature_vector
- # [1] High Low High Low Medium
- # Levels: Low < Medium < High
- # 因有序,比較才有意義
- factor_temperature_vector[1] > factor_temperature_vector[2]
- # [1] TRUE
運用
- survey_vector <- c("M", "F", "F", "M", "M")
- factor_survey_vector <- factor(survey_vector)
- factor_survey_vector
- # [1] M F F M M
- # Levels: F M
- # 更改值
- levels(factor_survey_vector) <- c("Female", "Male")
- factor_survey_vector
- # [1] Male Female Female Male Male
- # Levels: Female Male
- # 得到第五個值
- factor_survey_vector[5]
- # [1] Male
- # Levels: Female Male
- summary(factor_survey_vector)
- # Female Male
- # 2 3
- # tapply 用法
- incomes <- c(60, 49, 40, 61, 64, 60, 59, 54, 62, 69, 70, 42, 56,
- 61, 61, 61, 58, 51, 48, 65, 49, 49, 41, 48, 52, 46,
- 59, 46, 58, 43)
- state <- c("tas", "sa", "qld", "nsw", "nsw", "nt", "wa", "wa",
- "qld", "vic", "nsw", "vic", "qld", "qld", "sa", "tas",
- "sa", "nt", "wa", "vic", "qld", "nsw", "nsw", "wa",
- "sa", "act", "nsw", "vic", "vic", "act")
- statef <- factor(state)
- # [1] tas sa qld nsw nsw nt wa wa qld vic nsw vic qld qld sa tas sa nt wa
- # [20] vic qld nsw nsw wa sa act nsw vic vic act
- # Levels: act nsw nt qld sa tas vic wa
- levels(statef)
- # [1] "act" "nsw" "nt" "qld" "sa" "tas" "vic" "wa"
- # 算出每個 group 的 mean
- incmeans <- tapply(incomes, statef, mean)
- # act nsw nt qld sa tas vic wa
- # 44.50000 57.33333 55.50000 53.60000 55.00000 60.50000 56.00000 52.25000
Lists
list 可以包含不同資料屬性的資料宣告
- v = c(1, 2, 3)
- x <- list(a=1, b=TRUE, c="test", d=v)
- # $a
- # [1] 1
- #
- # $b
- # [1] TRUE
- #
- # $c
- # [1] "test"
- #
- # $d
- # [1] 1 2 3
- a=1; b=TRUE; c="test"; d=c(1, 2, 3)
- x <- list(a, b, c, d)
- # 可以看到無名字,例 [[1]],因為不是直接用 = 的方法
- print(x)
- # [[1]]
- # [1] 1
- #
- # [[2]]
- # [1] TRUE
- #
- # [[3]]
- # [1] "test"
- #
- # [[4]]
- # [1] 1 2 3
- # 加入名字後就同最上面的 list
- names(x) <- c('a', 'b', 'c', 'd')
- print(x)
- # 以下建立的 list 同最上面
- x <- list()
- x$a <- 1
- x['b'] <- T
- x[3] <- 'test'
- names(x)[3] = 'c'
- x$d <- c(1, 2, 3)
得值
- x <- list(a=1, b=TRUE, c="test", d=c(1, 2, 3))
- # $a
- # [1] 1
- #
- # $b
- # [1] TRUE
- #
- # $c
- # [1] "test"
- #
- # $d
- # [1] 1 2 3
- # 若單純指定 index 如下,得到的只是 list class,非是內含物
- x['d']
- x[4]
- class(x['d']) # [1] "list"
- # 所以會出錯,無法得到值
- x[4][2]
- # $<NA>
- # NULL
- # 得到內含物
- x[['d']]
- x[[4]]
- x$d
- class(x$d) # [1] "numeric"
- # 可正確得到值
- x$d[2]
- # [1] 2
- # list 加入 m
- m = matrix(0, nrow=2, ncol=2)
- x$m = m
- x
- # $a
- # [1] 1
- #
- # $b
- # [1] TRUE
- #
- # $c
- # [1] "test"
- #
- # $d
- # [1] 1 2 3
- #
- # $m
- # [,1] [,2]
- # [1,] 0 0
- # [2,] 0 0
Data frames
data.frame 類似資料表,常當作大量資料集,例如:匯入外部檔或讀取資料庫資料等建立時若為 vector 長度需一致,若為 matrix row 長度需一致
宣告
- # name 長度與其他不一致,會出錯
- name <- c("Joe", "Bob")
- age <- c("28", "26", "34")
- gender <- c("Male","Male","Female")
- data.frame(name, age, gender)
- # 修正如下
- name <- c("Joe", "Bob", "Vicky")
- data.frame(name, age, gender)
- # name age gender
- # 1 Joe 28 Male
- # 2 Bob 26 Male
- # 3 Vicky 34 Female
- # row size 不一致,會出錯
- data.frame(array(0, c(4,3)), array(0, c(3,2)))
- # 修正如下
- data.frame(array(0, c(3,3)), array(0, c(3,2)))
- # X1 X2 X3 X1.1 X2.1
- # 1 0 0 0 0 0
- # 2 0 0 0 0 0
- # 3 0 0 0 0 0
- df <- data.frame(character(3))
- df[1] <- c("Joe", "Bob", "Vicky")
- df[2] <- c("28", "26", "34")
- df[3] <- c("Male","Male","Female")
- colnames(df) <- c('name', 'age', 'gender')
- # 同最上面的資料
- print(df)
- # name age gender
- # 1 Joe 28 Male
- # 2 Bob 26 Male
- # 3 Vicky 34 Female
- rownames(df) <- c('第一人', '第二人', '第三人')
- print(df)
- # name age gender
- # 第一人 Joe 28 Male
- # 第二人 Bob 26 Male
- # 第三人 Vicky 34 Female
- # 顯示資料基本資訊
- summary(df)
- # name age gender
- # Bob :1 26:1 Female:1
- # Joe :1 28:1 Male :2
- # Vicky:1 34:1
得值
- name <- c("Joe", "Bob", "Vicky")
- age <- c("28", "26", "34")
- gender <- c("Male","Male","Female")
- df <- data.frame(name, age, gender)
- # name age gender
- # 1 Joe 28 Male
- # 2 Bob 26 Male
- # 3 Vicky 34 Female
- df[1,1]
- # [1] "Joe"
- df[1,]
- # name age gender
- # 1 Joe 28 Male
- df[,1]
- df[,'name']
- df$name
- # [1] "Joe" "Bob" "Vicky"
參考
R BasicData Types
留言
張貼留言