- 取得連結
- X
- 以電子郵件傳送
- 其他應用程式
程式語言:R
官網
官方文件
簡介:基本資料架構
character > complex > numeric > integer > logical
宣告
得值
宣告
得值
宣告
得值
宣告
運用
宣告
得值
建立時若為 vector 長度需一致,若為 matrix row 長度需一致
宣告
得值
Data Types
官網
官方文件
簡介:基本資料架構
基本
- 所有 index 皆是從 1 開始,而不是 0
- class(x)
- 判斷 x 架構,像是 matrix
- mode(x)
- 判斷 x 內含的資料屬性,像是 numeric
- typeof(x)
- 判斷更精確的 x 內含資料屬性,像是 double
- attributes(x)
- 得到 x 的屬性,像是 dim
資料屬性
- character
- 字串
- class("test")
- numeric
- 實數
- R 計算上皆是實數計算,除非特別轉換
- class(10)
- integer
- 整數
- class(1:10)
- class(as.integer(10))
- complex
- 複數
- class(2+2i)
- logical
- True 或 False
- class(T)
- raw
- bytes
- class(charToRaw('A'))
Assignment
# 正常做法
x <- c(1,2,3)
# 反著寫也可以
c(1,2,3) -> x
# 用 function
assign("x", c(1,2,3))
# 大部分情況等同 ->,但不建議使用
x = c(1,2,3)
Vector
vector 必定是同個資料屬性,會自動轉換,依以下強弱順序character > complex > numeric > integer > logical
宣告
# by c()
v <- c(1, 3, 5)
# by seq
v <- seq(1, 5, by=2)
# by :
v <- 5:1
# by rep
v <- rep(1, times=5)
# boolean
v <- c(1, 3, 5) > 3
# is.na
v <- is.na(c(1:3,NA))
# character
v <- paste(c('x', 'y'), 1:5, sep="")
得值
# init
v <- c(seq(1, 10, by=2), NA)
vLen = length(v)
vNames <- as.raw(0x41:(0x41+vLen-1))
vNames <- rawToChar(vNames)
vNames <- strsplit(vNames, split='')
vNames <- unlist(vNames)
names(v) <- vNames
print(v)
# A B C D E F
# 1 3 5 7 9 NA
# index 從 1 開始,而不是 0
v[1:10]
# A B C D E F <NA> <NA> <NA> <NA>
# 1 3 5 7 9 NA NA NA NA NA
v[!is.na(v) & v>5]
# D E
# 7 9
# 排除前兩項
v[-(1:2)]
# C D E F
# 5 7 9 NA
v[c('A', 'D')]
# A D
# 1 7
# 將大於 3 的值設為負的
index <- !is.na(v) & v>3
v[index] = -v[index]
print(v)
# A B C D E F
# 1 3 -5 -7 -9 NA
Array
宣告
# 將 z 轉換為三維陣列,大小為 2x3x4 z <- 1:24 dim(z) <- c(2,3,4) # 初始值為 0 的 2x3 陣列 array(0, c(2,3))
得值
# 建立三維陣列,大小為 2x3x4 z <- array(1:24, dim=c(2,3,4)) # , , 1 # # [,1] [,2] [,3] # [1,] 1 3 5 # [2,] 2 4 6 # # , , 2 # # [,1] [,2] [,3] # [1,] 7 9 11 # [2,] 8 10 12 # # , , 3 # # [,1] [,2] [,3] # [1,] 13 15 17 # [2,] 14 16 18 # # , , 4 # # [,1] [,2] [,3] # [1,] 19 21 23 # [2,] 20 22 24 # 得到位罝為 (1,2,3) 的值 z[1,2,3] # [1] 15 # 得到所有 x 位罝為 2 的值,大小為 3x4 z[2,,] # [,1] [,2] [,3] [,4] # [1,] 2 8 14 20 # [2,] 4 10 16 22 # [3,] 6 12 18 24
Matrix
二維 array 即是 matrix,class 皆為 matrix宣告
a <- array(0, c(2,3))
# [,1] [,2] [,3]
# [1,] 0 0 0
# [2,] 0 0 0
m <- matrix(0,nrow=2, ncol=3)
# [,1] [,2] [,3]
# [1,] 0 0 0
# [2,] 0 0 0
class(a) == class(m)
# [1] TRUE
matrix(c(1:4), nrow=2, ncol=2)
# [,1] [,2]
# [1,] 1 3
# [2,] 2 4
# 可以更改成按照 row 填入資料
matrix(c(1:4), nrow=2, ncol=2, byrow=TRUE)
# [,1] [,2]
# [1,] 1 2
# [2,] 3 4
matrix(c(1:4), nrow=2, ncol=2, dimnames=list(c('A', 'B'),c('a', 'b')))
# a b
# A 1 3
# B 2 4
# 同上
m2 <- matrix(c(1:4), nrow=2, ncol=2)
rownames(m2) <- c('A', 'B')
colnames(m2) <- c('a', 'b')
print(m2)
# a b
# A 1 3
# B 2 4
得值
m = matrix(c(1:6), nrow=2, ncol=3, dimnames=list(c('A', 'B'), c('a', 'b', 'c')), byrow=T)
# a b c
# A 1 2 3
# B 4 5 6
# 得 a 行
m[,'a']
# A B
# 1 4
# 得 A 欄
m[1,]
# a b c
# 1 2 3
# 多加一欄
rbind(m, C=7:9)
# a b c
# A 1 2 3
# B 4 5 6
# C 7 8 9
# 多加一行
cbind(m, 7:8)
# a b c
# A 1 2 3 7
# B 4 5 6 8
Factors
資料分類用宣告
# 無序
survey_vector <- c("M", "F", "F", "M", "M")
factor_survey_vector <- factor(survey_vector)
factor_survey_vector
# [1] M F F M M
# Levels: F M
# 因無序,比較無意義
factor_survey_vector[1] > factor_survey_vector[2]
# [1] NA
# Warning message:
# In Ops.factor(factor_survey_vector[1], factor_survey_vector[2]) :
# ‘>’ not meaningful for factors
# 有序,因溫度有高下之分
temperature_vector <- c("High", "Low", "High","Low", "Medium")
factor_temperature_vector <- factor(temperature_vector, order=TRUE, levels=c("Low", "Medium", "High"))
factor_temperature_vector
# [1] High Low High Low Medium
# Levels: Low < Medium < High
# 因有序,比較才有意義
factor_temperature_vector[1] > factor_temperature_vector[2]
# [1] TRUE
運用
survey_vector <- c("M", "F", "F", "M", "M")
factor_survey_vector <- factor(survey_vector)
factor_survey_vector
# [1] M F F M M
# Levels: F M
# 更改值
levels(factor_survey_vector) <- c("Female", "Male")
factor_survey_vector
# [1] Male Female Female Male Male
# Levels: Female Male
# 得到第五個值
factor_survey_vector[5]
# [1] Male
# Levels: Female Male
summary(factor_survey_vector)
# Female Male
# 2 3
# tapply 用法
incomes <- c(60, 49, 40, 61, 64, 60, 59, 54, 62, 69, 70, 42, 56,
61, 61, 61, 58, 51, 48, 65, 49, 49, 41, 48, 52, 46,
59, 46, 58, 43)
state <- c("tas", "sa", "qld", "nsw", "nsw", "nt", "wa", "wa",
"qld", "vic", "nsw", "vic", "qld", "qld", "sa", "tas",
"sa", "nt", "wa", "vic", "qld", "nsw", "nsw", "wa",
"sa", "act", "nsw", "vic", "vic", "act")
statef <- factor(state)
# [1] tas sa qld nsw nsw nt wa wa qld vic nsw vic qld qld sa tas sa nt wa
# [20] vic qld nsw nsw wa sa act nsw vic vic act
# Levels: act nsw nt qld sa tas vic wa
levels(statef)
# [1] "act" "nsw" "nt" "qld" "sa" "tas" "vic" "wa"
# 算出每個 group 的 mean
incmeans <- tapply(incomes, statef, mean)
# act nsw nt qld sa tas vic wa
# 44.50000 57.33333 55.50000 53.60000 55.00000 60.50000 56.00000 52.25000
Lists
list 可以包含不同資料屬性的資料宣告
v = c(1, 2, 3)
x <- list(a=1, b=TRUE, c="test", d=v)
# $a
# [1] 1
#
# $b
# [1] TRUE
#
# $c
# [1] "test"
#
# $d
# [1] 1 2 3
a=1; b=TRUE; c="test"; d=c(1, 2, 3)
x <- list(a, b, c, d)
# 可以看到無名字,例 [[1]],因為不是直接用 = 的方法
print(x)
# [[1]]
# [1] 1
#
# [[2]]
# [1] TRUE
#
# [[3]]
# [1] "test"
#
# [[4]]
# [1] 1 2 3
# 加入名字後就同最上面的 list
names(x) <- c('a', 'b', 'c', 'd')
print(x)
# 以下建立的 list 同最上面
x <- list()
x$a <- 1
x['b'] <- T
x[3] <- 'test'
names(x)[3] = 'c'
x$d <- c(1, 2, 3)
得值
x <- list(a=1, b=TRUE, c="test", d=c(1, 2, 3)) # $a # [1] 1 # # $b # [1] TRUE # # $c # [1] "test" # # $d # [1] 1 2 3 # 若單純指定 index 如下,得到的只是 list class,非是內含物 x['d'] x[4] class(x['d']) # [1] "list" # 所以會出錯,無法得到值 x[4][2] # $<NA> # NULL # 得到內含物 x[['d']] x[[4]] x$d class(x$d) # [1] "numeric" # 可正確得到值 x$d[2] # [1] 2 # list 加入 m m = matrix(0, nrow=2, ncol=2) x$m = m x # $a # [1] 1 # # $b # [1] TRUE # # $c # [1] "test" # # $d # [1] 1 2 3 # # $m # [,1] [,2] # [1,] 0 0 # [2,] 0 0
Data frames
data.frame 類似資料表,常當作大量資料集,例如:匯入外部檔或讀取資料庫資料等建立時若為 vector 長度需一致,若為 matrix row 長度需一致
宣告
# name 長度與其他不一致,會出錯
name <- c("Joe", "Bob")
age <- c("28", "26", "34")
gender <- c("Male","Male","Female")
data.frame(name, age, gender)
# 修正如下
name <- c("Joe", "Bob", "Vicky")
data.frame(name, age, gender)
# name age gender
# 1 Joe 28 Male
# 2 Bob 26 Male
# 3 Vicky 34 Female
# row size 不一致,會出錯
data.frame(array(0, c(4,3)), array(0, c(3,2)))
# 修正如下
data.frame(array(0, c(3,3)), array(0, c(3,2)))
# X1 X2 X3 X1.1 X2.1
# 1 0 0 0 0 0
# 2 0 0 0 0 0
# 3 0 0 0 0 0
df <- data.frame(character(3))
df[1] <- c("Joe", "Bob", "Vicky")
df[2] <- c("28", "26", "34")
df[3] <- c("Male","Male","Female")
colnames(df) <- c('name', 'age', 'gender')
# 同最上面的資料
print(df)
# name age gender
# 1 Joe 28 Male
# 2 Bob 26 Male
# 3 Vicky 34 Female
rownames(df) <- c('第一人', '第二人', '第三人')
print(df)
# name age gender
# 第一人 Joe 28 Male
# 第二人 Bob 26 Male
# 第三人 Vicky 34 Female
# 顯示資料基本資訊
summary(df)
# name age gender
# Bob :1 26:1 Female:1
# Joe :1 28:1 Male :2
# Vicky:1 34:1
得值
name <- c("Joe", "Bob", "Vicky")
age <- c("28", "26", "34")
gender <- c("Male","Male","Female")
df <- data.frame(name, age, gender)
# name age gender
# 1 Joe 28 Male
# 2 Bob 26 Male
# 3 Vicky 34 Female
df[1,1]
# [1] "Joe"
df[1,]
# name age gender
# 1 Joe 28 Male
df[,1]
df[,'name']
df$name
# [1] "Joe" "Bob" "Vicky"
參考
R BasicData Types
留言
張貼留言