https://cran.r-project.org/web/packages/corrplot/vignettes/corrplot-intro.html
http://www.sthda.com/english/wiki/ggcorrplot-visualization-of-a-correlation-matrix-using-ggplot2
Load and install the packages
packages = c("ggplot2", "dplyr", "grid", "ggthemes","plotly", "forcats",
"reshape2", "corrplot","ggcorrplot","Hmisc")
package.check <- lapply(packages, FUN = function(x) {
if (!require(x, character.only = TRUE)) {
install.packages(x, dependencies = TRUE)
library(x, character.only = TRUE)
}
})
Now read the data, will be using wheat data for 13 traits with 204 observations
corr<-read.csv(file="~/Box/Postdoc/R_club/data_files/corr.csv", header=TRUE)
str(corr)
## 'data.frame': 204 obs. of 13 variables:
## $ entry: Factor w/ 204 levels "HW_100","HW_102",..: 135 52 67 69 89 105 136 137 138 151 ...
## $ GY : num 1613 1722 1719 1647 1668 ...
## $ TKW : num 47.9 49.2 47.8 49 47.3 ...
## $ GPS : num 39.2 41.8 41.4 39.6 39.3 ...
## $ SPMS : num 206 222 230 224 216 ...
## $ GPMS : num 7137 8284 7489 7782 7988 ...
## $ PH : num 96.5 99 97.6 104.8 102.5 ...
## $ SL : num 9.31 9.4 8.27 9.74 8.18 ...
## $ DFF : num 142 144 144 143 144 ...
## $ FLW : num 1.5 1.63 1.57 1.5 1.55 ...
## $ FLL : num 22.2 22.3 22.7 22 22.5 ...
## $ FLA : num 26.1 30.4 27 28 27.8 ...
## $ ESV : num 0.68 0.6 0.67 0.66 0.64 0.64 0.67 0.62 0.65 0.67 ...
table(is.na(corr))
##
## FALSE
## 2652
head(corr[1:5, ]) # Extract first five and and all the columns
## entry GY TKW GPS SPMS GPMS PH SL
## 1 HW_26 1613.190 47.91615 39.23682 205.5131 7136.854 96.48813 9.305438
## 2 HW_169 1721.537 49.21850 41.83785 221.6166 8284.455 99.01476 9.404050
## 3 HW_186 1719.118 47.75041 41.39790 229.9589 7489.420 97.60259 8.270675
## 4 HW_191 1646.953 48.98302 39.64697 223.5354 7781.957 104.84513 9.738373
## 5 HW_214 1667.508 47.32700 39.34576 216.3867 7987.595 102.46325 8.181575
## DFF FLW FLL FLA ESV
## 1 142.3762 1.502133 22.22555 26.10632 0.68
## 2 144.4058 1.632039 22.29936 30.44854 0.60
## 3 144.3703 1.569610 22.65048 26.99655 0.67
## 4 143.1361 1.500000 22.00000 28.00000 0.66
## 5 144.0152 1.546915 22.45863 27.78856 0.64
Correlation methods: (“pearson”, “kendall”, “spearman”)
# Let is create function to perform correlations
mycorr<-function (data, type){
result<-rcorr(as.matrix(data), type=type)
result$r
}
# Get the correlation cofficents
# use function to get cor.cofficients, rounding results by two digits after point and
# droping column first from corr data file as it contains names of the lines
corr_mat_pe<-round(mycorr(corr[,-1], type="pearson"),2)
corr_mat_sp<-round(mycorr(corr[,-1], type="spearman"),2)
head (corr_mat_pe[, 1:4])
## GY TKW GPS SPMS
## GY 1.00 0.27 0.52 0.42
## TKW 0.27 1.00 0.36 0.21
## GPS 0.52 0.36 1.00 0.64
## SPMS 0.42 0.21 0.64 1.00
## GPMS 0.53 0.15 0.08 0.03
## PH 0.01 0.37 0.02 -0.10
head (corr_mat_sp[, 1:4])
## GY TKW GPS SPMS
## GY 1.00 0.27 0.49 0.45
## TKW 0.27 1.00 0.36 0.23
## GPS 0.49 0.36 1.00 0.66
## SPMS 0.45 0.23 0.66 1.00
## GPMS 0.54 0.12 0.04 0.04
## PH 0.04 0.37 -0.02 -0.09
#now get matrix of p-values using cor_pmat function from package corrplot package
pmat_pe<- cor_pmat(corr_mat_pe)
head(pmat_pe[, 1:4])
## GY TKW GPS SPMS
## GY 0.000000000 0.3876520 0.0088497029 0.0207259864
## TKW 0.387651953 0.0000000 0.1798113511 0.4274754132
## GPS 0.008849703 0.1798114 0.0000000000 0.0001059547
## SPMS 0.020725986 0.4274754 0.0001059547 0.0000000000
## GPMS 0.042126036 0.9308283 0.9368401430 0.8960470454
## PH 0.083868330 0.4650376 0.1430241168 0.0420613975
pmat_sp<- cor_pmat(corr_mat_sp)
head(pmat_pe[, 1:4])
## GY TKW GPS SPMS
## GY 0.000000000 0.3876520 0.0088497029 0.0207259864
## TKW 0.387651953 0.0000000 0.1798113511 0.4274754132
## GPS 0.008849703 0.1798114 0.0000000000 0.0001059547
## SPMS 0.020725986 0.4274754 0.0001059547 0.0000000000
## GPMS 0.042126036 0.9308283 0.9368401430 0.8960470454
## PH 0.083868330 0.4650376 0.1430241168 0.0420613975
myggcorr<-function(cor_cof, p.mat){
ggcorrplot(cor_cof, method="circle",hc.order = FALSE,outline.col = "blue",
type="lower", lab=TRUE, p.mat =p.mat, insig = "blank",pch = 4,
pch.col="black", pch.cex = 5,
show.diag = FALSE, lab_col = "black", lab_size = 2, sig.level =c(0.1,0.05,0.01),
tl.cex=10, tl.col="black", tl.srt=45, digits=2)
}
# Now plot correlation heatmap method pearson using function myggcorr
myggcorr(cor_cof=corr_mat_pe, p.mat=pmat_pe)
# Now plot correlation heat map method spearman
myggcorr(cor_cof=corr_mat_sp, p.mat=pmat_sp)
mycorrplot<-function(corr_cof, p.mat){
corrplot(corr_cof, p.mat = p.mat, insig = "label_sig",method="circle", type="upper",
sig.level = c(.001, .01, .05), pch.cex = .9, pch.col = "white", tl.col = "black",tl.srt=45)
}
# Now plot correlation heat map method pearson
mycorrplot(corr_cof=corr_mat_pe, p.mat=pmat_pe)
# Now plot correlation heat map method spearman
mycorrplot(corr_cof=corr_mat_sp, p.mat=pmat_sp)
mycorrmix<-function(corr_mix){
corrplot.mixed(corr_mix, lower="number", upper="circle",lower.col = "black",bg="white", number.cex = .7)
}
# Now plot mixed correlation heat map method pearson
mycorrmix(corr_mix=corr_mat_pe)
# Now plot mixed correlation heat map method spearman
mycorrmix(corr_mix=corr_mat_sp)
## END