ボックスプロットで使ったデータを、確率プロットで表示してみました。凡例には、データ数とメディアンを表示しています。
# CDF PLOT SAMPLE
# =============================================================================
# chart.cdf
# =============================================================================
chart.cdf <- function(tbl, valX = "RAW_VALUE", colBy = "condition",
gTitle = "", xLab = "measured data") {
x <- tbl[, valX]
x <- x[!is.na(x)]
n <- length(x)
x <- sort(x)
y <- (1:n - 0.375) / (n + 0.25)
probs <- c(0.001, 0.01, 0.1, 1, 5, 10, 20, 30, 40, 50,
60, 70, 80, 90, 95, 99, 99.9, 99.99, 99.999) / 100
m <- length(probs)
x_min <- x[1]
x_max <- x[n]
x_pad <- (x_max - x_min) * 0.4
y_min <- qnorm(probs[1])
y_max <- qnorm(probs[m])
y_stp <- (y_max - y_min) * 0.1
plot(c(x_min, x_max + x_pad), c(y_min, y_max),
type = "n", yaxt = "n",
xlab = xLab, ylab = "Cumulative Percent",
main = gTitle, cex.main = 1)
axis(2, qnorm(probs), probs * 100)
abline(h = qnorm(probs), lty = 3, col = "#C0C0C0")
abline(h = 0, lty = 1, col = "#808080")
listBy <- sort(unique(tbl[, colBy]))
k <- length(listBy)
for (i in 1:k) {
valBy <- listBy[i]
x <- tbl[tbl[, colBy] == valBy, valX]
x <- x[!is.na(x)]
n <- length(x)
x <- sort(x)
y <- (1:n - 0.375) / (n + 0.25)
total <- length(x)
x.median <- median(x)
legend_label <- paste(valBy, ':', total, "/", x.median)
points(x, qnorm(y), type='o', col = i, pch = i)
legend_x <- x_max + x_pad * 0.05
legend_y <- y_max - y_stp * (i - 1)
legend_lty = 1
legend(legend_x, legend_y, legend_label, col = i,
lty = legend_lty, pch = i, ncol = 1, cex = 0.7, box.lty = 0)
}
}
# =============================================================================
# open.png
# =============================================================================
open.png <- function(imgName, picWidth = 640, picHeight = 400) {
png(filename = imgName, width = picWidth, height = picHeight,
units = "px", pointsize = 14, bg = "white", res = NA)
}
# -----------------------------------------------------------------------------
# MAIN
# -----------------------------------------------------------------------------
# _____________________________________________________________________________
# READ CSV DATA
file.name <- "./sample_box.csv"
tbl.data <- read.csv(file.name, header = T, as.is = T)
# _____________________________________________________________________________
# generate CDF PLOT
head.y <- names(tbl.data)[1]
head.x <- names(tbl.data)[2]
title.box <- paste("SAMPLE (n = ", length(tbl.data[, head.x]), ")",
sep = "")
label.x <- "Measurement"
chart.cdf(tbl.data, head.x, head.y, title.box, label.x)
# _____________________________________________________________________________
# generate PNG FILE
open.png("sample_cdf.png")
chart.cdf(tbl.data, head.x, head.y, title.box, label.x)
dev.off()
# ---
# END PROGRAM
実行例
Fedora で実行した例を示します。
$ R
R version 2.9.2 (2009-08-24)
Copyright (C) 2009 The R Foundation for Statistical Computing
ISBN 3-900051-07-0
...
(途中省略)
...
'q()'と入力すればRを終了します。
> source("cdf.R")
X Window 上では以下の様に表示されます。
出力された PNG ファイルはこんな感じになります。