response-time-of-chunks-of-calls.r

library("data.table")		# fread (fast csv reading)

pdf(file=sprintf("response-time-of-chunks-of-calls-%s.pdf", format(Sys.time(), format="%d-%B-%Y")))

filename = "response-time-of-chunks-of-calls.csv"
numFirstValuesToIgnore = 10000000
chunkSize = 10000

colors = c("red", "blue", "green", "black")

# returns a data.table (by default) 
## which enhances/extends a data.frame
### which in turn is a list of vectors.
#### Each vector in the list represents a row with its column values.
csvTable = fread(filename, skip=numFirstValuesToIgnore)

rowCount = csvTable[, .N]
cat(sprintf("\nYour input file contains %d lines.\n\n", rowCount))

# csvTable with an additional column "id" which contains the row numbers (necessary for grouping; see below)
csvTable[, id:=1:.N]

print(csvTable)

# default column name: "V" followed by the column number
thread <- csvTable[["V1"]]
time <- csvTable[["V2"]]
memory <- csvTable[["V3"]]
gcActivity <- csvTable[["V4"]]

# grouped*Time is a table with "id" and "V1"
groupedMaxTime <- csvTable[, max(V2), by=.((id-1)%/%chunkSize)]
groupedMeanTime <- csvTable[, mean(V2), by=.((id-1)%/%chunkSize)]
groupedMedianTime <- csvTable[, median(V2), by=.((id-1)%/%chunkSize)]
groupedMinTime <- csvTable[, min(V2), by=.((id-1)%/%chunkSize)]

maxTimes <- groupedMaxTime[["V1"]]
meanTimes <- groupedMeanTime[["V1"]]
medianTimes <- groupedMedianTime[["V1"]]
minTimes <- groupedMinTime[["V1"]]

# disable scientific number representation, e.g., 1e+07
options(scipen=10)

ts.plot(
	ts(maxTimes), ts(meanTimes), ts(medianTimes), ts(minTimes),
	gpars = list(yaxt="n", xaxt="n"),
	col=colors, 
	type="l", 
	log="y", 
	xlab="Chunk",
	ylab="Response time (in us) of a chunk"
)
# display x-ticks with "th" as suffix
ticks <- axTicks(1)
axis(1, at = ticks, labels=sprintf("%dth", ticks))
# display y-ticks in micro seconds (so, we divide the current ticks by 1000)
ticks <- axTicks(2)
axis(2, at = ticks, labels=ticks/1000)

legend("top", c("max", "mean", "median", "min"), 
	fill=colors, 
	horiz=TRUE,
	title=sprintf("Each chunk of %d calls is aggregated via:", chunkSize)
)

i=1
plot(maxTimes, col=colors[i], type="l")
plot(meanTimes, col=colors[i+1], type="l")
plot(medianTimes, col=colors[i+2], type="l")
plot(minTimes, col=colors[i+3], type="l")


### experimental code ###
#print("experimental code follows...")

#csvTable = fread(filename, skip=0, nrows=10)
#csvTable = fread(filename, skip=numFirstValuesToIgnore, select=c(1,2))
#csvTable = fread(filename, skip=numFirstValuesToIgnore, select=c("thread","time"))

#print(csvTable)

# increase the width of the plot (margin) due to multiple y-axes
#par(oma = c(0, 2, 0, 2))

# returns the column named "thread" as vector
#csvTable[["thread"]]

# returns the column with number 1 as vector
#csvTable[1]

#numRows = csvTable[, .N]

# default column name: "V" followed by the column number
#x <- seq(1:numRows)

#groupedTime <- csvTable[,c(V2),by=V1]
#print(groupedTime)

# x and y are each a list of values
#plot(x,y)
# y label is "-log_10(p)"
#plot(x,y, ylab=expression(-log[10](italic(p))))

# axis(..): side=2 means left side; side=4 means right side

#labels <- sapply(ticks, function(i) as.expression(bquote(10^ .(i))))
# labels <- c("a", "b", "c")

# write pdf
invisible(dev.off())