###############################################################
###############################################################
##
# Rows starting with the rhomb are comments
## First steps with R
## Basic arithmetic operations - R as a calculator
1+2
3+2*2 # R knows the operator precedence rules 'Punkt vor Strich
(3+2)*2 # If you want to add the first two values first, use brackets
exp(1) # 'exp()' is the exponential function
log(5) # 'log()' is the natural logarithm
exp(log(5)) # 'log()' of an exponent is the "original" number again
log(8,base=2) # Logarithm of 8 to base 2
sqrt(9) # SQuare Root
3^3 # 3 to the power of 3
9^(1/2) # another way to write the square root
1.2 # decimal number are written with a dot
1,2 # German notation will lead to an error
factorial(3) # 3!
1==1+1 # allows you to compare values. Gived FALSE or TRUE as an answer
# Also possible:
1<1+1
1>1+1
1!=1+1
1<=1+1
1>=1+1
## with this command you will find more information about available functions:
help("log") # opens the manual page with information on the build-in-function "log"
??sin # If you don't know the exact function name, but are searching for information
# about a topic
help.start() # Startet den Browser mit einer Html-Seite zu verschiedenen Manuals
help.search("t-Test") # Listet alle Befehle auf, die etwas mit 't-Test' zu tun haben.
#########################
## Now try it yourself:
# 1) Discover, how to calculate the sum and the product of the numbers from 1-100
# without the operator (+ or *)
# hint: check "help(sum)" and have a look at the examples
#
# 2) Compute the third root of 27. (hint: check root laws - how to transform a root into the power of a number)
#
#
# 3) Check, if the logarithm laws are true and calculate e.g. log(4*8,2)==log(4,2)+log(8,2)
#########################l
## Associating values to variables
#
a <- 3 # variable 'a' is herewith set to value '3'
a
b <- 4
b
a+b
b = 5 # also possible do it like that
b
4 -> b # also this is possible, but confusing
b
# Exercise:
# 4) Devide 36 by 4 and store the result in a variable called "v"
# create a new variable "w" with the value 12
# devide "v" by 3 and multiply the result with "w"
#########################
## You can combine single values to a so-called "vector"
## use the command "c()"
# c stands for"concatenate" or "combine"
test_vector <- c(1,2,4,-1,) # Vektor of integer
test_vector
x <- c(1,2.3,pi,3^6) # also possible
x
class(x) # What is the type of my object ?
y<-c( c(1,2), c(3,pi,4) ) # c() can also combine vectors
y
a<- c(1,2) # also possible with pre-defines vectors
b<- c(3,pi,4)
d<-c(a,b)
d == y # ergibt das selbe Ergebnis wie Zeile 76
z <- c( "Es", "können", "auch", "Texte zu einem Vector verbunden werden")
z #
class(z) # Type of z ist 'character'
# Basic arithmetic operations with a vector
4*c(1,2,4) # is interpreted as (4*1,4*2,4*4)
c(1,2,4)+c(2,3,5) # is interpreted as (1+2,2+3,4+5)
c(1,2)+c(2,3,1,5) # is interpreted as c(1,2,1,2)+c(2,3,1,5)
## creating sequences
1:10 # identical to c(1,2,3,4,5)
(1:5)*2 # identical to c(1,2,3,4,5)*2
rep(3,5) # identical to c(3,3,3,3,3); 'rep' stands for 'replicate'
rep( c(2,7) ,3) # identical to c(2,7,2,7,2,7); replicate c(2,7) three times
seq(from=0,to=10,by=0.1) # means "create a Sequence from 0 to 10 with stepwide 0.1
# Exercise:
# 5) Compute the multiplication table for 7 up to 7000
# 6) Compute the square root for all natural numbers from 1 to 50
sqrt(1:50)
## Comparisons
x <- c(1,3,5,3) #
#
x == 3 # This checks - component by component -
# whether or not the elements of the vector x are equal to 3
x > 3 #
x != 5 #
#Exercise:
# 7) Please sort the following expressions by their magnitude:
# natural logarithm of 2, logarithm of 3 to the base 10, and logarithm of 1.5 to the base 2
## Indexing of vectors
x <- c(3:6)
x
x[4] # 4. 4th element of vektors x
x[c(2,4)] # 2nd. und 4th. Element; x[2,4] is only valid for matrices
x[c(FALSE,TRUE,FALSE,TRUE)] # equal to x[c(2,4)]
# Indizierung mit TRUE/FALSE Vektor
x[x>4] # Since x>4 leads to (FALSE,FALSE,TRUE,TRUE)
x[x>=4] # larger or equal
x[x<=4] # smaller or equal
x[x>5 | x<4] # larger than 5 or smaller than 4
x[x<=5 & x>3] # kleiner oder gleich 5 und groesser als 3
# Indexing with TRUE-FALSE can be very useful as can be seen in the following example:
size <- 1.70 + seq(from=0.01,to=0.1,by=0.01)
Location <- rep(c("Kathmandu","Bangkok"),5)
size[Location=="Bangkok"]
size[Location=="Bangkok" & size<1.75]
# Exercise:
# 8) Select all fish coming from Kathmandu with a size of at least 1.75
#########################
## creating plots
x <- seq(from=0,to=1,by=0.1)
plot(x,col="red")
plot(x,x^2,col="red")
plot(x,x^2,col="red",pch=16) # point character: 16 steht für volle Kreise
plot(x,x^2,col="red",type="l") # Als plot-Typ: Linien statt Punkte
plot(sin,from=-3,to=3) # Plotte die Sinus-Funktion zwischen -3 und 3
abline(v=2) # Fügt eine vertikale Linie durch (2,0) hinzu
abline(h=1) # Fügt eine horizontale Linie durch (0,1) hinzu
# Exercise:
# 9) Plot the function f(x)=3x^2-2x-4 in the range from -1 to 5
#########################
## Mean, variance ans standard deviation of a sample
x <- c(4:6,0,-5)
x
length(x) # number of samples
sum(x) # 4+5+6+0-5
mean(x) # Mittelwert: sum(x)/length(x)
# (4+5+6+0-5)/5
var(x) # Varianz: sum( (x-sum(x))^2 ) / (length(x)-1)
# ( (4-1)^2+(5-1)^2+(6-1)^2+(0-1)^2+(-5-1)^2 )/4
sd(x) # Standardabweichung: sqrt( var(x) )
median(x) # Median
# Exercise: #
# 10) Lets make a small statistic about the age distribution within this course.
#
Age <-c(, , , , , , ,)
# Do a summary statistic
# Calculate the median, the mean, and the standard deviation
# Plot a histogram, a boxplot and pie-chart
#
#
#
#
#
#
# The following paragraph is from the paper of
# Hui Liu,et.al: “Feature Selection Combined with Neural Network Structure Optimization for HIV-1 Protease
# Cleavage Site Prediction,” BioMed Research International, vol. 2015, Article ID 263586, 11 pages, 2015.
# doi:10.1155/2015/263586
### After reading please answer and discuss the questions below.
##
# Acquired immune deficiency syndrome (AIDS) is still a severe disease which mostly causes patient’s death
# during its terminal period. Most patients suffer from this disease because they are infected by HIV-1.
# Although many researches and investigations have been implemented, medicines or methods to entirely
# cure AIDS have not been found. However, there are some methods to relieve patient’s ailment by medicines
# or therapies. HIV-1 protease inhibitor is such a kind of medicine that can be used to treat AIDS.
# HIV-1 protease is an enzyme which plays an important role in the replication progress.
# It cleaves proteins to smaller peptides, and these peptides are used to make up some important proteins
# that are essential for the replication of HIV-1 [1]. Thus inhibition of this protease
# is a reliable method to interfere the virus reproduction.
# HIV-1 protease inhibitor is a small molecule that can tightly bind to HIV-1 protease at the active cleavage
# sites, so that substrates which should normally be cleaved cannot bind to the protease.
# Normally, the protease binds with a protein in octapeptide length and cleaves it at the scissile bond.
# It is quite important to find which amino acid sequences can be cleaved, that is, the specificity of the protease.
# Also a good concept of which residues play more important roles in the cleavage progress is necessary.
#[....] Understanding the specificity of HIV-1 protease can help human beings design effective protease inhibitor to treat AIDS.
# Judging whether a peptide can be cleaved by HIV-1 protease is the key point, and machine learning is an economical solution for
# solving this problem since machine learning methods can be used here to predict whether octapeptides are cleavable
# for the protease.
#
# Exercise 11)
# Why is the task of computational HIV-1 protease cleavage site prediction such a big problem - keeping researchers busy now for two decades ?
# Why not starting an international initiative (like the human genome project) and simply test all possible combinations of amino-acids experimentally.
# Please calculate
# a) The number of possible amino-acid sequences to be tested.
# b) The estimated costs for synthesis of these peptides (calculate with roughly 50US$ per Amino-Acid).
# c) Compare this amount of money with the total German Brutto-Inlandsprodukt 2015
#
#
#
# Exercise 12)
# Please reproduce the calculation from the goat-problem
# Compute the empirical probability for winning and loosing the two events "keeping" and "changing" the door.
# Exercise 13)
# Example for an explanatory analysis
# Please go - row by row - through the following example and try to retrace what was done.
# Understand the commands and add comments to the analysis
library(MASS)
data(birthwt)
help("birthwt")
str(birthwt)
dim(birthwt)
table(birthwt$age)
summary(birthwt$age)
boxplot(birthwt$age)
boxplot(birthwt$age, main="Boxplot of Mothers Age")
type.freq <- table(birthwt$age)
type.freq
barplot(type.freq, main="Barplot of age frequencys ")
sd(birthwt$age)
colnames(birthwt)
colnames(birthwt) <- c("birthwt.below.2500", "mother.age",
"mother.weight", "race",
"mother.smokes", "previous.prem.labor",
"hypertension", "uterine.irr",
"physician.visits", "birthwt.grams")
head(birthwt)
birthwt$race <- factor(c("white", "black", "other")[birthwt$race])
birthwt$mother.smokes <- factor(c("No", "Yes")[birthwt$mother.smokes + 1])
birthwt$uterine.irr <- factor(c("No", "Yes")[birthwt$uterine.irr + 1])
birthwt$hypertension <- factor(c("No", "Yes")[birthwt$hypertension + 1])# Exercise 12)
summary(birthwt)
plot (birthwt$race)
title (main = "Count of Mother's Race in Springfield MA, 1986")
plot (birthwt$mother.age)
title (main = "Mother's Ages in Springfield MA, 1986")
plot (birthwt$mother.age, birthwt$birthwt.grams)
title (main = "Birth Weight by Mother's Age in Springfield MA, 1986")