############################################################### ############################################################### ## # Rows starting with the rhomb are comments ## First steps with R ## Basic arithmetic operations - R as a calculator 1+2 3+2*2 # R knows the operator precedence rules 'Punkt vor Strich (3+2)*2 # If you want to add the first two values first, use brackets exp(1) # 'exp()' is the exponential function log(5) # 'log()' is the natural logarithm exp(log(5)) # 'log()' of an exponent is the "original" number again log(8,base=2) # Logarithm of 8 to base 2 sqrt(9) # SQuare Root 3^3 # 3 to the power of 3 9^(1/2) # another way to write the square root 1.2 # decimal number are written with a dot 1,2 # German notation will lead to an error factorial(3) # 3! 1==1+1 # allows you to compare values. Gived FALSE or TRUE as an answer # Also possible: 1<1+1 1>1+1 1!=1+1 1<=1+1 1>=1+1 ## with this command you will find more information about available functions: help("log") # opens the manual page with information on the build-in-function "log" ??sin # If you don't know the exact function name, but are searching for information # about a topic help.start() # Startet den Browser mit einer Html-Seite zu verschiedenen Manuals help.search("t-Test") # Listet alle Befehle auf, die etwas mit 't-Test' zu tun haben. ######################### ## Now try it yourself: # 1) Discover, how to calculate the sum and the product of the numbers from 1-100 # without the operator (+ or *) # hint: check "help(sum)" and have a look at the examples # # 2) Compute the third root of 27. (hint: check root laws - how to transform a root into the power of a number) # # # 3) Check, if the logarithm laws are true and calculate e.g. log(4*8,2)==log(4,2)+log(8,2) #########################l ## Associating values to variables # a <- 3 # variable 'a' is herewith set to value '3' a b <- 4 b a+b b = 5 # also possible do it like that b 4 -> b # also this is possible, but confusing b # Exercise: # 4) Devide 36 by 4 and store the result in a variable called "v" # create a new variable "w" with the value 12 # devide "v" by 3 and multiply the result with "w" ######################### ## You can combine single values to a so-called "vector" ## use the command "c()" # c stands for"concatenate" or "combine" test_vector <- c(1,2,4,-1,) # Vektor of integer test_vector x <- c(1,2.3,pi,3^6) # also possible x class(x) # What is the type of my object ? y<-c( c(1,2), c(3,pi,4) ) # c() can also combine vectors y a<- c(1,2) # also possible with pre-defines vectors b<- c(3,pi,4) d<-c(a,b) d == y # ergibt das selbe Ergebnis wie Zeile 76 z <- c( "Es", "können", "auch", "Texte zu einem Vector verbunden werden") z # class(z) # Type of z ist 'character' # Basic arithmetic operations with a vector 4*c(1,2,4) # is interpreted as (4*1,4*2,4*4) c(1,2,4)+c(2,3,5) # is interpreted as (1+2,2+3,4+5) c(1,2)+c(2,3,1,5) # is interpreted as c(1,2,1,2)+c(2,3,1,5) ## creating sequences 1:10 # identical to c(1,2,3,4,5) (1:5)*2 # identical to c(1,2,3,4,5)*2 rep(3,5) # identical to c(3,3,3,3,3); 'rep' stands for 'replicate' rep( c(2,7) ,3) # identical to c(2,7,2,7,2,7); replicate c(2,7) three times seq(from=0,to=10,by=0.1) # means "create a Sequence from 0 to 10 with stepwide 0.1 # Exercise: # 5) Compute the multiplication table for 7 up to 7000 # 6) Compute the square root for all natural numbers from 1 to 50 sqrt(1:50) ## Comparisons x <- c(1,3,5,3) # # x == 3 # This checks - component by component - # whether or not the elements of the vector x are equal to 3 x > 3 # x != 5 # #Exercise: # 7) Please sort the following expressions by their magnitude: # natural logarithm of 2, logarithm of 3 to the base 10, and logarithm of 1.5 to the base 2 ## Indexing of vectors x <- c(3:6) x x[4] # 4. 4th element of vektors x x[c(2,4)] # 2nd. und 4th. Element; x[2,4] is only valid for matrices x[c(FALSE,TRUE,FALSE,TRUE)] # equal to x[c(2,4)] # Indizierung mit TRUE/FALSE Vektor x[x>4] # Since x>4 leads to (FALSE,FALSE,TRUE,TRUE) x[x>=4] # larger or equal x[x<=4] # smaller or equal x[x>5 | x<4] # larger than 5 or smaller than 4 x[x<=5 & x>3] # kleiner oder gleich 5 und groesser als 3 # Indexing with TRUE-FALSE can be very useful as can be seen in the following example: size <- 1.70 + seq(from=0.01,to=0.1,by=0.01) Location <- rep(c("Kathmandu","Bangkok"),5) size[Location=="Bangkok"] size[Location=="Bangkok" & size<1.75] # Exercise: # 8) Select all fish coming from Kathmandu with a size of at least 1.75 ######################### ## creating plots x <- seq(from=0,to=1,by=0.1) plot(x,col="red") plot(x,x^2,col="red") plot(x,x^2,col="red",pch=16) # point character: 16 steht für volle Kreise plot(x,x^2,col="red",type="l") # Als plot-Typ: Linien statt Punkte plot(sin,from=-3,to=3) # Plotte die Sinus-Funktion zwischen -3 und 3 abline(v=2) # Fügt eine vertikale Linie durch (2,0) hinzu abline(h=1) # Fügt eine horizontale Linie durch (0,1) hinzu # Exercise: # 9) Plot the function f(x)=3x^2-2x-4 in the range from -1 to 5 ######################### ## Mean, variance ans standard deviation of a sample x <- c(4:6,0,-5) x length(x) # number of samples sum(x) # 4+5+6+0-5 mean(x) # Mittelwert: sum(x)/length(x) # (4+5+6+0-5)/5 var(x) # Varianz: sum( (x-sum(x))^2 ) / (length(x)-1) # ( (4-1)^2+(5-1)^2+(6-1)^2+(0-1)^2+(-5-1)^2 )/4 sd(x) # Standardabweichung: sqrt( var(x) ) median(x) # Median # Exercise: # # 10) Lets make a small statistic about the age distribution within this course. # Age <-c(, , , , , , ,) # Do a summary statistic # Calculate the median, the mean, and the standard deviation # Plot a histogram, a boxplot and pie-chart # # # # # # # The following paragraph is from the paper of # Hui Liu,et.al: “Feature Selection Combined with Neural Network Structure Optimization for HIV-1 Protease # Cleavage Site Prediction,” BioMed Research International, vol. 2015, Article ID 263586, 11 pages, 2015. # doi:10.1155/2015/263586 ### After reading please answer and discuss the questions below. ## # Acquired immune deficiency syndrome (AIDS) is still a severe disease which mostly causes patient’s death # during its terminal period. Most patients suffer from this disease because they are infected by HIV-1. # Although many researches and investigations have been implemented, medicines or methods to entirely # cure AIDS have not been found. However, there are some methods to relieve patient’s ailment by medicines # or therapies. HIV-1 protease inhibitor is such a kind of medicine that can be used to treat AIDS. # HIV-1 protease is an enzyme which plays an important role in the replication progress. # It cleaves proteins to smaller peptides, and these peptides are used to make up some important proteins # that are essential for the replication of HIV-1 [1]. Thus inhibition of this protease # is a reliable method to interfere the virus reproduction. # HIV-1 protease inhibitor is a small molecule that can tightly bind to HIV-1 protease at the active cleavage # sites, so that substrates which should normally be cleaved cannot bind to the protease. # Normally, the protease binds with a protein in octapeptide length and cleaves it at the scissile bond. # It is quite important to find which amino acid sequences can be cleaved, that is, the specificity of the protease. # Also a good concept of which residues play more important roles in the cleavage progress is necessary. #[....] Understanding the specificity of HIV-1 protease can help human beings design effective protease inhibitor to treat AIDS. # Judging whether a peptide can be cleaved by HIV-1 protease is the key point, and machine learning is an economical solution for # solving this problem since machine learning methods can be used here to predict whether octapeptides are cleavable # for the protease. # # Exercise 11) # Why is the task of computational HIV-1 protease cleavage site prediction such a big problem - keeping researchers busy now for two decades ? # Why not starting an international initiative (like the human genome project) and simply test all possible combinations of amino-acids experimentally. # Please calculate # a) The number of possible amino-acid sequences to be tested. # b) The estimated costs for synthesis of these peptides (calculate with roughly 50US$ per Amino-Acid). # c) Compare this amount of money with the total German Brutto-Inlandsprodukt 2015 # # # # Exercise 12) # Please reproduce the calculation from the goat-problem # Compute the empirical probability for winning and loosing the two events "keeping" and "changing" the door. # Exercise 13) # Example for an explanatory analysis # Please go - row by row - through the following example and try to retrace what was done. # Understand the commands and add comments to the analysis library(MASS) data(birthwt) help("birthwt") str(birthwt) dim(birthwt) table(birthwt$age) summary(birthwt$age) boxplot(birthwt$age) boxplot(birthwt$age, main="Boxplot of Mothers Age") type.freq <- table(birthwt$age) type.freq barplot(type.freq, main="Barplot of age frequencys ") sd(birthwt$age) colnames(birthwt) colnames(birthwt) <- c("birthwt.below.2500", "mother.age", "mother.weight", "race", "mother.smokes", "previous.prem.labor", "hypertension", "uterine.irr", "physician.visits", "birthwt.grams") head(birthwt) birthwt$race <- factor(c("white", "black", "other")[birthwt$race]) birthwt$mother.smokes <- factor(c("No", "Yes")[birthwt$mother.smokes + 1]) birthwt$uterine.irr <- factor(c("No", "Yes")[birthwt$uterine.irr + 1]) birthwt$hypertension <- factor(c("No", "Yes")[birthwt$hypertension + 1])# Exercise 12) summary(birthwt) plot (birthwt$race) title (main = "Count of Mother's Race in Springfield MA, 1986") plot (birthwt$mother.age) title (main = "Mother's Ages in Springfield MA, 1986") plot (birthwt$mother.age, birthwt$birthwt.grams) title (main = "Birth Weight by Mother's Age in Springfield MA, 1986")