############################################################### ############################################################### ## # Rows starting with the rhomb are comments ## First steps with R ## Basic arithmetic operations - R as a calculator 1+2 3+2*2 # R knows the operator precedence rules 'Punkt vor Strich' (3+2)*2 # If you want to add the first two values first, use brackets exp(1) # 'exp()' is the exponential function log(5) # 'log()' is the natural logarithm exp(log(5)) # 'log()' of an exponent is the "original" number again log(8,base=2) # Logarithm of 8 to base 2 sqrt(9) # Square Root 3^3 # 3 to the power of 3 9^(1/2) # another way to write the square root 1.2 # decimal number are written with a dot 1,2 # German notation will lead to an error factorial(3) # 3! 1==1+1 # allows you to compare values. Gived FALSE or TRUE as an answer # Also possible: 1<1+1 1>1+1 1!=1+1 1<=1+1 1>=1+1 ## with this command you will find more information about available functions: help("log") # opens the manual page with information on the build-in-function "log" ??sin # If you don't know the exact function name, but are searching for information sin # about a topic help.start() # Opens the browser with the online manual help.search("t-Test") # lists all commands concerning 't-Test' ########################################################################### # Exercise: # Open the skript "Basics_in_R.R" # Now try it yourself (or in teams) # write the solution to the question directly in the script # 1) Discover, how to calculate a) the sum and b) the product of the numbers from 1-100 # without using the operator (+) more than once # hint: check "help(sum)" and have a look at the examples # a) # b) # 2) Compute the third root of 27. # (hint: check root laws - how to transform a root into the power of a number) # 3) Check, if the logarithm laws are true and calculate e.g. log(4*8,2)==log(4,2)+log(8,2) log(4*8,2)==log(4,2)+log(8,2) log((4/8),2)==log(4,2)-log(8,2) log((4^8),2)==8*log(4,2) log(8^(1/4),2)==1/4*log(8,2) options(digits=2) log(8^(1/4),2) 1/4*log(8,2) ########################################################################### ## Associating values to variables # a <- 3 # variable 'a' is herewith set to value '3' a b <- 4 b a+b ##################################################################################################### #Exercise: # Devide 36 by 4 and store the result in a variable called "v" # create a new variable "w" with the value 12 # devide "v" by 3 and multiply the result with "w" #################################################################################################### ## You can combine single values to a so-called "vector" ## use the command "c()" # c stands for"concatenate" or "combine" test_vector <- c(1,2,4,-1,1) # Vektor of integer test_vector x <- c(1,2.3,pi,3^6) # also possible x class(x) # What is the type of my object ? y<-c( c(1,2), c(2,3) ) # c() can also combine vectors y z <- c( "You", "can", "also", "combine words") z # class(z) # Type of z ist 'character' ##################################################################################################### #Exercise: # Please combine the "test_vector" and "x" into a new vector named "cat" ######################################################################################### #Exercise: # Find the reasons for the error and correct z <- c( "Where" "is" "the mistake") z <- c("Where", "is", "the", "second", mistake") z <- ("Where", "is", "the", "third", "mistake") ##################################################################################################### " # Basic arithmetic operations with a vector 4*c(1,2,4) # is interpreted as (4*1,4*2,4*4) c(1,2,4)+c(2,3,5) # is interpreted as (1+2,2+3,4+5) c(1,2)+c(2,3,1,5) # is interpreted as c(1,2,1,2)+c(2,3,1,5) # ## creating sequences 1:10 # identical to c(1,2,3,4,5,6,7,8,9,10) (1:5)*2 # identical to c(1,2,3,4,5)*2 rep(3,5) # identical to c(3,3,3,3,3); 'rep' stands for 'replicate' rep( c(2,7) ,3) # identical to c(2,7,2,7,2,7); replicate c(2,7) three times seq(from=0,to=10,by=0.1) # means "create a Sequence from 0 to 10 with stepwide 0.1 ##################################################################################################### # Exercise: # Compute the multiplication table for 7 up to 7000 # Compute the square root for all natural numbers from 1 to 50 ##################################################################################################### ## Comparisons x <- c(1,3,5,3) # # The following commands checks - component by component - # whether or not the elements of the vector x are,... x == 3 # equal to 3 x > 3 # larger than 3 x != 5 # or unequal to 3 ##################################################################################################### Exercise: # Please sort the following expressions by their magnitude: # natural logarithm of 2, logarithm of 3 to the base 10, and logarithm of 1.5 to the base 2 # Discover how to automatically sort the following values in descending: # 2, 5, 10, 2, 50, 100, 7, 8, 9 ##################################################################################################### ## Indexing of vectors x <- c(3:30) x x[4] # 4. 4th element of vektors x x[c(2,4)] # 2nd. und 4th. Element; x[2,4] is only valid for matrices x[x>4] # all elements from the vector larger than 4 x[x>=4] # larger or equal x[x<=4] # smaller or equal x[x>15 | x<6] # larger than 15 or smaller than 6 x[x<=5 & x>3] # kleiner oder gleich 5 und groesser als 3 # Indexing can be very useful as can be seen in the following example: # creating a dummy dataset of fish coming from either Kathmandu or Bangkok size <- 1.70 + seq(from=0.01,to=0.1,by=0.01) # creating 10 sizes Location <- rep(c("Kathmandu","Bangkok"),5) # creating locations Location=="Bangkok" size[Location=="Bangkok"] ##################################################################################################### # Exercise: # Select all fish coming from Kathmandu with a size of at least 1.75 ##################################################################################################### # Some important commands on vectors # Assume, you collected some ants with the following weight (in mg): v <- c(13:19,12,13,18,7,5,10,8, 3, 16, 6, 8, 16) v length(v) # returns the length of the vector v -> the amount of all collected ants rev(v) # returns the a reversed vector sort(v) # returns the sorted vector (smalles to talest ant) duplicated(v) # identifies multiple elements (two ants with the same weight ) unique(v) # returns vector without multiple elements (only ants with a unique weight) which(v > 13 ) # returns a vector with those values fulfilling the condition (all ants heavier than 13mg) which(v==13) # returns the vector with indices of all values fulfilling the condition (all ants with exactly 13mg) which.max(v) # returns the index of the maximum (first such index) (fattest ant of all) which.min(v) # returns the index of the minimum (first such index) (smallest ant of all) ## example of receiving the indices for elements of interest # please create the index-vector for the occurence of "needle" in heystack # extract the positions and calculate the total amount of needles haystack <- c(1, 2, 4, 3, 4, 5, 7, 10, 34, 2, 4, 6, 7, 3, 6, 8, 3, 9 ,4, 6, 0, 1, 10, 100, 5, 4) needle <- 4 ############## # matrices # # Matrices are usually created with the matrix, by converting a vector into a matrix or by binding # vectors together m <- matrix(data = 1:8, nrow=4, ncol=2 ) m matrix(1:8,4,2) # Same as matrix( data = 1:8, nrow=4, ncol=2 ) ## Indexing a matrix # remember: Indexing is first "row", than "column" m[3,2] # Entry in the third row and second column. m[2,] # Second row (since the fild behind the comma is empty (no column specified), # the whole row will be taken with all columns) m[,2] # Second whole column m[2:3,1:2] # submatrix ## creating plots x <- seq(from=0,to=1,by=0.1) plot(x) plot(x,col="red") plot(x,x^2,col="red") plot(x,x^2,col="red",pch=16) # point character: 16 steht f?r volle Kreise plot(x,x^2,col="red",type="l") # Als plot-Typ: Linien statt Punkte plot(sin,from=-3,to=3) # Plotte die Sinus-Funktion zwischen -3 und 3 abline(v=2) # Fügt eine vertikale Linie durch (2,0) hinzu abline(h=1) # Fügt eine horizontale Linie durch (0,1) hinzu ##################################################################################################### # Exercise: # Plot the function f(x)=3x^2-2x-4 in the range from -10 to 10 # Hint: the range is meant with respect to the variable "z" ################################################################################# ################################################################################# PART 2 ################################################################################# ################################################################################# ################################################################################# ################################################################################# # # In the following are some important functions you will need very often in the context of statistical analyses ## Mean, variance ans standard deviation of a sample x <- c(4:6,0,-5) x length(x) # number of samples sum(x) # 4+5+6+0-5 mean(x) # Mittelwert: sum(x)/length(x) sd(x) # Standardabweichung: sqrt( var(x) ) median(x) # Median summary(x) ##################################################################################################### ## ## creating random numbers # # This is a simulation of 10 times rolling the dice sides <- 1:6 # the pips of a die s<-sample(sides, size=100000, replace=TRUE) # Picking 6 samples in ramdom order and putting them back s hist(s) # the Gaussian distribution (Normalverteilung) is the most important distribution # dnorm() is the command to show the probability-density function # rnoem() #Generates random numbers from normal distribution v<-rnorm(1000, 3, .25) # Generates 1000 random numbers from a normal with mean 3 and sd=.25 plot(v) summary(v) plot(density(v)) ##################################################################################################### # Excercise # a) Plot the probability-density function in the range of -3 to 3 # b) generate 1000 random numbers from a gaussian distribution and plot the values using the plot() command # c) sort the values and plot them again # d) plot a histogram of the 1000 random numbers # e) re-plot the histogram - in orange and with a finer subdevision (intervals of 0.2) and # f) Generates 1000 numbers from a normal distribution with mean 3 and sd=.25 ##################################################################################################### ##################################################################################################### # Excercise # Take a sample from n=10 gaussian distributed values and calculate mean, median and standard deviation # via the summary() function and plot a histogram # Repeat this experiment for 100 and 100000 values. -> what can you observe ? ##################################################################################################### # Excercise # I made a small statistic about a random size distribution within a group of people. # 30 people from the last Biomedicine class were asked for height and gender. # Are the men in this example - statistically - taller than the woman ? # Use the t-test [command "t.test(x,y)"] to answer this question. # discuss the result: What are t-value and p-value telling you ? height <-c(170,162,158,166,170,170,161,173,168,160,162,192,186,196,170,172,160,171,170,160,192,172,187,168,160,175,174, 186, 170, 192) Gender<-c("F","F","F","F","F","F","F","F","F","F","F","M","M","M","F","F","F","F","F","F","M","M","M","F","F","F","F","M","F", "M" ) Frau <- Gender=="F" # define "Mann" size_Frauen <-height[Frau] # define "size_Maenner" # Do a summary statistic (Calculate the median, the mean, and the standard deviation of the heights of woman and men). summary(size_Frauen) table(Gender) # create "size_Maenner" # Plot histogram, and boxplots (use the command hist()). Create one histogram for woman, one for man and one pooled) # Perform a t.test to check if the height of men significantly differs from the height of women # ### # Let's do another example: # A study was performed to compare the efficiancy of a new sleep-drug between man and woman. # The followin cross-table contains answers to the question: # Did you sleep better after taking these pills #Ergebniss: Maenner: 13* Ja, 58*Nein. Frauen:15*Ja, 67*Nein Drug_test <- matrix(c(13,15,58,67),2,2) # creates a 2x2 contingency table dimnames(Drug_test) <- list(c("Men", "Woman"), c("YES", "NO")) Drug_test # Do a Fisher's exact test and discuss the result.