###############################################################
###############################################################
##
# Rows starting with the rhomb are comments

## First steps with R
##  Basic  arithmetic operations - R as a calculator 
1+2
3+2*2          # R knows the operator precedence rules 'Punkt vor Strich'
(3+2)*2        # If you want to add the first two values first, use brackets
exp(1)         # 'exp()' is the exponential function
log(5)         # 'log()' is the natural logarithm
exp(log(5))    # 'log()' of an exponent is the "original" number again
log(8,base=2)  # Logarithm of 8 to base 2
sqrt(9)        # Square Root
3^3            # 3 to the power of 3         
9^(1/2)        # another way to write the square root
1.2            # decimal number are written with a dot
1,2            # German notation will lead to an error
factorial(3)   # 3!


1==1+1         # allows you to compare values. Gived FALSE or TRUE as an answer
# Also possible: 
1<1+1
1>1+1
1!=1+1
1<=1+1
1>=1+1

## with this command you will find more information about available functions:

help("log")    # opens the manual page with information on the build-in-function "log"
??sin          # If you don't know the exact function name, but are searching for information
sin
# about a topic 
help.start()   # Opens the browser with the online manual
help.search("t-Test") # lists all commands concerning 't-Test'


###########################################################################
# Exercise: 
# Open the skript "Basics_in_R.R" 
# Now try it yourself (or in teams)
# write the solution to the question directly in the script 

# 1) Discover, how to calculate a) the sum and b) the product of the numbers from 1-100 
# without using the operator (+) more than once
# hint: check "help(sum)" and have a look at the examples 


# a)

# b)

# 2) Compute the third root of 27.
# (hint: check root laws - how to transform a root into the power of a number)

# 3) Check, if the logarithm laws are true and calculate e.g. log(4*8,2)==log(4,2)+log(8,2)
log(4*8,2)==log(4,2)+log(8,2)
log((4/8),2)==log(4,2)-log(8,2)
log((4^8),2)==8*log(4,2)
log(8^(1/4),2)==1/4*log(8,2)

options(digits=2)
log(8^(1/4),2)
1/4*log(8,2)

###########################################################################
## Associating values to variables
#
a <- 3         # variable 'a' is herewith set to value '3'
a
b <- 4 
b
a+b

#####################################################################################################
#Exercise: 
# Devide 36 by 4 and store the result in a variable called "v"
# create a new variable "w" with the value 12
# devide "v" by 3 and multiply the result with "w" 


####################################################################################################
## You can combine single values to a so-called "vector"
## use the command "c()"
# c stands for"concatenate" or "combine"

test_vector <- c(1,2,4,-1,1)   # Vektor of integer
test_vector
x <- c(1,2.3,pi,3^6)              # also possible
x
class(x)               # What is the type of my object ?

y<-c( c(1,2), c(2,3) ) # c() can also combine vectors
y


z <- c( "You", "can", "also", "combine words")
z                      # 
class(z)               # Type of z ist 'character'

#####################################################################################################
#Exercise: 
# Please combine the "test_vector" and "x" into a new vector named "cat"


#########################################################################################
#Exercise: 
# Find the reasons for the error and correct

 z <- c( "Where" "is" "the mistake")

 z <- c("Where", "is", "the", "second", mistake")

 z <- ("Where", "is", "the", "third", "mistake")
#####################################################################################################
"
# Basic arithmetic operations with a vector 

4*c(1,2,4)          # is interpreted as  (4*1,4*2,4*4)
c(1,2,4)+c(2,3,5)   # is interpreted as  (1+2,2+3,4+5)
c(1,2)+c(2,3,1,5)   # is interpreted as  c(1,2,1,2)+c(2,3,1,5) #


## creating sequences
1:10                # identical to  c(1,2,3,4,5,6,7,8,9,10)
(1:5)*2             # identical to c(1,2,3,4,5)*2
rep(3,5)            # identical to c(3,3,3,3,3); 'rep' stands for 'replicate'
rep( c(2,7) ,3)     # identical to c(2,7,2,7,2,7); replicate c(2,7) three times
seq(from=0,to=10,by=0.1) # means "create a Sequence from 0 to 10 with stepwide 0.1 

#####################################################################################################
# Exercise:
# Compute the multiplication table for 7 up to 7000


# Compute the square root for all natural numbers from 1 to 50 

#####################################################################################################

##  Comparisons
x <- c(1,3,5,3)     #  
# The following commands checks - component by component -
# whether or not the elements of the vector x are,...
x == 3              # equal to 3 
x > 3               # larger than 3
x != 5              # or unequal to 3

#####################################################################################################
Exercise:
  # Please sort the following expressions by their magnitude: 
  # natural logarithm of 2, logarithm of 3 to the base 10, and logarithm of 1.5 to the base 2

# Discover how to automatically sort the following values in descending: 
# 2, 5, 10, 2,  50, 100, 7, 8, 9


#####################################################################################################

## Indexing of vectors
x <- c(3:30)
x
x[4]                # 4. 4th element of vektors x
x[c(2,4)]           # 2nd. und 4th. Element; x[2,4] is only valid for matrices
x[x>4]              # all elements from the vector larger than 4
x[x>=4]             # larger or equal
x[x<=4]             # smaller or equal
x[x>15 | x<6]        # larger than 15 or smaller than 6
x[x<=5 & x>3]       # kleiner oder gleich 5 und groesser als 3


# Indexing can be very useful as can be seen in the following example:
# creating a dummy dataset of fish coming from either Kathmandu or Bangkok
size <- 1.70 + seq(from=0.01,to=0.1,by=0.01)  # creating 10 sizes
Location <- rep(c("Kathmandu","Bangkok"),5)   # creating locations
Location=="Bangkok"
size[Location=="Bangkok"]

#####################################################################################################
# Exercise: 
# Select all fish coming from Kathmandu with a size of at least 1.75


#####################################################################################################

# Some  important commands on vectors
# Assume, you collected some ants with the following weight (in mg):
v <- c(13:19,12,13,18,7,5,10,8, 3, 16, 6, 8, 16) 
v
length(v) # returns the length of the vector v -> the amount of all collected ants
rev(v) # returns the a reversed vector
sort(v) # returns the sorted vector (smalles to talest ant)
duplicated(v) # identifies multiple elements (two ants with the same weight )
unique(v) # returns vector without multiple elements (only ants with a unique weight)
which(v > 13 )  # returns a vector with those values fulfilling the condition (all ants heavier than 13mg)
which(v==13) # returns the vector with indices of all values fulfilling the condition (all ants with exactly 13mg)
which.max(v) # returns the index of the maximum (first such index) (fattest ant of all)
which.min(v) # returns the index of the minimum (first such index) (smallest ant of all)

## example of receiving the indices for elements of interest
# please create the index-vector for the occurence of "needle" in heystack
# extract the positions and calculate the total amount of needles
haystack <- c(1, 2, 4, 3, 4, 5, 7, 10, 34, 2, 4, 6, 7, 3, 6, 8, 3, 9 ,4, 6, 0, 1, 10, 100, 5, 4)
needle <- 4


##############
# matrices
#
# Matrices are usually created with the matrix, by converting a vector into a matrix or by binding
#  vectors together
m <- matrix(data = 1:8, nrow=4, ncol=2 )
m
matrix(1:8,4,2) # Same as matrix( data = 1:8, nrow=4, ncol=2 )

## Indexing a matrix
# remember: Indexing is first "row", than "column"

m[3,2] # Entry in the third row and second column.
m[2,] # Second row (since the fild behind the comma is empty (no column specified),
# the whole row will be taken with all columns)

m[,2] # Second whole column
m[2:3,1:2] # submatrix


## creating plots
x <- seq(from=0,to=1,by=0.1)
plot(x)
plot(x,col="red")

plot(x,x^2,col="red")
plot(x,x^2,col="red",pch=16)   # point character: 16 steht f?r volle Kreise
plot(x,x^2,col="red",type="l") # Als plot-Typ: Linien statt Punkte
plot(sin,from=-3,to=3)         # Plotte die Sinus-Funktion zwischen -3 und 3
abline(v=2)                    # Fügt eine vertikale Linie durch (2,0) hinzu
abline(h=1)                    # Fügt eine horizontale Linie durch (0,1) hinzu

#####################################################################################################
# Exercise:
# Plot the function f(x)=3x^2-2x-4 in the range from -10 to  10
# Hint: the range is meant with respect to the variable "z"


#################################################################################
#################################################################################

PART 2

#################################################################################
#################################################################################
#################################################################################
#################################################################################
#
# In the following are some important functions you will need very often in the context of statistical analyses
## Mean, variance ans standard deviation of a sample
x <- c(4:6,0,-5)
x
length(x)    # number of samples
sum(x)       # 4+5+6+0-5
mean(x)      # Mittelwert: sum(x)/length(x) 
sd(x)        # Standardabweichung: sqrt( var(x) )
median(x)    # Median
summary(x)


#####################################################################################################
##
## creating random numbers
#
# This is a simulation of 10 times rolling the dice
sides <- 1:6      # the pips of a die
s<-sample(sides, size=100000, replace=TRUE) # Picking 6 samples in ramdom order and putting them back
s   
hist(s)

# the Gaussian distribution (Normalverteilung) is the most important distribution
# dnorm() is the command to show the probability-density function
# rnoem() #Generates random numbers  from normal distribution 
v<-rnorm(1000, 3, .25) # Generates 1000 random numbers from a normal with mean 3 and sd=.25
plot(v)
summary(v)
plot(density(v))
#####################################################################################################

#  Excercise 
# a) Plot the probability-density function in the range of -3 to 3

# b) generate 1000 random numbers from a gaussian distribution and plot the values using the plot() command

# c) sort the values and plot them again

# d) plot a histogram of the 1000 random numbers


# e) re-plot the histogram - in orange and with a finer subdevision (intervals of 0.2) and 

# f) Generates 1000 numbers from a normal distribution with mean 3 and sd=.25


#####################################################################################################


#####################################################################################################
#  Excercise 
# Take a sample from n=10 gaussian distributed values and calculate mean, median and standard deviation
# via the summary() function and plot a histogram
# Repeat this experiment for 100 and 100000 values. -> what can you observe ? 


#####################################################################################################
#  Excercise 
# I made a small statistic about a random size distribution within a group of people.
#  30 people from the last Biomedicine class were asked for height and gender.
# Are the men in this example - statistically - taller than the woman ? 
# Use the t-test [command "t.test(x,y)"] to answer this question.
# discuss the result: What are t-value and p-value telling you ? 

height <-c(170,162,158,166,170,170,161,173,168,160,162,192,186,196,170,172,160,171,170,160,192,172,187,168,160,175,174, 186, 170, 192)
Gender<-c("F","F","F","F","F","F","F","F","F","F","F","M","M","M","F","F","F","F","F","F","M","M","M","F","F","F","F","M","F", "M" )

Frau <- Gender=="F"
#  define "Mann"

size_Frauen <-height[Frau]

#  define "size_Maenner" 

# Do a summary statistic (Calculate the median, the mean, and the standard deviation of the heights of woman and men).
summary(size_Frauen)
table(Gender)
#  create "size_Maenner" 

# Plot histogram, and boxplots (use the command hist()). Create one histogram for woman, one for man and one pooled)

# Perform a t.test to check if the height of men significantly differs from the height of women


#

### 
# Let's do another example: 
# A study was performed to compare the efficiancy of a new sleep-drug between man and woman.   
# The followin cross-table contains answers to the question: 
# Did you sleep better after taking these pills
#Ergebniss: Maenner: 13* Ja, 58*Nein. Frauen:15*Ja, 67*Nein
Drug_test <- matrix(c(13,15,58,67),2,2)                           # creates a 2x2 contingency table
dimnames(Drug_test) <-  list(c("Men", "Woman"), c("YES", "NO")) 
Drug_test                              
# Do a Fisher's exact test and discuss the result.