####
# Exercise day 2
# Nr 1)
# The prevalence in Germany for carying the HI Virus is about 0,1 Prozent (1 out of 1000 people is infected)
# Assume, the blood sample of a random person was positively tested for HIV (via Elisa with an error-rate of 1,5%)
# Can you calculate (via a thought experiment) the chance of this person NOT to be infected due to a false positive test ?
# Imagine 1.000.000 people doing this test.
# How many people will be (statistically) really infected with the HIV ?
# How many people people will receive a positive test, even though they are not infected ?
# Compare the Number of false-positives with the number of true positives
#
#
# In the following are some important functions you will need very often in the context of statistical analyses
## Mean, variance ans standard deviation of a sample
x <- c(4:6,0,-5)
x
length(x) # number of samples
sum(x) # 4+5+6+0-5
mean(x) # Mittelwert: sum(x)/length(x)
# (4+5+6+0-5)/5
var(x) # Varianz: sum( (x-sum(x))^2 ) / (length(x)-1)
# ( (4-1)^2+(5-1)^2+(6-1)^2+(0-1)^2+(-5-1)^2 )/4
sd(x) # Standardabweichung: sqrt( var(x) )
median(x) # Median
# Exercise: #
# 2) Lets make a small statistic about the age distribution within this course.
#
Age <-c(12,12, 14,13, 15,16, 17,18)
size <-c(, , , , , , ,)
Gender<-c("M","F","M","F","M","F","F","F")
table(Gender)
Frau <- Gender=="F"
Alter_Frauen <-Age[Frau]
# Do a summary statistic
# Calculate the median, the mean, and the standard deviation
# Plot a histogram, a boxplot and pie-chart
# Are the men in this course - in general - taller than the woman ?
# Use the t-test to answer this question.
#
###
# 3)
# Fisher's exact test
# Let's do an example case:
# Within a study comparing the efficiancy of a new sleep-drug, the followin cross-table gives the total sum of the
# answer to the question:
# Did you sleep better after taking these pills
#
x <- matrix(c(13,15,58,67),2,2) # erstellt eine 2x2 Felder Tafel
dimnames(x) <- list(c("Men", "Woman"), c("A", "B"))
x
# Question: Is there a difference between men and woman ?
# do the fisher exact test and have an informative look at the output
resultat = fisher.test(x)
#########################
#########################
##
## creating random numbers
#
# the Gaussian distribution (Normalverteilung) is the most important distribution
# 4)
# Have a look at her an take random samples
# dnorm() is the command to show the probability-density function
# rnorm() generates random numbers
plot(dnorm,from=-3,to=3) # probability-density function
x <- rnorm(10) # generate 10 Numbers from a gaussian distribution
x
y <- rnorm(1000) # generate 1000 Numbers of a gaussian distribution
hist(y) # histogram of y
hist(y, breaks=seq(from=-4,to=4,by=0.2), col="orange") # finer subdevision
# This is a simulation of 10 times rolling the dice:
s <- sample(z, size=10, replace=TRUE) # Picking 6 samples in ramdom order and putting them back
s
mean(s) # Mittelwert des samples
sd(s) # Standardabweichung des samples
sample(z, size=10, replace=TRUE)
# Exercise 5)
# Generate a random sequence of 50 values in the range of 200 to 1000 without putting them back
## 6)
# Take a sample from n=10 gaussian distributed Values and calculate mean, median and standard deviation
# Repeat this experiment several times for
# n=10
# n=100
# and
# n= 100000
# 7) Take 100 values from a gaussian distribution with mean 1.3 and variance of 7.2
# calculate the mean and the standard deviation of this sample.
# plot a histogram in the color "skyblue" and insert median and +/- standard deviation into the histogram.