#### # Exercise day 2 # Nr 1) # The prevalence in Germany for carying the HI Virus is about 0,1 Prozent (1 out of 1000 people is infected) # Assume, the blood sample of a random person was positively tested for HIV (via Elisa with an error-rate of 1,5%) # Can you calculate (via a thought experiment) the chance of this person NOT to be infected due to a false positive test ? # Imagine 1.000.000 people doing this test. # How many people will be (statistically) really infected with the HIV ? # How many people people will receive a positive test, even though they are not infected ? # Compare the Number of false-positives with the number of true positives # # # In the following are some important functions you will need very often in the context of statistical analyses ## Mean, variance ans standard deviation of a sample x <- c(4:6,0,-5) x length(x) # number of samples sum(x) # 4+5+6+0-5 mean(x) # Mittelwert: sum(x)/length(x) # (4+5+6+0-5)/5 var(x) # Varianz: sum( (x-sum(x))^2 ) / (length(x)-1) # ( (4-1)^2+(5-1)^2+(6-1)^2+(0-1)^2+(-5-1)^2 )/4 sd(x) # Standardabweichung: sqrt( var(x) ) median(x) # Median # Exercise: # # 2) Lets make a small statistic about the age distribution within this course. # Age <-c(12,12, 14,13, 15,16, 17,18) size <-c(, , , , , , ,) Gender<-c("M","F","M","F","M","F","F","F") table(Gender) Frau <- Gender=="F" Alter_Frauen <-Age[Frau] # Do a summary statistic # Calculate the median, the mean, and the standard deviation # Plot a histogram, a boxplot and pie-chart # Are the men in this course - in general - taller than the woman ? # Use the t-test to answer this question. # ### # 3) # Fisher's exact test # Let's do an example case: # Within a study comparing the efficiancy of a new sleep-drug, the followin cross-table gives the total sum of the # answer to the question: # Did you sleep better after taking these pills # x <- matrix(c(13,15,58,67),2,2) # erstellt eine 2x2 Felder Tafel dimnames(x) <- list(c("Men", "Woman"), c("A", "B")) x # Question: Is there a difference between men and woman ? # do the fisher exact test and have an informative look at the output resultat = fisher.test(x) ######################### ######################### ## ## creating random numbers # # the Gaussian distribution (Normalverteilung) is the most important distribution # 4) # Have a look at her an take random samples # dnorm() is the command to show the probability-density function # rnorm() generates random numbers plot(dnorm,from=-3,to=3) # probability-density function x <- rnorm(10) # generate 10 Numbers from a gaussian distribution x y <- rnorm(1000) # generate 1000 Numbers of a gaussian distribution hist(y) # histogram of y hist(y, breaks=seq(from=-4,to=4,by=0.2), col="orange") # finer subdevision # This is a simulation of 10 times rolling the dice: s <- sample(z, size=10, replace=TRUE) # Picking 6 samples in ramdom order and putting them back s mean(s) # Mittelwert des samples sd(s) # Standardabweichung des samples sample(z, size=10, replace=TRUE) # Exercise 5) # Generate a random sequence of 50 values in the range of 200 to 1000 without putting them back ## 6) # Take a sample from n=10 gaussian distributed Values and calculate mean, median and standard deviation # Repeat this experiment several times for # n=10 # n=100 # and # n= 100000 # 7) Take 100 values from a gaussian distribution with mean 1.3 and variance of 7.2 # calculate the mean and the standard deviation of this sample. # plot a histogram in the color "skyblue" and insert median and +/- standard deviation into the histogram.