# PRACTICE answers
# 1. Calculate the mean age.
mean(iqlead$AGE)
# 2. Calculate the mean age of the females.
mean(iqlead$AGE[iqlead$SEX==2])
# 3. Calculate the number of females.
length(iqlead$AGE[iqlead$SEX==2])
# 4. How many observations of the Wind variable are there in the airquality
# dataset?
length(airquality$Wind)
# 5. Calculate the mean of the Wind variable in the airquality dataset (ignore
# NA's if necessary)
mean(airquality$Wind)
mean(airquality$Wind,na.rm=T) #There are no NA's, so either works.
# 6. Trim 5% of the of the observations from each end and calculate the trimmed
# mean of the Wind variable.
mean(airquality$Wind,trim=0.05)
# 7. Generate a histogram for the Wind variable in the airquality dataset.
hist(airquality$Wind)
# 8. Generate a histogram for the Wind variable in the airquality dataset but
# only for the month of June (Month==6).
hist(airquality$Wind[airquality$Month==6])
# 9. Add the argument freq=F to either of your histograms. What did this change
# on your histogram?
hist(airquality$Wind,freq=F) # y-axis changes from frequencies to densities.
# 10. Re-run your histogram for the Wind variable in the airquality dataset.
# Does the distribution look approximately normal?
hist(airquality$Wind) # Approximately bell shaped I suppose
# 11. Run qqnorm() and qqline() for the Wind variable in the airquality
# dataset.
qqnorm(airquality$Wind)
qqline(airquality$Wind)
# 12. Create a vector of values selected randomly from a normal distribution
# that has approximately the same mean and standard deviation as the Wind
# variable in the airquality dataset. Make the sample size the same as well.
# Then generate a histogram and q qqplot with the qqline added for this data.
mean(airquality$Wind) # Mean of about 10
sd(airquality$Wind) # Standard Deviation of about 3.5
length(airquality$Wind) # 153 observations
mydata <- rnorm(153, mean=10, sd=3.5)
hist(mydata)
qqnorm(mydata)
qqline(mydata)
# 13. Run the summary() function on the airquality dataset.
summary(airquality)
# 14. Make boxplots of the Wind variable in the airquality dataset broken out
# by Month.
boxplot(airquality$Wind~airquality$Month)
# 15. Run a t-test on the Wind variable for May and July, then one for July and
# August. What do you observe?
t.test(airquality$Wind[airquality$Month==5],airquality$Wind[airquality$Month == 7])
# Seems to be a statisticaly significant difference in means.
t.test(airquality$Wind[airquality$Month==7],airquality$Wind[airquality$Month == 8])
# Does not seem to be a statistically significant difference in means.