setwd("~/Dropbox/Teaching/Poli612/2015/Day 1") # Create some objects in R a <- 5 a b <- 6 c <- a*b c d <- 1:10 d obj1 <- c(1,5,10) obj1 length(obj1) obj2 <- obj1*5 # Vectors can be strings obj3 <- c("Conservative","BQ","Liberal","NDP","Greens") obj3 length(obj3) # Subsetting obj3[1:2] obj3[c(1,5)] obj3[obj3=="Greens"] obj3=="Greens" obj4 <- c(obj3,"Greens") obj4 help(c) help(rm) rm(obj1) #rm(list=ls()) #ls obj4[2:6] # Logical subsetting obj4[obj4!="Conservative"] obj5 <- c(1,2,3) mymatrix <- cbind(obj2,obj5) mymatrix mymatrix[2,1] mymatrix[2,] colnames(mymatrix) mymatrix[mymatrix[,1]>5,] ### Load dataset getwd() us <- read.csv("uspresidentialelections.csv") summary(us) hist(us$growth) boxplot(us$growth) # Scatterplot plot(x=us$growth,y=us$vote, main="Relationship between growth and vote share", pch = 19, xlim = c(-6,7), ylim = c(40,65), xlab = "Growth Rate", ylab = "Vote Share " ) # Sampling poll <- c(rep(0,1703),rep(1,760)) table(poll) barplot(table(poll)) mean(poll) sd(poll) poll.se <- sd(poll)/sqrt(2463) poll.se # Lower bound of 95% CI mean(poll) - 1.96*poll.se # Upper bound of 95% CI mean(poll) + 1.96*poll.se # Central Limit Theorem Demonstration set.seed(1) population <- c(rep(0,700000),rep(1,300000)) table(population) mypoll <- sample(population,1000) mean(mypoll) mean(population) mypolls <- NULL # Write a loop and sample new set of 1000 respondents from population each time for (i in 1:100) { mypoll <- sample(population,1000) mypolls <- c(mypolls,mean(mypoll)) } hist(mypolls)