library(RCurl)
library(reshape)
library(htmltab)
library(ggplot2)
#get the table from the url
theurl <- getURL("https://en.wikipedia.org/wiki/Template:2016USDem", ssl.verifyPeer=FALSE)
table <- htmltab(theurl)
#keep only the useful columns and name them
df <- table[, c(1, 2, 9, 12)]
names(df) <- c("Date", "State", "Clinton", "Sanders")
#transform strings into dates and numbers
df$Date = as.Date(substr(df[, 1], 9, 18))
df$Clinton = as.numeric(df$Clinton)
df$Sanders = as.numeric(df$Sanders)
#remove rows missing data
df = na.omit(df)
#Clinton is gold, Sanders is green
colors <- c("#D4AA00", "#228b22")
#data frame for number at end of path
number = tail(df, 1)
number$Label = tail(cumsum(df$Clinton-df$Sanders), 1)
#vertical adjustments
vtotal = -0.5
vcontests = c(1.4*(df$Clinton<df$Sanders)-0.4*(df$Clinton>df$Sanders)+0.5*(df$Sanders==df$Clinton))
vlabel = 0.3
#generate graph
d = ggplot(df, aes(x=reorder(State, Date))) +
geom_bar(aes(y=Clinton-Sanders, fill=Clinton-Sanders<0), stat="identity") +
scale_fill_manual(values=colors, labels=c("Clinton", "Sanders")) +
geom_path(aes(y=cumsum(Clinton-Sanders), group=1)) +
theme(axis.text.x=element_text(angle=90,hjust=1,vjust=vlabel)) +
labs(x="Contest", title="Pledged delegate gap", y="Gap", fill="Candidate") +
geom_text(data=number, show.legend=F,
aes(x=State, y=Label, label=abs(Label)), size=4, hjust=1, vjust=vtotal) +
geom_text(aes(y=Clinton-Sanders, label=abs(Clinton-Sanders), vjust=vcontests), size=3.5) +
scale_y_continuous(labels=abs, breaks=seq(-1000, 1000, 50), minor_breaks=seq(-1000, 1000, 10)) +
theme(plot.background = element_rect(fill="transparent",colour = NA)) +
theme(legend.background = element_rect(fill="transparent",colour = NA))
#display plot
svg(filename="gap.svg",
width=ceiling(nrow(df)/6)+1,
height=5,
pointsize=12,
bg="transparent")
d
dev.off()