library (twitteR) library(lattice) # first we will rewrite Jeff Gentry's Rtweets function to make it more flexible gettweets <- function (session = getCurlHandle(), tag = "rstats", n = 20, page = 1) { uri <- paste("http://search.twitter.com/search.json?q=&tag=", tag, "&rpp=", n , "&page=", page, sep="") out <- getURL(uri, curl = session) jsonList <- twFromJSON(out)[[1]] sapply(jsonList, buildStatus) } # now get a bit of data rtw <- gettweets(n=100, page=1) rtw <- append(rtw, gettweets(n=100, page=2)) # plot the usernames tweeters <- sapply(rtw, function (x) slot(x, "screenName")) trellis.par.set(list(fontsize=list(text=6))) barchart(sort(table(tweeters)), horiz=T, col="light blue") # how many users? length(table(tweeters)) # quantile break to fine users responsible for 50% of the tweets quantile(tweeters,probs = c(0.5)) panel.abline(h=73, col="red", lty=2) # (you may need to change your parameter for h) ## look for tweets containing links tweets <- sapply(rtw, function (x) slot(x, "text")) pct <- round(length(tweets[grep("http",tweets)])/length(tweets)*100, digits=2) barplot(matrix(c(length(tweets[grep("http",tweets)]),length(tweets)-length(tweets[grep("http",tweets)]))), col=c("brown","beige")) text(0.7,70, paste ("with links:",pct,"%")) # (you may need to adjust text position) text(0.7,200, paste ("without links:", 100-pct,"%")) ## look for other hashtags in posts tokens <- unlist(strsplit(tweets, " ")) hashtags <- tokens[grep("#", tokens)] barchart(sort(table(hashtags)), horiz=T, col="lightgreen")