mkearney · geotheory · Dec 1, 2019 · Dec 1, 2019 · Dec 6, 2019 · Dec 6, 2019
diff --git a/.gitignore b/.gitignore
@@ -1,4 +1,5 @@
+.DS_Store
+.gitignore
 .Rhistory
-.RData
+.Rproj.user/*
 .Rproj.user
-.DS_Store
diff --git a/R/pushshift_io.R b/R/pushshift_io.R
@@ -3,6 +3,14 @@
 #' Reads/parses reddit data from api.pushshift.io
 #'
 #' @param subreddit Name of subreddit from which to get data. Defaults to "all".
+#' @param q Query term for comments and submissions.
+#' @param title Search in title only.
+#' @param selftext Search in selftext (main body) only.
+#' @param author Restrict results to author - use "!" to negate, comma delimited for multiples.
+#' @param is_video Boolean - Restrict results based on if submission is video.
+#' @param is_self Boolean - Restrict results based on if submission is a self post.
+#' @param is_original_content Boolean - Restrict results based on if submission is original content.
+#' @param is_reddit_media_domain Boolean - Is Submission hosted on Reddit Media.
 #' @param n Number of submission/posts to return. Defaults to 1000.
 #' @param after Optional, the date-time from which to start the next search.
 #' @param before Optional, the date-time from which to start the next search.
@@ -37,17 +45,27 @@
 #' }
 #'
 #' @export
-get_r_reddit <- function(subreddit = "all", n = 1000, after = NULL) {
+get_r_reddit <- function(subreddit = "all", q = NULL, title = NULL, selftext = NULL, author = NULL,
+                         is_video = NULL, is_self = NULL, is_original_content = NULL, is_reddit_media_domain = NULL,
+                         domain = NULL, link_url = NULL, n = 1000, after = NULL, before = NULL, verbose = FALSE) {
   n <- ceiling(n / 1000)
   x <- vector("list", n)
   for (i in seq_along(x)) {
-    url <- "https://api.pushshift.io/reddit/search/submission/?size=1000"
-    if (!identical(subreddit, "all")) {
-      url <- paste0(url, "&subreddit=", subreddit)
-    }
-    if (!is.null(after)) {
-      url <- paste0(url, "&before=", as.numeric(after))
-    }
+    url <- "https://api.pushshift.io/reddit/submission/search/?size=1000"
+    if (!identical(subreddit, "all")) url <- paste0(url, "&subreddit=", subreddit)
+    if (!is.null(q)) url <- paste0(url, "&q=", urltools::url_encode(q))
+    if (!is.null(title)) url <- paste0(url, "&title=", urltools::url_encode(title))
+    if (!is.null(selftext)) url <- paste0(url, "&selftext=", urltools::url_encode(selftext))
+    if (!is.null(author)) url <- paste0(url, "&author=", author)
+    if (!is.null(is_video)) url <- paste0(url, "&is_video=", tolower(is_video))
+    if (!is.null(is_self)) url <- paste0(url, "&is_self=", tolower(is_self))
+    if (!is.null(is_original_content)) url <- paste0(url, "&is_original_content=", tolower(is_original_content))
+    if (!is.null(is_reddit_media_domain)) url <- paste0(url, "&is_reddit_media_domain=", tolower(is_reddit_media_domain))
+    if (!is.null(domain)) url <- paste0(url, "&domain=", urltools::url_encode(domain))
+    if (!is.null(link_url)) url <- paste0(url, "&url=", urltools::url_encode(link_url))
+    if (!is.null(before)) url <- paste0(url, "&before=", as.numeric(before))
+    if (!is.null(after)) url <- paste0(url, "&after=", as.numeric(after))
+    if(verbose) message(url)
     r <- httr::GET(url)
     j <- httr::content(r, as = "text", encoding = "UTF-8")
     j <- jsonlite::fromJSON(j)
@@ -61,7 +79,7 @@ get_r_reddit <- function(subreddit = "all", n = 1000, after = NULL) {
     )
   }
   tryCatch(docall_rbind(x),
-    error = function(e) x)
+           error = function(e) x)
 }
 
 
@@ -106,24 +124,25 @@ get_r_reddit <- function(subreddit = "all", n = 1000, after = NULL) {
 #' }
 #'
 #' @export
-get_comment_reddit <- function(subreddit = "all", author = NULL, n = 1000, after = NULL) {
+get_comment_reddit <- function(subreddit = "all", n = 1000, after = NULL, before = NULL, verbose = FALSE) {
   n <- ceiling(n / 1000)
   x <- vector("list", n)
   for (i in seq_along(x)) {
     url <- "https://api.pushshift.io/reddit/search/comment/?size=1000"
     if (!identical(subreddit, "all")) {
       url <- paste0(url, "&subreddit=", subreddit)
     }
-    if (!is.null(author)) {
-      url <- paste0(url, "&author=", author)
+    if (!is.null(before)) {
+      url <- paste0(url, "&before=", as.numeric(before))
     }
     if (!is.null(after)) {
-      url <- paste0(url, "&before=", as.numeric(after))
+      url <- paste0(url, "&after=", as.numeric(after))
     }
+    if(verbose) message(url)
     r <- httr::GET(url)
     j <- httr::content(r, as = "text", encoding = "UTF-8")
     j <- jsonlite::fromJSON(j)
-    x[[i]] <- tbltools::as_tbl(non_recs(j$data))
+    x[[i]] <- dplyr::as_tibble(non_recs(j$data))
     if (!"created_utc" %in% names(x[[i]])) break
     x[[i]] <- formate_createds(x[[i]])
     after <- x[[i]]$created_utc[nrow(x[[i]])]

diff --git a/R/reddit_com.R b/R/reddit_com.R
@@ -9,12 +9,12 @@
 #' @param before Optional, the parameter from which to start the next search.
 #' @return A data frame of reddit data.
 #' @export
-get_reddit_com <- function(subreddit = "all", n = 100, after = NULL, before = NULL) {
+get_reddit_com <- function(subreddit = "all", n = 100, after = NULL, before = NULL, verbose = FALSE) {
   n <- ceiling(n / 100)
   r <- vector("list", n)
   count <- 0
   for (i in seq_along(r)) {
-    r[[i]] <- get_subreddit_(subreddit, after = after, before = before, count = count)
+    r[[i]] <- get_subreddit_(subreddit, after = after, before = before, count = count, verbose = verbose)
     r[[i]] <- parse_reddit_com_json(r[[i]])
     count <- count + nrow(r[[i]])
     after <- get_after(r[[i]])
@@ -65,10 +65,11 @@ get_before <- function(x) attr(x, "before")
 
 
 get_subreddit_ <- function(subreddit,
-  before = NULL,
-  after = NULL,
-  count = 100,
-  sort = c("relevance", "hot", "top", "new", "comments")) {
+                           before = NULL,
+                           after = NULL,
+                           count = 100,
+                           sort = c("relevance", "hot", "top", "new", "comments"),
+                           verbose = FALSE) {
   sort <- match.arg(sort)
   sort <- "new"
   url <- paste0(
@@ -85,5 +86,6 @@ get_subreddit_ <- function(subreddit,
   if (!is.null(after)) {
     url <- paste0(url, "&after=", after)
   }
+  if(verbose) message(url)
   jsonlite::fromJSON(url)
 }
diff --git a/man/get_comment_reddit.Rd b/man/get_comment_reddit.Rd
diff --git a/man/get_r_reddit.Rd b/man/get_r_reddit.Rd
diff --git a/man/get_reddit_com.Rd b/man/get_reddit_com.Rd