Skip to content

Commit

Permalink
get_r_reddit upgrade
Browse files Browse the repository at this point in the history
  • Loading branch information
geotheory committed Dec 6, 2019
1 parent 1546fab commit f808980
Show file tree
Hide file tree
Showing 6 changed files with 65 additions and 30 deletions.
6 changes: 3 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
.Rhistory
.RData
.Rproj.user
.DS_Store
.gitignore
.Rhistory
.Rproj.user/*
43 changes: 28 additions & 15 deletions R/pushshift_io.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,14 @@
#' Reads/parses reddit data from api.pushshift.io
#'
#' @param subreddit Name of subreddit from which to get data. Defaults to "all".
#' @param q Query term for comments and submissions.
#' @param title Search in title only.
#' @param selftext Search in selftext (main body) only.
#' @param author Restrict results to author - use "!" to negate, comma delimited for multiples.
#' @param is_video Boolean - Restrict results based on if submission is video.
#' @param is_self Boolean - Restrict results based on if submission is a self post.
#' @param is_original_content Boolean - Restrict results based on if submission is original content.
#' @param is_reddit_media_domain Boolean - Is Submission hosted on Reddit Media.
#' @param n Number of submission/posts to return. Defaults to 1000.
#' @param after Optional, the date-time from which to start the next search.
#' @param before Optional, the date-time from which to start the next search.
Expand Down Expand Up @@ -37,20 +45,27 @@
#' }
#'
#' @export
get_r_reddit <- function(subreddit = "all", n = 1000, after = NULL, before = NULL) {
get_r_reddit <- function(subreddit = "all", q = NULL, title = NULL, selftext = NULL, author = NULL,
is_video = NULL, is_self = NULL, is_original_content = NULL, is_reddit_media_domain = NULL,
domain = NULL, link_url = NULL, n = 1000, after = NULL, before = NULL, verbose = FALSE) {
n <- ceiling(n / 1000)
x <- vector("list", n)
for (i in seq_along(x)) {
url <- "https://api.pushshift.io/reddit/search/submission/?size=1000"
if (!identical(subreddit, "all")) {
url <- paste0(url, "&subreddit=", subreddit)
}
if (!is.null(before)) {
url <- paste0(url, "&before=", as.numeric(before))
}
if (!is.null(after)) {
url <- paste0(url, "&after=", as.numeric(after))
}
url <- "https://api.pushshift.io/reddit/submission/search/?size=1000"
if (!identical(subreddit, "all")) url <- paste0(url, "&subreddit=", subreddit)
if (!is.null(q)) url <- paste0(url, "&q=", urltools::url_encode(q))
if (!is.null(title)) url <- paste0(url, "&url=", urltools::url_encode(title))
if (!is.null(selftext)) url <- paste0(url, "&url=", urltools::url_encode(selftext))
if (!is.null(author)) url <- paste0(url, "&author=", author)
if (!is.null(is_video)) url <- paste0(url, "&is_video=", tolower(is_video))
if (!is.null(is_self)) url <- paste0(url, "&is_self=", tolower(is_self))
if (!is.null(is_original_content)) url <- paste0(url, "&is_original_content=", tolower(is_original_content))
if (!is.null(is_reddit_media_domain)) url <- paste0(url, "&is_reddit_media_domain=", tolower(is_reddit_media_domain))
if (!is.null(domain)) url <- paste0(url, "&domain=", urltools::url_encode(domain))
if (!is.null(link_url)) url <- paste0(url, "&url=", urltools::url_encode(link_url))
if (!is.null(before)) url <- paste0(url, "&before=", as.numeric(before))
if (!is.null(after)) url <- paste0(url, "&after=", as.numeric(after))
if(verbose) message(url)
r <- httr::GET(url)
j <- httr::content(r, as = "text", encoding = "UTF-8")
j <- jsonlite::fromJSON(j)
Expand Down Expand Up @@ -109,23 +124,21 @@ get_r_reddit <- function(subreddit = "all", n = 1000, after = NULL, before = NUL
#' }
#'
#' @export
get_comment_reddit <- function(subreddit = "all", author = NULL, n = 1000, after = NULL, before = NULL) {
get_comment_reddit <- function(subreddit = "all", n = 1000, after = NULL, before = NULL, verbose = FALSE) {
n <- ceiling(n / 1000)
x <- vector("list", n)
for (i in seq_along(x)) {
url <- "https://api.pushshift.io/reddit/search/comment/?size=1000"
if (!identical(subreddit, "all")) {
url <- paste0(url, "&subreddit=", subreddit)
}
if (!is.null(author)) {
url <- paste0(url, "&author=", author)
}
if (!is.null(before)) {
url <- paste0(url, "&before=", as.numeric(before))
}
if (!is.null(after)) {
url <- paste0(url, "&after=", as.numeric(after))
}
if(verbose) message(url)
r <- httr::GET(url)
j <- httr::content(r, as = "text", encoding = "UTF-8")
j <- jsonlite::fromJSON(j)
Expand Down
14 changes: 8 additions & 6 deletions R/reddit_com.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@
#' @param before Optional, the parameter from which to start the next search.
#' @return A data frame of reddit data.
#' @export
get_reddit_com <- function(subreddit = "all", n = 100, after = NULL, before = NULL) {
get_reddit_com <- function(subreddit = "all", n = 100, after = NULL, before = NULL, verbose = FALSE) {
n <- ceiling(n / 100)
r <- vector("list", n)
count <- 0
for (i in seq_along(r)) {
r[[i]] <- get_subreddit_(subreddit, after = after, before = before, count = count)
r[[i]] <- get_subreddit_(subreddit, after = after, before = before, count = count, verbose = verbose)
r[[i]] <- parse_reddit_com_json(r[[i]])
count <- count + nrow(r[[i]])
after <- get_after(r[[i]])
Expand Down Expand Up @@ -65,10 +65,11 @@ get_before <- function(x) attr(x, "before")


get_subreddit_ <- function(subreddit,
before = NULL,
after = NULL,
count = 100,
sort = c("relevance", "hot", "top", "new", "comments")) {
before = NULL,
after = NULL,
count = 100,
sort = c("relevance", "hot", "top", "new", "comments"),
verbose = FALSE) {
sort <- match.arg(sort)
sort <- "new"
url <- paste0(
Expand All @@ -85,5 +86,6 @@ get_subreddit_ <- function(subreddit,
if (!is.null(after)) {
url <- paste0(url, "&after=", after)
}
if(verbose) message(url)
jsonlite::fromJSON(url)
}
8 changes: 4 additions & 4 deletions man/get_comment_reddit.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 21 additions & 1 deletion man/get_r_reddit.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/get_reddit_com.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit f808980

Please sign in to comment.