From 1c8fe9f377c3d2b4a4cf4f932e3b88ba25dac1fd Mon Sep 17 00:00:00 2001 From: dashee87 Date: Mon, 26 Sep 2016 10:28:10 +0100 Subject: [PATCH] Update fetch_atp_ranking.R Handle non-Monday date inputs a bit more elegantly --- R/fetch_atp_ranking.R | 90 ++++++++++++++++++++++++++++++------------- 1 file changed, 63 insertions(+), 27 deletions(-) diff --git a/R/fetch_atp_ranking.R b/R/fetch_atp_ranking.R index be15257..7f3f1bf 100755 --- a/R/fetch_atp_ranking.R +++ b/R/fetch_atp_ranking.R @@ -2,36 +2,72 @@ #' #' This function extracts ATP singles rankings for a particular date and ranking group #' -#' @param date A character date for extracting rankings, YYYY-MM-DD +#' @param date A character date for extracting rankings +#' @param format The format of the supplied data e.g. 2016-05-22 = "%Y-%m-%d" +#' @param round Rankings are released on Mondays. If the date supplied is not a Monday, +#' should the function query the previous (round="prev") or next (round="next") Monday? #' @param min_rank A numeric for the highest ranked player to include #' @param max_rank A numeric for the lowest ranked player to include #' #' @export -fetch_atp_rankings <- function(date, min_rank = 1, max_rank = 100){ - url <- "http://www.atpworldtour.com/en/rankings/singles?rankDate=DATE&rankRange=RANK&countryCode=all" +fetch_atp_rankings <- function(date, format ="%Y-%m-%d", round="prev", min_rank = 1, max_rank = 100){ + + if(!round %in% c("prev","next")) + stop('Incorrect round argument: Must be either "prev" or "next"') + + check_date <- as.Date(date, format = format) + + if(is.na(check_date)) + stop('Incorrect date and/or date format!') + + if(weekdays(check_date)!="Monday"){ + check_date <- check_date + (-6 - as.POSIXlt(check_date)$wday %% -7 ) + } + + if(round=="next") + check_date <- check_date + 7 + + name_index <- NULL + attempt <- 1 + + url <- "http://www.atpworldtour.com/en/rankings/singles?rankDate=DATE&rankRange=RANK&countryCode=all" + + rank <- paste(min_rank, max_rank, sep = "-") + + url <- sub("RANK", rank, url) + + while(length(name_index)==0){ + + if(attempt==3) + stop("No rankings available. Change your date.") + + if(attempt==2) + check_date <- check_date + ifelse(round=="prev",-7,7) + + rankings <- readLines(sub("DATE", check_date, url)) + + name_index <- grep("en/players.*[a-z][0-9]+.*>[A-Z]", rankings) + + attempt <- attempt + 1 + } - rank <- paste(min_rank, max_rank, sep = "-") - - url <- sub("DATE", date, url) - url <- sub("RANK", rank, url) - - rankings <- readLines(url) - - name_index <- grep("en/players.*[a-z][0-9]+.*>[A-Z]", rankings) - name <- sub("(.*en/players.*>)([A-Z].*)(.*)", "\\2", rankings[name_index]) - - age <- sub("([0-9]+)(\t.*)","\\1",rankings[name_index + 2]) - - ranking_points <- sub("(.*rankings.*>)([0-9].*)()([0-9].*)()([A-Z].*)(.*)", "\\2", rankings[name_index]) + + age <- sub("([0-9]+)(\t.*)","\\1",rankings[name_index + 2]) + + country <- gsub("\".*$","",gsub( ".*alt=\"", "", rankings[name_index - 5] )) + + ranking_points <- sub("(.*rankings.*>)([0-9].*)()([0-9].*)(