forked from Jeffalltogether/datasciencecoursera
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrankall.R
87 lines (77 loc) · 4.31 KB
/
rankall.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
## Write a function called rankall that takes two arguments: an outcome name (outcome) and a hospital
# rank-ing (num). The function reads the outcome-of-care-measures.csv file and returns a 2-column data frame
# containing the hospital in each state that has the ranking specified in num. For example the function call
# rankall("heart attack", "best") would return a data frame containing the names of the hospitals that
# are the best in their respective states for 30-day heart attack death rates. The function should return a value
# for every state (some may be NA). The first column in the data frame is named hospital, which contains
# the hospital name, and the second column is named state, which contains the 2-character abbreviation for
# the state name. Hospitals that do not have data on a particular outcome should be excluded from the set of
# hospitals when deciding the rankings.
# Handling ties. The rankall function should handle ties in the 30-day mortality rates in the same way
# that the rankhospital function handles ties.
# The function should use the following template.
#setwd("C:/Users/jeffthatcher/Cloud Drive/RRepos/DataScience/ProgAssignment3")
#outcome = "heart attack"
#num = "best"
rankall <- function(outcome, num = "best") {
## Read outcome data
data <- read.csv("outcome-of-care-measures.csv", colClasses = "character")
states <- unique(data$State)
states <- states[order(states)] # Order the subsetted dataframe best to worst based on desired outcome
## Check that outcome, and rank are valid
outcomes <- c("heart attack", "heart failure", "pneumonia")
a = outcome %in% outcomes
if (y == FALSE){
stop("Invalid Outcome")
}
numvals <- c("best", "worst", 1:length(data[,1]))
b = num %in% numvals
if (w == FALSE){
stop("Invalid Rank")
}
## Return hospital name in that state with the given rank
## 30-day death rate
if (outcome == "heart attack"){
outcomeData = data$Hospital.30.Day.Death..Mortality..Rates.from.Heart.Attack
}
if (outcome == "heart failure"){
outcomeData = data$Hospital.30.Day.Death..Mortality..Rates.from.Heart.Failure
}
if (outcome == "pneumonia"){
outcomeData = data$Hospital.30.Day.Death..Mortality..Rates.from.Pneumonia
}
suppressWarnings(sdata <- data.frame(data$State, data$Hospital.Name, as.numeric(outcomeData))) #must use as.numeric on `outcomeData` to prevent transferiing this into class `factor`
y <- sdata[order(sdata$as.numeric.outcomeData),] # Order the subsetted dataframe best to worst based on desired outcome
y <- subset(y, y$as.numeric.outcomeData. != "NA") # Remove incomplete data
g <- split(y$as.numeric.outcomeData, y$data.State) # Split the data into outcomes for each state
## For each state, find the hospital of the given rank
# Dealing with "best"
if (num == "best"){
num = 1
}
# Dealing with "worst"
if (num == "worst"){
#get the rank value for each state
x <- c()
result <- c()
for (i in 1:length(g)){
x <- subset(y, y$data.State == states[i])
x <- x[order(x$data.Hospital.Name),] # Dealing with Ties
x <- x[order(x$as.numeric.outcomeData.),] # Dealing with Ties
result[i] <- c(as.list(x[length(x),2]))
}
} else {
#get the rank value for each state
x <- c()
result <- c()
for (i in 1:length(g)){
x <- subset(y, y$data.State == states[i])
x <- x[order(x$data.Hospital.Name),] # Dealing with Ties
x <- x[order(x$as.numeric.outcomeData.),] # Dealing with Ties
result[i] <- c(as.list(x[as.numeric(num),2]))
}
}
answer <- data.frame(unlist(result), states)
colnames(answer) <- c("Hospital", "State")
head(answer,10)
}