-
Notifications
You must be signed in to change notification settings - Fork 1
/
rproject.R
150 lines (117 loc) · 4.74 KB
/
rproject.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
setwd("C:/Users/PC/Documents/Projects/College Assignments/RProject")
rdata=read.csv("dataset.csv")
rdata1=as.data.frame(rdata)
View(rdata1)
class(rdata1)
library(zoo)
library(tidyr)
library(dplyr)
#for (i in seq(1,5744))+
# if(rdata1$Air.Quality[i] == "--")
# rdata1$Air.Quality[i]<-TRUE
#for (i in seq(1,5744))
#if(rdata1$O3.Quality[i] == "--")
# rdata1$O3.Quality[i]<-"NA"
#for (i in seq(1,5744))
#if(rdata1$NO2.Quality[i] == "--")
# rdata1$NO2.Quality[i]<-TRUE
#for (i in seq(1,5744))
# if(rdata1$PM10.Quality[i] == "--")
# rdata1$PM10.Quality[i]<-"NA"
# Repacing all the blank values with NA
rdata1$NO2.Quality[rdata1$NO2.Quality=="--"]<-NA
rdata1$O3.Quality[rdata1$O3.Quality=="--"]<-NA
rdata1$PM10.Quality[rdata1$PM10.Quality=="--"]<-NA
rdata1$Air.Quality[rdata1$Air.Quality=="--"]<-NA
print("hi")
# Replacing all the NA values with the previous value
dfair<-as.data.frame(rdata1[,2])
rdata1[,2]<-dfair%>%do(na.locf(.))
dfoxy<-as.data.frame(rdata1[,c(5,6)])
rdata1[,c(5,6)]<-dfoxy%>%do(na.locf(.))
dfoxy1<-as.data.frame(rdata1[,7])
rdata1[,7]<-dfoxy1%>%do(na.locf(.))
dfnitro<-as.data.frame(rdata1[8,9])
rdata1[,c(8,9)]<-dfoxy%>%do(na.locf(.))
dfnitro1<-as.data.frame(rdata1[,10])
rdata1[,10]<-dfnitro1%>%do(na.locf(.))
dfpm<-as.data.frame(rdata1[,c(11,12)])
rdata1[,c(11,12)]<-dfoxy%>%do(na.locf(.))
dfpm1<-as.data.frame(rdata1[,13])
rdata1[,13]<-dfpm1%>%do(na.locf(.))
rdata1$Latitude<-as.numeric(rdata1$Latitude)
print("hi")
# correcting the outliers in latitude column
for (i in seq(1,5744)){
if(rdata1$Latitude[i] > 42)
rdata1$Latitude[i]=rdata1$Latitude[i]/10000
}
# normalizing the PM10.Value column
mx = max(rdata1$PM10.Value, na.rm=TRUE)
print(mx)
mn = min(rdata1$PM10.Value, na.rm=TRUE)
print(mn)
for (i in seq(1,5744)){
rdata1$PM10.Value[i]=round((rdata1$PM10.Value[i]-mn)/(mx-mn),digits=3)
}
#Visualisations
a = data.frame()
b = data.frame()
c = data.frame()
d = data.frame()
e = data.frame()
f = data.frame()
g = data.frame()
h = data.frame()
for(i in seq(1,5744,8)){
a = rbind(a,c(rdata1$Station[i],rdata1$Latitude[i],rdata1$Longitude[i],rdata1$NO2.Value[i],rdata1$O3.Value[i],rdata1$PM10.Value[i],rdata1$Date.Time[i],rdata1$Air.Quality))
}
print("1")
for(i in seq(2,5744,8)){
b = rbind(b,c(rdata1$Station[i],rdata1$Latitude[i],rdata1$Longitude[i],rdata1$NO2.Value[i],rdata1$O3.Value[i],rdata1$PM10.Value[i],rdata1$Date.Time[i],rdata1$Air.Quality))
}
print("2")
for(i in seq(3,5744,8)){
c = rbind(c,c(rdata1$Station[i],rdata1$Latitude[i],rdata1$Longitude[i],rdata1$NO2.Value[i],rdata1$O3.Value[i],rdata1$PM10.Value[i],rdata1$Date.Time[i],rdata1$Air.Quality))
}
print("3")
for(i in seq(4,5744,8)){
d = rbind(d,c(rdata1$Station[i],rdata1$Latitude[i],rdata1$Longitude[i],rdata1$NO2.Value[i],rdata1$O3.Value[i],rdata1$PM10.Value[i],rdata1$Date.Time[i],rdata1$Air.Quality))
}
print("4")
for(i in seq(5,5744,8)){
e = rbind(e,c(rdata1$Station[i],rdata1$Latitude[i],rdata1$Longitude[i],rdata1$NO2.Value[i],rdata1$O3.Value[i],rdata1$PM10.Value[i],rdata1$Date.Time[i],rdata1$Air.Quality))
}
print("5")
for(i in seq(6,5744,8)){
f = rbind(f,c(rdata1$Station[i],rdata1$Latitude[i],rdata1$Longitude[i],rdata1$NO2.Value[i],rdata1$O3.Value[i],rdata1$PM10.Value[i],rdata1$Date.Time[i],rdata1$Air.Quality))
}
print("6")
for(i in seq(7,5744,8)){
g = rbind(g,c(rdata1$Station[i],rdata1$Latitude[i],rdata1$Longitude[i],rdata1$NO2.Value[i],rdata1$O3.Value[i],rdata1$PM10.Value[i],rdata1$Date.Time[i],rdata1$Air.Quality))
}
print("7")
for(i in seq(8,5744,8)){
h = rbind(h,c(rdata1$Station[i],rdata1$Latitude[i],rdata1$Longitude[i],rdata1$NO2.Value[i],rdata1$O3.Value[i],rdata1$PM10.Value[i],rdata1$Date.Time[i],rdata1$Air.Quality))
}
print("8")
colnames(a)=c("Station","Lat","Long","NO2","O3","PM10","Time","AQ")
colnames(b)=c("Station","Lat","Long","NO2","O3","PM10","Time","AQ")
colnames(c)=c("Station","Lat","Long","NO2","O3","PM10","Time","AQ")
colnames(d)=c("Station","Lat","Long","NO2","O3","PM10","Time","AQ")
colnames(e)=c("Station","Lat","Long","NO2","O3","PM10","Time","AQ")
colnames(f)=c("Station","Lat","Long","NO2","O3","PM10","Time","AQ")
colnames(g)=c("Station","Lat","Long","NO2","O3","PM10","Time","AQ")
colnames(h)=c("Station","Lat","Long","NO2","O3","PM10","Time","AQ")
final = data.frame()
final = rbind(a,b,c,d,e,f,g,h)
library(plotly)
sp2 <- plot_ly(final, x = ~Lat, y = ~Time, z = ~Long, color = ~NO2, size=1, colorscale = c('#BF382A', '#0G4B8E'),showscale=TRUE) %>%
add_markers() %>%
layout(scene = list(xaxis = list(title = 'Latitude'),
yaxis = list(title = 'Time'),
zaxis = list(title = 'Longitude')))
print(sp2)
#print(pie(table(rdata1$Air.Quality), names(table(rdata1$Air.Quality)),main="Air Quality breakdown"))
print(plot_ly(rdata1,x = ~Air.Quality , type = "bar",colors = c('#BF382A', '#0G4B8E')))
View(rdata1)