library(readr) library(ggplot2) # Read in Sense dat over 3 calendar years Sense <- read_csv("~/Documents/PostSmartplug/1-hour data from Jan 01, 2018 to Jan 01, 2019.csv", col_types = cols_only(DateTime = col_datetime(format = "%Y-%m-%d %H:%M:%S"), Name = col_character(), kWh = col_double()), locale = locale(tz = "US/Pacific"), skip = 1) Sense19 <- read_csv("~/Documents/PostSmartplug/1-hour data from Jan 01, 2019 to Jan 01, 2020.csv", col_types = cols_only(DateTime = col_datetime(format = "%Y-%m-%d %H:%M:%S"), Name = col_character(), kWh = col_double()), locale = locale(tz = "US/Pacific"), skip = 1) Sense17 <- read_csv("~/Documents/PostSmartplug/1-hour data from Jan 01, 2017 to Jan 01, 2018.csv", col_types = cols_only(DateTime = col_datetime(format = "%Y-%m-%d %H:%M:%S"), Name = col_character(), kWh = col_double()), locale = locale(tz = "US/Pacific"), skip = 1) Sense <- rbind(Sense17, Sense, Sense19) attr(Sense$DateTime, "tzone") <- "US/Pacific" # Clean up Sense artifacts - duplicated 1st of the year, and extra 1am DST transistion hour dupes <- sum(duplicated (Sense)) print (paste("Getting rid of ", dupes, " Duplicates !")) Sense <- unique (Sense) # Do a completeness check on raw exported data # Generate a sequence of hourly dates to check Sense completeness against check <- seq(from=as.POSIXct(Sense$DateTime[1]), by="hour", to=as.POSIXct(Sense$DateTime[nrow(Sense)])) MissingSenseHours <- as.data.frame(check[!check %in% as.POSIXct(Sense$DateTime[Sense$Name == 'Total Usage'])]) # Add NAs for missing Total Usage days MissingSenseHours$kWh <- NA MissingSenseHours$Name <- 'Total Usage' colnames(MissingSenseHours)[1] <- 'DateTime' Sense <- rbind(Sense, MissingSenseHours) # Put everything back in DateTime order after adding the missing time periods Sense <- Sense[order(Sense$DateTime),] # Add an IsConnected boolean to help with plotting Sense$IsConnected <- !is.na(Sense$kWh) # Rename Solar Production so it's easy to use the word Total to select Usage and Solar Sense$Name[Sense$Name == "Solar Production"] <- "Total Solar" # Break out Time separately as string for easier graphing Sense$Time <- substr(Sense$DateTime,12,21) # Break out Month and Date separately Sense$Month <- substr(Sense$DateTime,1,7) Sense$Date <- as.Date(Sense$DateTime) # Aggregate by Date SenseDays <- aggregate(kWh ~ Date + Name, data = Sense, sum) SenseDays$Month <- substr(SenseDays$Date,1,7) SenseDays$MonthNumber <- substr(SenseDays$Date,6,7) # Aggregate by Month SenseMonths <- aggregate(kWh ~ Month + Name, data = SenseDays, sum) SenseMonths$MonthNumber <- substr(SenseMonths$Month,6,7) # Plot Total Usage and Solar to makes sure things make sense ggplot(Sense[grepl('Total', Sense$Name),], aes(x=Time, y=abs(kWh),color=Name)) + geom_boxplot() + ggtitle("Energy Usage", subtitle = "") + xlab("Time of Day") + ylab("kWh for each Hour") + theme_bw() ggplot(SenseDays[grepl('Total', SenseDays$Name),], aes(x=MonthNumber, y=abs(kWh),color=Name)) + geom_boxplot() + ggtitle("Energy Usage", subtitle = "") + xlab("Month Number") + ylab("kWh for each Day") + theme_bw() ggplot(SenseMonths[grepl('Total', SenseMonths$Name),], aes(x=MonthNumber, y=abs(kWh),color=Name)) + geom_point() + ggtitle("Energy Usage", subtitle = "") + xlab("Month Number") + ylab("kWh for each Day") + theme_bw()