# Read in a bunch of Ecobee monthly files and compute AC, Fan and Furnace on events, then compute runtimes library(readr) library(reshape2) library(reshape) library(ggplot2) # Collect hourly data from Sense and 5-minute data from my two Ecobees via web app export # Set up Ecoperiod read-in dataframe EcoPeriod.df <- data.frame(Date=character(), Time=character(), Mode=character(), Cool=integer(), Heat=integer(), Fan=integer(), Loc=character()) rooms <- c("Up", "Down") for (j in 1:length(rooms)) { # Reset dataframe for use in accumulation MonthList <- list ("Apr19", "May19", "Jun19", "Jul19", "Aug19", "Sep19", "Oct19") for (HeatingMonth in MonthList) { # Read in Ecobeee CSV file that has been renamed .csv. Skip extraneous columns EcoMonth <- read_csv(paste("~/Documents/Ecobee/DataMonths/", rooms[j], HeatingMonth, ".csv", sep=""), col_types = cols_only( Date = col_character(), Time = col_character(), `System Mode` = col_character(), `Heat Stage 1 (sec)` = col_integer(), `Cool Stage 1 (sec)` = col_integer(), `Fan (sec)` = col_integer()), skip = 5) EcoMonth$Loc <- rooms[j] # Accumulate the months into the full 1 year period EcoPeriod.df <- rbind (EcoPeriod.df, EcoMonth) } # Check for any 5 minute dropouts print(paste("Finished reading", rooms[j])) } # Clean up Ecobee dataframe data EcoPeriod.df$DateTime <- as.POSIXct(paste(EcoPeriod.df$Date, EcoPeriod.df$Time)) attr(EcoPeriod.df$DateTime, "tzone") <- "US/Pacific" colnames(EcoPeriod.df) <- c("Date", "Time", "Mode", "Cool", "Heat", "Fan", "Loc", "DateTime") # Merge and assemble the monthly data from the Ecobees into one file and aggregate into hourly data # Aggregate Ecobee data into hours EcoPeriod.df$DateTime <- paste(EcoPeriod.df$Date, substr(EcoPeriod.df$Time, 1,3)) mEcoHourly <- reshape2::melt(EcoPeriod.df[,c(4:8)], id=c("DateTime", "Loc"), na.rm=FALSE) EcoHourly <- dcast(mEcoHourly, DateTime ~ Loc + variable, sum, fill = 0) # EcoHourly <- aggregate(cbind(Heat, Cool, Fan) ~ DateTime+Loc, EcoPeriod.df, sum) EcoHourly$DateTime <- as.POSIXct(gsub(":", ":00:00", EcoHourly$DateTime), tz="US/Pacific" ) # Useful lists for filtering HVAClist <- c('AC', 'AC2', 'AC3', 'FurnaceUp', 'FurnaceDown') HVAClist2 <- c('AC', 'AC 2', 'AC 3', 'Furnace Up', 'Furnace Down') HVACDetections <- c('AC', 'AC2', 'AC3') # Read in the Sense data, then filter just the HVAC detections Sense <- read_csv("~/Documents/Ecobee/1-hour data from Jan 01, 2019 to Jan 01, 2020.csv", col_types = cols(DateTime = col_datetime(format = "%Y-%m-%d %H:%M:%S")), locale = locale(tz = "US/Pacific"), skip = 1) attr(Sense$DateTime, "tzone") <- "US/Pacific" Sense <- Sense[Sense$Name %in% HVAClist2,] # Convert Sense data to wide format # Create a faked "molten" dataframe mSense <- Sense[,c(1,3,9)] colnames(mSense) <- c("DateTime", "variable", "value") mSense$value <- as.numeric(mSense$value) # Cast DateTime vs. device dataframes Sense <- cast(mSense, DateTime~variable, sum, fill = 0) # Get rid of spaces in detection names for easier charting colnames (Sense) <- gsub(' ', '', colnames(Sense)) # Merge all the hourly data based on timestamp Sense$DateTime <- as.POSIXct(Sense$DateTime, tz = "US/Pacific") HVACComp <- merge(Sense, EcoHourly, by = 'DateTime') # Fix NAs from merge and earlier HVACComp$Down_Cool[is.na(HVACComp$Down_Cool)] <- 0 # Add deliniation periods for correlation analysis HVACComp$Month <- as.factor(substr(as.character(HVACComp$DateTime), 1,7)) HVACComp$Week <- as.factor(substr(as.Date(cut(HVACComp$DateTime, "week")),6, 10)) # Perform Pearson correlation calculation between the Sense detection energy usage and the Ecobee cooling runtimes on a weekly basis # Compute weekly correlations, then convert from list to easily graphable dataframe UpWeek.ls <- by(HVACComp, HVACComp$Week, function(df){cor(df[HVAClist], df['Up_Cool'], use = "pairwise.complete.obs")}) UpWeek.df <- as.data.frame(t(sapply(UpWeek.ls, function(df){cbind(df)}))) UpWeek.df$Week <- rownames(UpWeek.df) colnames(UpWeek.df) <- c(rownames(UpWeek.ls[[1]]), 'Week') UpWeek.df <- melt(UpWeek.df) colnames(UpWeek.df)[2:3] <- c('Detection', 'Correlation') DownWeek.ls <- by(HVACComp, HVACComp$Week, function(df){cor(df[HVAClist], df['Down_Cool'], use = "pairwise.complete.obs")}) DownWeek.df <- as.data.frame(t(sapply(DownWeek.ls, function(df){cbind(df)}))) DownWeek.df$Week <- rownames(DownWeek.df) colnames(DownWeek.df) <- c(rownames(DownWeek.ls[[1]]), 'Week') DownWeek.df <- melt(DownWeek.df) colnames(DownWeek.df)[2:3] <- c('Detection', 'Correlation') # Graph the upstairs and downstairs data ggplot(data = UpWeek.df[UpWeek.df$Detection %in% HVACDetections,], aes(x=Week, y=Correlation, fill=Detection)) + geom_col(position = 'dodge') + theme_bw() + ggtitle('Pearson Correlation vs Detection Weeks') + scale_y_continuous(limits = c(-0.4, 1.2)) ggplot(data = DownWeek.df[DownWeek.df$Detection %in% HVACDetections,], aes(x=Week, y=Correlation, fill=Detection)) + geom_col(position = 'dodge') + theme_bw() + ggtitle('Pearson Correlation vs Detection Weeks') + scale_y_continuous(limits = c(-0.4, 1.2))