@AndyB,
If you are looking for my R/RSelenium code that scrapes the web app, here it is. A few notes:
- As a scraper, is still sensitive to underlying web app response times and occasionally breaks - the most typical mode of failure is when I access the changing carousel of bar graphs and it sees an additional day of bars (4 days/96 hours instead of 3 days/72 hours). I haven’t put in a mechanism to trap that and repair, but instead have tried to extend the waits in the program to avoid.
- I have had success running it for 180 days of scraping. I haven’t fixed the DST issue, so I manually fixed the missing hour on March 11th by hand.
To use:
- Add your email and password where indicated
- Set the NumDays to the number of days you want to go back in time. The code will stop short of scraping the last couple days of data to avoid going off the end of the carousel…
# Semi automatic scraping
library(RSelenium)
# Run via Selenium Server
rD <- rsDriver (port = 4445L)
remDr <- rD$client
# Login to the website
remDr$navigate ("https://home.sense.com/login")
Sys.sleep(4)
webElem <- remDr$findElement(using = "xpath", '//*[@id="application__main"]/div/div[1]/form/div[1]/input')
webElem$sendKeysToElement(list("youremail\n"))
Sys.sleep(1)
webElem <- remDr$findElement(using = "xpath", '//*[@id="application__main"]/div/div[1]/form/div[2]/input')
webElem$sendKeysToElement(list("yourpassword\n"))
Sys.sleep(4)
# Go to usage page for current month.
webElem <- remDr$findElement(using = 'xpath', value = '//*[@id="navbar"]/div/a[2]/div')
webElem$clickElement()
Sys.sleep(1)
# Navigate to Days and click
webElem <- remDr$findElement(using = 'xpath', value = '//*[@id="trends-viewer"]/div[1]/div/span[1]/label')
webElem$clickElement()
Sys.setenv(TZ="America/Los_Angeles")
# Backtrack to start of reporting period plus 1
NumDays <- 180
FirstDay <- Sys.Date()-(NumDays-1)
# Navigate to left arrow and click
for (i in 1:(NumDays+1)) {
webElem <- remDr$findElement(using = 'xpath', value = '//*[@id="trends-viewer"]/div[2]/div[1]/div/i[1]')
webElem$clickElement()
Sys.sleep(0.5)
}
# Intitialize data storage lists as string lists
date.list <- list()
energy.list <- list()
senergy.list <- list()
# Move forward sampling data
for (i in 1:((NumDays-2)%/%3)) {
# Click forward 3 days
webElem <- remDr$findElement(using = 'xpath', value = '//*[@id="trends-viewer"]/div[2]/div[1]/div/i[2]')
webElem$clickElement()
webElem <- remDr$findElement(using = 'xpath', value = '//*[@id="trends-viewer"]/div[2]/div[1]/div/i[2]')
webElem$clickElement()
webElem <- remDr$findElement(using = 'xpath', value = '//*[@id="trends-viewer"]/div[2]/div[1]/div/i[2]')
webElem$clickElement()
# Pull out the center date
webElem <- remDr$findElement(using = 'class', value = 'nav-arrows__title')
centerdate <- (gsub('', '', webElem$getElementAttribute("outerHTML")))
centerdate <- gsub('
', '', centerdate)
print (centerdate)
date.list <- c(date.list, rep (centerdate, 72))
FirstDay <- as.Date(centerdate, format="%a %b %d, %Y")-1
# Navigate back to Usage and click
webElem <- remDr$findElement(using = 'xpath', value = '//*[@id="navbar"]/div/a[2]/div')
webElem$clickElement()
Sys.sleep(10.0)
# Extract the kWh data Usage page
# Data will include current month (to date), plus previous month
webElem <- remDr$findElements(using = "class", "bar-graph__bar")
energy.list <- c(energy.list, sapply(webElem, function(x){x$getElementAttribute("outerHTML")}))
# Navigate to Solar and click
webElem <- remDr$findElement(using = 'xpath', value = '//*[@id="application__main"]/div/div[1]/div[2]')
webElem$clickElement()
Sys.sleep(1)
# Extract the solar kWh data on that page...
# Data will include current month (to date), plus previous month
webElem <- remDr$findElements(using = "class", 'bar-graph__bar')
senergy.list <- c(senergy.list, sapply(webElem, function(x){x$getElementAttribute("outerHTML")}))
}
# Get rid of all the extra HTML gleet (smarter grep minds could probably do in a single line)
energy.list <- gsub ('', "", energy.list)
energy.list <- gsub ('kWh<.*', "", energy.list)
# Get rid of all the extra HTML gleet (smarter grep minds could probably do in a single line)
senergy.list <- gsub ('', "", senergy.list)
senergy.list <- gsub ('kWh<.*', "", senergy.list)
# Convert list to dataframe
energy.df <- data.frame(matrix(unlist(date.list), ncol=1))
energy.df$Energy <- as.numeric(energy.list)
colnames(energy.df) <- c("CenterDate", "Energy")
# Add solar to dataframe
energy.df$Solar <- as.numeric(senergy.list)
# Calculate hourly net power
energy.df$Net <- energy.df$Energy - energy.df$Solar
# Synthesize datetime for hourly data
energy.df$DateTime <- seq(as.POSIXct(c(paste(FirstDay,"00:00:00"))), by="hour", length.out=length(energy.df$Solar))
energy.df <- energy.df[,c(5,2,3,4,1)]
write.csv(energy.df, paste("./", as.character(FirstDay), "Energy.csv", sep=""))