Sie müssen herausfinden, wie Selen auf Ihrem System zum Laufen zu bringen und wie gehen die remoteDr(...)
Anruf zu bekommen. Danach sollte das Ihnen den Einstieg:
library(seleniumPipes)
library(rvest)
library(dplyr)
library(stringi)
library(purrr)
remDr <- remoteDr(...)
remDr %>% go("http://www.wspdp2c.org/Summary_Disclaimer.aspx")
submit <- remDr %>% findElement("xpath", ".//input[@type='submit']")
submit %>% elementClick()
from_date <- remDr %>% findElement("xpath", ".//input[@name='MasterPage$mainContent$txtDateFrom2']")
from_date %>% elementClear()
from_date %>% elementSendKeys("12/22/2016")
to_date %>% elementSendKeys("12/23/2016", selKeys$escape) # esc clears the popup calednar
to_date <- remDr %>% findElement("xpath", ".//input[@name='MasterPage$mainContent$txtDateTo2']")
to_date %>% elementClear()
to_date %>% elementSendKeys("12/23/2016", selKeys$escape)
search <- remDr %>% findElement("class name", "ui-icon-search")
search %>% elementClick()
remDr %>% getPageSource() -> pg
html_nodes(pg, "table.DataGridText") -> tab
html_nodes(tab, xpath=".//td[2]")[1:9] %>%
html_text() %>%
as.POSIXct(format="%m/%d/%Y %H:%M") -> occurred
html_nodes(tab, xpath=".//td[3]")[1:9] %>%
html_text() -> incident_or_arrest
html_nodes(tab, xpath=".//td[4]")[1:9] %>%
html_text() %>%
stri_trim_both() -> case_or_arrestee
stri_match_all_regex(case_or_arrestee,
paste0(c("Case #: ([[:digit:]]+)",
"Primary Offense: ([[:print:]]+)",
"Arrestee: ([[:print:]]+)",
"Charge: ([[:print:]]+)"), collapse="|")) %>%
map(~apply(.[,2:5], 1, discard, is.na)) %>%
map_df(function(x) {
x <- as.list(x)
if (stri_detect_regex(x[[1]], "[[:alpha:]]")) {
setNames(x, c("arrestee", "charge"))
} else {
setNames(x, c("case_number", "primary_offense"))
}
}) -> case_or_arrestee
html_nodes(tab, xpath=".//td[5]")[1:9] %>%
html_text() -> location
data_frame(occurred, incident_or_arrest, location) %>%
bind_cols(case_or_arrestee) %>%
glimpse()
## Observations: 9
## Variables: 7
## $ occurred <dttm> 2016-12-22 00:00:00, 2016-12-22 00:00:00, 2016-12-22 00:0...
## $ incident_or_arrest <chr> "Incident", "Incident", "Arrest", "Incident", "Incident", ...
## $ location <chr> "2600-BLK TODDLER PLACE DR", "300-BLK ALSPAUGH DR", ...
## $ case_number <chr> "1667276", "1667273", NA, "1667249", "1667248", NA, NA, "1...
## $ primary_offense <chr> "BREAKING & ENTERING WITH FORCE", "MALICIOUS INJURY TO PRO...
## $ arrestee <chr> NA, NA, "THOMAS, KERRY MARTIN", NA, NA, "LOZANO, MIGUEL AR...
## $ charge <chr> NA, NA, "PANHANDLING W/ NO PRIVLEDGE LICENSE", NA, NA, "AN...
Es ist ein Sharepoint-driven Website. Verwenden Sie einfach RSelenium oder SeleniumPipes. – hrbrmstr