我想定期从下面的站点中提取一张表。
单击构建基块名称时价格表会发生变化(BLOK 16 A,BLOK 16 B,BLOK 16 C,...)。 URL不变,通过触发可以改变页面
javascript:__doPostBack('ctl00$ContentPlaceHolder1$DataList2$ctl04$lnk_blok','')
在搜索google和starckoverflow之后,我尝试了3种方法。
我没有尝试过的1:不会触发doPostBack事件。
postForm( "http://www.kentkonut.com.tr/tr/modul/projeler/daire_fiyatlari.aspx?id=44", ctl00_ContentPlaceHolder1_DataList2_ctl03_lnk_blok="ctl00$ContentPlaceHolder1$DataList2$ctl03$lnk_blok")
我没有尝试过的2:硒遥控器似乎可以在(http://localhost:4444/)上运行,但是remotedriver无法导航。返回此错误。 (checkError(res)中的错误:
httr调用中发生未定义的错误。 httr输出:length(url)== 1不是TRUE)
library(RSelenium)
startServer()
remDr <- remoteDriver()
remDr <- remoteDriver(remoteServerAddr = "localhost"
, port = 4444L, browserName = "firefox")
remDr$open()
remDr$getStatus()
remDr$navigate("http://www.kentkonut.com.tr/tr/modul/projeler/daire_fiyatlari.aspx?id=44")
我没有尝试过的3:这是触发dopostback事件的另一种方法。它不会导航。
base.url <- "http://www.kentkonut.com.tr/tr/modul/projeler/",
event.target <- 'ctl00$ContentPlaceHolder1$DataList2$ctl03$lnk_blok',
action <- "daire_fiyatlari.aspx?id=44"
ftarget <- paste0(base.url, action)
dum <- getURL(ftarget)
event.val <- unlist(strsplit(dum,"__EVENTVALIDATION\" value=\""))[2]
event.val <- unlist(strsplit(event.val,"\" />\r\n\r\n<script"))[1]
view.state <- unlist(strsplit(dum,"id=\"__VIEWSTATE\" value=\""))[2]
view.state <- unlist(strsplit(view.state,"\" />\r\n\r\n\r\n<script"))[1]
web.data <- postForm(ftarget, "form name" = "ctl00_ContentPlaceHolder1_DataList2_ctl03_lnk_blok",
"method" = "POST",
"action" = action,
"id" = "ctl00_ContentPlaceHolder1_DataList2_ctl03_lnk_blok",
"__EVENTTARGET"=event.target,
"__EVENTVALIDATION"=event.val,
"__VIEWSTATE"=view.state)
谢谢你的帮助。
最佳答案
library(rvest)
url<-"http://www.kentkonut.com.tr/tr/modul/projeler/daire_fiyatlari.aspx?id=44"
pgsession<-html_session(url)
t<-html_table(html_nodes(read_html(pgsession), css = "#ctl00_ContentPlaceHolder1_DataList1"), fill= TRUE)[[1]]
even_indices<-seq(2,length(t$X1),2)
t<-t[even_indices,]
t<-t[2:(length(t$X1)),]
编辑代码:
library(rvest)
url<-"http://www.kentkonut.com.tr/tr/modul/projeler/daire_fiyatlari.aspx?id=44"
pgsession<-html_session(url)
pgform<-html_form(pgsession)[[1]]
page<-rvest:::request_POST(pgsession,"http://www.kentkonut.com.tr/tr/modul/projeler/daire_fiyatlari.aspx?id=44",
body=list(
`__VIEWSTATE`=pgform$fields$`__VIEWSTATE`$value,
`__EVENTTARGET`="ctl00$ContentPlaceHolder1$DataList2$ctl01$lnk_blok",
`__EVENTARGUMENT`="",
`__VIEWSTATEGENERATOR`=pgform$fields$`__VIEWSTATEGENERATOR`$value,
`__VIEWSTATEENCRYPTED`=pgform$fields$`__VIEWSTATEENCRYPTED`$value,
`__EVENTVALIDATION`=pgform$fields$`__EVENTVALIDATION`$value
),
encode="form"
)
# in the above example change eventtarget as "ctl00$ContentPlaceHolder1$DataList2$ctl02$lnk_blok" to get different table
t<-html_table(html_nodes(read_html(page), css = "#ctl00_ContentPlaceHolder1_DataList1"), fill= TRUE)[[1]]
even_indices<-seq(2,length(t$X1),2)
t<-t[even_indices,]
t<-t[2:(length(t$X1)),]