嗨,开发人员,我又回来了一个问题,
我正在尝试从此网站https://www.iamsterdam.com/nl/uit-in-amsterdam/uit/agenda获取一些数据。首先,我爬网了网站,但是这样做的时候想到了他们有一个api,而且速度会快很多。所以我试图从我尝试过的api中获取数据:
get-website.js:
var webPage = require('webpage');
var page = webPage.create();
var settings = {
operation: "POST",
encoding: "utf8",
headers: {
"Content-Type": "application/json"
},
data: JSON.stringify({
DateFilter: 03112016,
PageId: "3418a37d-b907-4c80-9d67-9fec68d96568",
Take: 2,
Skip: 12,
ViewMode: 1
})
};
page.open('https://www.iamsterdam.com/api/AgendaApi/', settings, function(status) {
console.log(page.content);
phantom.exit();
});
get-website.php
$phantom_script= 'get-website.js';
$response = exec ('phantomjs ' . $phantom_script);
echo $response;
但是我回来的东西看起来并不好:
Message":"An error has occurred.","ExceptionMessage":"Page could not be found","ExceptionType":"System.ApplicationException","StackTrace":" at Axendo.SC.AM.Iamsterdam.Controllers.Api.AgendaApiController.GetResultsInternal(RequestModel requestModel)\r\n at lambda_method(Closure , Object , Object[] )\r\n
etc.
这是萤火虫的图片:
我希望有一个人可以帮助我。
有趣的问题。我对该网站将在浏览器甚至cURL中接受AJAX请求感到惊讶,但在PhantomJS中却不满意。在这种情况下,您必须非常仔细地研究和复制请求,因为很少的细节之一可能会极大地影响服务器的响应。
原来,它是一个cookie和表单内容类型,必须进行相应设置。
var webPage = require('webpage');
var page = webPage.create();
// courtesy of http://stackoverflow.com/a/1714899/2715393
var serialize = function(obj) {
var str = [];
for(var p in obj)
if (obj.hasOwnProperty(p)) {
str.push(encodeURIComponent(p) + "=" + encodeURIComponent(obj[p]));
}
return str.join("&");
}
var settings = {
operation: "POST",
encoding: "utf8",
headers: {
"accept-encoding" : "identity", // https://github.com/ariya/phantomjs/issues/10930#issuecomment-81541618
"x-requested-with" : "XMLHttpRequest",
"accept-language" : "en;q=0.8,en-US;q=0.6",
"authority" : "www.iamsterdam.com",
"accept":"application/json, text/javascript, */*; q=0.01",
"content-type" : "application/x-www-form-urlencoded; charset=UTF-8",
"cookie" : "website#lang=nl"
},
data: serialize({
Genre: '',
DateFilter: '03112016',
DayPart: '',
SearchTerm: '',
Neighbourhoud: '',
DayRange: '',
ViewMode: 1,
LastMinuteTickets : '',
PageId: '3418a37d-b907-4c80-9d67-9fec68d96568',
Skip: 0,
Take: 12
})
};
page.open('https://www.iamsterdam.com/api/AgendaApi/', settings, function(status) {
console.log(page.content);
phantom.exit();
});
本文收集自互联网,转载请注明来源。
如有侵权,请联系 [email protected] 删除。
我来说两句