我被编码为php机器人。现在我要发送800万个请求。是的,例如:
我使用multi_curl *但有问题。
我使用linux并行库问题。
也许您有什么建议吗?
PHP 7.1.1,Linux Ubuntu 16.0.1
myfnc(){
i=1264609
echo "$(($1+i))";
response=$(curl --write-out %{http_code} --silent --output /dev/null http://localhost/botum/index.php?i=$(($1+i)))
echo $response
}
export -f myfnc
seq 100 | parallel -j0 myfnc
多卷曲问题:
set_time_limit(0);
ini_set("max_execution_time",-1);
$nodes = array();
for($i =1366295;$i<1396296;$i++){
array_push($nodes,"http://165.227.152.138/botum2/index.php?i=$i");
}
$node_count = count($nodes);
$curl_arr = array();
$master = curl_multi_init();
for($i = 0; $i < $node_count; $i++)
{
$url =$nodes[$i];
//problems code start
$curl_arr[$i] = curl_init($url);
curl_setopt($curl_arr[$i], CURLOPT_RETURNTRANSFER, true);
//problems code end
curl_multi_add_handle($master, $curl_arr[$i]);
}
do {
curl_multi_exec($master,$running);
} while($running > 0);
for($i = 0; $i < $node_count; $i++)
{
$results[] = curl_multi_getcontent ( $curl_arr[$i] );
}
print_r($results);
最佳答案
您的curl代码试图同时启动130万个curl句柄,并且显然会耗尽资源(但是如果执行了类似那么您会注意到它)
此外,您在这里使用忙循环
do {
curl_multi_exec($master,$running);
} while($running > 0);
这意味着您将在执行句柄时使用100%cpu,这毫无疑问,而您应该一直在使用curl_multi_select等待。
这是curl_multi的工作,但您只是在错误地使用它。我的建议是稍微修改Which performs faster, headless browser or Curl?中的代码,
这将处理800万个请求,在完成响应时打印响应,并且永远不会同时使用500个以上的连接,并使用异步select()方法在等待网络IO时不使用任何CPU,
curl_multi_fetch_and_print("http://165.227.152.138/botum2/index.php?i=",8000000,500,10000,true,true);
function curl_multi_fetch_and_print(string $base_url, int $count, int $max_connections, int $timeout_ms = 10000, bool $consider_http_300_redirect_as_error = true, bool $print_fault_reason): void
{
if ($max_connections < 1) {
throw new InvalidArgumentException("max_connections MUST be >=1");
}
if ($count < 1) {
throw new InvalidArgumentException("count MUST be >=1");
}
$mh = curl_multi_init();
$workers = array();
$work = function () use (&$workers, &$mh, &$print_fault_reason) {
// > If an added handle fails very quickly, it may never be counted as a running_handle
while (1) {
curl_multi_exec($mh, $still_running);
if ($still_running < count($workers)) {
break;
}
$cms = curl_multi_select($mh, 10);
//var_dump('sr: ' . $still_running . " c: " . count($workers)." cms: ".$cms);
}
while (false !== ($info = curl_multi_info_read($mh))) {
//echo "NOT FALSE!";
//var_dump($info);
{
if ($info['msg'] !== CURLMSG_DONE) {
continue;
}
if ($info['result'] !== CURLM_OK) {
if ($print_fault_reason) {
echo "request #" . ($workers[(int)$info['handle']]) . " error: " . print_r(array(false, $info['result'], "curl_exec error " . $info['result'] . ": " . curl_strerror($info['result'])), true) . PHP_EOL;
}
} elseif (CURLE_OK !== ($err = curl_errno($info['handle']))) {
if ($print_fault_reason) {
echo "request #" . ($workers[(int)$info['handle']]) . " error: " . print_r(array(false, $err, "curl error " . $err . ": " . curl_strerror($err)), true) . PHP_EOL;
}
} else {
$code = (string)curl_getinfo($info['handle'], CURLINFO_HTTP_CODE);
if ($code[0] === "3") {
if ($consider_http_300_redirect_as_error) {
if ($print_fault_reason) {
echo "request #" . ($workers[(int)$info['handle']]) . " error: " . print_r(array(false, -1, "got a http " . $code . " redirect, which is considered an error"), true) . PHP_EOL;
}
} else {
//if ($print_fault_reason) {
// echo "request #" . ($workers[(int)$info['handle']]) . " success: " . print_r(array(true, 0, "got a http " . $code . " redirect, which is considered a success"), true).PHP_EOL;
//} else {
// ... got a http redirect, which is not considered an errror,
echo "request #" . ($workers[(int)$info['handle']]) . " success: (http {$code} redirect)\n";
//}
}
} elseif ($code[0] === "2") {
if ($print_fault_reason) {
echo "request #" . ($workers[(int)$info['handle']]) . " success: http {$code}: " . curl_multi_getcontent($info['handle']) . PHP_EOL;
} else {
echo "request #" . ($workers[(int)$info['handle']]) . ": " . curl_multi_getcontent($info['handle']) . PHP_EOL;
}
} else {
// all non-2xx and non-3xx are always considered errors (500 internal server error, 400 client error, 404 not found, etcetc)
if ($print_fault_reason) {
echo "request #" . ($workers[(int)$info['handle']]) . " error: " . print_r(array(false, -1, "got a http " . $code . " code, which is considered an error"), true) . PHP_EOL;
}
}
}
curl_multi_remove_handle($mh, $info['handle']);
assert(isset($workers[(int)$info['handle']]));
unset($workers[(int)$info['handle']]);
curl_close($info['handle']);
}
}
//echo "NO MORE INFO!";
};
for ($i = 0; $i < $count; ++$i) {
$url = $base_url . $i;
while (count($workers) >= $max_connections) {
//echo "TOO MANY WORKERS!\n";
$work();
}
$neww = curl_init($url);
if (!$neww) {
trigger_error("curl_init() failed! probably means that max_connections is too high and you ran out of resources", E_USER_WARNING);
if ($print_fault_reason) {
echo "request #{$i} error: curl_init() failed!" . PHP_EOL;
}
continue;
}
$workers[(int)$neww] = $url;
curl_setopt_array($neww, array(
//CURLOPT_NOBODY => 1,
CURLOPT_RETURNTRANSFER=>1,
CURLOPT_SSL_VERIFYHOST => 0,
CURLOPT_SSL_VERIFYPEER => 0,
CURLOPT_TIMEOUT_MS => $timeout_ms
));
curl_multi_add_handle($mh, $neww);
//curl_multi_exec($mh, $unused_here); LIKELY TO BE MUCH SLOWER IF DONE IN THIS LOOP: TOO MANY SYSCALLS
}
while (count($workers) > 0) {
//echo "WAITING FOR WORKERS TO BECOME 0!";
//var_dump(count($workers));
$work();
}
curl_multi_close($mh);
return;
}