问题描述
我想在大型阵列上进行复杂的CPU密集型工作。理想情况下,我想将此传递给子进程。
var spawn = require('child_process')。spawn;
// dataAsNumbers是一个大的2D数组
var child = spawn(process.execPath,['/ child_process_scripts / getStatistics',dataAsNumbers]);
child.stdout.on('data',function(data){
console.log('from child:',data.toString());
}) ;
但是当我这样做时,node会给出错误:
我遇到了
所以管道数据到子进程似乎是要走的路。我的代码现在是:
var spawn = require('child_process')。spawn;
console.log('创建孩子........................');
var options = {stdio:[null,null,null,'pipe']};
var args = ['/ getStatistics'];
var child = spawn(process.execPath,args,options);
var pipe = child.stdio [3];
pipe.write(Buffer('awesome'));
child.stdout.on('data',function(data){
console.log('from child:',data.toString());
}) ;
然后在getStatistics.js中:
console.log('im inside child');
process.stdin.on('data',function(data){
console.log('data is',data);
process.exit(0);
});
然而中的回调process.stdin.on
未到达。如何在我的子脚本中收到一个流?
编辑
我不得不放弃缓冲方法。现在我将数组作为消息发送:
var cp = require('child_process');
var child = cp.fork('/ getStatistics.js');
child.send({
dataAsNumbers:dataAsNumbers
});
但这只适用于dataAsNumbers的长度低于约20,000,否则会超时。
是一个非常简单的模块,似乎适合您的应用程序。示例:
parent.js
使用严格;
const shm = require('shm-typed-array');
const fork = require('child_process')。fork;
//创建共享内存
const SIZE = 20000000;
const data = shm.create(SIZE,'Float64Array');
//填充虚拟数据
Array.prototype.fill.call(data,1);
//生成孩子,设置通信,并给予共享内存
const child = fork(child.js);
child.on('message',sum => {
console.log(`得到回答:$ {sum}`);
//仅限演示;理想情况下你会重复使用同一个孩子
child.kill();
});
child.send(data.key);
child.js
use strict;
const shm = require('shm-typed-array');
process.on('message',key => {
//访问共享内存
const data = shm.get(key,'Float64Array');
//执行处理
const sum = Array.prototype.reduce.call(data,(a,b)=> a + b,0);
//返回已处理数据
process.send(sum);
});
请注意,我们只是通过IPC从父进程向子进程发送一个小密钥,不是整个数据。因此,我们节省了大量的内存和时间。
当然,您可以更改'Float64Array'
(例如a double
)无论。请注意,此库特别只处理一维类型数组;但这应该只是一个小障碍。
I have complex CPU intensive work I want to do on a large array. Ideally, I'd like to pass this to the child process.
var spawn = require('child_process').spawn;
// dataAsNumbers is a large 2D array
var child = spawn(process.execPath, ['/child_process_scripts/getStatistics', dataAsNumbers]);
child.stdout.on('data', function(data){
console.log('from child: ', data.toString());
});
But when I do, node gives the error:
I came across this article
So piping the data to the child process seems to be the way to go. My code is now:
var spawn = require('child_process').spawn;
console.log('creating child........................');
var options = { stdio: [null, null, null, 'pipe'] };
var args = [ '/getStatistics' ];
var child = spawn(process.execPath, args, options);
var pipe = child.stdio[3];
pipe.write(Buffer('awesome'));
child.stdout.on('data', function(data){
console.log('from child: ', data.toString());
});
And then in getStatistics.js:
console.log('im inside child');
process.stdin.on('data', function(data) {
console.log('data is ', data);
process.exit(0);
});
However the callback in process.stdin.on
isn't reached. How can I receive a stream in my child script?
EDIT
I had to abandon the buffer approach. Now I'm sending the array as a message:
var cp = require('child_process');
var child = cp.fork('/getStatistics.js');
child.send({
dataAsNumbers: dataAsNumbers
});
But this only works when the length of dataAsNumbers is below about 20,000, otherwise it times out.
With such a massive amount of data, I would look into using shared memory rather than copying the data into the child process (which is what is happening when you use a pipe or pass messages). This will save memory, take less CPU time for the parent process, and be unlikely to bump into some limit.
shm-typed-array
is a very simple module that seems suited to your application. Example:
parent.js
"use strict";
const shm = require('shm-typed-array');
const fork = require('child_process').fork;
// Create shared memory
const SIZE = 20000000;
const data = shm.create(SIZE, 'Float64Array');
// Fill with dummy data
Array.prototype.fill.call(data, 1);
// Spawn child, set up communication, and give shared memory
const child = fork("child.js");
child.on('message', sum => {
console.log(`Got answer: ${sum}`);
// Demo only; ideally you'd re-use the same child
child.kill();
});
child.send(data.key);
child.js
"use strict";
const shm = require('shm-typed-array');
process.on('message', key => {
// Get access to shared memory
const data = shm.get(key, 'Float64Array');
// Perform processing
const sum = Array.prototype.reduce.call(data, (a, b) => a + b, 0);
// Return processed data
process.send(sum);
});
Note that we are only sending a small "key" from the parent to the child process through IPC, not the whole data. Thus, we save a ton of memory and time.
Of course, you can change 'Float64Array'
(e.g. a double
) to whatever typed array your application requires. Note that this library in particular only handles single-dimensional typed arrays; but that should only be a minor obstacle.
这篇关于将大数组传递给节点子进程的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持!