我想创建一个node.js应用程序,对一些站点进行web抓取,将数据保存在postgresql数据库中,然后在网页上显示这些数据的可视化(在d3.js中)。
我考虑了拆分前端部分(创建和显示可视化)和后端部分(进行web抓取和更新db)。
这两个应用程序的框架(有两个是因为我将任务分为两个应用程序)如下所示。
后端应用程序(scraper
):
连接到数据库
如果表不存在,则创建它们
数据刮刀
在数据库中保存数据
断开与数据库的连接。
这个后端应用程序一年只能启动几次(为此,如果使用的是unix,我可以配置cron文件)。
前端应用程序(viz
):
连接到数据库
启动等待端口3000的服务器(我需要它进行可视化)
每次用户刷新页面(onLoad()
)时,应用程序都会进行查询(SELECT
),从数据库中获取数据。这样,数据总是更新的。
这个应用程序只由程序员启动一次(理想情况下)。
我创建了这种类型的文件夹结构(我使用了npm init
和Express
):
project
|_ scraper
|_ helpers // contains some useful .js files
|_ elaborateJson.js
|_ saveOnDb.js
|_ utilFunc.js
|_ node_modules // modules installed using `npm install moduleName --save`
|_ routes // contains the files that make scraping
|_ downloaderHome.js
|_ downloaderWork.js
|_ services // contains a files concerning the db
|_ postgreSQLlib.js
|_ app.js
|_ package.json
|_ package-lock.json
|_ viz
|_ helpers // // contains some useful .js files
|_ utilFunc.js
|_ node_modules // modules installed using `npm install moduleName --save`
|_ public // contains files for visualizations
|_ index.handlebars
|_ script.js
|_ style.css
|_ services // contains a file concerning the db
|_ postgreSQLlib.js
|_ app.js
|_ package.json
|_ package-lock.json
有了这个结构,我已经有两个问题不知道如何解决:
1.
postgreSQLlib.js
文件(以及utilFunc.js
)在scraper
和viz
中都是相同的。如何避免代码重复?2.我必须在
express-handlebars
和express
文件夹中安装一些模块(例如scraper
和viz
)两次。这是
project/scraper/app.js
:const downloaderHome = require('./routes/downloaderHome.js');
const downloaderWork = require('./routes/downloaderWork.js');
const postgreSQLlib = require('./services/postgreSQLlib.js');
const saveOnDb = require('./helpers/saveOnDb.js');
const utilFunc = require('./helpers/utilFunc.js');
const express = require('express');
const exphbs = require('express-handlebars');
var app = express();
start();
async function start() {
console.log('\n Connect to db');
await postgreSQLlib.connect();
console.log('\n Create tables if they do not exist');
await postgreSQLlib.createHomeTable();
await postgreSQLlib.createWorkTable();
console.log('\n Check if table \'home\' is updated or not');
if(!await utilFunc.isTableUpdated('home', 6418)) { // 6308
console.log('\n Download data for home');
await downloaderHome.download();
console.log('\n Saving data for home on db');
await saveOnDb.saveHome();
}
console.log('\n Check if table \'work\' is updated or not');
if(!await utilFunc.isTableUpdated('work', 6804)) {
console.log('\n Download data for work');
await downloaderWork.download();
console.log('\n Saving data for work on db');
await saveOnDb.saveWork();
}
console.log('\n Disconnect from db');
await postgreSQLlib.disconnect();
}
这是
project/viz/app.js
:const postgreSQLlib = require('./services/postgreSQLlib.js');
const utilFunc = require('./helpers/utilFunc.js');
const express = require('express');
const exphbs = require('express-handlebars');
const http = require('http');
var app = express();
var response;
var callback;
start();
async function start() {
console.log('\n Connect to db');
await postgreSQLlib.connect();
// how do I check when page is refreshed?!
http.get({
hostname: 'localhost',
port: 3000,
path: '/',
agent: false
}, callback);
callback = function(res) {
response = res;
console.log(response); // here response will return an object
console.log('refresh callback');
}
console.log(response);
console.log('refresh');
///////////////////////////////////////////////
// How do I check the disconnection from the db?
// If I disconnect now, the visualizations are no longer work.
// So when do I get disconnected?
// Create problems leaving the connection to the active db?
///////////////////////////////////////////////
//console.log('\n Disconnect from db');
//await postgreSQLlib.disconnect();
}
第一个应用程序(
project/scraper/app.js
)工作得很好。第二份申请(
project/viz/app.js
)没有。我希望您这样做:连接到数据库[完成。它起作用]
启动一个等待端口3000的服务器(我需要它进行可视化)[我该怎么做?向下看(*)]
每次用户刷新页面(
onLoad()
)时,应用程序都会进行查询(SELECT
),从数据库中获取数据[我该怎么做?](*)我想到了这样的事情:
async function start() {
console.log('\n Connect to db');
await postgreSQLlib.connect();
console.log('\n Get data from db');
var dataHome = await postgreSQLlib.getTableHome();
var dataWork = await postgreSQLlib.getTableWork();
//console.log('\n Connect to my server');
pageLoad(dataHome, dataWork);
}
function pageLoad(dataHome, dataWork) {
var hbs = exphbs.create({
helpers: {
getDataHome: function() {
return JSON.stringify(dataHome);
},
getDataWork: function() {
return JSON.stringify(dataWork);
}
}
});
app.engine('handlebars', hbs.engine);
app.set('view engine', 'handlebars');
app.get('/', function(req, res, next) {
res.render('index', { // index is html filename
showTitle: true,
});
});
console.log('Go to http://localhost:3000/ to see visualizations');
app.listen(3000);
}
其中
dataHome
和dataWork
是两个对象,它们包含使用SELECT
查询从数据库下载的数据。但通过这种方式,数据只被废弃一次,而不是每次用户刷新页面。
我们将非常感谢您的帮助。谢谢您!
编辑
你能更精确些吗?我试着这么做,但没用:
项目/VIZ/app.js:
const postgreSQLlib = require('../shared_libs/postgreSQLlib.js');
const express = require('express');
var app = express();
start();
async function start() {
console.log('Connect to db');
await postgreSQLlib.connect();
app.get('/', fetchFreshData);
}
async function fetchFreshData(req, res) {
// download data from db
var dataHome = await postgreSQLlib.getTableHome();
var dataWork = await postgreSQLlib.getTableWork();
// fill this JSON using the results
var viewData = {dataHome, dataWork};
// pass data to view
res.render('index', viewData);
}
project\viz\view\index.handlebars项目:
<!DOCTYPE html>
<html lang='en'>
<head>
<meta charset='utf-8'>
<title>Map</title>
<script src='https://d3js.org/d3.v5.js' charset='utf-8'></script>
<link rel='stylesheet' type='text/css' href='/style.css' media='screen'/>
</head>
<body>
<div id='example'></div>
</body>
<script src='/script.js'></script>
</html>
project\viz\view\script.js:
console.log('viewData:', viewData);
我错在哪里了?
编辑2
好的,我再次修改
viz/app.js
代码:const postgreSQLlib = require('../shared_libs/postgreSQLlib.js');
const express = require('express');
const exphbs = require('express-handlebars');
var app = express();
start();
async function start() {
await postgreSQLlib.connect();
var hbs = Handlebars.registerHelper('json', function(context) {
return JSON.stringify(context);
});
app.engine('handlebars', hbs.engine);
app.set('view engine', 'handlebars');
app.get('/', fetchFreshData);
console.log('Go to http://localhost:3000/ to see data');
app.listen(3000);
}
async function fetchFreshData(req, res) {
// download data from db
var dataHome = await postgreSQLlib.getTableHome();
var dataWork = await postgreSQLlib.getTableWork();
// fill this JSON using the results
var viewData = {};
viewData.timestamp = Date.now();
viewData.entries = dataHome;
// pass data to view
res.render('index', viewData);
}
当我运行应用程序时,没有错误,但如果我连接到http://localhost:3000/,浏览器会告诉我无法访问该站点。我觉得有点傻…
编辑3
如果我正确地理解了你的代码,你的代码中有一个(分散注意力的)错误。
在
returnOBJ()
而不是res.render('index', viewData);
中,它应该是res.render('obj', viewData);
(与obj.hbs
文件相关)。对吗?我以这种方式更改index.hbs文件:
<html lang='en'>
<head>
<meta charset='utf-8'>
<title>Index</title>
<script src='https://d3js.org/d3.v5.js' charset='utf-8'></script>
<link rel='stylesheet' type='text/css' href='/style.css' media='screen'/>
</head>
<body>
<h1>INDEX<small>{{timestamp}}</small></h1>
</body>
<script>
// add global variables in the .hbs file
window.viewData_dataWork = {{ json entries }}
console.log(window.viewData);
</script>
<script src='/script.js'></script>
</html>
但我得到:
(node:207156) UnhandledPromiseRejectionWarning: Error: callback function required
at Function.engine (C:\...\node_modules\express\lib\application.js:295:11)
at start (C:\...\viz\app.js:20:6)
at <anonymous>
at process._tickCallback (internal/process/next_tick.js:182:7)
(node:207156) UnhandledPromiseRejectionWarning: Unhandled promise rejection. This error originated either by throwing inside of an async function without a catch block, or by rejecting a promise which was not handled with .catch(). (rejection id: 1)
(node:207156) [DEP0018] DeprecationWarning: Unhandled promise rejections are deprecated. In the future, promise rejections that are not handled will terminate the Node.js process with a non-zero exit code.
我也不明白这段代码。
app.set('view engine', 'hbs');
app.engine('hbs', hbs.__express);
hbs.registerHelper('json', function(context) {
return JSON.stringify(context);
});
app.engine('handlebars', hbs.engine);
app.set('view engine', 'handlebars');
为什么用不同的值调用
app.set('view engine', ...)
两次?编辑4
我进一步简化了代码:
/VIZ/应用程序js:
const postgreSQLlib = require(__dirname + './../shared_libs/services/postgreSQLlib.js');
const express = require('express');
const hbs = require('hbs');
var app = express();
// Server initiator
async function start() {
await postgreSQLlib.connect();
// hbs
app.set('views', '' + __dirname + '/views');
app.set('view engine', 'hbs');
app.engine('hbs', hbs.__express);
hbs.registerHelper('json', function(context) {
return JSON.stringify(context);
});
app.engine('handlebars', hbs.engine);
app.set('view engine', 'handlebars');
// router
app.get('/', testMe);
console.log('Go to http://localhost:3000/ to see data');
app.listen(3000);
}
// Your section with fresh data has been populated properly
async function testMe(req, res) {
console.log('testMe');
// fill this JSON using the results
var viewData = {};
viewData.data = 'this string';
// pass data to view
res.render('test', viewData);
}
// start the server
start();
/VIZ/视图/测试.hbs:
<html>
<head>
<title>Server test</title>
</head>
<body>
{{data}}
</body>
</html>
然后在prompt命令中转到
project/viz
并键入node app.js
+enter。进程启动并等待:没有错误。
当我转到
http://localhost:3000/
但连接失败时。我快疯了。
编辑5
问题不在于
connect
或者选择的函数,所以我稍微简化了代码。现在,它几乎可以工作了!
这是密码。
VIZ/应用程序js:
const postgreSQLlib = require(__dirname + './../shared_libs/services/postgreSQLlib.js');
const express = require('express');
var app = express()
const hbs = require('hbs');
const webapp_opts = {"port":3000};
Initialize();
//.: Setup & Start Server
async function Initialize(){
await postgreSQLlib.connect();
console.log("[~] starting ...")
//:[HBS]:Setup
app.set('view engine', 'hbs');
app.engine('hbs', hbs.__express)
app.set('views', "" + __dirname + "/views")
//:[HBS]:Helpers
hbs.registerHelper('json', function(context) {
return JSON.stringify(context);
})
//:[EXPRESS]:Router.Paths
app.get("/", IndexPathFunction);
// app.get("/script.js", scriptFile); <-- for script.js file
//:[EXPRESS]:Start
app.listen(webapp_opts.port,()=>{
console.log("[i] ready & listening","\n http://localhost:"+webapp_opts.port+"/")
})
}
/*async function scriptFile(req, res) { <-- for script.js file
console.log('\nscriptFile');
var viewData = {};
viewData.number = 50;
console.log('viewData:', viewData);
res.render('script.js', viewData);
}*/
//.: Router Function : "/"
async function IndexPathFunction(req,res){
var viewData = {};
viewData.timestamp = Date.now();
viewData.exJson = [{color: 'red', year: '1955'}, {color: 'blue', year: '2000'}, {color: 'yellow', year: '2013'}];
viewData.exString = 'example of string';
console.log('viewData:', viewData);
res.render('index', viewData);
}
VIZ/视图/索引.hbs:
<html lang='en'>
<head>
<meta charset='utf-8'>
<title>Index</title>
<script src='https://d3js.org/d3.v5.js' charset='utf-8'></script>
<link rel='stylesheet' type='text/css' href='/style.css' media='screen'/>
</head>
<body>
<h1>INDEX timestamp: <small>{{timestamp}}</small></h1>
</body>
<script>
viewData = {};
console.log('viewData:', viewData);
viewData.exJson = JSON.parse('{{ json exJson }}'.replace(/"/g, '"').replace(/</, ''));
viewData.timestamp = {{timestamp}}; // doesn't work
viewData.exString = {{ exString }}; // doesn't work
console.log('viewData.exJson:', viewData.exJson);
console.log('viewData.timestamp:', viewData.timestamp);
console.log('viewData.exString:', viewData.exString);
</script>
<!--<script src='/script.js'></script>-->
</html>
问题是获取的数据类型不是json。例如,当我试图打印时间戳和exstring时,它会给我错误。为什么?
另外,我想清理一下代码,并将javascript部分放入一个
script.js
文件中,该文件由index.hbs
使用<script src='/script.js'></script>
调用。编辑6
我发现这对我很有用。
我通过添加一个css文件、一个图像和一个脚本(它只包含一个
index.hbs
,但其思想是将console.log('here');
变量放入script.js中)来编辑viewData
文件。项目/viz/views/index.hbs:
<html lang='en'>
<head>
<meta charset='utf-8'>
<title>Index</title>
<script src='https://d3js.org/d3.v5.js' charset='utf-8'></script>
<link href="/css/style.css" rel="stylesheet">
</head>
<body>
<img src="/images/logo.png"/>
<h1>timestamp: <small>{{timestamp}}</small></h1>
<h2>Welcome in index.hbs</h2>
</body>
<script>
viewData = {};
console.log('viewData:', viewData);
viewData.exJson = JSON.parse('{{json exJson }}'.replace(/"/g, '"').replace(/</, ''));
viewData.timestamp = {{timestamp}};
viewData.exString = '{{exString}}';
console.log('viewData.exJson:', viewData.exJson);
console.log('viewData.timestamp:', viewData.timestamp);
console.log('viewData.exString:', viewData.exString);
</script>
<link href='/script/script.js' rel='script'>
</html>
我的文件结构是:
project
|_ node_modules
|_ scraper
|_ shared_libs
|_ viz
|_ app.js
|_ public
|_ css
|_ style.css
|_ images
|_ logo.png
|_ script
|_ script.js
|_ views
|_ index.hbs
现在我看到了图片,使用了css。但脚本似乎不起作用,因为这里没有打印字符串。
我在互联网上搜索如何将变量从脚本标记传递到外部js文件,但似乎没有找到任何适合我的东西。
我读过把手api,但它们并不有用。
最佳答案
共享(自定义)代码的解决方案:
有了这个结构,我已经有两个问题,我不知道怎么解决
要解决:
scraper和viz中的postgreSQLlib.js
文件(以及utilFunc.js
)都是相同的。如何避免代码重复?
您已经在使用require
,因此请从以下位置移动文件:
project/scraper/services/postgreSQLlib.js
project/viz/services/postgreSQLlib.js
到项目根目录中新创建的目录
project
|_ shared_libs
|_ scraper
|_ viz
(在本例中,我使用了
shared_libs
,您可以随意命名)project/shared_libs/postgreSQLlib.js
然后,从代码中:
const postgreSQLlib = require(__dirname+"/../shared_libs/postgreSQLlib.js");
这样您就不需要在两个位置的两个不同文件中维护代码
共享节点模块解决方案:
这同样适用于
node_modules
简单地说,合并当前的两个(节点模块目录)
project/scraper/node_modules
project/viz/node_modules
进入
project
目录根目录中的文件夹project/node_modules
我的建议是:
删除里面的旧的(
project/scraper
&project/viz
)从现在起,使用
npm i <module> --save
目录中的project
。project/scraper
和project/viz
都将使用project/node_modules
不需要复制整个库…
project/viz/app.js
的解决方案如果要在每个
GET
请求上从db获取数据然后,您必须在请求中包含从db逻辑获取数据:
app.get('/',FetchFreshData)
该函数将包含获取并构造hbs的viewdata,以便使用.hbs标记中引用的新数据呈现。
function FetchFreshData(req,res){
/* add your SELECT here */
var viewData = {} //fill this JSON using the results
res.render('index',viewData)
}
因此,从逻辑上讲,每次执行
GET
到“/”路由时,您都将运行查询并接收带有新数据的“可视化”。编辑:展开答案
首先,我建议你更深入地了解车把。
如果要在脚本中使用数据,则必须在服务器端注册一个助手,该助手根据需要呈现数据。
Handlebars.registerHelper('json',function(context){
return JSON.stringify(context)
})
我需要为您的情况做一个示例,因此假设viewdata如下:
function FetchFreshData(req,res){
/* add your SELECT here */
var viewData = {} //fill this JSON using the results
viewData.timestamp = Date.now()
viewData.entries = dataHome
res.render('index',viewData)
}
给我们举个例子:
{
"timestamp":"1525182734",
"entries":[
{"name":"Entry 1"},
{"name":"Entry 2"},
{"name":"Entry 3"}
]
}
现在我将重点介绍您的模板(.hbs):
<html lang='en'>
<head>
<meta charset='utf-8'>
<title>Map</title>
<script src='https://d3js.org/d3.v5.js' charset='utf-8'></script>
<link rel='stylesheet' type='text/css' href='/style.css' media='screen'/>
</head><body>
<h1>MAP<small>{{timestamp}}</small></h1>
<div id='example'>
{{#if entries}}
<ul>
{{#each entries}}
<li> this.name </li>
{{/each}}
</ul>
{{else}} No content... {{/if}}
</div>
</body>
<script>
//Add global variables in the .hbs file
window.viewData_entries = {{ json entries }}
</script>
<script src='/script.js'></script>
</html>
它将使用json对象(viewdata)的“键”
要访问数据并呈现视图…
编辑2:尝试一些简单的事情
使用模块:
hbs
并尝试使用我的两个返回时间戳和对象的简单示例,我还在某些部分修复了您的代码,以便您继续改进它以满足您的更多需求。 const postgreSQLlib = require('../shared_libs/postgreSQLlib.js');
const express = require('express');
const hbs = require('hbs')
var app = express();
//:Server Initiator
async function start() {
await postgreSQLlib.connect();
//:HBS:
app.set('views', "" + __dirname + "/views");
app.set('view engine', 'hbs');
app.engine('hbs', hbs.__express);
hbs.registerHelper('json',function(context){return JSON.stringify(context)});
//:Router:
app.get('/', fetchFreshData);
app.get('/timestamp', returnTimestamp);
app.get('/obj', returnOBJ);
console.log('Go to http://localhost:3000/ to see data');
app.listen(3000);
}
//:Your section with fresh data has been populated properly
async function fetchFreshData(req, res) {
var viewData = {};
viewData.timestamp = Date.now();
viewData.dataWork = await postgreSQLlib.getTableWork();
viewData.dataHome = await postgreSQLlib.getTableHome();
// pass data to view
res.render('index', viewData);
}
/*
The index.hbs I posted earlier wont work anymore, since the "entries" key doesnt exist now... Try to understand the template engine with the following 2 examples below
*/
//:Simple Timestamp EXAMPLE
function returnTimestamp(req, res) {
var viewData = {};
viewData.timestamp = Date.now();
// pass data to view
res.render('timestamp_example', viewData);
}
/* This would be timestamp_example.hbs :
<html><head><title>Server Timestamp</title></head><body>{{timestamp}}</body></html>
*/
//:Simple JSON EXAMPLE
function returnOBJ(req, res) {
var viewData = {};
viewData.OBJ = {"key":"value"};
// pass data to view
res.render('json_example', viewData);
}
/* This would be json_example.hbs :
<html><head><title>Server Object</title></head><body>Page will alert "{{OBJ.key}}"<script>var OBJ = {{json OBJ}}; alert(OBJ.key);</script></body></html>
*/
//start the server :
start()
记住,您需要为
/timestamp
和/obj
路径添加新视图,我在每个服务器函数下面都添加了一个注释示例。----------
编辑3:带有回调的简单演示
const express = require('express'); var app = express()
const hbs = require('hbs')
const webapp_opts = {"port":3000}
//.: Setup & Start Server
function Initialize(){
console.log("[~] starting ...")
//:[HBS]:Setup
app.set('view engine', 'hbs'); app.engine('hbs', hbs.__express)
app.set('views', "" + __dirname + "/views")
//:[HBS]:Helpers
hbs.registerHelper('json',function(context){return JSON.stringify(context)})
//:[EXPRESS]:Router.Paths
app.get("/",IndexPathFunction)
app.get("/debug",(req,res)=>{
console.log("[GET]:/debug"); res.send("ok")
})
//:[EXPRESS]:Start
app.listen(webapp_opts.port,()=>{
console.log("[i] ready & listening","\n http://localhost:"+webapp_opts.port+"/")
})
}
//.: Router Function : "/"
function IndexPathFunction(req,res){
DBQuery((query_error,query_results)=>{
if(query_error){console.log("[!] DBQuery @ path : '/'\n",query_error.stack)}
else{
console.log("[+] DBResults :",query_results)
res.render("index",{data:query_results})
/*or*/// var viewData = {}; viewData.data=query_results; res.render("index",viewData)
}
})
}
//:[DB]:Example Query using callback method
function DBQuery(callback_function){
console.log("[>] DBQuery")
pool.connect((err,client)=>{
if (err) throw err
client.query('SELECT * FROM whatever',(err,res)=>{
client.release()
callback_function(err,res)
})
})
}
//------------------------------------
/* We can Initialize() the webapp
once we know the DB is accesible : */
const pg = require('pg')
const db_opts = {
"user":"dbuser","password":"secretpassword",
"host":"database.server.com", "port":3211,
"database":"mydb"
})
var pool = new pg.Pool(db_opts);
pool.on('error',(err,client)=>{
console.error('Unexpected error on idle client', err); process.exit(-1)
})
pool.connect((err,client,done)=>{
if(err){console.log("[!] DB Connection Error",err)}
else{console.log("[+] DB Connected"); client.release(); Initialize() }
})
记住,要理解出问题的关键是要控制从小到大的流程和构建。
你应该读一读:
https://node-postgres.com/,如果您想使用Express with async/await
不管怎样,这个新编辑的部分应该为您提供一些见解,使最低限度的工作版本。;)
关于node.js - Node.js:表达路线,从数据库查询数据并在template.hbs View 中呈现,我们在Stack Overflow上找到一个类似的问题:https://stackoverflow.com/questions/50079276/