package com.originalityTest;

import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.List;
import java.util.Set;
import java.util.concurrent.ArrayBlockingQueue; import org.springframework.data.mongodb.core.MongoTemplate;
import org.springframework.data.mongodb.core.query.Query;
import org.yqm.nlp.cn.seg.ISegTagger;
import org.yqm.nlp.cn.seg.impl.CharNgramSegTagger; import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import com.mongodb.BasicDBObject;
import com.mongodb.DB;
import com.mongodb.DBCollection;
import com.mongodb.DBCursor;
import com.mongodb.DBObject;
import com.mongodb.MongoClient;
import com.mongodb.MongoClientOptions;
import com.mongodb.ServerAddress;
import com.originalityTest.Test.Consumer;
import com.originalityTest.Test.Producer; import redis.clients.jedis.Jedis;
import redis.clients.jedis.JedisPool;
import redis.clients.jedis.JedisPoolConfig;
import us.codecraft.background.entity.KeywordDetailed;
import us.codecraft.background.solr.SolrService;
import us.codecraft.background.solr.VSMTextSimilarity;
import us.codecraft.webmagic.main.CollectInterface;
import us.codecraft.webmagic.main.testMain;
import us.codecraft.webmagic.model.samples.iask.IaskQuestionModel;
import us.codecraft.webmagic.utils.HttpUtils;
import us.codecraft.webmagic.utils.MongoUtils; /**
*
*-----------------------------------------------------------------------------
* <br>Copyright (c) 2018 深圳问我时代科技有限公司
*
* <p>跑360采集数据跑SEO质量得分 </p>
*
* @project name : webmagic-samples
* @package name : com.originalityTest
* @file name : RunJob.java
* @author : flm
* @date : 2018年10月29日 <br>
*
*-----------------------------------------------------------------------------
*/
public class RunJob { protected static String host = "192.168.1.90"; //192.168.9.40:30000 protected static int port = 30000;
protected static String dbname = "5118baiduzhidao";
protected static DB mongoDB = null;
protected static DB mongoDBURL = null;
protected static boolean isTestFlag = false;
protected static int initDate = 1000*60*10; private int queueSize = 10000000;
private ArrayBlockingQueue<BasicDBObject> queue = new ArrayBlockingQueue<BasicDBObject>(queueSize); static {
MongoClientOptions.Builder buide = new MongoClientOptions.Builder();
buide.connectionsPerHost(100);// 与目标数据库可以建立的最大链接数
buide.connectTimeout(1000 * 60 * 20);// 与数据库建立链接的超时时间
buide.maxWaitTime(100 * 60 * 5);// 一个线程成功获取到一个可用数据库之前的最大等待时间
buide.threadsAllowedToBlockForConnectionMultiplier(100);
buide.maxConnectionIdleTime(0);
buide.maxConnectionLifeTime(0);
buide.socketTimeout(0);
buide.socketKeepAlive(true);
MongoClientOptions myOptions = buide.build();
try {
MongoClient mongoClient = new MongoClient(new ServerAddress(host, port), myOptions);
mongoDB = mongoClient.getDB(dbname);
mongoDBURL = mongoClient.getDB("seo_keyword");
} catch (UnknownHostException e) {
e.printStackTrace();
System.exit(0);
} }
/**
* 手动执行方法
* @param args
* @throws Exception
*/
public static void main(String[] args) throws Exception {
RunJob runJob = new RunJob();
Producer producer = runJob.new Producer();
Consumer consumer1 = runJob.new Consumer(1);
Consumer consumer2 = runJob.new Consumer(2);
Consumer consumer3 = runJob.new Consumer(3);
Consumer consumer4 = runJob.new Consumer(4);
Consumer consumer5 = runJob.new Consumer(5);
Consumer consumer6 = runJob.new Consumer(6);
Consumer consumer7 = runJob.new Consumer(7);
Consumer consumer8 = runJob.new Consumer(8);
Consumer consumer9 = runJob.new Consumer(9);
Consumer consumer10 = runJob.new Consumer(10);
// 生产数据
producer.start(); // 跑数据原创度
consumer1.start();
consumer2.start();
consumer3.start();
consumer4.start();
consumer5.start();
consumer6.start();
consumer7.start();
consumer8.start();
consumer9.start();
consumer10.start();
} class Consumer extends Thread{
int i;
public Consumer(int i){
this.i = i;
}
@Override
public void run() {
consume();
} private void consume() {
while(true){
try {
System.out.println("队列获取 队列i:"+i);
DBCollection collQuestion = mongoDB.getCollection("soQA");
DBCollection collection = mongoDBURL.getCollection("domain"); BasicDBObject d = queue.take(); List<DBObject> answers= (List<DBObject>) d.get("answers");
String questionTxt = d.getString("title") + d.getString("quest"); String questionTitle = "";
if(d.getString("title")!=null&&d.getString("title")!=""){
questionTitle = d.getString("title");
}else{
questionTitle = d.getString("quest");
} float score = 0F; float answerLenOriginality = OriginalityUtitls.getAnswerLenOriginality(answers);
float answerSizeOriginality = OriginalityUtitls.getAnswerrSizeOriginality(answers);
float goodOriginality = OriginalityUtitls.getGoodOriginality(answers);
float questionOriginality = OriginalityUtitls.getQuestionOriginality(questionTxt, answers);
float titleOriginality = OriginalityUtitls.getTitleOriginality(questionTitle);
float wenwoOriginality = OriginalityUtitls.getWenwoOriginality(questionTitle, collection); score += Float.valueOf(questionOriginality*0.3+"");
score += Float.valueOf(answerLenOriginality*0.2+"");
score += Float.valueOf(answerSizeOriginality*0.1+"");
score += Float.valueOf(goodOriginality*0.05+"");
score += Float.valueOf(titleOriginality*0.2+"");
score += Float.valueOf(titleOriginality*0.2+"");
score += Float.valueOf(wenwoOriginality*0.1+"");
System.out.println("score :"+score); d.put("originality", score);
d.put("run", 0);
collQuestion.save(d);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
} class Producer extends Thread{
@Override
public void run() {
produce();
} private void produce() {
try {
DBCollection collQuestion = mongoDB.getCollection("soQA"); DBObject idQuery = new BasicDBObject();
idQuery.put("state",1);
DBCursor lists = collQuestion.find(idQuery);
lists.addOption(com.mongodb.Bytes.QUERYOPTION_NOTIMEOUT);
int i = 0;
while(lists.hasNext()){
BasicDBObject d = (BasicDBObject)lists.next();
queue.put(d);
System.out.println((++i)+"条记录 ,向队列取中插入一个元素,队列剩余空间:"+(queueSize-queue.size())); try {
Thread.sleep(500); // 控制生产速度,防止队列满
} catch (Exception e) {
System.err.println("Thread.sleep.....");
}
}
} catch (InterruptedException e) {
e.printStackTrace();
}
}
} }
05-11 16:53