本文介绍了怎么能在这段代码中获得最快的速度?的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!
问题描述
#include "stdafx.h"
#include <conio.h>
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <map>
#include <set>
#include <Windows.h>
#include <sstream>
#include <algorithm>
#include "hash_map"
#include <stdio.h>
using namespace std;
string b1 = "<BODY>", b2 = "</BODY>";
#define FINPUT "input.txt"
#define FOUTPUT "output.txt"
#define STOPWORDS "stopword.txt"
#define START_DOC "<BODY>"
#define END_DOC "</BODY>"
#define START_DOC_LEN b1.length()
#define END_DOC_LEN b2.length()
class CountOfWrd<code></code>
{
public:
int DocNum;
int Repeat;
vector<int> Positions;
};
vector<string> vec_stop;
vector<string> WordsForSort;
hash_map <string, vector<CountOfWrd>> hmap;
hash_map <string, vector<CountOfWrd>> :: iterator hmap_AcIter;
typedef pair <string, vector<CountOfWrd>> Word_Pair;
void syntax(string &word);
inline string stem(string word);
inline void caps(string &word);
bool is_stop(string& word);
int _tmain(int argc, _TCHAR* argv[])
{
ifstream file_of_stopwords(STOPWORDS);
string str_stop;
while( file_of_stopwords >> str_stop )
vec_stop.push_back(str_stop);
CountOfWrd TempCw;
ifstream in(FINPUT);
string word;
int Doc = 0;
int Pos=0;
int startPos;
vector<CountOfWrd> TmpPushPos;
while(in >> word)
if(word.find(START_DOC) != string::npos){
if(word.length() > START_DOC_LEN){
startPos = word.find(START_DOC);
word = word.substr (startPos + START_DOC_LEN );
Pos = 0;
do{
Pos ++;
if(is_stop(word) == true)
continue;
syntax(word);
if(word.length() > 2)
stem(word);
caps(word);
if(is_stop(word) == true)
continue;
hmap_AcIter = hmap.find(word);
if(hmap_AcIter != hmap.end())
{if(hmap_AcIter->second[hmap_AcIter->second.size()-1].DocNum == Doc)
{ (hmap_AcIter)->second[hmap_AcIter->second.size()-1].Repeat++;
(hmap_AcIter->second[hmap_AcIter->second.size()-1].Positions).push_back(Pos);}
else{
TempCw.Repeat = 1;
TempCw.DocNum = Doc;
TempCw.Positions.clear();
TempCw.Positions.push_back(Pos);
((hmap_AcIter)->second).push_back(TempCw);}}
else
{
TempCw.DocNum = Doc;
TempCw.Positions.clear();
TempCw.Positions.push_back(Pos);
TempCw.Repeat = 1;
TmpPushPos.clear();
TmpPushPos.push_back(TempCw);
hmap.insert(Word_Pair(word,TmpPushPos));
WordsForSort.push_back(word);
}
}while(in >> word && word.find(END_DOC) == string::npos);
}
make_heap(WordsForSort.begin(), WordsForSort.end());
sort_heap(WordsForSort.begin(), WordsForSort.end());
int start_of_index=0;
ofstream out;
out.open(FOUTPUT);
for(unsigned int i = start_of_index; i < WordsForSort.size(); i++){
hmap_AcIter = hmap.find(WordsForSort[i]);
out << hmap_AcIter->first << "\t\t";
for(unsigned int j = 0; j < hmap_AcIter->second.size(); j++){
out << "[" <<hmap_AcIter->second[j].DocNum << "," << hmap_AcIter->second[j].Repeat << "(";
for(unsigned int k = 0; k < hmap_AcIter->second[j].Positions.size(); k++){
out << hmap_AcIter->second[j].Positions[k];
if(k !=hmap_AcIter->second[j].Positions.size()-1)
out << ",";}
out << ")" << "]" << "\t";}
out<< "\n";
}
out.close();
_getch();
return 0;}
inline string stem(string word){
if((word[word.length()-3] == 'i' || word[word.length()-3] == 'I') && (word[word.length()-2] == 'n' || word[word.length()-2] == 'N') && (word[word.length()-1] == 'g' || word[word.length()-1] == 'G'))
word.erase(word.length()-3, 3);
return word;}
inline void caps(string &word){
for(unsigned int i=0; i<word.size(); i++)
if( isupper(word[i]))
word[i] = tolower( word[i] );}
bool is_stop(string& word)
{for(unsigned int i=0; i<vec_stop.size(); i++)
if( word == vec_stop[i] )
return true;
return false;}
void syntax(string &word){
for(unsigned int i=0; i<word.size(); i++){
if( word[i] == '.'||word[i] == '#' ||word[i] == '&'|| word[i] == ',' || word[i] == ';'
|| word[i] == ')' || word[i] == '(' || word[i] == ':' || word[i] == '-' || word[i] == '"' || word[i] == '/' || word[i] == '?'
|| word[i] == '\'' || word[i] == '\"' || (word[i] >= 48 && word[i] <= 57))
{word.erase(i, 1);
i--;}
}
return;}
推荐答案
inline string stem(string word){
if((word[word.length()-3] == 'i' || word[word.length()-3] == 'I') && (word[word.length()-2] == 'n' || word[word.length()-2] == 'N') && (word[word.length()-1] == 'g' || word[word.length()-1] == 'G'))
word.erase(word.length()-3, 3);
return word;}
为什么在运行时库已经使用这样的代码提供优化的函数来比较字符串而不考虑cas?阅读文档,无论是字符串
类,还是 strXXX
函数。类似地, isXXX
函数将确定字符串是否包含控制字符,是否为所有数字等。
Why are you using code like this when the run time libraries already provide optimised functions for comparing strings without regard to cas? Read the documentation, either for the string
class, or the strXXX
functions. Similarly the isXXX
functions will determine if a string contains control characters, is all digits etc.
这篇关于怎么能在这段代码中获得最快的速度?的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持!