可以通过查询语句的组合进行检索,VS2015.
main函数,读取存有数据的文件,进行检索。提供两种入口。查词,与按照表达式查询。
1 #include <iostream> 2 #include <fstream> 3 4 #include "TextSearch.h" 5 #include "TextQueryI.h" 6 7 8 using namespace std; 9 10 11 int main() { 12 13 ifstream is; 14 is.open("c:/tmp/data.txt", ios::in); 15 16 // Search word 17 // TextSearch::run(is); 18 19 // Search word with Query 20 TextQueryI q = TextQueryI("good") | TextQueryI("nice") & TextQueryI("day"); 21 TextSearch::run(is, q); 22 23 return 0; 24 }
封装检索功能的入口
TextSearch.h
1 #pragma once 2 3 #include <fstream> 4 5 #include "TextQueryI.h" 6 7 using std::ifstream; 8 9 // 搜索功能入口 10 class TextSearch 11 { 12 public: 13 TextSearch(); 14 ~TextSearch(); 15 public: 16 static int run(ifstream &infile); 17 static int run(ifstream &infile, TextQueryI &query); 18 };
TextSearch.cpp
1 #include "TextSearch.h" 2 3 #include <iostream> 4 #include <string> 5 6 #include "TextQuery.h" 7 #include "TextQueryI.h" 8 9 using std::cin; using std::cout; using std::endl; 10 using std::string; 11 12 int TextSearch::run(ifstream & infile) 13 { 14 // 读取、分析数据 15 TextQuery tq(infile); 16 17 // serach query 18 while (true) { 19 cout << "enter search word, or [q] to quit:" << endl; 20 string s; 21 if (!(cin >> s) || s == "q") break; 22 print(cout, tq.query(s)) << endl; 23 } 24 return 0; 25 } 26 27 int TextSearch::run(ifstream &infile, TextQueryI &query) { 28 cout << "start process query: "; 29 TextQuery tq(infile); 30 print(cout, query.eval(tq)) << endl; 31 return 0; 32 }
TextQuery.h 文本内容分析,保存原始数据的分词分析结果和行号信息。并提供单个词检索的功能。
1 #pragma once 2 3 #include <fstream> 4 #include <string> 5 #include <map> 6 #include <set> 7 #include <vector> 8 #include <memory> 9 10 #include <iostream> 11 12 using std::string; 13 using std::vector; 14 using std::endl; 15 16 class QueryResult; 17 18 // 完成分词与检索功能 19 class TextQuery 20 { 21 public: 22 using LineNo = vector<string>::size_type; 23 TextQuery(std::ifstream&); 24 ~TextQuery() {}; 25 public: 26 QueryResult query(const string&) const; 27 private: 28 // 保存输入数据,每个元素是一行string 29 std::shared_ptr<vector<string> > data; 30 // 保存分析结果,key为检索词,value是包含检索词的行号 31 std::map<string, std::shared_ptr<std::set<LineNo>> > wm; 32 }; 33 34 class QueryResult 35 { 36 friend std::ostream& print(std::ostream &, const QueryResult &); 37 public: 38 using LineNo = TextQuery::LineNo; 39 QueryResult(std::string word, 40 std::shared_ptr<std::set<LineNo> > pLineNo, 41 std::shared_ptr<std::vector<std::string> > data) : 42 query_word(word), lines(pLineNo), files(data) {}; 43 ~QueryResult() {}; 44 auto getFiles() { return files; } 45 auto begin() { return lines->begin(); } 46 auto end() { return lines->end(); } 47 private: 48 // 查询词 49 std::string query_word; 50 // 查询词的行号,ordered 51 std::shared_ptr<std::set<LineNo> > lines; 52 // 对原始数据的引用 53 std::shared_ptr<std::vector<std::string> > files; 54 };
TextQuery.cpp
1 #include "TextQuery.h" 2 3 #include <sstream> 4 5 using std::getline; 6 using std::istringstream; 7 8 TextQuery::TextQuery(std::ifstream &is): data(new vector<std::string>) 9 { 10 string text; 11 while (getline(is, text)) { 12 // 读取一行并记录数据,方便给出查询结果 13 data->push_back(text); 14 LineNo line_no = data->size() - 1; 15 // 单词分解 16 istringstream line(text); 17 string word; 18 // 单词查询结果记录 19 while (line >> word) { 20 auto &lines = wm[word]; // 获取智能指针,如果map没有会自动创建 21 if (!lines) { 22 lines.reset(new std::set<LineNo>); // 配置智能指针的对象 23 } 24 lines->insert(line_no); // 添加行号,如果重复什么都不做 25 } 26 } 27 } 28 29 QueryResult TextQuery::query(const string & word) const 30 { 31 static std::shared_ptr<std::set<LineNo> > p_no_result(new std::set<LineNo>); 32 auto loc = wm.find(word); 33 if (loc == wm.end()) { 34 return QueryResult(word, p_no_result, data); 35 } 36 else { 37 return QueryResult(word, loc->second, data); 38 } 39 } 40 41 // 格式化打印结果,类似于提供tostring 42 std::ostream& print(std::ostream &os, const QueryResult &qr) { 43 os << qr.query_word << " occours " << qr.lines->size() << 44 (qr.lines->size() > 1 ? "times" : "time") << endl; 45 for (auto num : *qr.lines) { 46 os << "at line:" << num + 1 << " > "; 47 os << *(qr.files->begin() + num) << endl; // 尽量使用迭代器,不使用下标,获得较好的扩展性 48 } 49 return os; 50 }
TextQueryI.h 使用表达式进行查询的接口,用于屏蔽下层。
1 #pragma once 2 #include <vector> 3 #include <string> 4 #include <memory> 5 6 #include "TextQuery.h" 7 #include "TextQueryBase.h" 8 #include "TextQueryWordQuery.h" 9 10 // TextQuery接口类 11 class TextQueryI { 12 // 有一个私有的构造函数,需要运算符是友元 13 friend TextQueryI operator~(const TextQueryI &); 14 friend TextQueryI operator&(const TextQueryI &, const TextQueryI &); 15 friend TextQueryI operator|(const TextQueryI &, const TextQueryI &); 16 17 public: 18 using LineNo = std::vector<std::string>::size_type; 19 TextQueryI(const std::string &s): q(new TextQueryWordQuery(s)) {} 20 // 作为TextQueryBase的唯一接口,自己实现对应的方法来屏蔽TextQueryBase的行为 21 QueryResult eval(const TextQuery &tq) const 22 { return q->eval(tq); } 23 std::string rep() const 24 { return q->rep(); } 25 private: 26 TextQueryI(std::shared_ptr<TextQueryBase> query): q(query) {} 27 std::shared_ptr<TextQueryBase> q; 28 }; 29 30 std::ostream & 31 operator<<(std::ostream &os, const TextQueryI &tq); 32 33 TextQueryI operator~(const TextQueryI &); 34 TextQueryI operator&(const TextQueryI &, const TextQueryI &); 35 TextQueryI operator|(const TextQueryI &, const TextQueryI &);
TextQueryI.cpp 实现重定向避免重复定义。
1 #include "TextQueryI.h" 2 3 std::ostream & 4 operator<<(std::ostream &os, const TextQueryI &tq) { 5 return os << tq.rep(); 6 }
TextQueryBase.h 利用虚函数实现表达式功能实现的抽象类。
1 #pragma once 2 3 #include <string> 4 5 #include "TextQuery.h" 6 7 class TextQueryBase 8 { 9 // 用户不会使用TextQueryBase类,所有使用都通过TextQueryI完成 10 friend class TextQueryI; 11 protected: 12 using LineNo = TextQuery::LineNo; 13 virtual ~TextQueryBase() = default; 14 private: 15 // 执行查询 16 virtual QueryResult eval(const TextQuery &) const = 0; 17 // 获得查询对应的string形式表示,类似toString 18 virtual std::string rep() const = 0; 19 };
TextQueryNot.h 实现非逻辑的对象。完成对非逻辑表达式的string表示、完成对分词结果的Not分析。
1 #pragma once 2 3 #include <memory> 4 5 #include "TextQueryI.h" 6 #include "TextQueryBase.h" 7 8 class TextQueryNot : public TextQueryBase { 9 friend TextQueryI operator~(const TextQueryI &); 10 private: 11 TextQueryNot(const TextQueryI &q) : query(q) {} 12 virtual QueryResult eval(const TextQuery &) const override; 13 // 获得查询的string表示? 14 virtual std::string rep() const override { 15 return "~(" + query.rep() + ")"; 16 } 17 private: 18 TextQueryI query; 19 }; 20 21 inline TextQueryI operator~(const TextQueryI &operand) { 22 return std::shared_ptr<TextQueryBase>(new TextQueryNot(operand)); 23 }
TextQueryNot.cpp 实现非逻辑的代码。完成对分词结果进行非逻辑的加工。
1 #include "TextQueryNot.h" 2 3 QueryResult 4 TextQueryNot::eval(const TextQuery &tq) const { 5 auto result = query.eval(tq); 6 auto ret = std::make_shared<std::set<LineNo> >(); 7 auto beg = result.begin(), end = result.end(); 8 auto sz = result.getFiles()->size(); 9 for (size_t n = 0; n != sz; n++) { 10 // 考察结果中的每一行 11 if (beg == end || *beg != n) { 12 ret->insert(n); 13 } 14 else if (beg != end) { 15 ++beg; 16 } 17 } 18 return QueryResult(rep(), ret, result.getFiles()); 19 }
TextQueryBinary.h 二元运算的共同基类,同时定义了And和Or运算
1 #pragma once 2 3 #include "TextQueryI.h" 4 #include "TextQueryBase.h" 5 6 class TextQueryBinary : public TextQueryBase { 7 protected: 8 TextQueryBinary(const TextQueryI &left, const TextQueryI &right, std::string s): 9 lhs(left), rhs(right), opSymbol(s) {} 10 // 只提供打印方法,实际操作还是虚函数 11 std::string rep() const override { 12 return "(" + lhs.rep() + " " + opSymbol + " " + rhs.rep() + ")"; 13 } 14 15 protected: 16 TextQueryI lhs, rhs; // 操作对象 17 std::string opSymbol; // 操作符 18 }; 19 20 class TextQueryAnd : public TextQueryBinary { 21 friend TextQueryI operator&(const TextQueryI &, const TextQueryI &); 22 private: 23 TextQueryAnd(const TextQueryI &lhs, const TextQueryI &rhs): TextQueryBinary(lhs, rhs, "&") {} 24 QueryResult eval(const TextQuery &) const override; 25 }; 26 27 class TextQueryOr : public TextQueryBinary { 28 friend TextQueryI operator|(const TextQueryI &, const TextQueryI &); 29 private: 30 TextQueryOr(const TextQueryI &lhs, const TextQueryI &rhs) : TextQueryBinary(lhs, rhs, "|") {} 31 QueryResult eval(const TextQuery &) const override; 32 };
TextQueryBinary.cpp
1 #include "TextQueryBinary.h" 2 3 #include <set> 4 #include <algorithm> 5 #include <iterator> 6 #include <memory> 7 8 QueryResult 9 TextQueryOr::eval(const TextQuery &tq) const { 10 auto right = rhs.eval(tq), left = lhs.eval(tq); 11 auto ret = std::make_shared<std::set<LineNo> >(left.begin(), left.end()); 12 ret->insert(right.begin(), right.end()); 13 return QueryResult(rep(), ret, left.getFiles()); 14 } 15 16 QueryResult 17 TextQueryAnd::eval(const TextQuery &tq) const { 18 auto right = rhs.eval(tq), left = lhs.eval(tq); 19 auto ret = std::make_shared<std::set<LineNo> >(); 20 std::set_intersection(left.begin(), left.end(), right.begin(), right.end(), std::inserter(*ret, ret->begin())); 21 ret->insert(right.begin(), right.end()); 22 return QueryResult(rep(), ret, left.getFiles()); 23 } 24 25 TextQueryI operator&(const TextQueryI &lhs, const TextQueryI &rhs) { 26 return std::shared_ptr<TextQueryBase>(new TextQueryAnd(lhs, rhs)); 27 } 28 29 TextQueryI operator|(const TextQueryI &lhs, const TextQueryI &rhs) { 30 return std::shared_ptr<TextQueryBase>(new TextQueryOr(lhs, rhs)); 31 }
TextQueryWordQuery.h 表达式查询的叶子节点,表示对某个词进行查询,相当于表达式体系中对单个词查询的基础功能调用。
1 #pragma once 2 3 #include <string> 4 5 #include "TextQuery.h" 6 #include "TextQueryBase.h" 7 8 // 对象树的叶子节点 9 class TextQueryWordQuery : public TextQueryBase 10 { 11 friend class TextQueryI; 12 private: 13 TextQueryWordQuery(const std::string &s) : query_word(s) {} 14 virtual QueryResult eval(const TextQuery &tq) const override { 15 return tq.query(query_word); 16 } 17 virtual std::string rep() const override { 18 return query_word; 19 } 20 private: 21 std::string query_word; 22 };
标签:std,include,const,string,TextQuery,15.9,TextQueryI,C++,Primer From: https://www.cnblogs.com/terrencestark/p/16800156.html