实现概要:
- 读取放入buf后 查找匹配的第一个字符 然后使用seek()移动文件指针,peek()查看 剩余的字符是否匹配
- 如果剩余的字符匹配 把该字符串在文件中的位置 push 进一个vector<int>中 再继续查看剩余的文件内容
// str2.cpp -- capacity() and reserve()
#include <iostream>
#include <fstream>
#include <string>
#include <cstring>
#include <malloc.h>
#include <windows.h>
#include <memory>
#include <iomanip>
//由于peek函数只能在当前文件指针下查看下一个字符, 应该自定义一个能移动到指定位置后查看下一个字符的peek.
#include <vector>
using namespace std;
int bufvolum = 512;
auto PeekInFile(ifstream& file, unsigned pos, unsigned peekNum)
{
auto originPtr = file.tellg();
string PeekedStr;
PeekedStr.reserve(peekNum);
if (!file.is_open())
{
return PeekedStr;
}
for (int i = 0; i < peekNum; ++i)
{
file.seekg(pos + i);
if (file.fail())
{
cout << "seekg error!" << endl;
}
char ch = file.peek();
PeekedStr.push_back(file.peek());
}
file.seekg(originPtr);
return PeekedStr;
}
decltype(auto) FindInFile(ifstream& file, string substr, int pos = 0)
{
vector<int> arrIndex;
arrIndex.clear();
if (!file.is_open())
{
return arrIndex;
}
file.seekg(pos);
string buf;
buf.reserve(bufvolum);
buf.clear();
auto j = substr.begin();
auto getCount{0};
ofstream temp("temp.xml", ios_base::out);
while (1)
{
file.read(buf.data(), bufvolum);
buf.append(buf.data());
auto i = buf.begin();
auto backI = i;
int realGet = file.gcount();
getCount += realGet;
for (; i < buf.end() ; ++i)
{
if (*i == *j)
{
buf.resize(bufvolum);
int curIndex = getCount > bufvolum ? getCount - distance(i, buf.end()) : distance(buf.begin(), i) ;
string PeekedStr(PeekInFile(file, curIndex, substr.size()));
if (PeekedStr == substr)
{
arrIndex.push_back(curIndex);
}
}
}
if (file.fail())
{
file.clear();
break;
}
buf.clear();
}
return arrIndex;
}
int main()
{
SetConsoleOutputCP(65001);
ifstream fxml("C:\\Users\\34625\\Downloads\\cnblogs_blog_ComputerTech.20240111164025\\cnblogs.xml", ios_base::in); // create fis and associate with jamjar.txt
if (!fxml.is_open())
{
cout << "\n open error!\n";
}
vector indexArr(move(FindInFile(fxml, "[TOC]"))) ;
ofstream fwrite("xxxxx.xml");
if (!fwrite.is_open())
{
fxml.close();
return 0;
}
string buf;
buf.reserve(bufvolum);
for (auto i = indexArr.begin(); i < indexArr.end() ; ++i)
{
int need2write {0};
if (i + 1 == indexArr.end())
{
auto originPos = fxml.tellg();
fxml.seekg(0, ios_base::end);
auto filesize = fxml.tellg();
fxml.seekg(originPos);
need2write = filesize - *i;
}
else
{
need2write = *(i + 1) - *i ;
}
int residual = need2write % bufvolum;
unsigned time = need2write / bufvolum ;
fxml.seekg(*i);
if (!fxml.is_open() || fxml.fail() || fxml.bad())
{
cout << endl << "fxml error" << endl;
system("pause");
}
while (time--)
{
if (fxml.fail() || fxml.bad())
{
cout << "failllllllllllllll" << endl;
system("pause");
break;
}
fxml.get(buf.data(), bufvolum + 1, EOF);
int getNum = fxml.gcount();
buf.append(buf.data());
fwrite.write(buf.data(), fxml.gcount());//写入fwrite
cout << buf.data();//
buf.clear();
}
if (residual)
{
buf.clear();
fxml.get(buf.data(), residual + 1, EOF );
buf.append(buf.data());
fwrite.write(buf.data(), residual);
cout << buf.c_str();
}
}
fwrite.close();
fxml.close();
system("pause");
return 0;
}
标签:arrIndex,auto,C++,bufvolum,查找,file,字符串,include,buf
From: https://www.cnblogs.com/ComputerTech/p/17962969