iso_extractor.h
#pragma once
#include <vector>
#include <string>
#include <fstream>
#define PATH_SEPARATOR_WIN ("\\")
#define PATH_SEPARATOR_UNIX ("/")
typedef struct _iso_primary_vol_desc iso_primary_vol_desc, * piso_primary_vol_desc;
typedef struct _iso_dir_info iso_dir_info, * piso_dir_info;
typedef std::vector<_iso_dir_info> iso_dir_list;
#pragma warning(disable: 4201)
typedef struct _iso_dir_info
{
std::string name; // 文件名
uint64_t dir_offset; // 目录表位置
uint64_t data_offset; // 数据位置
uint64_t data_size; // 数据大小
union
{
uint8_t data;
struct
{
uint8_t hidden:1; // 隐藏
uint8_t directory :1; // 目录
uint8_t associated:1; //
uint8_t record:1; //
uint8_t protection:1; //
uint8_t reserved:2; //
uint8_t multi_extent:1; //
};
}attr;
iso_dir_list list; // 子项列表
_iso_dir_info() :
dir_offset(0),
data_offset(0),
data_size(0),
attr{ 0 }
{
}
void clear()
{
list.clear();
name.clear();
dir_offset = 0;
data_offset = 0;
data_size = 0;
attr = { 0 };
}
}iso_dir_info, * piso_dir_info;
// ISO 9660 提取器
// FlameCyclone 2024.11.13
// 参考资料: https://wiki.osdev.org/ISO_9660
class iso_extractor
{
public:
iso_extractor();
~iso_extractor();
// 打开文件
bool open(const std::string& path);
// 关闭
void close();
// 获取目录结构
iso_dir_info& parse_dir_info(int depth = 260);
// 获取指定路径
const iso_dir_info& get_dir_info(const std::string& path, bool root = false);
// 提取文件到指定路径
bool extract_file(const std::string& name, const std::string& path, bool root = false, int depth = 260);
// 提取数据到指定路径
bool extract(const std::string& path, bool root = false, int depth = 260);
// 打印目录结构
void print_iso_path_info(const iso_dir_info& path_info, bool root = false);
private:
// 读取数据
std::streamsize _read_file_data(void* data_ptr, std::streamsize size, std::streampos pos);
// 解析目录结构
void _parse_dir_info(iso_dir_info& path_info, piso_primary_vol_desc primary_ptr, std::streampos dir_pos, int depth);
// 提取数据到指定路径
bool _extract_file(const iso_dir_info& path_info, const std::string& path, int depth);
// 提取数据到指定路径
bool _extract_list(const iso_dir_list& path_list, const std::string& path, int depth);
// 创建文件夹
bool _create_dir(const std::string& path);
// 获取指定路径
const iso_dir_info& _get_dir_info(const iso_dir_info& path_info, const std::string& cur_path, const std::string& find_path);
// 获取指定路径
const iso_dir_info& _get_dir_list (const iso_dir_list& path_list, const std::string& find_path);
// 分割字符串
std::vector<std::string> _split_string(const std::string& str, const std::string& delim);
// 替换字符串
std::string& _replace_string(std::string& src, const std::string& find, const std::string& replace);
// 打印文件结构
void _print_dir_info(const iso_dir_info& path_info, int tab);
// 打印文件结构
void _print_dir_list(const iso_dir_list& path_list, int tab);
private:
iso_dir_info m_path_info; // 目录结构
std::ifstream m_input_file; // 输入文件
std::streamsize m_file_size; // 文件大小
};
iso_extractor.cpp
#include "iso_extractor.h"
#include <direct.h>
#include <io.h>
#include <string.h>
#pragma pack(1)
// 日期时间
typedef struct _iso_date_time
{
uint8_t year;
uint8_t month;
uint8_t day;
uint8_t hour;
uint8_t minute;
uint8_t second;
uint8_t timezone_offset;
}iso_date_time, * piso_date_time;
// 文件标志
typedef struct _iso_file_flag
{
uint8_t hidden:1; // 隐藏
uint8_t directory :1; // 目录
uint8_t associated:1; //
uint8_t record:1; //
uint8_t protection:1; //
uint8_t reserved:2; //
uint8_t multi_extent:1; //
}iso_file_flag, * piso_file_flag;
typedef struct _iso_int16
{
uint16_t little; // 小端字节序
uint16_t big; // 大端字节序
}iso_data_16;
typedef struct _iso_int32
{
uint32_t little; // 小端字节序
uint32_t big; // 大端字节序
}iso_data_32;
// 卷描述符类型
enum iso_vol_descriptor_type
{
boot_record = 0, // 引导记录
primary, // 主卷描述符
supplementary, // 补充卷描述符
partition, // 卷分区描述符
terminator = 255 // 卷描述符集终止符
};
// 路径表
typedef struct _iso_directory
{
uint8_t directory_record_length;
uint8_t extended_attribute_record_length;
iso_data_32 location_of_extent;
iso_data_32 data_length;
iso_date_time recording_date_time;
iso_file_flag file_flags;
uint8_t file_unit_size;
uint8_t inter_leave_gap_size;
iso_data_16 vol_sequence_number;
uint8_t file_id_length;
char file_id[1];
}iso_directory, *piso_directory;
// 描述头部
typedef struct _iso_header
{
uint8_t type;
uint8_t id[5];
uint8_t version;
}iso_header;
// 引导记录
typedef struct _iso_boot_record
{
iso_header header;
char boot_system_id[32];
char boot_id[32];
uint8_t boot_system_use[1977];
}iso_boot_record, *piso_boot_record;
// 主卷描述符
typedef struct _iso_primary_vol_desc
{
iso_header header;
uint8_t Unused1;
char system_id[32];
char vol_id[32];
uint8_t unused_0[8];
iso_data_32 vol_space_size;
uint8_t unused_2[32];
iso_data_16 vol_set_size;
iso_data_16 vol_sequence_number;
iso_data_16 logical_block_size;
iso_data_32 path_table_size;
uint32_t location_of_path_table_little;
uint32_t location_of_optional_path_table_little;
uint32_t location_of_path_table_big;
uint32_t location_of_optional_path_table_big;
iso_directory directory;
char vol_set_id[128];
char publisher_id[128];
char data_preparer_id[128];
char application_id[128];
char copyright_file_id[37];
char abstract_file_id[37];
char bibliographic_file_id[37];
uint8_t vol_creation_date_time[17];
uint8_t vol_modification_date_time[17];
uint8_t vol_expiration_date_time[17];
uint8_t vol_effective_date_time[17];
uint8_t file_structure_version;
uint8_t unused_3;
uint8_t application_used[512];
uint8_t reserved[653];
}iso_primary_vol_desc, *piso_primary_vol_desc;
// 卷描述符集终止符
typedef struct _iso_vol_desc_set_terminator
{
iso_header header;
uint8_t reserved[2041];
}iso_vol_desc_set_terminator, *piso_vol_desc_set_terminator;
#pragma pack()
// 空的目录信息
const iso_dir_info g_result_empty;
iso_extractor::iso_extractor()
:
m_file_size(0)
{
}
iso_extractor::~iso_extractor()
{
close();
}
void iso_extractor::_print_dir_list(const iso_dir_list& path_list, int tab)
{
for (const auto& item : path_list)
{
_print_dir_info(item, tab);
}
}
void iso_extractor::_print_dir_info(const iso_dir_info& path_info, int tab)
{
for (int i = 0; i < tab; i++)
{
printf(" ");
}
printf("%s", path_info.name.c_str());
if (path_info.data_size < 1024)
{
printf(" %.8llX %llu Bytes", path_info.dir_offset, path_info.data_size);
}
else if(path_info.data_size < 1024 * 1024)
{
printf(" %.8llX %.2lf KB", path_info.dir_offset, (double)path_info.data_size / (1024.0f));
}
else
{
printf(" %.8llX %.2lfMB", path_info.dir_offset, (double)path_info.data_size / (1024.0f * 1024.0f));
}
if (path_info.attr.directory)
{
printf(" [DIR]");
}
printf("\n");
_print_dir_list(path_info.list, tab + 1);
}
std::vector<std::string> iso_extractor::_split_string(const std::string& src, const std::string& delim)
{
std::vector<std::string> result;
size_t iStart = 0;
size_t iEnd = 0;
if (delim.empty())
{
result.push_back(src);
}
else
{
while ((iStart = src.find_first_not_of(delim, iEnd)) != std::string::npos)
{
iEnd = src.find(delim, iStart);
result.push_back(src.substr(iStart, iEnd - iStart));
}
}
return result;
}
std::string& iso_extractor::_replace_string(std::string& src, const std::string& find, const std::string& replace)
{
std::string dest = src;
size_t find_pos = 0;
if (!find.empty())
{
while (std::string::npos != (find_pos = dest.find(find, find_pos)))
{
dest.replace(find_pos, find.size(), replace);
find_pos += replace.size();
}
}
src = dest;
return src;
}
const iso_dir_info& iso_extractor::_get_dir_info(const iso_dir_info& path_info, const std::string& cur_path, const std::string& find_path)
{
std::string dest_path = cur_path;
if (dest_path.empty())
{
dest_path = path_info.name;
}
else
{
dest_path = dest_path + PATH_SEPARATOR_UNIX + path_info.name;
}
// 路径比较
if (0 == _stricmp(dest_path.c_str(), find_path.c_str()))
{
return path_info;
}
for (const auto& item : path_info.list)
{
const iso_dir_info& result = _get_dir_info(item, dest_path, find_path);
if (!result.name.empty())
{
return result;
}
}
return g_result_empty;
}
const iso_dir_info& iso_extractor::_get_dir_list(const iso_dir_list& path_list, const std::string& find_path)
{
for (const auto& item : path_list)
{
std::string dest_path = item.name;
// 路径比较
if (0 == _stricmp(item.name.c_str(), find_path.c_str()))
{
return item;
}
const iso_dir_info& result = _get_dir_info(item, "", find_path);
if (!result.name.empty())
{
return result;
}
}
return g_result_empty;
}
std::streamsize iso_extractor::_read_file_data(void* data_ptr, std::streamsize size, std::streampos pos)
{
std::streamsize read_size = 0;
if (pos < m_file_size)
{
m_input_file.seekg(pos, std::ios::beg);
m_input_file.read(reinterpret_cast<char*>(data_ptr), size);
read_size = m_input_file.gcount();
}
return read_size;
}
void iso_extractor::_parse_dir_info(iso_dir_info& path_info, piso_primary_vol_desc primary_ptr, std::streampos dir_pos, int depth)
{
if (depth < 0)
{
return;
}
uint8_t data_buffer[256] = { 0 };
piso_directory dir_ptr = reinterpret_cast<piso_directory>(data_buffer);
std::streampos cur_pos = dir_pos;
std::streamsize read_size = 0;
int index = 0;
// 读取目录结构大小
if (1 != _read_file_data(data_buffer, 1, cur_pos))
{
return;
}
// 存在有效信息则进一步解析
while (dir_ptr->directory_record_length)
{
// 不解析索引0和索引1 (索引0为目录自身, 索引1为父目录结构)
if (index >= 2)
{
char szName[256] = { 0 };
// 读取描述信息
m_input_file.read(reinterpret_cast<char*>(&data_buffer[1]), data_buffer[0] - 1);
read_size = m_input_file.gcount();
if ((data_buffer[0] - 1) != read_size)
{
break;
}
// 解析文件名
memcpy_s(szName, sizeof(szName), dir_ptr->file_id, dir_ptr->file_id_length);
if (0 == dir_ptr->file_flags.directory)
{
if (';' == szName[dir_ptr->file_id_length - 2])
{
szName[dir_ptr->file_id_length - 2] = 0;
}
}
iso_dir_info info;
info.attr.data = *(uint8_t*)&dir_ptr->file_flags;
info.name = szName;
info.data_size = dir_ptr->data_length.little;
info.dir_offset = cur_pos;
info.data_offset = (uint64_t)dir_ptr->location_of_extent.little * (uint64_t)primary_ptr->logical_block_size.little;
// 递归解析目录
if (dir_ptr->file_flags.directory)
{
info.data_size = 0;
_parse_dir_info(info, primary_ptr, dir_ptr->location_of_extent.little * primary_ptr->logical_block_size.little, depth - 1);
}
// 统计子项并统计文件大小
path_info.list.push_back(info);
path_info.data_size += info.data_size;
}
// 文件指针偏移到下一个目录信息描述
cur_pos += dir_ptr->directory_record_length;
// 读取目录结构大小, 如果为0, 则解析结束
if (1 != _read_file_data(data_buffer, 1, cur_pos))
{
break;
}
index++;
}
}
bool iso_extractor::_create_dir(const std::string& path)
{
std::string dest_path = path;
// 替换路径分隔符
_replace_string(dest_path, PATH_SEPARATOR_WIN, PATH_SEPARATOR_UNIX);
if (dest_path.empty())
{
return true;
}
// 创建完整路径
std::string dir_path;
std::vector<std::string> result = _split_string(dest_path, PATH_SEPARATOR_UNIX);
for (const auto& item : result)
{
if (dir_path.empty())
{
dir_path = item;
}
else
{
dir_path = dir_path + PATH_SEPARATOR_UNIX + item;
}
// 目录不存在则创建目录
if (0 != _access(dir_path.c_str(), 0))
{
// 创建目录失败则返回
if (0 != _mkdir(dir_path.c_str()))
{
return false;
}
}
}
return true;
}
bool iso_extractor::_extract_file(const iso_dir_info& path_info , const std::string& path, int depth)
{
if (depth < 0)
{
return true;
}
// 提取目录则创建目录
if (path_info.attr.directory)
{
std::string dir_path = path_info.name;
if (!path.empty())
{
dir_path = path + PATH_SEPARATOR_UNIX + path_info.name;
}
// 目录不存在则创建目录
if (0 != _access(dir_path.c_str(), 0))
{
// 创建目录失败则返回
if (0 != _mkdir(dir_path.c_str()))
{
return false;
}
}
// 递归提取文件
for (const auto& item : path_info.list)
{
_extract_file(item, dir_path, depth - 1);
}
}
else
{
// 拼接文件输出路径
std::string file_path = path_info.name;
if (!path.empty())
{
file_path = path + PATH_SEPARATOR_UNIX + path_info.name;
}
// 创建输出文件流
std::ofstream out_file(file_path, std::ios::out | std::ios::binary);
if (!out_file.is_open())
{
return false;
}
std::streamsize data_size = path_info.data_size;
int64_t size_mb = 1024 * 1024 * 1;
std::streamsize read_size = data_size >= size_mb ? size_mb : data_size;
std::vector<uint8_t> data_buf((size_t)read_size);
// 定位到文件流起始位置, 分块读取输出
m_input_file.seekg(path_info.data_offset);
while (data_size > 0)
{
// 读取大小限定
if (data_size < read_size)
{
read_size = data_size;
}
// 分块读取文件数据
m_input_file.read(reinterpret_cast<char*>(&data_buf[0]), read_size);
if (read_size != m_input_file.gcount())
{
break;
}
// 输出块数据到文件
out_file.write(reinterpret_cast<char*>(&data_buf[0]), read_size);
data_size -= read_size;
}
}
return true;
}
bool iso_extractor::_extract_list(const iso_dir_list& path_list, const std::string& path, int depth)
{
for (const auto& item : path_list)
{
_extract_file(item, path, depth);
}
return true;
}
bool iso_extractor::open(const std::string& path)
{
m_input_file.open(path, std::ios::in | std::ios::binary);
if (m_input_file.is_open())
{
m_input_file.seekg(0, std::ios::end);
m_file_size = m_input_file.tellg();
m_input_file.seekg(0, std::ios::beg);
}
return m_input_file.is_open();
}
void iso_extractor::close()
{
if (m_input_file.is_open())
{
m_input_file.close();
}
}
iso_dir_info& iso_extractor::parse_dir_info(int depth/* = 260*/)
{
size_t start_sector = 0x10;
size_t start_offset = start_sector * 2048;
m_path_info.clear();
piso_primary_vol_desc primary_ptr = new (std::nothrow) iso_primary_vol_desc;
if (primary_ptr)
{
memset(primary_ptr, 0, sizeof(primary_ptr));
// 读取主卷信息描述
std::streamsize read_size = _read_file_data(primary_ptr, sizeof(iso_primary_vol_desc), start_offset);
if (sizeof(iso_primary_vol_desc) != read_size)
{
return m_path_info;
}
// 目录所在位置再文件大小范围内则进一步解析目录结构
size_t offset = (size_t)primary_ptr->directory.location_of_extent.little * (size_t)primary_ptr->logical_block_size.little;
if (offset < m_file_size)
{
m_path_info.name = primary_ptr->vol_id;
m_path_info.attr.directory = 0x01;
//m_path_info.data_size = (uint64_t)primary_ptr->logical_block_size.little* (uint64_t)primary_ptr->vol_space_size.little;
m_path_info.dir_offset = start_offset;
m_path_info.data_offset = offset;
// 主卷名去除后面的空格
size_t count = m_path_info.name.size();
for (auto it = m_path_info.name.rbegin(); it != m_path_info.name.rend(); it++)
{
if (' ' != *it)
{
break;
}
count--;
}
m_path_info.name.resize(count);
// 解析目录结构
_parse_dir_info(m_path_info, primary_ptr, primary_ptr->directory.location_of_extent.little * primary_ptr->logical_block_size.little, depth);
}
}
delete primary_ptr;
return m_path_info;
}
const iso_dir_info& iso_extractor::get_dir_info(const std::string& find_path, bool root/* = false*/)
{
std::string dest_path = find_path;
_replace_string(dest_path, PATH_SEPARATOR_WIN, PATH_SEPARATOR_UNIX);
// 解析目录结构
if (m_path_info.name.empty())
{
parse_dir_info();
}
// 未解析到内容则返回
if (find_path.empty())
{
return m_path_info;
}
// 获取指定路径
if (root)
{
return _get_dir_info(m_path_info, "", dest_path);
}
return _get_dir_list(m_path_info.list, dest_path);
}
bool iso_extractor::extract_file(const std::string& name, const std::string& path, bool root/* = false*/, int depth/* = 260*/)
{
// 解析目录结构
if (m_path_info.name.empty())
{
parse_dir_info(depth);
}
// 未解析到内容则返回
if (m_path_info.name.empty())
{
return false;
}
// 获取提取项
iso_dir_info target = get_dir_info(name);
if (target.name.empty())
{
return false;
}
// 创建存放文件夹路径
if (!_create_dir(path))
{
return false;
}
// 提取文件
bool result = false;
if (root)
{
result = _extract_file(target, path, depth);
}
else
{
result = _extract_list(target.list, path, depth);
}
return result;
}
bool iso_extractor::extract(const std::string& path, bool root, int depth/* = 260*/)
{
return extract_file("", path, root, depth);
}
void iso_extractor::print_iso_path_info(const iso_dir_info& path_info, bool root/* = false*/)
{
if (root)
{
_print_dir_info(path_info, 0);
}
else
{
_print_dir_list(path_info.list, 0);
}
}
main.cpp
#include "iso_extractor.h"
int main(int argc, char* argv[])
{
if (argc >= 2)
{
while (true)
{
{
iso_extractor obj;
obj.open(argv[1]);
iso_dir_info info = obj.parse_dir_info(260);
const iso_dir_info& find = obj.get_dir_info("DATA", false);
clock_t timeBegin = ::clock();
clock_t timeEnd = ::clock();
for (int i = 0; i < 1000; i++)
{
//info = obj.get_iso_path_info(260);
}
timeEnd = ::clock();
//obj.print_iso_path_info(info);
obj.print_iso_path_info(info, false);
printf("parse cost time: %dms\n", timeEnd - timeBegin);
timeBegin = ::clock();
obj.extract_file("", "test2", false, 2);
timeEnd = ::clock();
printf("extract cost time: %dms\n", timeEnd - timeBegin);
}
system("pause");
}
}
return 0;
}
标签:info,std,提取,ISO,iso,9660,path,dir,size From: https://blog.csdn.net/Flame_Cyclone/article/details/143784954