设备端文件系统的格式
所有文件系统都使用如下格式为基础
如minix,适用于小容量环境
如ext2,适用于大容量环境,于是进行了扩展
具体分析设备上的文件系统
以最简单的minix为例
格式化
root@ubuntu:~# mkfs.minix /dev/sdb
704 inodes
2048 blocks
Firstdatazone=26 (26)
Zonesize=1024
Maxsize=268966912
说明,一共创建了 2048个逻辑块,0-25个逻辑块用于记录引导,super_block,i-bmap,d-bmap,inode[704],一个逻辑块大小为1KB,单个文件最大大小268966912B。
dump出磁盘的内容
root@ubuntu:~# hexdump -C /dev/sdb > wlt/minix.origin
root@ubuntu:~# cat wlt/minix.origin
00000000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
*
00000400 c0 02 00 08 01 00 01 00 1a 00 00 00 00 1c 08 10 |................|
00000410 8f 13 01 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
00000420 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
*
00000800 03 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
00000810 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
*
00000850 00 00 00 00 00 00 00 00 fe ff ff ff ff ff ff ff |................|
00000860 ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
*
00000c00 03 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
00000c10 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
*
00000cf0 00 00 00 00 00 00 00 00 00 00 00 00 80 ff ff ff |................|
00000d00 ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
*
00001000 ed 41 00 00 40 00 00 00 30 c5 75 63 00 02 1a 00 |.A..@...0.uc....|
00001010 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
*
00006800 01 00 2e 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
00006810 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
00006820 01 00 2e 2e 00 00 00 00 00 00 00 00 00 00 00 00 |................|
00006830 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
00006840 00 00 2e 62 61 64 62 6c 6f 63 6b 73 00 00 00 00 |...badblocks....|
00006850 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
*
00200000
文件系统必须提供 xxx_inode , xxx_super_block, xxx_dir_entry,如minix,通过下面的信息,就能解读 以minix格式化的磁盘数据。
include/uapi/linux/minix_fs.h
/*
* This is the original minix inode layout on disk.
* Note the 8-bit gid and atime and ctime.
*/
struct minix_inode {
__u16 i_mode; // 文件类型和访问权限
__u16 i_uid; // user id
__u32 i_size; // 大小
__u32 i_time;
__u8 i_gid; // group id
__u8 i_nlinks; // 硬链接数量
__u16 i_zone[9]; // 相关的data zone
};
/*
* minix super-block data on disk
*/
struct minix_super_block {
__u16 s_ninodes; // inode的数量
__u16 s_nzones; // 逻辑块数量
__u16 s_imap_blocks; // inode bmap占用的逻辑块数量
__u16 s_zmap_blocks; // data bmap占用的逻辑块数量
__u16 s_firstdatazone; // 第一个data zone 的逻辑块编号
__u16 s_log_zone_size; // 一个data zone 的大小,2^n
__u32 s_max_size; // 支持的最大文件大小
__u16 s_magic;
__u16 s_state;
__u32 s_zones;
};
struct minix_dir_entry {
__u16 inode;
char name[0];
};
创建目录和文件后,文件系统的改变
root@ubuntu:~# mount /dev/sdb /mnt/
root@ubuntu:~# cd /mnt/
root@ubuntu:/mnt# mkdir dir0
root@ubuntu:/mnt# echo "world" > file0
root@ubuntu:/mnt# cd dir0/
root@ubuntu:/mnt/dir0# echo "hello" > file1
root@ubuntu:/mnt# cd /root/
root@ubuntu:~# umount /mnt
root@ubuntu:~# hexdump -C /dev/sdb > wlt/minix.data
文件系统
root@ubuntu:~# cat wlt/minix.data
00000000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
*
00000400 c0 02 00 08 01 00 01 00 1a 00 00 00 00 1c 08 10 |................|
00000410 8f 13 01 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
00000420 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
*
00000800 1f 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
00000810 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
*
00000850 00 00 00 00 00 00 00 00 fe ff ff ff ff ff ff ff |................|
00000860 ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
*
00000c00 1f 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
00000c10 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
*
00000cf0 00 00 00 00 00 00 00 00 00 00 00 00 80 ff ff ff |................|
00000d00 ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
*
00001000 ed 41 00 00 80 00 00 00 f3 df 75 63 00 03 1a 00 |.A........uc....|
00001010 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
00001020 ed 41 00 00 60 00 00 00 f9 df 75 63 00 02 1b 00 |.A..`.....uc....|
00001030 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
00001040 a4 81 00 00 06 00 00 00 08 e0 75 63 00 01 1d 00 |..........uc....|
00001050 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
00001060 a4 81 00 00 06 00 00 00 ff df 75 63 00 01 1c 00 |..........uc....|
00001070 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
*
00006800 01 00 2e 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
00006810 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
00006820 01 00 2e 2e 00 00 00 00 00 00 00 00 00 00 00 00 |................|
00006830 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
00006840 02 00 64 69 72 30 00 00 00 00 00 00 00 00 00 00 |..dir0..........|
00006850 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
00006860 03 00 66 69 6c 65 30 00 00 00 00 00 00 00 00 00 |..file0.........|
00006870 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
*
00006c00 02 00 2e 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
00006c10 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
00006c20 01 00 2e 2e 00 00 00 00 00 00 00 00 00 00 00 00 |................|
00006c30 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
00006c40 04 00 66 69 6c 65 31 00 00 00 00 00 00 00 00 00 |..file1.........|
00006c50 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
*
00007000 68 65 6c 6c 6f 0a 00 00 00 00 00 00 00 00 00 00 |hello...........|
00007010 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
*
00007400 77 6f 72 6c 64 0a 00 00 00 00 00 00 00 00 00 00 |world...........|
00007410 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
*
00200000
分析如下
VFS
Linux中一切皆文件,关键就在VFS,让应用程序的可以使用一套文件操作接口访问kernel。
为了实现这样目标,具体的文件系统需要实现各自的回调方法,VFS会迟绑定这些回调。
如 file->fops
比如 minix 的
const struct file_operations minix_file_operations = {
.llseek = generic_file_llseek,
.read_iter = generic_file_read_iter,
.write_iter = generic_file_write_iter,
.mmap = generic_file_mmap,
.fsync = generic_file_fsync,
.splice_read = generic_file_splice_read,
};
而 字符设备 的
const struct file_operations def_chr_fops = {
.open = chrdev_open,
.llseek = noop_llseek,
};
需要迟绑定的核心数据结构包括
VFS super_block
VFS inode
VFS file
VFS dentry
此外VFS还有一些重要的类型
file_system_type
vfsmount
文件系统的注册
文件系统模块init时,进行注册,exit时,删除。
以minix为例
static struct file_system_type minix_fs_type = {
.owner = THIS_MODULE,
.name = "minix",
.mount = minix_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
MODULE_ALIAS_FS("minix");
static int __init init_minix_fs(void)
{
int err = init_inodecache();
if (err)
goto out1;
err = register_filesystem(&minix_fs_type);
if (err)
goto out;
return 0;
out:
destroy_inodecache();
out1:
return err;
}
static void __exit exit_minix_fs(void)
{
unregister_filesystem(&minix_fs_type);
destroy_inodecache();
}
module_init(init_minix_fs)
module_exit(exit_minix_fs)
file_system_type 表示文件系统
struct file_system_type {
const char *name; // 文件系统的名称
int fs_flags; // 支持的存储类型
#define FS_REQUIRES_DEV 1 /* 硬盘 */
#define FS_BINARY_MOUNTDATA 2 /* 网络文件系统 */
#define FS_HAS_SUBTYPE 4
#define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */
#define FS_DISALLOW_NOTIFY_PERM 16 /* Disable fanotify permission events */
#define FS_ALLOW_IDMAP 32 /* FS has been updated to handle vfs idmappings. */
#define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */
int (*init_fs_context)(struct fs_context *);
const struct fs_parameter_spec *parameters;
struct dentry *(*mount) (struct file_system_type *, int,
const char *, void *); // 挂载文件系统的方法
void (*kill_sb) (struct super_block *); // 卸载方法
struct module *owner;
struct file_system_type * next; // kernel已注册的文件系统将以链表形式组织
struct hlist_head fs_supers;
struct lock_class_key s_lock_key;
struct lock_class_key s_umount_key;
struct lock_class_key s_vfs_rename_key;
struct lock_class_key s_writers_key[SB_FREEZE_LEVELS];
struct lock_class_key i_lock_key;
struct lock_class_key i_mutex_key;
struct lock_class_key invalidate_lock_key;
struct lock_class_key i_mutex_dir_key;
};
具体看注册操作
static struct file_system_type **find_filesystem(const char *name, unsigned len)
{
struct file_system_type **p;
for (p = &file_systems; *p; p = &(*p)->next)
if (strncmp((*p)->name, name, len) == 0 &&
!(*p)->name[len])
break;
return p;
}
int register_filesystem(struct file_system_type * fs)
{
int res = 0;
struct file_system_type ** p;
if (fs->parameters &&
!fs_validate_description(fs->name, fs->parameters))
return -EINVAL;
BUG_ON(strchr(fs->name, '.'));
if (fs->next)
return -EBUSY;
write_lock(&file_systems_lock);
p = find_filesystem(fs->name, strlen(fs->name)); // 查找是否已注册,否则返回尾节点指针的指针
if (*p)
res = -EBUSY;
else
*p = fs;
write_unlock(&file_systems_lock);
return res;
}
所以注册后系统会生成如下链表
可以查看系统已注册的文件系统
root@ubuntu:~/wlt/build/linux-5.16.2# cat /proc/filesystems
nodev sysfs
nodev rootfs
nodev ramfs
nodev bdev
nodev proc
nodev cpuset
设备挂载
为了理解,需要了解的类型
struct super_block {
struct list_head s_list; /* 所有已挂载的文件系统连成一个表 */
dev_t s_dev; /* search index; _not_ kdev_t */
unsigned char s_blocksize_bits;
unsigned long s_blocksize;
loff_t s_maxbytes; /* Max file size */
struct file_system_type *s_type; // 指向挂载的文件系统模块
const struct super_operations *s_op; // 回调接口
const struct dquot_operations *dq_op;
const struct quotactl_ops *s_qcop;
const struct export_operations *s_export_op;
unsigned long s_flags;
unsigned long s_iflags; /* internal SB_I_* flags */
unsigned long s_magic;
struct dentry *s_root; // 文件系统根目录
struct rw_semaphore s_umount;
int s_count;
...
};
struct dentry {
...
struct hlist_bl_node d_hash; /* lookup hash list */
struct dentry *d_parent; /* parent directory */
struct qstr d_name; /* 文件名 */
struct inode *d_inode; /* Where the name belongs to - NULL is
* negative */
unsigned char d_iname[DNAME_INLINE_LEN]; /* small names */
const struct dentry_operations *d_op;
struct super_block *d_sb; /* The root of the dentry tree */
...
} __randomize_layout;
struct inode {
umode_t i_mode;
unsigned short i_opflags;
kuid_t i_uid;
kgid_t i_gid;
unsigned int i_flags;
const struct inode_operations *i_op; // 回调接口
struct super_block *i_sb;
struct address_space *i_mapping;
....
};
可见以上类型是 VFS 对 文件系统的抽象,kernel在挂载设备时,通过读取设备上的文件系统,以创建以上类型的对象。而且这些对象互相连接。
首先创建 mount vfsmount dentry inode super_block 的关系
上面执行完后,会构成如下结构
综上,通过 mount_hashtable 就可以找到 设备的super_block,再通过super_block就能加载设备根目录的inode,从而实现访问挂载的设备。
mount_hashtable的键值是根据 struct path 计算出的
struct path {
struct vfsmount *mnt; // 包含挂载的super block
struct dentry *dentry; // 挂载点的目录
} __randomize_layout;
因为在确定 dentry 对应的 inode 时,只通过 dentry是无法确定的,因为dentry上可能挂载了设备,所以还需要 mnt
如果挂载了子设备则会形成如下结构
文件打开和创建
相关对象关系
读文件
设计介绍
注意 address_space,因为 数据块是不连续存储,而对上层读写需要数据的连续,所以定义 地址空间,用于实现不连续的数据连续读写,address_space使用 radix tree 管理page
代码分析
设备文件
设备文件在 /dev目录下,包括 字符设备,块设备,
yangxr@vexpress:/ # ls /dev/ -l
total 0
crw-rw---- 1 0 0 5, 1 Jan 1 00:00 console
crw-rw---- 1 0 0 10, 127 Jan 1 00:00 cpu_dma_latency
crw-rw---- 1 0 0 1, 7 Jan 1 00:00 full
crw-rw---- 1 0 0 10, 183 Jan 1 00:00 hwrng
drwxr-xr-x 2 0 0 80 Jan 1 00:00 input
crw-rw---- 1 0 0 1, 11 Jan 1 00:00 kmsg
crw-rw---- 1 0 0 1, 1 Jan 1 00:00 mem
brw-rw---- 1 0 0 179, 0 Jan 1 00:00 mmcblk0
crw-rw---- 1 0 0 90, 0 Jan 1 00:00 mtd0
再看/dev目录上挂载的文件系统,是 tmpfs,
yangxr@vexpress:/ # mount
192.168.5.129:/root/wlt/rootfs on / type nfs (rw,relatime,vers=2,rsize=4096,wsize=4096,namlen=255,hard,nolock,proto=tcp,timeo=600,retrans=2,sec=sys,mountaddr=192.168.5.129,mountvers=1,mountproto=tcp,local_lock=all,addr=192.168.5.129)
proc on /proc type proc (rw,relatime)
tmpfs on /tmp type tmpfs (rw,relatime)
sysfs on /sys type sysfs (rw,relatime)
var on /dev type tmpfs (rw,relatime)
devpts on /dev/pts type devpts (rw,relatime,mode=600,ptmxmode=000)
而 tmpfs是基于内存的
yangxr@vexpress:/ # cat /proc/filesystems
nodev sysfs
nodev tmpfs
nodev bdev
nodev proc
nodev cgroup
nodev cgroup2
nodev cpuset
nodev devtmpfs
nodev tracefs
nodev sockfs
nodev pipefs
nodev ramfs
nodev rpc_pipefs
nodev devpts
ext3
ext4
ext2
cramfs
squashfs
vfat
nodev nfs
nodev jffs2
nodev 9p
nodev ubifs
所以设备文件和普通的不同:
- 设备文件的inode不记录到 存储设备上,都是kernel运行后基于内存创建的
- 设备文件最关键的信息是设备号
设备文件的创建时机:
- kernel初始化时预先创建
- udev/mdev 根据 /sys/class 创建
- 用户调用mknod
tmpfs文件系统
// tmpfs 没有实现 mount 操作,mount用于实现读取设备的super_block,构建内存的super_block,mount对象
static struct file_system_type shmem_fs_type = {
.owner = THIS_MODULE,
.name = "tmpfs",
.init_fs_context = shmem_init_fs_context,
#ifdef CONFIG_TMPFS
.parameters = shmem_fs_parameters,
#endif
.kill_sb = kill_litter_super,
.fs_flags = FS_USERNS_MOUNT,
};
int __init shmem_init(void)
{
int error;
shmem_init_inodecache();
error = register_filesystem(&shmem_fs_type);
if (error) {
pr_err("Could not register tmpfs\n");
goto out2;
}
shm_mnt = kern_mount(&shmem_fs_type);
if (IS_ERR(shm_mnt)) {
error = PTR_ERR(shm_mnt);
pr_err("Could not kern_mount tmpfs\n");
goto out1;
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
if (has_transparent_hugepage() && shmem_huge > SHMEM_HUGE_DENY)
SHMEM_SB(shm_mnt->mnt_sb)->huge = shmem_huge;
else
shmem_huge = SHMEM_HUGE_NEVER; /* just in case it was patched */
#endif
return 0;
out1:
unregister_filesystem(&shmem_fs_type);
out2:
shmem_destroy_inodecache();
shm_mnt = ERR_PTR(error);
return error;
}
设备文件的创建
设备文件的创建使用mknod,主要工作是创建inode,记录设备号,绑定默认ops
设备文件的打开
根文件系统
根文件系统是 kernel 切换到用户进程必须的文件系统,有特定的目录结构
root@ubuntu:~/wlt/build/linux-5.16.2# ls ../../rootfs
bin dev etc lib linuxrc proc root sbin sys tmp usr var
必须应用程序,如 linuxrc -> bin/busybox
和一些初始化系统的配置如
root@ubuntu:~/wlt/build/linux-5.16.2# ls ../../rootfs/etc/
fstab init.d inittab profile
fstab 用于指导初始化时挂载 的文件系统
inittab:系统启动,关闭等特殊动作时运行的脚本或程序
init.d:目录,下面是各种脚本
profile : 界面格式
根文件系统的挂载
三种方式:
1)在设备上创建,需要给引导传参,以指定根文件系统的设备和文件系统类型
2)initrd,将根文件系统相关数据构建成模块,会进行两次挂载。
3)initramfs,链接内核时,会创建一个特殊的段,用于存储根文件系统的数据,最终成为内核二进制的一部分
rootfs
根文件系统的挂载是需要系统存在一个目录,而这个目录又属于一个文件系统,这个文件系统比根文件系统还早被创建,且不依赖与任何其他文件系统,被称为 rootfs。
rootfs是基于 tmpfs实现,在kernel初始化阶段创建。
asmlinkage __visible void __init __no_sanitize_address start_kernel(void)
{
...
vfs_caches_init(); // 创建rootfs和根目录
...
arch_call_rest_init(); // 挂载根文件目录
}
代码分析
rootfs 挂载后
rootfs挂载后会创建如下结构,再将进程的工作目录和当前目录的path设置为rootfs的根目录。
挂载根文件系统
我使用的启动参数为
setenv bootargs 'root=/dev/nfs rw noinitrd \
nfsroot=192.168.5.129:/root/wlt/rootfs \
ip=192.168.5.127 \
init=/linuxrc console=ttyAMA0';
根据启动参数挂载文件系统到rootfs的root目录,再修改挂载点到rootfs的/目录,最后设置 task_struct->fs,也就是进程的根目录为 nfs的 根目录
initrd
CPIO格式
修改内核,使其支持压缩格式
创建initrd镜像
find . | cpio -o -H newc | gzip -c > initrd.tgz
uboot启动参数
#define CONFIG_BOOTCOMMAND \
"tftp 0x60010000 uImage; tftp 0x60500000 vexpress-v2p-ca9.dtb; \
tftp 0x62000000 initrd.tgz; \
setenv bootargs 'initrd=0x62000000,4M root=/dev/ram0 rw \
rdinit=/linuxrc console=ttyAMA0'; \
bootm 0x60010000 - 0x60500000;"
启动后,查看mount情况,证明根文件系统为rootfs
yangxr@vexpress:/ # mount
rootfs on / type rootfs (rw)
proc on /proc type proc (rw,relatime)
tmpfs on /tmp type tmpfs (rw,relatime)
sysfs on /sys type sysfs (rw,relatime)
var on /dev type tmpfs (rw,relatime)
devpts on /dev/pts type devpts (rw,relatime,mode=600,ptmxmode=000)
initramfs
kernel配置 initramfs的目录
kbuild会将目录打包为cpio格式,链接到Image中。
需要注意 这会导致 uImage 很大,通常需要修改 uboot的对 image的最大限制。
使用 initramfs 时,默认的 init 程序为 init,所以需要修改程序名称,可以用 initrd指定
代码分析
使用CPIO格式时,kenel会解压并展开内存中的数据,到rootfs文件系统,以rootfs文件系统为根文件系统,并运行 /init 程序
文件系统自动挂载
运行init程序后,init会根据/etc/fstab自动挂载文件系统
如
proc /proc proc defaults 0 0
tmpfs /tmp tmpfs defaults 0 0
sysfs /sys sysfs defaults 0 0
var /dev tmpfs defaults 0 0
ramfs /dev tmpfs defaults 0 0
tmpfs 和 ramfs都是基于内存的文件系统,差别是,tmpfs是ramfs的改进版,当文件删除后tmpfs会释放内存,当内存不够时,tmpfs会交换文件到flash
标签:kernel,00,fs,struct,文件系统,................,ff From: https://www.cnblogs.com/yangxinrui/p/16899698.html