首页 > 系统相关 >linux内存管理(三)进程地址空间(上)v5.0

linux内存管理(三)进程地址空间(上)v5.0

时间:2024-01-28 12:35:59浏览次数:30  
标签:__ struct mm vm long vma 内存 linux v5.0

每个进程都有自己的虚拟机地址空间。在task_struct数据结构中有一个mm_struct专门用来描述进程的虚拟地址空间。

struct task_struct {
...
    struct mm_struct  *mm;
...
}
struct mm_struct {
    struct {
        struct vm_area_struct *mmap;        /* list of VMAs */
        struct rb_root mm_rb;
        u64 vmacache_seqnum;                   /* per-thread vmacache */
#ifdef CONFIG_MMU
        unsigned long (*get_unmapped_area) (struct file *filp,
                unsigned long addr, unsigned long len,
                unsigned long pgoff, unsigned long flags);
#endif
        unsigned long mmap_base;    /* base of mmap area */
        unsigned long mmap_legacy_base;    /* base of mmap area in bottom-up allocations */
#ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES
        /* Base adresses for compatible mmap() */
        unsigned long mmap_compat_base;
        unsigned long mmap_compat_legacy_base;
#endif
        unsigned long task_size;    /* size of task vm space */
        unsigned long highest_vm_end;    /* highest vma end address */
        pgd_t * pgd;

        /**
         * @mm_users: The number of users including userspace.
         *
         * Use mmget()/mmget_not_zero()/mmput() to modify. When this
         * drops to 0 (i.e. when the task exits and there are no other
         * temporary reference holders), we also release a reference on
         * @mm_count (which may then free the &struct mm_struct if
         * @mm_count also drops to 0).
         */
        atomic_t mm_users;

        /**
         * @mm_count: The number of references to &struct mm_struct
         * (@mm_users count as 1).
         *
         * Use mmgrab()/mmdrop() to modify. When this drops to 0, the
         * &struct mm_struct is freed.
         */
        atomic_t mm_count;

#ifdef CONFIG_MMU
        atomic_long_t pgtables_bytes;    /* PTE page table pages */
#endif
        int map_count;            /* number of VMAs */

        spinlock_t page_table_lock; /* Protects page tables and some
                         * counters
                         */
        struct rw_semaphore mmap_sem;

        struct list_head mmlist; /* List of maybe swapped mm's.    These
                      * are globally strung together off
                      * init_mm.mmlist, and are protected
                      * by mmlist_lock
                      */


        unsigned long hiwater_rss; /* High-watermark of RSS usage */
        unsigned long hiwater_vm;  /* High-water virtual memory usage */

        unsigned long total_vm;       /* Total pages mapped */
        unsigned long locked_vm;   /* Pages that have PG_mlocked set */
        unsigned long pinned_vm;   /* Refcount permanently increased */
        unsigned long data_vm;       /* VM_WRITE & ~VM_SHARED & ~VM_STACK */
        unsigned long exec_vm;       /* VM_EXEC & ~VM_WRITE & ~VM_STACK */
        unsigned long stack_vm;       /* VM_STACK */
        unsigned long def_flags;

        spinlock_t arg_lock; /* protect the below fields */
        unsigned long start_code, end_code, start_data, end_data;
        unsigned long start_brk, brk, start_stack;
        unsigned long arg_start, arg_end, env_start, env_end;

        unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */

        /*
         * Special counters, in some configurations protected by the
         * page_table_lock, in other configurations by being atomic.
         */
        struct mm_rss_stat rss_stat;

        struct linux_binfmt *binfmt;

        /* Architecture-specific MM context */
        mm_context_t context;

        unsigned long flags; /* Must use atomic bitops to access */

        struct core_state *core_state; /* coredumping support */
#ifdef CONFIG_MEMBARRIER
        atomic_t membarrier_state;
#endif
#ifdef CONFIG_AIO
        spinlock_t            ioctx_lock;
        struct kioctx_table __rcu    *ioctx_table;
#endif
#ifdef CONFIG_MEMCG
        /*
         * "owner" points to a task that is regarded as the canonical
         * user/owner of this mm. All of the following must be true in
         * order for it to be changed:
         *
         * current == mm->owner
         * current->mm != mm
         * new_owner->mm == mm
         * new_owner->alloc_lock is held
         */
        struct task_struct __rcu *owner;
#endif
        struct user_namespace *user_ns;

        /* store ref to file /proc/<pid>/exe symlink points to */
        struct file __rcu *exe_file;
#ifdef CONFIG_MMU_NOTIFIER
        struct mmu_notifier_mm *mmu_notifier_mm;
#endif
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
        pgtable_t pmd_huge_pte; /* protected by page_table_lock */
#endif
#ifdef CONFIG_NUMA_BALANCING
        /*
         * numa_next_scan is the next time that the PTEs will be marked
         * pte_numa. NUMA hinting faults will gather statistics and
         * migrate pages to new nodes if necessary.
         */
        unsigned long numa_next_scan;

        /* Restart point for scanning and setting pte_numa */
        unsigned long numa_scan_offset;

        /* numa_scan_seq prevents two threads setting pte_numa */
        int numa_scan_seq;
#endif
        /*
         * An operation with batched TLB flushing is going on. Anything
         * that can move process memory needs to flush the TLB when
         * moving a PROT_NONE or PROT_NUMA mapped page.
         */
        atomic_t tlb_flush_pending;
#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
        /* See flush_tlb_batched_pending() */
        bool tlb_flush_batched;
#endif
        struct uprobes_state uprobes_state;
#ifdef CONFIG_HUGETLB_PAGE
        atomic_long_t hugetlb_usage;
#endif
        struct work_struct async_put_work;

#if IS_ENABLED(CONFIG_HMM)
        /* HMM needs to track a few things per mm */
        struct hmm *hmm;
#endif
    } __randomize_layout;

    /*
     * The mm_cpumask needs to be at the end of mm_struct, because it
     * is dynamically sized based on nr_cpu_ids.
     */
    unsigned long cpu_bitmap[];
};

mm_struct包含了运行一个进程所需的所有内存方面的描述。启动进程需要的可执行文件exe_file,程序的代码段数据段要放到内存中,由start_code, end_code, start_data, end_data表示;进程使用的内存,malloc用的堆由start_brk, brk表示,mmap空间用mmap_base表示,stack由start_stack表示。这样就构成了进程地址空间的视图。访问内存需要页表,页表的基地址放在pgd中。所有进程的mm_struct会链接起来。与栈空间不同,描述mmap空间有一个专门的数据结构vm_area_struct,它代表mmap空间的一段区域,也用来描述代码段,数据段。mm_struct中的mmap字段就是指向进程某个VMA的指针,VMA内有prev和next指针可以方便的遍历所有VMA。所有VMA又通过rbtree组织起来。

 图片来自《奔跑吧linux内核卷一》

struct vm_area_struct {
    /* The first cache line has the info for VMA tree walking. */

    unsigned long vm_start;        /* Our start address within vm_mm. */
    unsigned long vm_end;        /* The first byte after our end address
                       within vm_mm. */

    /* linked list of VM areas per task, sorted by address */
    struct vm_area_struct *vm_next, *vm_prev;

    struct rb_node vm_rb;

    /*
     * Largest free memory gap in bytes to the left of this VMA.
     * Either between this VMA and vma->vm_prev, or between one of the
     * VMAs below us in the VMA rbtree and its ->vm_prev. This helps
     * get_unmapped_area find a free area of the right size.
     */
    unsigned long rb_subtree_gap;

    /* Second cache line starts here. */

    struct mm_struct *vm_mm;    /* The address space we belong to. */
    pgprot_t vm_page_prot;        /* Access permissions of this VMA. */
    unsigned long vm_flags;        /* Flags, see mm.h. */

    /*
     * For areas with an address space and backing store,
     * linkage into the address_space->i_mmap interval tree.
     */
    struct {
        struct rb_node rb;
        unsigned long rb_subtree_last;
    } shared;

    /*
     * A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma
     * list, after a COW of one of the file pages.    A MAP_SHARED vma
     * can only be in the i_mmap tree.  An anonymous MAP_PRIVATE, stack
     * or brk vma (with NULL file) can only be in an anon_vma list.
     */
    struct list_head anon_vma_chain; /* Serialized by mmap_sem &
                      * page_table_lock */
    struct anon_vma *anon_vma;    /* Serialized by page_table_lock */

    /* Function pointers to deal with this struct. */
    const struct vm_operations_struct *vm_ops;

    /* Information about our backing store: */
    unsigned long vm_pgoff;        /* Offset (within vm_file) in PAGE_SIZE
                       units */
    struct file * vm_file;        /* File we map to (can be NULL). */
    void * vm_private_data;        /* was vm_pte (shared mem) */

    atomic_long_t swap_readahead_info;
#ifndef CONFIG_MMU
    struct vm_region *vm_region;    /* NOMMU mapping region */
#endif
#ifdef CONFIG_NUMA
    struct mempolicy *vm_policy;    /* NUMA policy for the VMA */
#endif
    struct vm_userfaultfd_ctx vm_userfaultfd_ctx;
} __randomize_layout;

vm_area_struct可以描述一个VMA区域的起始地址和大小,区域的属性,由vm_flags表示。下面是属性列表:

#define VM_NONE         0x00000000

#define VM_READ         0x00000001      //可读
#define VM_WRITE        0x00000002     //可写
#define VM_EXEC         0x00000004      //可执行
#define VM_SHARED       0x00000008    //多进程共享

/* mprotect() hardcodes VM_MAYREAD >> 4 == VM_READ, and so for r/w/x bits. */
#define VM_MAYREAD      0x00000010      /* limits for mprotect() etc */
#define VM_MAYWRITE     0x00000020
#define VM_MAYEXEC      0x00000040
#define VM_MAYSHARE     0x00000080

#define VM_GROWSDOWN    0x00000100      /* general info on the segment */
#ifdef CONFIG_MMU
#define VM_UFFD_MISSING 0x00000200      /* missing pages tracking */
#else /* CONFIG_MMU */
#define VM_MAYOVERLAY   0x00000200      /* nommu: R/O MAP_PRIVATE mapping that might overlay a file mapping */
#define VM_UFFD_MISSING 0
#endif /* CONFIG_MMU */
#define VM_PFNMAP       0x00000400      /* Page-ranges managed without "struct page", just pure PFN */
#define VM_UFFD_WP      0x00001000      /* wrprotect pages tracking */

#define VM_LOCKED       0x00002000
#define VM_IO           0x00004000      /* Memory mapped I/O or similar */
。。。

vm_flags只是从caller得到的属性需求,要想落实这些属性必须转换成pte可以识别的格式,VMA还有一个vm_page_prot,用来表示pte的属性,他是一个pgprot_t类型的变量,而pgprot_t本质是pteval_t。函数vm_get_page_pro用来完成从vm_flags到vm_page_prot的转换。

pgprot_t vm_get_page_prot(unsigned long vm_flags)
{
    pgprot_t ret = __pgprot(pgprot_val(protection_map[vm_flags &
                (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) |
            pgprot_val(arch_vm_get_page_prot(vm_flags)));

    return arch_filter_pgprot(ret);
}
pgprot_t protection_map[16] __ro_after_init = {
    __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
    __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
};

#define __P000  PAGE_NONE
#define __P001  PAGE_READONLY
#define __P010  PAGE_READONLY
#define __P011  PAGE_READONLY
#define __P100  PAGE_EXECONLY
#define __P101  PAGE_READONLY_EXEC
#define __P110  PAGE_READONLY_EXEC
#define __P111  PAGE_READONLY_EXEC

#define __S000  PAGE_NONE
#define __S001  PAGE_READONLY
#define __S010  PAGE_SHARED
#define __S011  PAGE_SHARED
#define __S100  PAGE_EXECONLY
#define __S101  PAGE_READONLY_EXEC
#define __S110  PAGE_SHARED_EXEC
#define __S111  PAGE_SHARED_EXEC

 看一下有关VMA的操作。

find_vma

/* Look up the first VMA which satisfies  addr < vm_end,  NULL if none. */
struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
{
    struct rb_node *rb_node;
    struct vm_area_struct *vma;

    /* Check the cache first. */
    vma = vmacache_find(mm, addr);
    if (likely(vma))
        return vma;

    rb_node = mm->mm_rb.rb_node;

    while (rb_node) {
        struct vm_area_struct *tmp;

        tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb);

        if (tmp->vm_end > addr) {
            vma = tmp;
            if (tmp->vm_start <= addr)
                break;
            rb_node = rb_node->rb_left;
        } else
            rb_node = rb_node->rb_right;
    }

    if (vma)
        vmacache_update(addr, vma);
    return vma;
}

VMA可能存在与vmacache中,该变量存在于当前进程的vmacache变量。

struct task_struct {
...
        /* Per-thread vma caching: */
    struct vmacache            vmacache;
...
}

struct vmacache {
    u64 seqnum;
    struct vm_area_struct *vmas[VMACACHE_SIZE];
};

#define VMACACHE_BITS 2
#define VMACACHE_SIZE (1U << VMACACHE_BITS)

每个进程可以保存最多4个VMA。

如果vma缓存中没有找到,则区vma的rbtree中找。找到后还要更新一下vmacache,这利用了时间局部性,最近访问的数据放入缓存中,稍后还会用到。

插入VMA,insert_vm_struct.

/* Insert vm structure into process list sorted by address
 * and into the inode's i_mmap tree.  If vm_file is non-NULL
 * then i_mmap_rwsem is taken here.
 */
int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
{
    struct vm_area_struct *prev;
    struct rb_node **rb_link, *rb_parent;

// 在rbtree中找到要插入的位置 if (find_vma_links(mm, vma->vm_start, vma->vm_end, &prev, &rb_link, &rb_parent)) return -ENOMEM; /* * The vm_pgoff of a purely anonymous vma should be irrelevant * until its first write fault, when page's anon_vma and index * are set. But now set the vm_pgoff it will almost certainly * end up with (unless mremap moves it elsewhere before that * first wfault), so /proc/pid/maps tells a consistent story. * * By setting it to reflect the virtual start address of the * vma, merges and splits can happen in a seamless way, just * using the existing file pgoff checks and manipulations. * Similarly in do_mmap_pgoff and in do_brk. */
//没明白 if (vma_is_anonymous(vma)) { BUG_ON(vma->anon_vma); vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT; } //将vma插入vma 链表和rbtree vma_link(mm, vma, prev, rb_link, rb_parent); return 0; }

 

标签:__,struct,mm,vm,long,vma,内存,linux,v5.0
From: https://www.cnblogs.com/banshanjushi/p/17991727

相关文章

  • 2024新版Windows 11要来了!16GB内存需求引热议 只是推荐配置
    最近,TrendForce集邦咨询的一份报告指出,微软已经将AIPC的内存基线设置为16GB。有媒体表示,这也意味着,新版Windows11的AI功能需要至少16GB内存才能运行。消息曝光后引发热议。对此,WC报道称,微软尚未就上述内存需求发表官方评论。16GB内存很可能只是微软的推荐配置,而非最低配置要求。W......
  • C语言代码实现:一个整数存储在内存中的二进制中的1的个数
    e.g.代码实现:一个整数存储在内存中的二进制中的1的个数#define_CRT_SECURE_NO_WARNINGS1#include<stdio.h>intmain(){ intnum=0; intcount=0; printf("统计num的补码中有几个1,请输入num:>"); scanf("%d",&num); //统计num的补码中有几个1 //法一 //while(nu......
  • linux之自旋锁(二千字长文)
    linux之自旋锁常见的各种锁悲观锁:在每次取数据时,总是担心数据会被其他线程修改,所以会在取数据前先加锁(读锁,写锁,行锁等),当其他线程想要访问数据时,被阻塞挂起。总是先行认为数据一定会被修改!所以要先加锁!保证没有人能够访问它!==我们学的同步互斥机制!其实都是属于悲观锁的范畴!==......
  • 《PalWorld/幻兽帕鲁》旧电脑linux搭建服务器
    关键词:PalWorld,Linux,Natapp,内网穿透,幻兽帕鲁 注意文章时效性 最近幻兽帕鲁爆火,steam一上线好友列表一串正在游玩哈哈哈,自己也是蹭一波热度,顺便试用一下natapp以及联系一下写文章的能力,嘿嘿 首先说明下环境Linux:ubuntu22.04.1Natapp:2.4.0#我用......
  • 查看、清空Linux日志【系统日志、软件运行日志】
    一、各种系统日志文件位置123456789101112131415/var/log/messages:记录Linux内核消息及各种应用程序的公共日志信息 /var/log/cron:      记录crond计划任务产生的事件信息 /var/log/dmesg:     记录Linux操作系统在引导过程......
  • 第十四天:Linux启动流程
    一、CentOS6之前版本的启动流程1.1硬件启动POSTPOST:Power-On-Self-Test,加电自检,是BIOS功能的一个主要部分。负责完成对CPU、主板、内存、硬盘子系统、显示子系统、串并行接口、键盘等硬件情况的检测主板的ROM:BIOS,BasicInputandOutputSystem,保存着有关计算机系统最重要......
  • 最近在学Linux系统记录一下
    Linux系统维基百科介绍CentOS(CommunityEnterpriseOperatingSystem)是Linux发行版之一,它是来自于RedHatEnterpriseLinux(RHEL)依照开放源代码规定发布的源代码所编译而成。由于出自同样的源代码,因此有些要求高度稳定性的服务器以CentOS替代商业版的RedHatEnterpriseLinux使......
  • VMMap内存分析工具
    VMMap是一个运行在windows上的内存Profier工具。按F5刷新数据。 虚拟内存PrivateBytes:进程PrivateCommitted(不含共享库部分)的虚拟内存字节数   对应VMMap的Private、windows任务管理器中的【提交大小】,资源管理器中的【提交】Committed(VMMap):进程PrivateCommitted的......
  • delphi 增加32位应用程序内存地址空间,解决“Out of memory”问题
    增加32位应用程序内存地址空间,解决“Outofmemory”问题使32位应用程序能够在64位Windows访问最大4GB内存空间在应用程序的.dpr文件中添加编译指令{$SetPEFlagsIMAGE_FILE_LARGE_ADDRESS_AWARE}。usesWinapi.Windows;{$SETPEFLAGSIMAGE_FILE_LARGE_ADDRESS_AWARE......
  • Python获取内存、CPU利用率,CPU温度
    最近一个项目的需求,在软件上加入硬件实时信息,大概搜索一下,没太好的结果,所以决定自己写一篇我的代码不难发现,都使用了psutil包,为什么呢,别的包我没太关注,纯粹是这台机的环境上有这个包如果大家没有这个包,使用pip工具安装一下即可,目前我还没发现兼容性问题python获取内存利用率......