From bfaeb8db2feff86ed33ad9737add56b36ea44aa5 Mon Sep 17 00:00:00 2001 From: prifri Date: Sat, 21 May 2022 22:00:44 +0900 Subject: [PATCH] 52th --- arch/arm64/kernel/sys.c | 37 +++ include/asm-generic/tlb.h | 5 + include/linux/mm.h | 13 + include/linux/mm_types.h | 12 + include/linux/mman.h | 22 ++ include/linux/sched/signal.h | 4 + include/uapi/asm-generic/mman-common.h | 15 ++ include/uapi/asm-generic/resource.h | 4 + mm/internal.h | 5 + mm/memory.c | 44 +++- mm/mempolicy.c | 16 ++ mm/mmap.c | 337 ++++++++++++++++++++++++- mm/util.c | 4 + security/integrity/ima/ima_main.c | 5 + security/security.c | 10 + 15 files changed, 531 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/sys.c b/arch/arm64/kernel/sys.c index d5ffaaab31a7d1..ed17c7ac20e74f 100644 --- a/arch/arm64/kernel/sys.c +++ b/arch/arm64/kernel/sys.c @@ -18,6 +18,43 @@ #include #include +/* + * IAMROOT, 2022.05.21: + * - arm64 sys_mmap + * + * ex) user에서 malloc을 사용했을때 strace 사용결과 + * sh) strace ./test + * == strace + * mmap(NULL, 1000001536, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fb4af7ee000 + * + * == smaps log(heap 측 부분) + * 7fb4af7ee000-7fb4eb19b000 rw-p 00000000 00:00 0 + * Size: 976564 kB + * KernelPageSize: 4 kB + * MMUPageSize: 4 kB + * Rss: 976564 kB + * Pss: 976564 kB + * Shared_Clean: 0 kB + * Shared_Dirty: 0 kB + * Private_Clean: 0 kB + * Private_Dirty: 976564 kB + * Referenced: 976564 kB + * Anonymous: 976564 kB + * LazyFree: 0 kB + * AnonHugePages: 0 kB + * ShmemPmdMapped: 0 kB + * FilePmdMapped: 0 kB + * Shared_Hugetlb: 0 kB + * Private_Hugetlb: 0 kB + * Swap: 0 kB + * SwapPss: 0 kB + * Locked: 0 kB + * THPeligible: 0 + * VmFlags: rd wr mr(may read) mw(may write) me(may execute) ac sd + * + * read, write 권한이 있는 vma라는것을 알수있다. + * malloc을 호출하면 항상 위와같은 prot, flag를 사용한다. + */ SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, unsigned long, prot, unsigned long, flags, unsigned long, fd, unsigned long, off) diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 2c68a545ffa7d2..c7eb4a0d855bd6 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -498,6 +498,11 @@ static inline void tlb_start_vma(struct mmu_gather *tlb, struct vm_area_struct * #endif #ifndef tlb_end_vma + +/* + * IAMROOT, 2022.05.21: + * - tlb flush + */ static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) { if (tlb->fullmm) diff --git a/include/linux/mm.h b/include/linux/mm.h index 771445153c27e8..a782ddc7723bc2 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2842,6 +2842,11 @@ extern struct vm_area_struct * find_vma_prev(struct mm_struct * mm, unsigned lon * Returns: The first VMA within the provided range, %NULL otherwise. Assumes * start_addr < end_addr. */ + +/* + * IAMROOT, 2022.05.21: + * - 겹치는 vma를 찾아온다. + */ static inline struct vm_area_struct *find_vma_intersection(struct mm_struct *mm, unsigned long start_addr, @@ -2872,6 +2877,10 @@ struct vm_area_struct *vma_lookup(struct mm_struct *mm, unsigned long addr) return vma; } +/* + * IAMROOT, 2022.05.21: + * - @vma가 아래로 내려가는 stack인 경우 보정. + */ static inline unsigned long vm_start_gap(struct vm_area_struct *vma) { unsigned long vm_start = vma->vm_start; @@ -2884,6 +2893,10 @@ static inline unsigned long vm_start_gap(struct vm_area_struct *vma) return vm_start; } +/* + * IAMROOT, 2022.05.21: + * - @vma가 위로 올라가는 stack인 경우 보정. + */ static inline unsigned long vm_end_gap(struct vm_area_struct *vma) { unsigned long vm_end = vma->vm_end; diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 6b18fa566f15d4..685cdb6268f182 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -434,6 +434,13 @@ struct vm_area_struct { * VMAs below us in the VMA rbtree and its ->vm_prev. This helps * get_unmapped_area find a free area of the right size. */ +/* + * IAMROOT, 2022.05.21: + * - 이 VMA의 왼쪽에 있는 최대 사용 가능한 메모리 간격(바이트)입니다. + * 이 VMA와 vma->vm_prev 사이 또는 VMArbtree에서 아래 VMA 중 하나와 + * 해당 ->vm_prev 사이입니다. 이렇게 하면 get_unmapped_area가 올바른 + * 크기의 빈 영역을 찾을 수 있습니다. + */ unsigned long rb_subtree_gap; /* Second cache line starts here. */ @@ -594,6 +601,11 @@ struct mm_struct { unsigned long total_vm; /* Total pages mapped */ unsigned long locked_vm; /* Pages that have PG_mlocked set */ atomic64_t pinned_vm; /* Refcount permanently increased */ +/* + * IAMROOT, 2022.05.21: + * - data공간이라 하면 주석과 같이 not stack, not shared를 의미한다. + * (is_data_mapping() 참고) + */ unsigned long data_vm; /* VM_WRITE & ~VM_SHARED & ~VM_STACK */ unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE & ~VM_STACK */ unsigned long stack_vm; /* VM_STACK */ diff --git a/include/linux/mman.h b/include/linux/mman.h index b66e91b8176cdc..3837d952a6b10e 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -130,6 +130,10 @@ static inline bool arch_validate_flags(unsigned long flags) * but this version is faster. * ("bit1" and "bit2" must be single bits) */ +/* + * IAMROOT, 2022.05.21: + * - bit1이 있으면 bit2 return 이라는뜻. + */ #define _calc_vm_trans(x, bit1, bit2) \ ((!(bit1) || !(bit2)) ? 0 : \ ((bit1) <= (bit2) ? ((x) & (bit1)) * ((bit2) / (bit1)) \ @@ -138,6 +142,16 @@ static inline bool arch_validate_flags(unsigned long flags) /* * Combine the mmap "prot" argument into "vm_flags" used internally. */ +/* + * IAMROOT, 2022.05.21: + * - prot flag를 vm flag로 변환하고 arch 에 따른 flag 추가가 있으면 + * 추가 한다. + * - PROT_READ -> VM_READ + * PROT_WRITE -> VM_WRITE + * PROT_EXEC -> VM_EXEC + * PROT_BTI -> VM_ARM64_BTI + * PROT_MTE -> VM_MTE + */ static inline unsigned long calc_vm_prot_bits(unsigned long prot, unsigned long pkey) { @@ -150,6 +164,14 @@ calc_vm_prot_bits(unsigned long prot, unsigned long pkey) /* * Combine the mmap "flags" argument into "vm_flags" used internally. */ +/* + * IAMROOT, 2022.05.21: + * - map flags를 vm flags로 변환한다. + * - MAP_GROWSDOWN -> VM_GROWSDOWN + * MAP_LOCKED -> VM_LOCKED + * MAP_SYNC -> VM_SYNC + * MAP_ANONYMOUS -> VM_MTE_ALLOWED + */ static inline unsigned long calc_vm_flag_bits(unsigned long flags) { diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index b21041b8a2926f..79fd1b9a2ca5c6 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -751,6 +751,10 @@ static inline unsigned long task_rlimit_max(const struct task_struct *task, return READ_ONCE(task->signal->rlim[limit].rlim_max); } +/* + * IAMROOT, 2022.05.21: + * - 해당 task가 @limit에 해당하는 제한 값을 읽어온다. + */ static inline unsigned long rlimit(unsigned int limit) { return task_rlimit(current, limit); diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h index 1567a3294c3dbf..7964669e1fa539 100644 --- a/include/uapi/asm-generic/mman-common.h +++ b/include/uapi/asm-generic/mman-common.h @@ -19,12 +19,27 @@ /* 0x01 - 0x03 are defined in linux/mman.h */ #define MAP_TYPE 0x0f /* Mask for type of mapping */ + +/* + * IAMROOT, 2022.05.21: + * - 가상공간의 특정영역에 고정시키고 싶을때 사용한다. + */ #define MAP_FIXED 0x10 /* Interpret addr exactly */ #define MAP_ANONYMOUS 0x20 /* don't use a file */ /* 0x0100 - 0x4000 flags are defined in asm-generic/mman.h */ + +/* + * IAMROOT, 2022.05.21: + * - mapping을 바로 하고싶을때 사용한다. + */ #define MAP_POPULATE 0x008000 /* populate (prefault) pagetables */ #define MAP_NONBLOCK 0x010000 /* do not block on IO */ + +/* + * IAMROOT, 2022.05.21: + * - user stack만들때 brk를 호출하거나 MAP_STACK을 사용해서 sys_mmap을 호출에서 사용하게 될것이다. + */ #define MAP_STACK 0x020000 /* give out an address that is best suited for process/thread stacks */ #define MAP_HUGETLB 0x040000 /* create a huge page mapping */ #define MAP_SYNC 0x080000 /* perform synchronous page faults for the mapping */ diff --git a/include/uapi/asm-generic/resource.h b/include/uapi/asm-generic/resource.h index f12db7a0da643b..e854ec0bb9e7a2 100644 --- a/include/uapi/asm-generic/resource.h +++ b/include/uapi/asm-generic/resource.h @@ -13,6 +13,10 @@ * then it defines them prior including asm-generic/resource.h. ) */ +/* + * IAMROOT, 2022.05.21: + * - resource 제한에 대한것들. + */ #define RLIMIT_CPU 0 /* CPU time in sec */ #define RLIMIT_FSIZE 1 /* Maximum filesize */ #define RLIMIT_DATA 2 /* max data size */ diff --git a/mm/internal.h b/mm/internal.h index 41a80aa78fdb52..50d666dccaee04 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -467,6 +467,11 @@ static inline bool is_stack_mapping(vm_flags_t flags) /* * Data area - private, writable, not stack */ +/* + * IAMROOT, 2022.05.21: + * stack을 제외한 data 공간. + * Data area - private, writable, not stack + */ static inline bool is_data_mapping(vm_flags_t flags) { return (flags & (VM_WRITE | VM_SHARED | VM_STACK)) == VM_WRITE; diff --git a/mm/memory.c b/mm/memory.c index a894b8b935006d..362aee2c067e7b 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1539,6 +1539,10 @@ static inline unsigned long zap_p4d_range(struct mmu_gather *tlb, return addr; } +/* + * IAMROOT, 2022.05.21: + * - 해당 영역의 정규 mapping을 unmap + */ void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long addr, unsigned long end, @@ -1556,10 +1560,17 @@ void unmap_page_range(struct mmu_gather *tlb, continue; next = zap_p4d_range(tlb, vma, pgd, addr, next, details); } while (pgd++, addr = next, addr != end); +/* + * IAMROOT, 2022.05.21: + * - @vma에 대한 tlb flush + */ tlb_end_vma(tlb, vma); } - +/* + * IAMROOT, 2022.05.21: + * - @vma unmmap + */ static void unmap_single_vma(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long start_addr, unsigned long end_addr, @@ -1621,6 +1632,33 @@ static void unmap_single_vma(struct mmu_gather *tlb, * ensure that any thus-far unmapped pages are flushed before unmap_vmas() * drops the lock and schedules. */ + +/* + * IAMROOT, 2022.05.21: + * + * ^ ---- vm_end + * | <- vma4 + * | --- end + * | ^ <- vma3->next = vma4 + * v ---- vm_start | + * | + * ... .. + * + * ^ ---- vm_end | + * | | + * | | <- vma2->next = vma3 + * v ---- vm_start | + * | + * ... .. + * | + * ^ ---- vm_end | + * | v <- vma->next = vma2 + * | --- start + * | <-- prev + * v ---- vm_start + * + * vma부터 ~ vma4까지 unmap될것이다.(unmap_vmas()) + */ void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long start_addr, unsigned long end_addr) @@ -3717,6 +3755,10 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) * but allow concurrent faults), and pte mapped but not yet locked. * We return with mmap_lock still held, but pte unmapped and unlocked. */ +/* + * IAMROOT, 2022.05.21: + * - anon page fault시 이 함수로 진입하게 될것이다. + */ static vm_fault_t do_anonymous_page(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; diff --git a/mm/mempolicy.c b/mm/mempolicy.c index f6374420b6044a..d729ac325cf46b 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1840,6 +1840,10 @@ nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *policy) * policy_node() is always coupled with policy_nodemask(), which * secures the nodemask limit for 'bind' and 'prefer-many' policy. */ +/* + * IAMROOT, 2022.05.21: + * - @policy->mode 가 MPOL_PREFERRED인 경우 first node를 return한다. + */ static int policy_node(gfp_t gfp, struct mempolicy *policy, int nd) { if (policy->mode == MPOL_PREFERRED) { @@ -2193,6 +2197,14 @@ static struct page *alloc_pages_preferred_many(gfp_t gfp, unsigned int order, * * Return: The page on success or NULL if allocation fails. */ +/* + * IAMROOT, 2022.05.21: + * - numa Policy는 주로 app을 위한 정책이다. policy에 따라 alloc_pages를 호출한다. + * + * --- + * - kernel같은 경우은 local이 preferred 인 개념이되고(나머지는 cost순), + * app같은 경우엔 사용자가 설정한 node policy에 따르게 된다. + */ struct page *alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, unsigned long addr, int node, bool hugepage) { @@ -2218,6 +2230,10 @@ struct page *alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, goto out; } +/* + * IAMROOT, 2022.05.21: + * - PASS + */ if (unlikely(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hugepage)) { int hpage_node = node; diff --git a/mm/mmap.c b/mm/mmap.c index 88dcc5c252255f..d9607777c28ff4 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -175,6 +175,10 @@ void unlink_file_vma(struct vm_area_struct *vma) /* * Close a vm structure and free it, returning the next. */ +/* + * IAMROOT, 2022.05.21: + * - @vma를 해제하고 return next vma + */ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma) { struct vm_area_struct *next = vma->vm_next; @@ -526,6 +530,38 @@ anon_vma_interval_tree_post_update_vma(struct vm_area_struct *vma) anon_vma_interval_tree_insert(avc, &avc->anon_vma->rb_root); } +/* + * IAMROOT, 2022.05.21: + * @pprev[out] 제일 마지막에 right 이동했을때의 parent + * @rb_link[out] vma가 link될 leaf node가 연결될 parent의 left or right pointer. + * @rb_parent[out] rb_link의 parent + * + * - @addr ~ end까지 겹치는 vma가 있다면 error. 그렇지 않다면 비어있는 + * out 인자를 완성하고 return 0. + * + * --- ()를 rb_link로 찾았다고 했을때. + * 1) + * + * A + * / \ + * B C + * / \ + * D () + * () -> rb_link + * B -> rb_parent, pprev + * + * 2) + * + * A + * / \ + * B C + * / /\ + * D () .. + * + * () -> rb_link + * C -> rb_parent + * A -> pprev + */ static int find_vma_links(struct mm_struct *mm, unsigned long addr, unsigned long end, struct vm_area_struct **pprev, struct rb_node ***rb_link, struct rb_node **rb_parent) @@ -542,6 +578,10 @@ static int find_vma_links(struct mm_struct *mm, unsigned long addr, __rb_parent = *__rb_link; vma_tmp = rb_entry(__rb_parent, struct vm_area_struct, vm_rb); +/* + * IAMROOT, 2022.05.21: + * - 요청 영역과 겹치는 vma가 있으면 error. + */ if (vma_tmp->vm_end > addr) { /* Fail if an existing vma overlaps the area */ if (vma_tmp->vm_start < end) @@ -593,12 +633,21 @@ static inline struct vm_area_struct *vma_next(struct mm_struct *mm, * * Returns: -ENOMEM on munmap failure or 0 on success. */ +/* + * IAMROOT, 2022.05.21: + * - start에서 len만큼의 공간에 대한 vma를 할당할수있는지 확인한다. + * 영역에 vma가 존재하면 모두 unmap후 제거한다. + */ static inline int munmap_vma_range(struct mm_struct *mm, unsigned long start, unsigned long len, struct vm_area_struct **pprev, struct rb_node ***link, struct rb_node **parent, struct list_head *uf) { - +/* + * IAMROOT, 2022.05.21: + * - vma가 겹친경우 do_munmap을 시도한다. 겹친 vma가 여러개일 경우 여러번 + * 시도될수있다. + */ while (find_vma_links(mm, start, start + len, pprev, link, parent)) if (do_munmap(mm, start, len, uf)) return -ENOMEM; @@ -1467,6 +1516,10 @@ unsigned long do_mmap(struct file *file, unsigned long addr, * to. we assume access permissions have been handled by the open * of the memory object, so we don't do any here. */ +/* + * IAMROOT, 2022.05.21: + * - prot, flags를 vm_flags로 변환하여 추가한다. + */ vm_flags = calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(flags) | mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; @@ -1547,6 +1600,11 @@ unsigned long do_mmap(struct file *file, unsigned long addr, pgoff = 0; vm_flags |= VM_SHARED | VM_MAYSHARE; break; +/* + * IAMROOT, 2022.05.21: + * - anon page + * MAP_PRIVATE|MAP_ANONYMOUS + */ case MAP_PRIVATE: /* * Set pgoff according to addr for anon_vma. @@ -1580,6 +1638,12 @@ unsigned long do_mmap(struct file *file, unsigned long addr, return addr; } + +/* + * IAMROOT, 2022.05.21: + * - user에서 호출한 malloc인 경우 if문 해당사항이 없어(file이 아닌경우) + * vm_mmap_pgoff를 바로 호출할것이다. + */ unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long pgoff) @@ -1738,6 +1802,10 @@ unsigned long mmap_region(struct file *file, unsigned long addr, return -ENOMEM; } +/* + * IAMROOT, 2022.05.21: + * - 영역내에 있는 vma들을 unmap후 해제 한다. + */ /* Clear old maps, set up prev, rb_link, rb_parent, and uf */ if (munmap_vma_range(mm, addr, len, &prev, &rb_link, &rb_parent, uf)) return -ENOMEM; @@ -1889,6 +1957,12 @@ unsigned long mmap_region(struct file *file, unsigned long addr, return error; } +/* + * IAMROOT, 2022.05.21: + * @return 찾은 빈공간의 align적용된 시작 주소. or error + * - 이진탐색으로 low_limit ~ high_limit 범위내에 vma들 사이에서 + * gap(prev vma ~ current vma 사이의 공간)을 찾는다. + */ static unsigned long unmapped_area(struct vm_unmapped_area_info *info) { /* @@ -1904,6 +1978,12 @@ static unsigned long unmapped_area(struct vm_unmapped_area_info *info) unsigned long length, low_limit, high_limit, gap_start, gap_end; /* Adjust search length to account for worst case alignment overhead */ + +/* + * IAMROOT, 2022.05.21: + * - info->align_mask + * length보다 크게(align에 맞춰서) 할당하겠다는것. + */ length = info->length + info->align_mask; if (length < info->length) return -ENOMEM; @@ -1924,9 +2004,31 @@ static unsigned long unmapped_area(struct vm_unmapped_area_info *info) if (vma->rb_subtree_gap < length) goto check_highest; +/* + * IAMROOT, 2022.05.21: + * + * A + * / \ + * B C + * / \ / \ + * D E F G + * + * 1. B,F의 rb_subtree_gap < length, 나머지는 true + * 0x00 ULONG_MAX + * D B E A F C G + * 1 2 + * ^found + * A -> B subtree false -> 1(E_A check) -> C -> F subtree false -> 2(F_C check) + */ while (true) { /* Visit left subtree if it looks promising */ gap_end = vm_start_gap(vma); +/* + * IAMROOT, 2022.05.21: + * - left subtree_gap의 공간이 있으면 left로 계속 내려간다. + * 단 low_limit 밑으로 내려가지 못한다. + * + */ if (gap_end >= low_limit && vma->vm_rb.rb_left) { struct vm_area_struct *left = rb_entry(vma->vm_rb.rb_left, @@ -1942,11 +2044,21 @@ static unsigned long unmapped_area(struct vm_unmapped_area_info *info) /* Check if current node has a suitable gap */ if (gap_start > high_limit) return -ENOMEM; +/* + * IAMROOT, 2022.05.21: + * - current vma와 prev vma 사이에 gap이 충분한지 검사한다. + * 공간이 충분하면 found. + */ if (gap_end >= low_limit && gap_end > gap_start && gap_end - gap_start >= length) goto found; /* Visit right subtree if it looks promising */ +/* + * IAMROOT, 2022.05.21: + * - left에 gap이 없었다는것은 right tree쪽에 gap이 있을수도있다. + * right 이동이 가능하면 right로 이동한다. + */ if (vma->vm_rb.rb_right) { struct vm_area_struct *right = rb_entry(vma->vm_rb.rb_right, @@ -1957,6 +2069,11 @@ static unsigned long unmapped_area(struct vm_unmapped_area_info *info) } } +/* + * IAMROOT, 2022.05.21: + * - left, right에서 gap을 못찾앗으므로 parent로 올라가고 + * check_current로 goto한다. 올라간 parent 기준으로 검사를 수행한다. + */ /* Go back up the rbtree to find next candidate node */ while (true) { struct rb_node *prev = &vma->vm_rb; @@ -1974,8 +2091,17 @@ static unsigned long unmapped_area(struct vm_unmapped_area_info *info) check_highest: /* Check highest gap, which does not precede any rbtree node */ +/* + * IAMROOT, 2022.05.21: + * - 범위내에서 못찾았다면 highest로 cache되잇는걸 start로 잡는다. + * 가장 높은 주소에 있는 빈공간이니 end도 끝점이될것이다. + */ gap_start = mm->highest_vm_end; gap_end = ULONG_MAX; /* Only for VM_BUG_ON below */ +/* + * IAMROOT, 2022.05.21: + * - 가장 끝에 있는데 high_limit을 넘어서면 범위초과이므로 fail + */ if (gap_start > high_limit) return -ENOMEM; @@ -2100,6 +2226,11 @@ static unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info) * - is at least the desired size. * - satisfies (begin_addr & align_mask) == (align_offset & align_mask) */ +/* + * IAMROOT, 2022.05.21: + * @info에서 요청한 범위에서 vma를 할당할수있는 빈 공간 시작주소를 + * 얻어온다. + */ unsigned long vm_unmapped_area(struct vm_unmapped_area_info *info) { unsigned long addr; @@ -2133,6 +2264,10 @@ unsigned long vm_unmapped_area(struct vm_unmapped_area_info *info) * This function "knows" that -ENOMEM has the bits set. */ #ifndef HAVE_ARCH_UNMAPPED_AREA +/* + * IAMROOT, 2022.05.21: + * - 요청 인자에 따라 vma를 할당할 addr를 구해온다. + */ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) @@ -2145,12 +2280,46 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, if (len > mmap_end - mmap_min_addr) return -ENOMEM; +/* + * IAMROOT, 2022.05.21: + * - MAP_FIXED면 검색도 안하고 그냥 들어온 인자로 결정된다. + */ if (flags & MAP_FIXED) return addr; if (addr) { addr = PAGE_ALIGN(addr); vma = find_vma_prev(mm, addr, &prev); +/* + * IAMROOT, 2022.05.21: + * - find_vma_prev를 통해서 즉시 vma가 할당이 안된 빈 공간을 찾을수있는 + * 지 확인한다. + * - addr은 유효범위 이지만 vma를 못찾앗거나 찾아온 vma가 vm범위를 넘고. + * prev가 없거나 addr이 vm_end_gap 이상이면 return addr. + * + * --- + * + * addr(vma안에 있을수있고, prev ~ vma 사이에 있을수도있음) + * v + * +-------+ + * v v + * | prev | | vma | + * ^----^ + * 이 공간안에 len이 충분한지 확인한다. + * + * --- + * - mmap_end - len >= addr && addr >= mmap_min_addr + * mmap공간내에 addr이 유효한지만 판단. + * + * - prev, vma가 둘다 있다고 가정하면, prev, vma 사이에 gap이 충분한지 + * 확인한다. + * + * - prev == NULL, vma == NULL + * memory에 vma에 하나도 없는경우 이므로 어디에나 할당가능 + * + * - prev != NULL, vma == NULL + * addr측에 존재하는 vma가 없다 + */ if (mmap_end - len >= addr && addr >= mmap_min_addr && (!vma || addr + len <= vm_start_gap(vma)) && (!prev || addr >= vm_end_gap(prev))) @@ -2225,6 +2394,10 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, } #endif +/* + * IAMROOT, 2022.05.21: + * - 인자에 따라 vma를 할당할 주소를 return한다. + */ unsigned long get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) @@ -2240,6 +2413,13 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, if (len > TASK_SIZE) return -ENOMEM; +/* + * IAMROOT, 2022.05.21: + * - file인 경우 f_op에 되있는걸 사용하고, shared인경우엔 shmem_get_unmapped_area를 + * 사용한다. 그 외의 경우엔 current->mm->get_unmapped_area를 사용하며, + * 아마 함수는 arch_get_unmapped_area일 것이다. + * (setup_new_exec(), arch_pick_mmap_layout() 참고) + */ get_area = current->mm->get_unmapped_area; if (file) { if (file->f_op->get_unmapped_area) @@ -2270,6 +2450,26 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, EXPORT_SYMBOL(get_unmapped_area); /* Look up the first VMA which satisfies addr < vm_end, NULL if none. */ +/* + * IAMROOT, 2022.05.21: + * @return 1. cache에서 바로 찾아지면 경우 + * 2. rbtree에서 찾아진경우 + * 3. rbtree에서 못찾은 경우 + * 3-1 addr보다 큰 vm_end를 가진것들중에서 가장 addr에 가까운 vm_end값을 + * 가진 vma + * 3-2 addr보다 큰 vm_end를 가진 vma가 없으면 NULL + * + * - cache에서 먼저 찾아보고 아니면 rb에서 이진탐색한다. + * + * --- + * A + * / \ + * B C + * + * .. | B | .. | A | .. | C | + * ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ + * B B B A A A A C C C C NULL <-return + */ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) { struct rb_node *rb_node; @@ -2289,7 +2489,16 @@ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb); if (tmp->vm_end > addr) { +/* + * IAMROOT, 2022.05.21: + * - addr보다 큰 vm_end를 가진 것들중에서 가장 addr에 가까운 vm_end값을 가진 + * vma를 고른다. + */ vma = tmp; +/* + * IAMROOT, 2022.05.21: + * - start <= addr < end 이경우. 즉 addr이 vma에 포함되서 찾아짐. + */ if (tmp->vm_start <= addr) break; rb_node = rb_node->rb_left; @@ -2307,6 +2516,25 @@ EXPORT_SYMBOL(find_vma); /* * Same as find_vma, but also return a pointer to the previous VMA in *pprev. */ +/* + * IAMROOT, 2022.05.21: + * - 1. vma가 찾아진경우 + * return vma (찾아진 vma) + * prev = vma의 prev. + * 2. vma가 못찾아진경우 + * return vma == NULL + * prev = last vma or NULL + * + * --- + * A + * / \ + * B C + * + * .. | B | .. | A | .. | C | + * ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ + * B B B A A A A C C C C NULL <- vma + * N N N B B B B A A A A C <- pprev + */ struct vm_area_struct * find_vma_prev(struct mm_struct *mm, unsigned long addr, struct vm_area_struct **pprev) @@ -2315,10 +2543,19 @@ find_vma_prev(struct mm_struct *mm, unsigned long addr, vma = find_vma(mm, addr); if (vma) { +/* + * IAMROOT, 2022.05.21: + * - vma prev를 저장. + */ *pprev = vma->vm_prev; } else { struct rb_node *rb_node = rb_last(&mm->mm_rb); +/* + * IAMROOT, 2022.05.21: + * - last가 있으면 prev는 last를 가져온다. return은 NULL이 된다. + */ + *pprev = rb_node ? rb_entry(rb_node, struct vm_area_struct, vm_rb) : NULL; } return vma; @@ -2540,6 +2777,10 @@ int expand_downwards(struct vm_area_struct *vma, } /* enforced gap between the expanding stack and other mappings. */ +/* + * IAMROOT, 2022.05.21: + * - 1MB + */ unsigned long stack_guard_gap = 256UL< */ + +/* + * IAMROOT, 2022.05.21: + * - @start에서 len만큼의 영역에 있는 vma들을 해제한다.(split, unmap, detach, free) + */ int __do_munmap(struct mm_struct *mm, unsigned long start, size_t len, struct list_head *uf, bool downgrade) { @@ -2847,10 +3093,89 @@ int __do_munmap(struct mm_struct *mm, unsigned long start, size_t len, /* Does it split the last one? */ last = find_vma(mm, end); if (last && end > last->vm_start) { + +/* + * IAMROOT, 2022.05.21: + * + * 1) vma 한개에 영역이 다 있는 경우 + * + * ----------- vm_end + * ^ + * | ^ ---- end + * | | + * |vma | + * | v ---- start + * v + * ----------- vm_start + * + * 이런경우 split가 두번되야될것이다. + * + * 2) 영역에 vma가 여러 개있는 경우 + * + * ^ ---- vm_end + * | + * | --- end + * | ^ + * v ---- vm_start | + * | + * ... .. + * + * ^ ---- vm_end | + * | | + * | | + * v ---- vm_start | + * | + * ... .. + * | + * ^ ---- vm_end | + * | v + * | --- start + * | + * v ---- vm_start + * + * 이경우 start와 겹치는 vma와 end와 겹치는 vma가 각각 splite 될것이다. + */ int error = __split_vma(mm, last, end, 1); if (error) return error; } + +/* + * IAMROOT, 2022.05.21: + * + * ----------- vm_end + * ^ + * | ^ ---- end + * | | | + * |vma | | <-- vma + * | v ---- start + * v | <-- prev + * ----------- vm_start + * + * ---- + * ^ ---- vm_end + * | <- vma4 + * | --- end + * | ^ <- vma3->next = vma4 + * v ---- vm_start | + * | + * ... .. + * + * ^ ---- vm_end | + * | | + * | | <- vma2->next = vma3 + * v ---- vm_start | + * | + * ... .. + * | + * ^ ---- vm_end | + * | v <- vma->next = vma2 + * | --- start + * | <-- prev + * v ---- vm_start + * + * vma부터 ~ vma4까지 unmap될것이다.(unmap_vmas()) + */ vma = vma_next(mm, prev); if (unlikely(uf)) { @@ -2874,6 +3199,10 @@ int __do_munmap(struct mm_struct *mm, unsigned long start, size_t len, if (mm->locked_vm) unlock_range(vma, end); +/* + * IAMROOT, 2022.05.21: + * - 영역 내에 있는 vma만 list에서 제외한다. + */ /* Detach vmas from rbtree */ if (!detach_vmas_to_be_unmapped(mm, vma, prev, end)) downgrade = false; @@ -3305,6 +3634,12 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, * Return true if the calling process may expand its vm space by the passed * number of pages */ +/* + * IAMROOT, 2022.05.21: + * - 가상공간이나 data limit을 초과한 경우 false. + * - 단 data limit의 경우 , valgrind를 사용중이거나 ignore 되있는 경우 그냥 + * true로 한다. + */ bool may_expand_vm(struct mm_struct *mm, vm_flags_t flags, unsigned long npages) { if (mm->total_vm + npages > rlimit(RLIMIT_AS) >> PAGE_SHIFT) diff --git a/mm/util.c b/mm/util.c index 463719f072b468..4e11aca574cb2b 100644 --- a/mm/util.c +++ b/mm/util.c @@ -520,6 +520,10 @@ unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr, &uf); mmap_write_unlock(mm); userfaultfd_unmap_complete(mm, &uf); +/* + * IAMROOT, 2022.05.21: + * - populate값이 있으면 mapping을 한다. + */ if (populate) mm_populate(ret, populate); } diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index 465865412100bf..6827fdf6c1234d 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -403,6 +403,11 @@ static int process_measurement(struct file *file, const struct cred *cred, * On success return 0. On integrity appraisal error, assuming the file * is in policy and IMA-appraisal is in enforcing mode, return -EACCES. */ + +/* + * IAMROOT, 2022.05.21: + * - sys_mmap은 mmap이나 malloc을 통해서 들어온다. + */ int ima_file_mmap(struct file *file, unsigned long prot) { u32 secid; diff --git a/security/security.c b/security/security.c index 9ffa9e9c5c554a..9c7ee2a55478ee 100644 --- a/security/security.c +++ b/security/security.c @@ -731,6 +731,11 @@ static int lsm_superblock_alloc(struct super_block *sb) P->hook.FUNC(__VA_ARGS__); \ } while (0) + +/* + * IAMROOT, 2022.05.21: + * - 등록된 security hook 가 있으면 호출한다. + */ #define call_int_hook(FUNC, IRC, ...) ({ \ int RC = IRC; \ do { \ @@ -1574,6 +1579,11 @@ static inline unsigned long mmap_prot(struct file *file, unsigned long prot) return prot; } +/* + * IAMROOT, 2022.05.21: + * LSM(linux security model) + * - memory 할당시 security callback. + */ int security_mmap_file(struct file *file, unsigned long prot, unsigned long flags) {