BSD amd64 内存管理分析 (freebsd9.0) by chishanmingshenhttp://chishanmingshen.blog.chinaunix.net第一部分 基本流程elf64_exec(struct preloaded_file *fp)第一次设置页表:2M*512=1G空间的映射.__exec((void *)VTOP(amd64_tramp), modulep, kernend);amd64_tramp:设置cr3.打开分页机制.此时是32bit模式跳到64bit模式.(之前的entry_hi/entry_lo即btext地址)ljmp$0x8, $VTOP(longmode)locore.Scallhammer_time(其中会调用getmemsize(kmdp, physfree)->pmap_bootstrap()->create_pagetable().)callmi_startup(module init)0.pmap_bootstrap(vm_paddr_t *firstaddr)create_pagetables(firstaddr)virtual_avail = (vm_offset_t) KERNBASE + *firstaddr;#defineKERNBASE KVADDR(KPML4I, KPDPI, 0, 0)/(511,510,0,0)最后1G空间virtual_end = VM_MAX_KERNEL_ADDRESS;#defineVM_MAX_KERNEL_ADDRESSKVADDR(KPML4I, NPDPEPG-1, NKPDE-1, NPTEPG-1)/*511,510,511*/最后留了2M/*kernel_pmap记录PML4表基址的虚拟地址,从物理地址KPML4.*/kernel_pmap->pm_pml4 = (pdp_entry_t *) (KERNBASE + KPML4phys);1. SYS_INIT's vm_mem_init()vm_set_page_sizevirtual_avail = vm_page_startup(virtual_avail);/*初始化各个物理页面,然后加入到freelist中*/遍历phys_avail[],得到段数:nblocks,总的空间大小:total.vm_pageq_init()/*page queue*/扣去umaslb得到new_end,将umaslb调pmap_map和uma_startup.(支持Dmap,所以不递增vaddr.#definePHYS_TO_DMAP(x)((x) | DMAP_MIN_ADDRESS)#defineDMAP_MIN_ADDRESSKVADDR(DMPML4I, 0, 0, 0)/*510*/计算可用物理页面总数为page_range个.npages = (total - (page_range * sizeof(struct vm_page)) - (end - new_end)) / PAGE_SIZE;vm_page_array指向pmap_map()映射后的vm_page[]空间,共npages个页面phys_avail[biggestone + 1] = new_end;最后一段内存更正为到new_end结束,扣除了vm_page[].vm_page_array_size = page_range;vm_phys_init();/*初始化物理内存分配器*/对所有段调用vm_phys_create_seg(phys_avail[i], phys_avail[i + 1],VM_FREELIST_DEFAULT);更新到vm_phys_segs[]中.vm_phys_free_queues[vm_nfreelists][VM_NFREEPOOL]/*static int vm_nfreelists = VM_FREELIST_DEFAULT + 1;*/遍历phys_avail[],对所有物理页调用vm_phys_add_page(pa).vm_phys_add_page(pa/*vm_paddr 物理地址*/):初始化一个物理页面,同时将它加到free list中.m = vm_phys_paddr_to_vm_page(vm_paddr_t pa):/*找到给定物理地址对应的vm_page*/遍历vm_phys_segs[],找到对应的vm_page结构指针,并返回该指针.return &(seg->first_page[atop(pa - seg->start)]);pmap_page_init(m);vm_phys_free_pages(m, 0);/*加到freelist中*/return (vaddr);/*最后将可以用的虚拟地址返回,其中vm_page[]的空间已经加进去了.返回的virtual_avail,由外面使用,即普通物理页面空间*/3.vm_object_init();3.1kernel_object_store (VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS)3.2kmem_object_store (VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS)3.31个vm_object的zoune: obj_zone 4.vm_map_startup();创建1个vm_map的zone:mapzone callby vm_map_create()创建2个vm_map_entry的zone :kmapentzone和mapentzone 根据vm_map生成vm_map_entry时,由vm_mpa的system_map决定.5.kmem_init(virtual_avail, virtual_end);vm_map_t m;根据给定的物理地址范围,如kernel_pmap, 在虚拟空间min和max内, 生成一个vm_map m./*vm kernel_pmap:-2G->-4M*/m = vm_map_create(kernel_pmap, VM_MIN_KERNEL_ADDRESS/*-2G*/, end/*VM_MAX_KERNEL_ADDRESS*/);vm_map_t result = uma_zalloc(mapzone, M_WAITOK);/*从mapzone中分一个vm_map*/_vm_map_init(result, min, max);给vm_map赋值各个字段(例如,result->pmap = pmap)kernel_map = m;/*内核总空间*/(void) vm_map_insert(m, NULL, (vm_ooffset_t) 0, VM_MIN_KERNEL_ADDRESS, start, VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT);#defineVM_MAX_KERNEL_ADDRESSKVADDR(KPML4I, NPDPEPG-1, NKPDE-1, NPTEPG-1) -2M#defineVM_MIN_KERNEL_ADDRESSKVADDR(KPML4I, KPDPI, 0, 0) -2G6.pmap_init();需要虚拟内存的模块可以调用了建立一个管理255个页面(放置1M个页表项)的对象kptobj = vm_object_allocate(OBJT_DEFAULT, NKPDE);对每个vm_page初始化pv_list//m = &vm_page_array[i];每个vm_page都有一个struct pv_entry.这些pv_entry由pvinit带头.pvinit = (struct pv_entry *) kmem_alloc(kernel_map,initial_pvs * sizeof (struct pv_entry));7.vm_pager_init()初始化已知的页pagertab[]第二部分 初始化SYSINIT's KMEM module first kmeminit()vm_kmem_size = 2*物理内存kmem_map = kmem_suballoc(kernel_map, &kmembase, &kmemlimit,vm_kmem_size);申请kmem_map空间 kmem_suballoc(parent/*kernel_map*/, min/*output*/, max/*output*/, size/*要分得的sub映射空间*/): *min = (vm_offset_t) vm_map_min(parent); *max = *min + size; result = vm_map_create(vm_map_pmap(parent), *min, *max); return result;/*将新分配的vm_map返回.即是kmem_map空间.*/kmem_map->system_map = 1;mt_zone = uma_zcreate("mt_zone", sizeof(struct malloc_type_internal),静态数组kmemzones[]处理:根据kmemzones[indx].kz_zone申请uma_zone结构体(2^4,...,2^12).uma_zcreate->uma_zalloc_internel返回zoneuma_zone_slab->slab_alloc->uma_small_alloc->kmem_malloc SYSINIT's KMEM module second malloc_init(void *data)struct malloc_type_internal *mtip = uma_zalloc(mt_zone, M_WAITOK | M_ZERO);第三部分 申请内存void *malloc(unsigned long size, struct malloc_type *mtp, int flags)1.小内存indx = kmemsize[size >> KMEM_ZSHIFT];/*根据申请内存大小得到indexzone = kmemzones[indx].kz_zone;/*由index得到对应的zoneva = uma_zalloc(zone, flags);2.大内存uma_large_malloc(size, flags)slab = uma_zalloc_internal(slabzone, NULL, wait);mem = page_alloc(NULL, size, &flags, wait);slab->us_data = mem;static uma_slab_tslab_zalloc(uma_zone_t zone, int wait)mem = keg->uk_allocf(zone, keg->uk_ppera * UMA_SLAB_SIZE, &flags, wait); m = vm_page_alloc()得到vm_page. vm_phys_alloc_pages(pool, order); f1 = vm_phys_free_queues[flind][pool]; m = TAILQ_FIRST(&fl[oind].pl);TAILQ_REMOVE(&fl[oind].pl, m, pageq); vm_phys_split_pages(m, oind, fl, order); buddy算法,找比order稍大的vm_page 1.如果是对象非空, vm_page_insert(m, object, pindex);将vm_map加入到vm 2.如果空对象, 则m->pindex = pindex; return m;pa = m->phys_addr;dump_add_page(pa);va = (void *)PHYS_TO_DMAP(pa);/*内核申请的故放到DMAP*/ /*#ifdef UMA_MD_SMALL_ALLOCkeg->uk_allocf = uma_small_alloc;/************?#elsekeg->uk_allocf = page_alloc;#endif*/ void *uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)ret = ((void *)kmem_malloc(kmem_map, bytes, M_NOWAIT));从指定的vm_map,即kmem_map中申请内存,大小为bytes.return ret;第四部分 页表相关static voidcreate_pagetables(vm_paddr_t *firstaddr){int i;/* Allocate pages */KPTphys = allocpages(firstaddr, NKPT);KPML4phys = allocpages(firstaddr, 1);KPDPphys = allocpages(firstaddr, NKPML4E);KPDphys = allocpages(firstaddr, NKPDPE);ndmpdp = (ptoa(Maxmem) + NBPDP - 1) >> PDPSHIFT;if (ndmpdpndmpdp = 4;DMPDPphys = allocpages(firstaddr, NDMPML4E);DMPDphys = allocpages(firstaddr, ndmpdp);dmaplimit = (vm_paddr_t)ndmpdp/* Fill in the underlying page table pages *//* Read-only from zero to physfree *//* XXX not fully used, underneath 2M pages */for (i = 0; (i PT表*/((pt_entry_t *)KPTphys)[i] = i((pt_entry_t *)KPTphys)[i] |= PG_RW | PG_V | PG_G | PG_U;}/* Now map the page tables at their location within PTmap */for (i = 0; i PD表*/((pd_entry_t *)KPDphys)[i] = KPTphys + (i((pd_entry_t *)KPDphys)[i] |= PG_RW | PG_V | PG_U;}/* Map from zero to end of allocations under 2M pages *//* This replaces some of the KPTphys entries above */for (i = 0; (i PD表 2M 直接跳过1*/((pd_entry_t *)KPDphys)[i] = i((pd_entry_t *)KPDphys)[i] |= PG_RW | PG_V | PG_PS | PG_G | PG_U;}/* And connect up the PD to the PDP */for (i = 0; i PDP表*/((pdp_entry_t *)KPDPphys)[i + KPDPI] = KPDphys + (i((pdp_entry_t *)KPDPphys)[i + KPDPI] |= PG_RW | PG_V | PG_U | PG_U;}/*2->DMPD表 2M 直接跳过1*//* Now set up the direct map space using 2MB pages */for (i = 0; i((pd_entry_t *)DMPDphys)[i] = (vm_paddr_t)i((pd_entry_t *)DMPDphys)[i] |= PG_RW | PG_V | PG_PS | PG_G | PG_U;}/*3->DMPDP表*//* And the direct map space's PDP */for (i = 0; i((pdp_entry_t *)DMPDPphys)[i] = DMPDphys + (i((pdp_entry_t *)DMPDPphys)[i] |= PG_RW | PG_V | PG_U;}/* And recursively map PML4 to itself in order to get PTmap *//*4->PML4表*/((pdp_entry_t *)KPML4phys)[PML4PML4I] = KPML4phys;/*256 递归*/((pdp_entry_t *)KPML4phys)[PML4PML4I] |= PG_RW | PG_V | PG_U;/* Connect the Direct Map slot up to the PML4 */((pdp_entry_t *)KPML4phys)[DMPML4I] = DMPDPphys;/*510 */((pdp_entry_t *)KPML4phys)[DMPML4I] |= PG_RW | PG_V | PG_U;/* Connect the KVA slot up to the PML4 */((pdp_entry_t *)KPML4phys)[KPML4I] = KPDPphys;/*511 */((pdp_entry_t *)KPML4phys)[KPML4I] |= PG_RW | PG_V | PG_U;}uma调用和page_alloc:都实际从kmem_map子空间中分配空间.而不是kernel_map.static void *page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait){void *p;/* Returned page */*pflag = UMA_SLAB_KMEM;p = (void *) kmem_malloc(kmem_map, bytes, wait);return (p);}vm_offset_tkmem_malloc(map/*从哪个空间,比如kmem_map*/, size/*申请内存的大小*/, flags)/*被uma_small_alloc()和page_alloc()调用.作用就是从kmem_map子空间中申请空间,大小为size.*/addr = vm_map_findspace()来找出addr这个起始虚拟地址.offset = addr - VM_MIN_KERNEL_ADDRESS;vm_object_reference(kmem_object);/*插入新的vm_map_entry_t,代表空间size大小*/vm_map_insert(map, kmem_object, offset, addr, addr + size,VM_PROT_ALL, VM_PROT_ALL, 0);/*逐页调用vm_page_alloc()来为每页生成vm_page结构体*/vm_page_t m = vm_page_alloc(kmem_object, OFF_TO_IDX(offset + i), pflags);/**************NOW, call pmap_enter!!!***************/pmap_enter(kernel_pmap, addr + i, m, VM_PROT_ALL, 1);return addr;#define OFF_TO_IDX(off) ((vm_pindex_t)(((vm_ooffset_t)(off)) >> PAGE_SHIFT))_vm_object_allocate分配2个对象:kmem_object,kernel_object.空间是一样大的:(VM_MAX_KERNEL_ADDRESS - VM_MAX_KERNEL_ADDRESS)struct pmap {struct mtxpm_mtx;pml4_entry_t*pm_pml4;/* KVA of level 4 page table */TAILQ_HEAD(,pv_chunk)pm_pvchunk;/* list of mappings in pmap */u_intpm_active;/* active on cpus *//* spare u_int here due to padding */struct pmap_statisticspm_stats;/* pmap statistics */};voidpmap_growkernel(vm_offset_t addr)/* * Address of current and alternate address space page table maps * and directories. * XXX it might be saner to just direct map all of physical memory * into the kernel using 2MB pages. We have enough space to do * it (2^47 bits of KVM, while current max physical addressability * is 2^40 physical bits). Then we can get rid of the evil hole * in the page tables and the evil overlapping. */ 内核可以有空间2^48/2,即内核和userland各一半. 目前内核仅仅用了2个表项,即1024G,#ifdef _KERNEL#defineaddr_PTmap(KVADDR(PML4PML4I, 0, 0, 0))#defineaddr_PDmap(KVADDR(PML4PML4I, PML4PML4I, 0, 0))#defineaddr_PDPmap(KVADDR(PML4PML4I, PML4PML4I, PML4PML4I, 0))#defineaddr_PML4map(KVADDR(PML4PML4I, PML4PML4I, PML4PML4I, PML4PML4I))#defineaddr_PML4pml4e(addr_PML4map + (PML4PML4I * sizeof(pml4_entry_t)))#definePTmap((pt_entry_t *)(addr_PTmap))#definePDmap((pd_entry_t *)(addr_PDmap))#definePDPmap((pd_entry_t *)(addr_PDPmap))#definePML4map((pd_entry_t *)(addr_PML4map))#definePML4pml4e((pd_entry_t *)(addr_PML4pml4e))extern u_int64_t KPML4phys;/* physical address of kernel level 4 */#endif在内核访问va的方法是通过PTmapPMAP_INLINE pt_entry_t *vtopte(vm_offset_t va){u_int64_t mask = ((1ulreturn (PTmap + ((va >> PAGE_SHIFT) & mask));}第五部分待续(by chishanmingshen)。。。 09-11 00:00