/* * We also sanity check to catch abuse of atomic reserves being used by * callers that are not in atomic context. */ ///检查__GFP_ATOMIC,用在中断上下文,优先级较高,检查是否滥用,这两个标签是无法同时使用的,如果同时使用,则发出警告并去除GFP_ATOMIC if (WARN_ON_ONCE((gfp_mask & (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)) == (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM))) gfp_mask &= ~__GFP_ATOMIC;
/* * The fast path uses conservative alloc_flags to succeed only until * kswapd needs to be woken up, and to avoid the cost of setting up * alloc_flags precisely. So we do that now. */ ///因为低水位分配失败,才走到这里,修改标志为最低水位线,后面重新尝试分配 alloc_flags = gfp_to_alloc_flags(gfp_mask);
/* * We need to recalculate the starting point for the zonelist iterator * because we might have used different nodemask in the fast path, or * there was a cpuset modification and we are retrying - otherwise we * could end up iterating over non-eligible zones endlessly. */ ///重新选择优先zone ac->preferred_zoneref = first_zones_zonelist(ac->zonelist, ac->highest_zoneidx, ac->nodemask); if (!ac->preferred_zoneref->zone) goto nopage;
///唤醒kswapd内核线程,回收页面 if (alloc_flags & ALLOC_KSWAPD) wake_all_kswapds(order, gfp_mask, ac);
/* * The adjusted alloc_flags might result in immediate success, so try * that first */ ///kswapd回收后,再次尝试直接内存分配 page = get_page_from_freelist(gfp_mask, order, alloc_flags, ac); if (page) goto got_pg;
/* * For costly allocations, try direct compaction first, as it's likely * that we have enough base pages and don't need to reclaim. For non- * movable high-order allocations, do that as well, as compaction will * try prevent permanent fragmentation by migrating from blocks of the * same migratetype. * Don't try this for allocations that are allowed to ignore * watermarks, as the ALLOC_NO_WATERMARKS attempt didn't yet happen. */ if (can_direct_reclaim && (costly_order || (order > 0 && ac->migratetype != MIGRATE_MOVABLE)) && !gfp_pfmemalloc_allowed(gfp_mask)) { ///进行页面规整,异步模式,触发条件: ///允许直接回收 ///高成本,大order页分配需求 ///不能访问系统预留内存 page = __alloc_pages_direct_compact(gfp_mask, order, alloc_flags, ac, INIT_COMPACT_PRIORITY, &compact_result); if (page) goto got_pg;
/* * Checks for costly allocations with __GFP_NORETRY, which * includes some THP page fault allocations */ if (costly_order && (gfp_mask & __GFP_NORETRY)) { /* * If allocating entire pageblock(s) and compaction * failed because all zones are below low watermarks * or is prohibited because it recently failed at this * order, fail immediately unless the allocator has * requested compaction and reclaim retry. * * Reclaim is * - potentially very expensive because zones are far * below their low watermarks or this is part of very * bursty high order allocations, * - not guaranteed to help because isolate_freepages() * may not iterate over freed pages as part of its * linear scan, and * - unlikely to make entire pageblocks free on its * own. */ if (compact_result == COMPACT_SKIPPED || compact_result == COMPACT_DEFERRED) goto nopage;
/* * Looks like reclaim/compaction is worth trying, but * sync compaction could be very expensive, so keep * using async compaction. */ compact_priority = INIT_COMPACT_PRIORITY; } }
retry: /* Ensure kswapd doesn't accidentally go to sleep as long as we loop */ if (alloc_flags & ALLOC_KSWAPD) ///唤醒kswapd内核线程 wake_all_kswapds(order, gfp_mask, ac);
///修改分配属性,可以分配系统预留内存 reserve_flags = __gfp_pfmemalloc_flags(gfp_mask); if (reserve_flags) alloc_flags = gfp_to_alloc_flags_cma(gfp_mask, reserve_flags);
/* * Reset the nodemask and zonelist iterators if memory policies can be * ignored. These allocations are high priority and system rather than * user oriented. */ if (!(alloc_flags & ALLOC_CPUSET) || reserve_flags) { ac->nodemask = NULL; ac->preferred_zoneref = first_zones_zonelist(ac->zonelist, ac->highest_zoneidx, ac->nodemask); }
///kswapd工作后,再尝试分配 /* Attempt with potentially adjusted zonelist and alloc_flags */ page = get_page_from_freelist(gfp_mask, order, alloc_flags, ac); if (page) goto got_pg;
/* Caller is not willing to reclaim, we can't balance anything */ if (!can_direct_reclaim) goto nopage;
/* Avoid recursion of direct reclaim */ if (current->flags & PF_MEMALLOC) goto nopage;
///触发直接内存回收 /* Try direct reclaim and then allocating */ page = __alloc_pages_direct_reclaim(gfp_mask, order, alloc_flags, ac, &did_some_progress); if (page) goto got_pg;
///再次尝试页面规整,然后重新分配内存 /* Try direct compaction and then allocating */ page = __alloc_pages_direct_compact(gfp_mask, order, alloc_flags, ac, compact_priority, &compact_result); if (page) goto got_pg;
/* Do not loop if specifically requested */ if (gfp_mask & __GFP_NORETRY) goto nopage;
/* * Do not retry costly high order allocations unless they are * __GFP_RETRY_MAYFAIL */ if (costly_order && !(gfp_mask & __GFP_RETRY_MAYFAIL)) goto nopage;
/* * It doesn't make any sense to retry for the compaction if the order-0 * reclaim is not able to make any progress because the current * implementation of the compaction depends on the sufficient amount * of free memory (see __compaction_suitable) */ ///是否重新内存规整 if (did_some_progress > 0 && should_compact_retry(ac, order, alloc_flags, compact_result, &compact_priority, &compaction_retries)) goto retry;
/* Deal with possible cpuset update races before we start OOM killing */ if (check_retry_cpuset(cpuset_mems_cookie, ac)) goto retry_cpuset;
///回收,页面规整都失败,尝试启动oom /* Reclaim has failed us, start killing things */ page = __alloc_pages_may_oom(gfp_mask, order, ac, &did_some_progress); if (page) goto got_pg;
/* Avoid allocations with no watermarks from looping endlessly */ if (tsk_is_oom_victim(current) && (alloc_flags & ALLOC_OOM || (gfp_mask & __GFP_NOMEMALLOC))) goto nopage;
/* Retry as long as the OOM killer is making progress */ if (did_some_progress) { no_progress_loops = 0; goto retry; }
nopage: /* Deal with possible cpuset update races before we fail */ if (check_retry_cpuset(cpuset_mems_cookie, ac)) goto retry_cpuset;
/* * Make sure that __GFP_NOFAIL request doesn't leak out and make sure * we always retry */ if (gfp_mask & __GFP_NOFAIL) { /* * All existing users of the __GFP_NOFAIL are blockable, so warn * of any new users that actually require GFP_NOWAIT */ if (WARN_ON_ONCE(!can_direct_reclaim)) goto fail;
/* * PF_MEMALLOC request from this context is rather bizarre * because we cannot reclaim anything and only can loop waiting * for somebody to do a work for us */ WARN_ON_ONCE(current->flags & PF_MEMALLOC);
/* * non failing costly orders are a hard requirement which we * are not prepared for much so let's warn about these users * so that we can identify them and convert them to something * else. */ WARN_ON_ONCE(order > PAGE_ALLOC_COSTLY_ORDER);
/* * Help non-failing allocations by giving them access to memory * reserves but do not use ALLOC_NO_WATERMARKS because this * could deplete whole memory reserves which would just make * the situation worse */ page = __alloc_pages_cpuset_fallback(gfp_mask, order, ALLOC_HARDER, ac); if (page) goto got_pg;