Android kswapd-->lowmemorykiller启动和扫描过程
16lz
2021-01-23
注册kswapd module,kernel启动时调用@kernel/mm:
module_init(kswapd_init)static int __init kswapd_init(void){ int nid; swap_setup(); for_each_node_state(nid, N_MEMORY) kswapd_run(nid); if (kswapd_cpu_mask == NULL) hotcpu_notifier(cpu_callback, 0); return 0;}/* * This kswapd start function will be called by init and node-hot-add. * On node-hot-add, kswapd will moved to proper cpus if cpus are hot-added. */int kswapd_run(int nid){ pg_data_t *pgdat = NODE_DATA(nid); int ret = 0; if (pgdat->kswapd) return 0; pgdat->kswapd = kthread_run(kswapd, pgdat, "kswapd%d", nid); if (IS_ERR(pgdat->kswapd)) { /* failure at boot is fatal */ BUG_ON(system_state == SYSTEM_BOOTING); pr_err("Failed to start kswapd on node %d\n", nid); ret = PTR_ERR(pgdat->kswapd); pgdat->kswapd = NULL; } else if (kswapd_cpu_mask) { if (set_kswapd_cpu_mask(pgdat)) pr_warn("error setting kswapd cpu affinity mask\n"); } return ret;}/* * The background pageout daemon, started as a kernel thread * from the init process. * * This basically trickles out pages so that we have _some_ * free memory available even if there is no other activity * that frees anything up. This is needed for things like routing * etc, where we otherwise might have all activity going on in * asynchronous contexts that cannot page things out. * * If there are applications that are active memory-allocators * (most normal use), this basically shouldn't matter. */static int kswapd(void *p){... /* * We can speed up thawing tasks if we don't call balance_pgdat * after returning from the refrigerator */ if (!ret) { trace_mm_vmscan_kswapd_wake(pgdat->node_id, order); balanced_classzone_idx = classzone_idx; balanced_order = balance_pgdat(pgdat, order, &balanced_classzone_idx); } } tsk->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD); current->reclaim_state = NULL; lockdep_clear_current_reclaim_state(); return 0;}/* * For kswapd, balance_pgdat() will work across all this node's zones until * they are all at high_wmark_pages(zone). * * Returns the final order kswapd was reclaiming at * * There is special handling here for zones which are full of pinned pages. * This can happen if the pages are all mlocked, or if they are all used by * device drivers (say, ZONE_DMA). Or if they are all in use by hugetlb. * What we do is to detect the case where all pages in the zone have been * scanned twice and there has been zero successful reclaim. Mark the zone as * dead and from now on, only perform a short scan. Basically we're polling * the zone for when the problem goes away. * * kswapd scans the zones in the highmem->normal->dma direction. It skips * zones which have free_pages > high_wmark_pages(zone), but once a zone is * found to have free_pages <= high_wmark_pages(zone), we scan that zone and the * lower zones regardless of the number of free pages in the lower zones. This * interoperates with the page allocator fallback scheme to ensure that aging * of pages is balanced across the zones. */static unsigned long balance_pgdat(pg_data_t *pgdat, int order, int *classzone_idx){... /* * There should be no need to raise the scanning * priority if enough pages are already being scanned * that that high watermark would be met at 100% * efficiency. */ if (kswapd_shrink_zone(zone, end_zone, &sc, lru_pages, &nr_attempted))}/* * kswapd shrinks the zone by the number of pages required to reach * the high watermark. * * Returns true if kswapd scanned at least the requested number of pages to * reclaim or if the lack of progress was due to pages under writeback. * This is used to determine if the scanning priority needs to be raised. */static bool kswapd_shrink_zone(struct zone *zone, int classzone_idx, struct scan_control *sc, unsigned long lru_pages, unsigned long *nr_attempted){... reclaim_state->reclaimed_slab = 0; shrink_slab(&shrink, sc->nr_scanned, lru_pages); sc->nr_reclaimed += reclaim_state->reclaimed_slab;...}/* * Call the shrink functions to age shrinkable caches * * Here we assume it costs one seek to replace a lru page and that it also * takes a seek to recreate a cache object. With this in mind we age equal * percentages of the lru and ageable caches. This should balance the seeks * generated by these structures. * * If the vm encountered mapped pages on the LRU it increase the pressure on * slab to avoid swapping. * * We do weird things to avoid (scanned*seeks*entries) overflowing 32 bits. * * `lru_pages' represents the number of on-LRU pages in all the zones which * are eligible for the caller's allocation attempt. It is used for balancing * slab reclaim versus page reclaim. * * Returns the number of slab objects which we shrunk. */unsigned long shrink_slab(struct shrink_control *shrinkctl, unsigned long nr_pages_scanned, unsigned long lru_pages){ struct shrinker *shrinker;... for_each_node_mask(shrinkctl->nid, shrinkctl->nodes_to_scan) { if (node_online(shrinkctl->nid)) freed += shrink_slab_node(shrinkctl, shrinker, nr_pages_scanned, lru_pages); }...}static unsigned longshrink_slab_node(struct shrink_control *shrinkctl, struct shrinker *shrinker, unsigned long nr_pages_scanned, unsigned long lru_pages){... freeable = shrinker->count_objects(shrinker, shrinkctl); if (freeable == 0) return 0;... /* * Normally, we should not scan less than batch_size objects in one * pass to avoid too frequent shrinker calls, but if the slab has less * than batch_size objects in total and we are really tight on memory, * we will try to reclaim all available objects, otherwise we can end * up failing allocations although there are plenty of reclaimable * objects spread over several slabs with usage less than the * batch_size. * * We detect the "tight on memory" situations by looking at the total * number of objects we want to scan (total_scan). If it is greater * than the total number of objects on slab (freeable), we must be * scanning at high prio and therefore should try to reclaim as much as * possible. */ while (total_scan > min_cache_size || total_scan >= freeable) { unsigned long ret; unsigned long nr_to_scan = min(batch_size, total_scan); shrinkctl->nr_to_scan = nr_to_scan; ret = shrinker->scan_objects(shrinker, shrinkctl); if (ret == SHRINK_STOP) break; freed += ret; count_vm_events(SLABS_SCANNED, nr_to_scan); total_scan -= nr_to_scan; cond_resched(); }
此处shrink就对应lowmemorykiller 注册的@kernel/drivers/staging/:
static unsigned long lowmem_scan(struct shrinker *s, struct shrink_control *sc){...//选择到需要kill的task,kill掉。 set_tsk_thread_flag(selected, TIF_MEMDIE); send_sig(SIGKILL, selected, 0);...}static unsigned long lowmem_count(struct shrinker *s, struct shrink_control *sc){ return global_page_state(NR_ACTIVE_ANON) + global_page_state(NR_ACTIVE_FILE) + global_page_state(NR_INACTIVE_ANON) + global_page_state(NR_INACTIVE_FILE);}static struct shrinker lowmem_shrinker = { .scan_objects = lowmem_scan, .count_objects = lowmem_count, .seeks = DEFAULT_SEEKS * 16};static int __init lowmem_init(void){ register_shrinker(&lowmem_shrinker); vmpressure_notifier_register(&lmk_vmpr_nb); return 0;}
所以一般在android里遇到kswapd占用cpu资源较多时,可以优化lowmemorykiller里的scan过程,尽量保证更多的memory, 减少kswapd回收不断scan的过程。
AMS在更新oom adj时则是通过下面的流程@frameworks/base/services/core/java/com/android/server/am:
//通过localsocket 通到native的lmkd daemon private static void writeLmkd(ByteBuffer buf) { for (int i = 0; i < 3; i++) { if (sLmkdSocket == null) { if (openLmkdSocket() == false) { try { Thread.sleep(1000); } catch (InterruptedException ie) { } continue; } } try { sLmkdOutputStream.write(buf.array(), 0, buf.position()); return; } catch (IOException ex) { Slog.w(TAG, "Error writing to lowmemorykiller socket"); try { sLmkdSocket.close(); } catch (IOException ex2) { } sLmkdSocket = null; } } } private static boolean openLmkdSocket() { try { sLmkdSocket = new LocalSocket(LocalSocket.SOCKET_SEQPACKET); sLmkdSocket.connect( new LocalSocketAddress("lmkd", LocalSocketAddress.Namespace.RESERVED)); sLmkdOutputStream = sLmkdSocket.getOutputStream(); } catch (IOException ex) { Slog.w(TAG, "lowmemorykiller daemon socket open failed"); sLmkdSocket = null; return false; } return true; }
native lmkd daemon:
@system/core/lmkd
static int init(void) {... ctrl_lfd = android_get_control_socket("lmkd"); if (ctrl_lfd < 0) { ALOGE("get lmkd control socket failed"); return -1; }...}
数据处理回调:
static void ctrl_data_handler(uint32_t events) { if (events & EPOLLHUP) { ALOGI("ActivityManager disconnected"); if (!ctrl_dfd_reopened) ctrl_data_close(); } else if (events & EPOLLIN) { ctrl_command_handler(); }}static void ctrl_command_handler(void) { int ibuf[CTRL_PACKET_MAX / sizeof(int)]; int len; int cmd = -1; int nargs; int targets; len = ctrl_data_read((char *)ibuf, CTRL_PACKET_MAX); if (len <= 0) return; nargs = len / sizeof(int) - 1; if (nargs < 0) goto wronglen; cmd = ntohl(ibuf[0]); switch(cmd) { case LMK_TARGET: targets = nargs / 2; if (nargs & 0x1 || targets > (int)ARRAY_SIZE(lowmem_adj)) goto wronglen; cmd_target(targets, &ibuf[1]); break; case LMK_PROCPRIO: if (nargs != 3) goto wronglen; cmd_procprio(ntohl(ibuf[1]), ntohl(ibuf[2]), ntohl(ibuf[3])); break; case LMK_PROCREMOVE: if (nargs != 1) goto wronglen; cmd_procremove(ntohl(ibuf[1])); break; default: ALOGE("Received unknown command code %d", cmd); return; } return;wronglen: ALOGE("Wrong control socket read length cmd=%d len=%d", cmd, len);}
最终写到对应proc 节点:
#define INKERNEL_MINFREE_PATH "/sys/module/lowmemorykiller/parameters/minfree"#define INKERNEL_ADJ_PATH "/sys/module/lowmemorykiller/parameters/adj""/proc/%d/oom_score_adj"
更多相关文章
- Android 4.0 gallery2 生成video thumbnail的过程
- Android应用程序绑定服务(bindService)的过程源代码分析(3)
- Android应用程序组件Content Provider的启动过程源代码分析(3)
- Android应用程序组件Content Provider的启动过程源代码分析(5)
- android 9.0 startService启动Servcie的过程分析
- Android一键锁屏开发全过程【源码】【附图】
- Android Camera调用过程分析
- android系统开发小问题-启动过程中android字符没有显示出来