summaryrefslogtreecommitdiff
path: root/patches.tizen/0825-vrange-Purging-vrange-anon-pages-from-shrinker.patch
blob: a7843416deaeb8df2aaf326102424de675fc0eb9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
From 63ff0738faf0de2b2310c7bd1152d3178357ca8b Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Mon, 22 Jul 2013 15:18:19 +0900
Subject: [PATCH 0825/1302] vrange: Purging vrange-anon pages from shrinker

This patch supports purging anon-vrange pages from slab shrinker.

XXX: This is super complicated code, and has a very short commit message
XXX: Sanity check function names.
XXX: Possibly split out vrange_root object addition into its own patch?

Signed-off-by: Minchan Kim <minchan@kernel.org>
Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
---
 include/linux/vrange.h       |   7 +-
 include/linux/vrange_types.h |   1 +
 mm/vrange.c                  | 202 ++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 205 insertions(+), 5 deletions(-)

diff --git a/include/linux/vrange.h b/include/linux/vrange.h
index b14d054..b5cb7c8 100644
--- a/include/linux/vrange.h
+++ b/include/linux/vrange.h
@@ -24,11 +24,13 @@ static inline int is_vrange_entry(swp_entry_t entry)
 	return swp_type(entry) == SWP_VRANGE;
 }
 
-static inline void vrange_root_init(struct vrange_root *vroot, int type)
+static inline void vrange_root_init(struct vrange_root *vroot, int type,
+								void *object)
 {
 	vroot->type = type;
 	vroot->v_rb = RB_ROOT;
 	mutex_init(&vroot->v_lock);
+	vroot->object = object;
 }
 
 static inline void vrange_lock(struct vrange_root *vroot)
@@ -59,7 +61,8 @@ extern bool vrange_addr_purged(struct vm_area_struct *vma,
 
 #else
 
-static inline void vrange_root_init(struct vrange_root *vroot, int type) {};
+static inline void vrange_root_init(struct vrange_root *vroot,
+					int type, void *obj) {};
 static inline void vrange_root_cleanup(struct vrange_root *vroot) {};
 static inline int vrange_fork(struct mm_struct *new, struct mm_struct *old)
 {
diff --git a/include/linux/vrange_types.h b/include/linux/vrange_types.h
index d1599ee..e8c5f1f 100644
--- a/include/linux/vrange_types.h
+++ b/include/linux/vrange_types.h
@@ -14,6 +14,7 @@ struct vrange_root {
 	struct mutex v_lock;		/* Protect v_rb */
 	enum vrange_type type;		/* range root type */
 	atomic_t refcount;
+	void *object;			/* mm_struct */
 };
 
 struct vrange {
diff --git a/mm/vrange.c b/mm/vrange.c
index b93d469..fad50c5 100644
--- a/mm/vrange.c
+++ b/mm/vrange.c
@@ -11,6 +11,8 @@
 #include <linux/hugetlb.h>
 #include "internal.h"
 #include <linux/mmu_notifier.h>
+#include <linux/mm_inline.h>
+#include <linux/migrate.h>
 
 static struct kmem_cache *vrange_cachep;
 static struct kmem_cache *vroot_cachep;
@@ -21,6 +23,11 @@ static struct vrange_list {
 	struct mutex lock;
 } vrange_list;
 
+struct vrange_walker {
+	struct vm_area_struct *vma;
+	struct list_head *pagelist;
+};
+
 static inline unsigned int vrange_size(struct vrange *range)
 {
 	return range->node.last + 1 - range->node.start;
@@ -68,6 +75,15 @@ static inline int __vroot_get(struct vrange_root *vroot)
 static inline void __vroot_put(struct vrange_root *vroot)
 {
 	if (atomic_dec_and_test(&vroot->refcount)) {
+		enum {VRANGE_MM, VRANGE_FILE} type = vroot->type;
+		if (type == VRANGE_MM) {
+			struct mm_struct *mm = vroot->object;
+			mmdrop(mm);
+		} else if (type == VRANGE_FILE) {
+			/* TODO : */
+		} else
+			BUG();
+
 		WARN_ON(!RB_EMPTY_ROOT(&vroot->v_rb));
 		kmem_cache_free(vroot_cachep, vroot);
 	}
@@ -80,7 +96,7 @@ static bool __vroot_init_mm(struct vrange_root *vroot, struct mm_struct *mm)
 	spin_lock(&mm->page_table_lock);
 	if (!mm->vroot) {
 		mm->vroot = vroot;
-		vrange_root_init(mm->vroot, VRANGE_MM);
+		vrange_root_init(mm->vroot, VRANGE_MM, mm);
 		atomic_inc(&mm->mm_count);
 		ret = true;
 	}
@@ -97,7 +113,7 @@ static bool __vroot_init_mapping(struct vrange_root *vroot,
 	mutex_lock(&mapping->i_mmap_mutex);
 	if (!mapping->vroot) {
 		mapping->vroot = vroot;
-		vrange_root_init(mapping->vroot, VRANGE_FILE);
+		vrange_root_init(mapping->vroot, VRANGE_FILE, mapping);
 		/* XXX - inc ref count on mapping? */
 		ret = true;
 	}
@@ -918,11 +934,191 @@ static struct vrange *isolate_vrange(void)
 	return vrange;
 }
 
-static unsigned int discard_vrange(struct vrange *vrange)
+static unsigned int discard_vrange_pagelist(struct list_head *page_list)
 {
+	struct page *page;
+	unsigned int nr_discard = 0;
+	LIST_HEAD(ret_pages);
+	LIST_HEAD(free_pages);
+
+	while (!list_empty(page_list)) {
+		int err;
+		page = list_entry(page_list->prev, struct page, lru);
+		list_del(&page->lru);
+		if (!trylock_page(page)) {
+			list_add(&page->lru, &ret_pages);
+			continue;
+		}
+
+		/*
+		 * discard_vapge returns unlocked page if it
+		 * is successful
+		 */
+		err = discard_vpage(page);
+		if (err) {
+			unlock_page(page);
+			list_add(&page->lru, &ret_pages);
+			continue;
+		}
+
+		ClearPageActive(page);
+		list_add(&page->lru, &free_pages);
+		dec_zone_page_state(page, NR_ISOLATED_ANON);
+		nr_discard++;
+	}
+
+	free_hot_cold_page_list(&free_pages, 1);
+	list_splice(&ret_pages, page_list);
+	return nr_discard;
+}
+
+static void vrange_pte_entry(pte_t pteval, unsigned long address,
+		unsigned ptent_size, struct mm_walk *walk)
+{
+	struct page *page;
+	struct vrange_walker *vw = walk->private;
+	struct vm_area_struct *vma = vw->vma;
+	struct list_head *pagelist = vw->pagelist;
+
+	if (pte_none(pteval))
+		return;
+
+	if (!pte_present(pteval))
+		return;
+
+	page = vm_normal_page(vma, address, pteval);
+	if (unlikely(!page))
+		return;
+
+	if (!PageLRU(page) || PageLocked(page))
+		return;
+
+	/* TODO : Support THP */
+	if (unlikely(PageCompound(page)))
+		return;
+
+	if (isolate_lru_page(page))
+		return;
+
+	list_add(&page->lru, pagelist);
+
+	VM_BUG_ON(page_is_file_cache(page));
+	inc_zone_page_state(page, NR_ISOLATED_ANON);
+}
+
+static int vrange_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
+		struct mm_walk *walk)
+{
+	pte_t *pte;
+	spinlock_t *ptl;
+
+	pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
+	for (; addr != end; pte++, addr += PAGE_SIZE)
+		vrange_pte_entry(*pte, addr, PAGE_SIZE, walk);
+	pte_unmap_unlock(pte - 1, ptl);
+	cond_resched();
+
 	return 0;
 }
 
+static unsigned int discard_vma_pages(struct mm_struct *mm,
+		struct vm_area_struct *vma, unsigned long start,
+		unsigned long end)
+{
+	unsigned int ret = 0;
+	LIST_HEAD(pagelist);
+	struct vrange_walker vw;
+	struct mm_walk vrange_walk = {
+		.pmd_entry = vrange_pte_range,
+		.mm = vma->vm_mm,
+		.private = &vw,
+	};
+
+	vw.pagelist = &pagelist;
+	vw.vma = vma;
+
+	walk_page_range(start, end, &vrange_walk);
+
+	if (!list_empty(&pagelist))
+		ret = discard_vrange_pagelist(&pagelist);
+
+	putback_lru_pages(&pagelist);
+	return ret;
+}
+
+/*
+ * vrange->owner isn't stable because caller doesn't hold vrange_lock
+ * so avoid touching vrange->owner.
+ */
+static int __discard_vrange_anon(struct mm_struct *mm, struct vrange *vrange,
+					unsigned int *ret_discard)
+{
+	struct vm_area_struct *vma;
+	unsigned int nr_discard = 0;
+	unsigned long start = vrange->node.start;
+	unsigned long end = vrange->node.last + 1;
+	int ret = 0;
+
+	/* It prevent to destroy vma when the process exist */
+	if (!atomic_inc_not_zero(&mm->mm_users))
+		return ret;
+
+	if (!down_read_trylock(&mm->mmap_sem)) {
+		mmput(mm);
+		ret = -EBUSY;
+		goto out; /* this vrange could be retried */
+	}
+
+	vma = find_vma(mm, start);
+	if (!vma || (vma->vm_start >= end))
+		goto out_unlock;
+
+	for (; vma; vma = vma->vm_next) {
+		if (vma->vm_start >= end)
+			break;
+		BUG_ON(vma->vm_flags & (VM_SPECIAL|VM_LOCKED|VM_MIXEDMAP|
+					VM_HUGETLB));
+		cond_resched();
+		nr_discard += discard_vma_pages(mm, vma,
+				max_t(unsigned long, start, vma->vm_start),
+				min_t(unsigned long, end, vma->vm_end));
+	}
+out_unlock:
+	up_read(&mm->mmap_sem);
+	mmput(mm);
+	*ret_discard = nr_discard;
+out:
+	return ret;
+}
+
+static int discard_vrange(struct vrange *vrange)
+{
+	int ret = 0;
+	struct mm_struct *mm;
+	struct vrange_root *vroot;
+	unsigned int nr_discard = 0;
+	vroot = vrange_get_vroot(vrange);
+	if (!vroot)
+		return 0;
+
+	/* TODO : handle VRANGE_FILE */
+	if (vroot->type != VRANGE_MM)
+		goto out;
+
+	/*
+	 * Race of vrange->owner could happens with __vrange_remove
+	 * but it's okay because subfunctions will check it again
+	 */
+	if (vrange->owner == NULL)
+		goto out;
+
+	mm = vroot->object;
+	ret = __discard_vrange_anon(mm, vrange, &nr_discard);
+out:
+	__vroot_put(vroot);
+	return nr_discard;
+}
+
 static int shrink_vrange(struct shrinker *s, struct shrink_control *sc)
 {
 	struct vrange *range = NULL;
-- 
1.8.3.2