summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--mm/vmscan.c58
1 files changed, 49 insertions, 9 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 3584067800e1..d3488828331a 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2198,10 +2198,40 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
}
#endif
+/*
+ * pgdat_balanced is used when checking if a node is balanced for high-order
+ * allocations. Only zones that meet watermarks and are in a zone allowed
+ * by the callers classzone_idx are added to balanced_pages. The total of
+ * balanced pages must be at least 25% of the zones allowed by classzone_idx
+ * for the node to be considered balanced. Forcing all zones to be balanced
+ * for high orders can cause excessive reclaim when there are imbalanced zones.
+ * The choice of 25% is due to
+ * o a 16M DMA zone that is balanced will not balance a zone on any
+ * reasonable sized machine
+ * o On all other machines, the top zone must be at least a reasonable
+ * precentage of the middle zones. For example, on 32-bit x86, highmem
+ * would need to be at least 256M for it to be balance a whole node.
+ * Similarly, on x86-64 the Normal zone would need to be at least 1G
+ * to balance a node on its own. These seemed like reasonable ratios.
+ */
+static bool pgdat_balanced(pg_data_t *pgdat, unsigned long balanced_pages,
+ int classzone_idx)
+{
+ unsigned long present_pages = 0;
+ int i;
+
+ for (i = 0; i <= classzone_idx; i++)
+ present_pages += pgdat->node_zones[i].present_pages;
+
+ return balanced_pages > (present_pages >> 2);
+}
+
/* is kswapd sleeping prematurely? */
static int sleeping_prematurely(pg_data_t *pgdat, int order, long remaining)
{
int i;
+ unsigned long balanced = 0;
+ bool all_zones_ok = true;
/* If a direct reclaimer woke kswapd within HZ/10, it's premature */
if (remaining)
@@ -2219,10 +2249,20 @@ static int sleeping_prematurely(pg_data_t *pgdat, int order, long remaining)
if (!zone_watermark_ok_safe(zone, order, high_wmark_pages(zone),
0, 0))
- return 1;
+ all_zones_ok = false;
+ else
+ balanced += zone->present_pages;
}
- return 0;
+ /*
+ * For high-order requests, the balanced zones must contain at least
+ * 25% of the nodes pages for kswapd to sleep. For order-0, all zones
+ * must be balanced
+ */
+ if (order)
+ return pgdat_balanced(pgdat, balanced, 0);
+ else
+ return !all_zones_ok;
}
/*
@@ -2250,7 +2290,7 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
int classzone_idx)
{
int all_zones_ok;
- int any_zone_ok;
+ unsigned long balanced;
int priority;
int i;
int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */
@@ -2284,7 +2324,7 @@ loop_again:
disable_swap_token();
all_zones_ok = 1;
- any_zone_ok = 0;
+ balanced = 0;
/*
* Scan in the highmem->dma direction for the highest
@@ -2404,11 +2444,11 @@ loop_again:
*/
zone_clear_flag(zone, ZONE_CONGESTED);
if (i <= classzone_idx)
- any_zone_ok = 1;
+ balanced += zone->present_pages;
}
}
- if (all_zones_ok || (order && any_zone_ok))
+ if (all_zones_ok || (order && pgdat_balanced(pgdat, balanced, classzone_idx)))
break; /* kswapd: all done */
/*
* OK, kswapd is getting into trouble. Take a nap, then take
@@ -2434,10 +2474,10 @@ out:
/*
* order-0: All zones must meet high watermark for a balanced node
- * high-order: Any zone below pgdats classzone_idx must meet the high
- * watermark for a balanced node
+ * high-order: Balanced zones must make up at least 25% of the node
+ * for the node to be balanced
*/
- if (!(all_zones_ok || (order && any_zone_ok))) {
+ if (!(all_zones_ok || (order && pgdat_balanced(pgdat, balanced, classzone_idx)))) {
cond_resched();
try_to_freeze();