diff options
-rw-r--r-- | fs/ext4/ext4.h | 23 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 96 | ||||
-rw-r--r-- | include/trace/events/ext4.h | 16 |
3 files changed, 82 insertions, 53 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 2a7e254cbae3..914475cbb060 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -128,6 +128,23 @@ enum SHIFT_DIRECTION { }; /* + * Number of criterias defined. For each criteria, mballoc has slightly + * different way of finding the required blocks nad usually, higher the + * criteria the slower the allocation. We start at lower criterias and keep + * falling back to higher ones if we are not able to find any blocks. + */ +#define EXT4_MB_NUM_CRS 4 +/* + * All possible allocation criterias for mballoc + */ +enum criteria { + CR0, + CR1, + CR2, + CR3, +}; + +/* * Flags used in mballoc's allocation_context flags field. * * Also used to show what's going on for debugging purposes when the @@ -1544,9 +1561,9 @@ struct ext4_sb_info { atomic_t s_bal_2orders; /* 2^order hits */ atomic_t s_bal_cr0_bad_suggestions; atomic_t s_bal_cr1_bad_suggestions; - atomic64_t s_bal_cX_groups_considered[4]; - atomic64_t s_bal_cX_hits[4]; - atomic64_t s_bal_cX_failed[4]; /* cX loop didn't find blocks */ + atomic64_t s_bal_cX_groups_considered[EXT4_MB_NUM_CRS]; + atomic64_t s_bal_cX_hits[EXT4_MB_NUM_CRS]; + atomic64_t s_bal_cX_failed[EXT4_MB_NUM_CRS]; /* cX loop didn't find blocks */ atomic_t s_mb_buddies_generated; /* number of buddies generated */ atomic64_t s_mb_generation_time; atomic_t s_mb_lost_chunks; diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 8a3946f1bdd9..4359280d2fa7 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -154,19 +154,19 @@ * structures to decide the order in which groups are to be traversed for * fulfilling an allocation request. * - * At CR = 0, we look for groups which have the largest_free_order >= the order + * At CR0 , we look for groups which have the largest_free_order >= the order * of the request. We directly look at the largest free order list in the data * structure (1) above where largest_free_order = order of the request. If that * list is empty, we look at remaining list in the increasing order of - * largest_free_order. This allows us to perform CR = 0 lookup in O(1) time. + * largest_free_order. This allows us to perform CR0 lookup in O(1) time. * - * At CR = 1, we only consider groups where average fragment size > request + * At CR1, we only consider groups where average fragment size > request * size. So, we lookup a group which has average fragment size just above or * equal to request size using our average fragment size group lists (data * structure 2) in O(1) time. * * If "mb_optimize_scan" mount option is not set, mballoc traverses groups in - * linear order which requires O(N) search time for each CR 0 and CR 1 phase. + * linear order which requires O(N) search time for each CR0 and CR1 phase. * * The regular allocator (using the buddy cache) supports a few tunables. * @@ -409,7 +409,7 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, static void ext4_mb_new_preallocation(struct ext4_allocation_context *ac); static bool ext4_mb_good_group(struct ext4_allocation_context *ac, - ext4_group_t group, int cr); + ext4_group_t group, enum criteria cr); static int ext4_try_to_trim_range(struct super_block *sb, struct ext4_buddy *e4b, ext4_grpblk_t start, @@ -859,7 +859,7 @@ mb_update_avg_fragment_size(struct super_block *sb, struct ext4_group_info *grp) * cr level needs an update. */ static void ext4_mb_choose_next_group_cr0(struct ext4_allocation_context *ac, - int *new_cr, ext4_group_t *group, ext4_group_t ngroups) + enum criteria *new_cr, ext4_group_t *group, ext4_group_t ngroups) { struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); struct ext4_group_info *iter, *grp; @@ -884,8 +884,8 @@ static void ext4_mb_choose_next_group_cr0(struct ext4_allocation_context *ac, list_for_each_entry(iter, &sbi->s_mb_largest_free_orders[i], bb_largest_free_order_node) { if (sbi->s_mb_stats) - atomic64_inc(&sbi->s_bal_cX_groups_considered[0]); - if (likely(ext4_mb_good_group(ac, iter->bb_group, 0))) { + atomic64_inc(&sbi->s_bal_cX_groups_considered[CR0]); + if (likely(ext4_mb_good_group(ac, iter->bb_group, CR0))) { grp = iter; break; } @@ -897,7 +897,7 @@ static void ext4_mb_choose_next_group_cr0(struct ext4_allocation_context *ac, if (!grp) { /* Increment cr and search again */ - *new_cr = 1; + *new_cr = CR1; } else { *group = grp->bb_group; ac->ac_flags |= EXT4_MB_CR0_OPTIMIZED; @@ -909,7 +909,7 @@ static void ext4_mb_choose_next_group_cr0(struct ext4_allocation_context *ac, * order. Updates *new_cr if cr level needs an update. */ static void ext4_mb_choose_next_group_cr1(struct ext4_allocation_context *ac, - int *new_cr, ext4_group_t *group, ext4_group_t ngroups) + enum criteria *new_cr, ext4_group_t *group, ext4_group_t ngroups) { struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); struct ext4_group_info *grp = NULL, *iter; @@ -932,8 +932,8 @@ static void ext4_mb_choose_next_group_cr1(struct ext4_allocation_context *ac, list_for_each_entry(iter, &sbi->s_mb_avg_fragment_size[i], bb_avg_fragment_size_node) { if (sbi->s_mb_stats) - atomic64_inc(&sbi->s_bal_cX_groups_considered[1]); - if (likely(ext4_mb_good_group(ac, iter->bb_group, 1))) { + atomic64_inc(&sbi->s_bal_cX_groups_considered[CR1]); + if (likely(ext4_mb_good_group(ac, iter->bb_group, CR1))) { grp = iter; break; } @@ -947,7 +947,7 @@ static void ext4_mb_choose_next_group_cr1(struct ext4_allocation_context *ac, *group = grp->bb_group; ac->ac_flags |= EXT4_MB_CR1_OPTIMIZED; } else { - *new_cr = 2; + *new_cr = CR2; } } @@ -955,7 +955,7 @@ static inline int should_optimize_scan(struct ext4_allocation_context *ac) { if (unlikely(!test_opt2(ac->ac_sb, MB_OPTIMIZE_SCAN))) return 0; - if (ac->ac_criteria >= 2) + if (ac->ac_criteria >= CR2) return 0; if (!ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) return 0; @@ -1000,7 +1000,7 @@ inc_and_return: * @ngroups Total number of groups */ static void ext4_mb_choose_next_group(struct ext4_allocation_context *ac, - int *new_cr, ext4_group_t *group, ext4_group_t ngroups) + enum criteria *new_cr, ext4_group_t *group, ext4_group_t ngroups) { *new_cr = ac->ac_criteria; @@ -1009,9 +1009,9 @@ static void ext4_mb_choose_next_group(struct ext4_allocation_context *ac, return; } - if (*new_cr == 0) { + if (*new_cr == CR0) { ext4_mb_choose_next_group_cr0(ac, new_cr, group, ngroups); - } else if (*new_cr == 1) { + } else if (*new_cr == CR1) { ext4_mb_choose_next_group_cr1(ac, new_cr, group, ngroups); } else { /* @@ -2408,13 +2408,13 @@ void ext4_mb_scan_aligned(struct ext4_allocation_context *ac, * for the allocation or not. */ static bool ext4_mb_good_group(struct ext4_allocation_context *ac, - ext4_group_t group, int cr) + ext4_group_t group, enum criteria cr) { ext4_grpblk_t free, fragments; int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb)); struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group); - BUG_ON(cr < 0 || cr >= 4); + BUG_ON(cr < CR0 || cr >= EXT4_MB_NUM_CRS); if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp) || !grp)) return false; @@ -2428,7 +2428,7 @@ static bool ext4_mb_good_group(struct ext4_allocation_context *ac, return false; switch (cr) { - case 0: + case CR0: BUG_ON(ac->ac_2order == 0); /* Avoid using the first bg of a flexgroup for data files */ @@ -2447,15 +2447,15 @@ static bool ext4_mb_good_group(struct ext4_allocation_context *ac, return false; return true; - case 1: + case CR1: if ((free / fragments) >= ac->ac_g_ex.fe_len) return true; break; - case 2: + case CR2: if (free >= ac->ac_g_ex.fe_len) return true; break; - case 3: + case CR3: return true; default: BUG(); @@ -2476,7 +2476,7 @@ static bool ext4_mb_good_group(struct ext4_allocation_context *ac, * out"! */ static int ext4_mb_good_group_nolock(struct ext4_allocation_context *ac, - ext4_group_t group, int cr) + ext4_group_t group, enum criteria cr) { struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group); struct super_block *sb = ac->ac_sb; @@ -2496,7 +2496,7 @@ static int ext4_mb_good_group_nolock(struct ext4_allocation_context *ac, free = grp->bb_free; if (free == 0) goto out; - if (cr <= 2 && free < ac->ac_g_ex.fe_len) + if (cr <= CR2 && free < ac->ac_g_ex.fe_len) goto out; if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp))) goto out; @@ -2511,7 +2511,7 @@ static int ext4_mb_good_group_nolock(struct ext4_allocation_context *ac, ext4_get_group_desc(sb, group, NULL); int ret; - /* cr=0/1 is a very optimistic search to find large + /* cr=CR0/CR1 is a very optimistic search to find large * good chunks almost for free. If buddy data is not * ready, then this optimization makes no sense. But * we never skip the first block group in a flex_bg, @@ -2519,7 +2519,7 @@ static int ext4_mb_good_group_nolock(struct ext4_allocation_context *ac, * and we want to make sure we locate metadata blocks * in the first block group in the flex_bg if possible. */ - if (cr < 2 && + if (cr < CR2 && (!sbi->s_log_groups_per_flex || ((group & ((1 << sbi->s_log_groups_per_flex) - 1)) != 0)) && !(ext4_has_group_desc_csum(sb) && @@ -2625,7 +2625,7 @@ static noinline_for_stack int ext4_mb_regular_allocator(struct ext4_allocation_context *ac) { ext4_group_t prefetch_grp = 0, ngroups, group, i; - int cr = -1, new_cr; + enum criteria cr, new_cr; int err = 0, first_err = 0; unsigned int nr = 0, prefetch_ios = 0; struct ext4_sb_info *sbi; @@ -2683,13 +2683,13 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac) } /* Let's just scan groups to find more-less suitable blocks */ - cr = ac->ac_2order ? 0 : 1; + cr = ac->ac_2order ? CR0 : CR1; /* - * cr == 0 try to get exact allocation, - * cr == 3 try to get anything + * cr == CR0 try to get exact allocation, + * cr == CR3 try to get anything */ repeat: - for (; cr < 4 && ac->ac_status == AC_STATUS_CONTINUE; cr++) { + for (; cr < EXT4_MB_NUM_CRS && ac->ac_status == AC_STATUS_CONTINUE; cr++) { ac->ac_criteria = cr; /* * searching for the right group start @@ -2716,7 +2716,7 @@ repeat: * spend a lot of time loading imperfect groups */ if ((prefetch_grp == group) && - (cr > 1 || + (cr > CR1 || prefetch_ios < sbi->s_mb_prefetch_limit)) { unsigned int curr_ios = prefetch_ios; @@ -2758,9 +2758,9 @@ repeat: } ac->ac_groups_scanned++; - if (cr == 0) + if (cr == CR0) ext4_mb_simple_scan_group(ac, &e4b); - else if (cr == 1 && sbi->s_stripe && + else if (cr == CR1 && sbi->s_stripe && !(ac->ac_g_ex.fe_len % EXT4_B2C(sbi, sbi->s_stripe))) ext4_mb_scan_aligned(ac, &e4b); @@ -2801,7 +2801,7 @@ repeat: ac->ac_b_ex.fe_len = 0; ac->ac_status = AC_STATUS_CONTINUE; ac->ac_flags |= EXT4_MB_HINT_FIRST; - cr = 3; + cr = CR3; goto repeat; } } @@ -2926,36 +2926,36 @@ int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset) seq_printf(seq, "\tgroups_scanned: %u\n", atomic_read(&sbi->s_bal_groups_scanned)); seq_puts(seq, "\tcr0_stats:\n"); - seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[0])); + seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[CR0])); seq_printf(seq, "\t\tgroups_considered: %llu\n", - atomic64_read(&sbi->s_bal_cX_groups_considered[0])); + atomic64_read(&sbi->s_bal_cX_groups_considered[CR0])); seq_printf(seq, "\t\tuseless_loops: %llu\n", - atomic64_read(&sbi->s_bal_cX_failed[0])); + atomic64_read(&sbi->s_bal_cX_failed[CR0])); seq_printf(seq, "\t\tbad_suggestions: %u\n", atomic_read(&sbi->s_bal_cr0_bad_suggestions)); seq_puts(seq, "\tcr1_stats:\n"); - seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[1])); + seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[CR1])); seq_printf(seq, "\t\tgroups_considered: %llu\n", - atomic64_read(&sbi->s_bal_cX_groups_considered[1])); + atomic64_read(&sbi->s_bal_cX_groups_considered[CR1])); seq_printf(seq, "\t\tuseless_loops: %llu\n", - atomic64_read(&sbi->s_bal_cX_failed[1])); + atomic64_read(&sbi->s_bal_cX_failed[CR1])); seq_printf(seq, "\t\tbad_suggestions: %u\n", atomic_read(&sbi->s_bal_cr1_bad_suggestions)); seq_puts(seq, "\tcr2_stats:\n"); - seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[2])); + seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[CR2])); seq_printf(seq, "\t\tgroups_considered: %llu\n", - atomic64_read(&sbi->s_bal_cX_groups_considered[2])); + atomic64_read(&sbi->s_bal_cX_groups_considered[CR2])); seq_printf(seq, "\t\tuseless_loops: %llu\n", - atomic64_read(&sbi->s_bal_cX_failed[2])); + atomic64_read(&sbi->s_bal_cX_failed[CR2])); seq_puts(seq, "\tcr3_stats:\n"); - seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[3])); + seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[CR3])); seq_printf(seq, "\t\tgroups_considered: %llu\n", - atomic64_read(&sbi->s_bal_cX_groups_considered[3])); + atomic64_read(&sbi->s_bal_cX_groups_considered[CR3])); seq_printf(seq, "\t\tuseless_loops: %llu\n", - atomic64_read(&sbi->s_bal_cX_failed[3])); + atomic64_read(&sbi->s_bal_cX_failed[CR3])); seq_printf(seq, "\textents_scanned: %u\n", atomic_read(&sbi->s_bal_ex_scanned)); seq_printf(seq, "\t\tgoal_hits: %u\n", atomic_read(&sbi->s_bal_goals)); seq_printf(seq, "\t\t2^n_hits: %u\n", atomic_read(&sbi->s_bal_2orders)); diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 54cd509ced0f..51eab82e897c 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -120,6 +120,18 @@ TRACE_DEFINE_ENUM(EXT4_FC_REASON_MAX); { EXT4_FC_REASON_INODE_JOURNAL_DATA, "INODE_JOURNAL_DATA"}, \ { EXT4_FC_REASON_ENCRYPTED_FILENAME, "ENCRYPTED_FILENAME"}) +TRACE_DEFINE_ENUM(CR0); +TRACE_DEFINE_ENUM(CR1); +TRACE_DEFINE_ENUM(CR2); +TRACE_DEFINE_ENUM(CR3); + +#define show_criteria(cr) \ + __print_symbolic(cr, \ + { CR0, "CR0" }, \ + { CR1, "CR1" }, \ + { CR2, "CR2" }, \ + { CR3, "CR3" }) + TRACE_EVENT(ext4_other_inode_update_time, TP_PROTO(struct inode *inode, ino_t orig_ino), @@ -1063,7 +1075,7 @@ TRACE_EVENT(ext4_mballoc_alloc, ), TP_printk("dev %d,%d inode %lu orig %u/%d/%u@%u goal %u/%d/%u@%u " - "result %u/%d/%u@%u blks %u grps %u cr %u flags %s " + "result %u/%d/%u@%u blks %u grps %u cr %s flags %s " "tail %u broken %u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, @@ -1073,7 +1085,7 @@ TRACE_EVENT(ext4_mballoc_alloc, __entry->goal_len, __entry->goal_logical, __entry->result_group, __entry->result_start, __entry->result_len, __entry->result_logical, - __entry->found, __entry->groups, __entry->cr, + __entry->found, __entry->groups, show_criteria(__entry->cr), show_mballoc_flags(__entry->flags), __entry->tail, __entry->buddy ? 1 << __entry->buddy : 0) ); |