diff options
Diffstat (limited to 'kernel/fs/xfs/xfs_log.c')
-rw-r--r-- | kernel/fs/xfs/xfs_log.c | 231 |
1 files changed, 154 insertions, 77 deletions
diff --git a/kernel/fs/xfs/xfs_log.c b/kernel/fs/xfs/xfs_log.c index bcc7cfabb..f52c72a1a 100644 --- a/kernel/fs/xfs/xfs_log.c +++ b/kernel/fs/xfs/xfs_log.c @@ -109,7 +109,7 @@ xlog_ungrant_log_space( STATIC void xlog_verify_dest_ptr( struct xlog *log, - char *ptr); + void *ptr); STATIC void xlog_verify_grant_tail( struct xlog *log); @@ -268,7 +268,7 @@ xlog_grant_head_wait( __set_current_state(TASK_UNINTERRUPTIBLE); spin_unlock(&head->lock); - XFS_STATS_INC(xs_sleep_logspace); + XFS_STATS_INC(log->l_mp, xs_sleep_logspace); trace_xfs_log_grant_sleep(log, tic); schedule(); @@ -379,7 +379,7 @@ xfs_log_regrant( if (XLOG_FORCED_SHUTDOWN(log)) return -EIO; - XFS_STATS_INC(xs_try_logspace); + XFS_STATS_INC(mp, xs_try_logspace); /* * This is a new transaction on the ticket, so we need to change the @@ -448,7 +448,7 @@ xfs_log_reserve( if (XLOG_FORCED_SHUTDOWN(log)) return -EIO; - XFS_STATS_INC(xs_try_logspace); + XFS_STATS_INC(mp, xs_try_logspace); ASSERT(*ticp == NULL); tic = xlog_ticket_alloc(log, unit_bytes, cnt, client, permanent, @@ -513,7 +513,7 @@ xfs_log_done( struct xfs_mount *mp, struct xlog_ticket *ticket, struct xlog_in_core **iclog, - uint flags) + bool regrant) { struct xlog *log = mp->m_log; xfs_lsn_t lsn = 0; @@ -526,14 +526,11 @@ xfs_log_done( (((ticket->t_flags & XLOG_TIC_INITED) == 0) && (xlog_commit_record(log, ticket, iclog, &lsn)))) { lsn = (xfs_lsn_t) -1; - if (ticket->t_flags & XLOG_TIC_PERM_RESERV) { - flags |= XFS_LOG_REL_PERM_RESERV; - } + regrant = false; } - if ((ticket->t_flags & XLOG_TIC_PERM_RESERV) == 0 || - (flags & XFS_LOG_REL_PERM_RESERV)) { + if (!regrant) { trace_xfs_log_done_nonperm(log, ticket); /* @@ -541,7 +538,6 @@ xfs_log_done( * request has been made to release a permanent reservation. */ xlog_ungrant_log_space(log, ticket); - xfs_log_ticket_put(ticket); } else { trace_xfs_log_done_perm(log, ticket); @@ -553,6 +549,7 @@ xfs_log_done( ticket->t_flags |= XLOG_TIC_INITED; } + xfs_log_ticket_put(ticket); return lsn; } @@ -671,9 +668,9 @@ xfs_log_mount( ASSERT(0); goto out_free_log; } + xfs_crit(mp, "Log size out of supported range."); xfs_crit(mp, -"Log size out of supported range. Continuing onwards, but if log hangs are\n" -"experienced then please report this message in the bug report."); +"Continuing onwards, but if log hangs are experienced then please report this message in the bug report."); } /* @@ -703,6 +700,7 @@ xfs_log_mount( if (error) { xfs_warn(mp, "log mount/recovery failed: error %d", error); + xlog_recover_cancel(mp->m_log); goto out_destroy_ail; } } @@ -743,18 +741,35 @@ out: * it. */ int -xfs_log_mount_finish(xfs_mount_t *mp) +xfs_log_mount_finish( + struct xfs_mount *mp) { int error = 0; - if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) { - error = xlog_recover_finish(mp->m_log); - if (!error) - xfs_log_work_queue(mp); - } else { + if (mp->m_flags & XFS_MOUNT_NORECOVERY) { ASSERT(mp->m_flags & XFS_MOUNT_RDONLY); + return 0; } + error = xlog_recover_finish(mp->m_log); + if (!error) + xfs_log_work_queue(mp); + + return error; +} + +/* + * The mount has failed. Cancel the recovery if it hasn't completed and destroy + * the log. + */ +int +xfs_log_mount_cancel( + struct xfs_mount *mp) +{ + int error; + + error = xlog_recover_cancel(mp->m_log); + xfs_log_unmount(mp); return error; } @@ -1145,11 +1160,13 @@ xlog_space_left( * In this case we just want to return the size of the * log as the amount of space left. */ + xfs_alert(log->l_mp, "xlog_space_left: head behind tail"); xfs_alert(log->l_mp, - "xlog_space_left: head behind tail\n" - " tail_cycle = %d, tail_bytes = %d\n" - " GH cycle = %d, GH bytes = %d", - tail_cycle, tail_bytes, head_cycle, head_bytes); + " tail_cycle = %d, tail_bytes = %d", + tail_cycle, tail_bytes); + xfs_alert(log->l_mp, + " GH cycle = %d, GH bytes = %d", + head_cycle, head_bytes); ASSERT(0); free_bytes = log->l_logsize; } @@ -1447,7 +1464,7 @@ xlog_alloc_log( iclog->ic_bp = bp; iclog->ic_data = bp->b_addr; #ifdef DEBUG - log->l_iclog_bak[i] = (xfs_caddr_t)&(iclog->ic_header); + log->l_iclog_bak[i] = &iclog->ic_header; #endif head = &iclog->ic_header; memset(head, 0, sizeof(xlog_rec_header_t)); @@ -1602,7 +1619,7 @@ xlog_pack_data( int i, j, k; int size = iclog->ic_offset + roundoff; __be32 cycle_lsn; - xfs_caddr_t dp; + char *dp; cycle_lsn = CYCLE_LSN_DISK(iclog->ic_header.h_lsn); @@ -1655,8 +1672,13 @@ xlog_cksum( if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { union xlog_in_core2 *xhdr = (union xlog_in_core2 *)rhead; int i; + int xheads; + + xheads = size / XLOG_HEADER_CYCLE_SIZE; + if (size % XLOG_HEADER_CYCLE_SIZE) + xheads++; - for (i = 1; i < log->l_iclog_heads; i++) { + for (i = 1; i < xheads; i++) { crc = crc32c(crc, &xhdr[i].hic_xheader, sizeof(struct xlog_rec_ext_header)); } @@ -1746,7 +1768,7 @@ xlog_sync( int v2 = xfs_sb_version_haslogv2(&log->l_mp->m_sb); int size; - XFS_STATS_INC(xs_log_writes); + XFS_STATS_INC(log->l_mp, xs_log_writes); ASSERT(atomic_read(&iclog->ic_refcnt) == 0); /* Add for LR header */ @@ -1783,7 +1805,7 @@ xlog_sync( bp = iclog->ic_bp; XFS_BUF_SET_ADDR(bp, BLOCK_LSN(be64_to_cpu(iclog->ic_header.h_lsn))); - XFS_STATS_ADD(xs_log_blocks, BTOBB(count)); + XFS_STATS_ADD(log->l_mp, xs_log_blocks, BTOBB(count)); /* Do we need to split this write into 2 parts? */ if (XFS_BUF_ADDR(bp) + BTOBB(count) > log->l_logBBsize) { @@ -2031,26 +2053,24 @@ xlog_print_tic_res( "SWAPEXT" }; - xfs_warn(mp, - "xlog_write: reservation summary:\n" - " trans type = %s (%u)\n" - " unit res = %d bytes\n" - " current res = %d bytes\n" - " total reg = %u bytes (o/flow = %u bytes)\n" - " ophdrs = %u (ophdr space = %u bytes)\n" - " ophdr + reg = %u bytes\n" - " num regions = %u", - ((ticket->t_trans_type <= 0 || - ticket->t_trans_type > XFS_TRANS_TYPE_MAX) ? + xfs_warn(mp, "xlog_write: reservation summary:"); + xfs_warn(mp, " trans type = %s (%u)", + ((ticket->t_trans_type <= 0 || + ticket->t_trans_type > XFS_TRANS_TYPE_MAX) ? "bad-trans-type" : trans_type_str[ticket->t_trans_type-1]), - ticket->t_trans_type, - ticket->t_unit_res, - ticket->t_curr_res, - ticket->t_res_arr_sum, ticket->t_res_o_flow, - ticket->t_res_num_ophdrs, ophdr_spc, - ticket->t_res_arr_sum + - ticket->t_res_o_flow + ophdr_spc, - ticket->t_res_num); + ticket->t_trans_type); + xfs_warn(mp, " unit res = %d bytes", + ticket->t_unit_res); + xfs_warn(mp, " current res = %d bytes", + ticket->t_curr_res); + xfs_warn(mp, " total reg = %u bytes (o/flow = %u bytes)", + ticket->t_res_arr_sum, ticket->t_res_o_flow); + xfs_warn(mp, " ophdrs = %u (ophdr space = %u bytes)", + ticket->t_res_num_ophdrs, ophdr_spc); + xfs_warn(mp, " ophdr + reg = %u bytes", + ticket->t_res_arr_sum + ticket->t_res_o_flow + ophdr_spc); + xfs_warn(mp, " num regions = %u", + ticket->t_res_num); for (i = 0; i < ticket->t_res_num; i++) { uint r_type = ticket->t_res_arr[i].r_type; @@ -2402,11 +2422,20 @@ xlog_write( &partial_copy_len); xlog_verify_dest_ptr(log, ptr); - /* copy region */ + /* + * Copy region. + * + * Unmount records just log an opheader, so can have + * empty payloads with no data region to copy. Hence we + * only copy the payload if the vector says it has data + * to copy. + */ ASSERT(copy_len >= 0); - memcpy(ptr, reg->i_addr + copy_off, copy_len); - xlog_write_adv_cnt(&ptr, &len, &log_offset, copy_len); - + if (copy_len > 0) { + memcpy(ptr, reg->i_addr + copy_off, copy_len); + xlog_write_adv_cnt(&ptr, &len, &log_offset, + copy_len); + } copy_len += start_rec_copy + sizeof(xlog_op_header_t); record_cnt++; data_cnt += contwr ? copy_len : 0; @@ -2893,7 +2922,7 @@ restart: iclog = log->l_iclog; if (iclog->ic_state != XLOG_STATE_ACTIVE) { - XFS_STATS_INC(xs_log_noiclogs); + XFS_STATS_INC(log->l_mp, xs_log_noiclogs); /* Wait for log writes to have flushed */ xlog_wait(&log->l_flush_wait, &log->l_icloglock); @@ -3145,11 +3174,19 @@ xlog_state_switch_iclogs( } if (log->l_curr_block >= log->l_logBBsize) { + /* + * Rewind the current block before the cycle is bumped to make + * sure that the combined LSN never transiently moves forward + * when the log wraps to the next cycle. This is to support the + * unlocked sample of these fields from xlog_valid_lsn(). Most + * other cases should acquire l_icloglock. + */ + log->l_curr_block -= log->l_logBBsize; + ASSERT(log->l_curr_block >= 0); + smp_wmb(); log->l_curr_cycle++; if (log->l_curr_cycle == XLOG_HEADER_MAGIC_NUM) log->l_curr_cycle++; - log->l_curr_block -= log->l_logBBsize; - ASSERT(log->l_curr_block >= 0); } ASSERT(iclog == log->l_iclog); log->l_iclog = iclog->ic_next; @@ -3192,7 +3229,7 @@ _xfs_log_force( struct xlog_in_core *iclog; xfs_lsn_t lsn; - XFS_STATS_INC(xs_log_force); + XFS_STATS_INC(mp, xs_log_force); xlog_cil_force(log); @@ -3277,7 +3314,7 @@ maybe_sleep: spin_unlock(&log->l_icloglock); return -EIO; } - XFS_STATS_INC(xs_log_force_sleep); + XFS_STATS_INC(mp, xs_log_force_sleep); xlog_wait(&iclog->ic_force_wait, &log->l_icloglock); /* * No need to grab the log lock here since we're @@ -3342,7 +3379,7 @@ _xfs_log_force_lsn( ASSERT(lsn != 0); - XFS_STATS_INC(xs_log_force); + XFS_STATS_INC(mp, xs_log_force); lsn = xlog_cil_force_lsn(log, lsn); if (lsn == NULLCOMMITLSN) @@ -3391,7 +3428,7 @@ try_again: (XLOG_STATE_WANT_SYNC | XLOG_STATE_SYNCING))) { ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR)); - XFS_STATS_INC(xs_log_force_sleep); + XFS_STATS_INC(mp, xs_log_force_sleep); xlog_wait(&iclog->ic_prev->ic_write_wait, &log->l_icloglock); @@ -3421,7 +3458,7 @@ try_again: spin_unlock(&log->l_icloglock); return -EIO; } - XFS_STATS_INC(xs_log_force_sleep); + XFS_STATS_INC(mp, xs_log_force_sleep); xlog_wait(&iclog->ic_force_wait, &log->l_icloglock); /* * No need to grab the log lock here since we're @@ -3664,7 +3701,7 @@ xlog_ticket_alloc( void xlog_verify_dest_ptr( struct xlog *log, - char *ptr) + void *ptr) { int i; int good_ptr = 0; @@ -3767,9 +3804,8 @@ xlog_verify_iclog( xlog_op_header_t *ophead; xlog_in_core_t *icptr; xlog_in_core_2_t *xhdr; - xfs_caddr_t ptr; - xfs_caddr_t base_ptr; - __psint_t field_offset; + void *base_ptr, *ptr, *p; + ptrdiff_t field_offset; __uint8_t clientid; int len, i, j, k, op_len; int idx; @@ -3788,9 +3824,9 @@ xlog_verify_iclog( if (iclog->ic_header.h_magicno != cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) xfs_emerg(log->l_mp, "%s: invalid magic num", __func__); - ptr = (xfs_caddr_t) &iclog->ic_header; - for (ptr += BBSIZE; ptr < ((xfs_caddr_t)&iclog->ic_header) + count; - ptr += BBSIZE) { + base_ptr = ptr = &iclog->ic_header; + p = &iclog->ic_header; + for (ptr += BBSIZE; ptr < base_ptr + count; ptr += BBSIZE) { if (*(__be32 *)ptr == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) xfs_emerg(log->l_mp, "%s: unexpected magic num", __func__); @@ -3798,20 +3834,19 @@ xlog_verify_iclog( /* check fields */ len = be32_to_cpu(iclog->ic_header.h_num_logops); - ptr = iclog->ic_datap; - base_ptr = ptr; - ophead = (xlog_op_header_t *)ptr; + base_ptr = ptr = iclog->ic_datap; + ophead = ptr; xhdr = iclog->ic_data; for (i = 0; i < len; i++) { - ophead = (xlog_op_header_t *)ptr; + ophead = ptr; /* clientid is only 1 byte */ - field_offset = (__psint_t) - ((xfs_caddr_t)&(ophead->oh_clientid) - base_ptr); + p = &ophead->oh_clientid; + field_offset = p - base_ptr; if (!syncing || (field_offset & 0x1ff)) { clientid = ophead->oh_clientid; } else { - idx = BTOBBT((xfs_caddr_t)&(ophead->oh_clientid) - iclog->ic_datap); + idx = BTOBBT((char *)&ophead->oh_clientid - iclog->ic_datap); if (idx >= (XLOG_HEADER_CYCLE_SIZE / BBSIZE)) { j = idx / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); k = idx % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); @@ -3829,13 +3864,13 @@ xlog_verify_iclog( (unsigned long)field_offset); /* check length */ - field_offset = (__psint_t) - ((xfs_caddr_t)&(ophead->oh_len) - base_ptr); + p = &ophead->oh_len; + field_offset = p - base_ptr; if (!syncing || (field_offset & 0x1ff)) { op_len = be32_to_cpu(ophead->oh_len); } else { - idx = BTOBBT((__psint_t)&ophead->oh_len - - (__psint_t)iclog->ic_datap); + idx = BTOBBT((uintptr_t)&ophead->oh_len - + (uintptr_t)iclog->ic_datap); if (idx >= (XLOG_HEADER_CYCLE_SIZE / BBSIZE)) { j = idx / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); k = idx % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); @@ -4005,3 +4040,45 @@ xlog_iclogs_empty( return 1; } +/* + * Verify that an LSN stamped into a piece of metadata is valid. This is + * intended for use in read verifiers on v5 superblocks. + */ +bool +xfs_log_check_lsn( + struct xfs_mount *mp, + xfs_lsn_t lsn) +{ + struct xlog *log = mp->m_log; + bool valid; + + /* + * norecovery mode skips mount-time log processing and unconditionally + * resets the in-core LSN. We can't validate in this mode, but + * modifications are not allowed anyways so just return true. + */ + if (mp->m_flags & XFS_MOUNT_NORECOVERY) + return true; + + /* + * Some metadata LSNs are initialized to NULL (e.g., the agfl). This is + * handled by recovery and thus safe to ignore here. + */ + if (lsn == NULLCOMMITLSN) + return true; + + valid = xlog_valid_lsn(mp->m_log, lsn); + + /* warn the user about what's gone wrong before verifier failure */ + if (!valid) { + spin_lock(&log->l_icloglock); + xfs_warn(mp, +"Corruption warning: Metadata has LSN (%d:%d) ahead of current LSN (%d:%d). " +"Please unmount and run xfs_repair (>= v4.3) to resolve.", + CYCLE_LSN(lsn), BLOCK_LSN(lsn), + log->l_curr_cycle, log->l_curr_block); + spin_unlock(&log->l_icloglock); + } + + return valid; +} |