diff options
author | Yunhong Jiang <yunhong.jiang@intel.com> | 2015-08-04 12:17:53 -0700 |
---|---|---|
committer | Yunhong Jiang <yunhong.jiang@intel.com> | 2015-08-04 15:44:42 -0700 |
commit | 9ca8dbcc65cfc63d6f5ef3312a33184e1d726e00 (patch) | |
tree | 1c9cafbcd35f783a87880a10f85d1a060db1a563 /kernel/fs/ocfs2/extent_map.c | |
parent | 98260f3884f4a202f9ca5eabed40b1354c489b29 (diff) |
Add the rt linux 4.1.3-rt3 as base
Import the rt linux 4.1.3-rt3 as OPNFV kvm base.
It's from git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git linux-4.1.y-rt and
the base is:
commit 0917f823c59692d751951bf5ea699a2d1e2f26a2
Author: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Sat Jul 25 12:13:34 2015 +0200
Prepare v4.1.3-rt3
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
We lose all the git history this way and it's not good. We
should apply another opnfv project repo in future.
Change-Id: I87543d81c9df70d99c5001fbdf646b202c19f423
Signed-off-by: Yunhong Jiang <yunhong.jiang@intel.com>
Diffstat (limited to 'kernel/fs/ocfs2/extent_map.c')
-rw-r--r-- | kernel/fs/ocfs2/extent_map.c | 994 |
1 files changed, 994 insertions, 0 deletions
diff --git a/kernel/fs/ocfs2/extent_map.c b/kernel/fs/ocfs2/extent_map.c new file mode 100644 index 000000000..767370b65 --- /dev/null +++ b/kernel/fs/ocfs2/extent_map.c @@ -0,0 +1,994 @@ +/* -*- mode: c; c-basic-offset: 8; -*- + * vim: noexpandtab sw=8 ts=8 sts=0: + * + * extent_map.c + * + * Block/Cluster mapping functions + * + * Copyright (C) 2004 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License, version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include <linux/fs.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/types.h> +#include <linux/fiemap.h> + +#include <cluster/masklog.h> + +#include "ocfs2.h" + +#include "alloc.h" +#include "dlmglue.h" +#include "extent_map.h" +#include "inode.h" +#include "super.h" +#include "symlink.h" +#include "ocfs2_trace.h" + +#include "buffer_head_io.h" + +/* + * The extent caching implementation is intentionally trivial. + * + * We only cache a small number of extents stored directly on the + * inode, so linear order operations are acceptable. If we ever want + * to increase the size of the extent map, then these algorithms must + * get smarter. + */ + +void ocfs2_extent_map_init(struct inode *inode) +{ + struct ocfs2_inode_info *oi = OCFS2_I(inode); + + oi->ip_extent_map.em_num_items = 0; + INIT_LIST_HEAD(&oi->ip_extent_map.em_list); +} + +static void __ocfs2_extent_map_lookup(struct ocfs2_extent_map *em, + unsigned int cpos, + struct ocfs2_extent_map_item **ret_emi) +{ + unsigned int range; + struct ocfs2_extent_map_item *emi; + + *ret_emi = NULL; + + list_for_each_entry(emi, &em->em_list, ei_list) { + range = emi->ei_cpos + emi->ei_clusters; + + if (cpos >= emi->ei_cpos && cpos < range) { + list_move(&emi->ei_list, &em->em_list); + + *ret_emi = emi; + break; + } + } +} + +static int ocfs2_extent_map_lookup(struct inode *inode, unsigned int cpos, + unsigned int *phys, unsigned int *len, + unsigned int *flags) +{ + unsigned int coff; + struct ocfs2_inode_info *oi = OCFS2_I(inode); + struct ocfs2_extent_map_item *emi; + + spin_lock(&oi->ip_lock); + + __ocfs2_extent_map_lookup(&oi->ip_extent_map, cpos, &emi); + if (emi) { + coff = cpos - emi->ei_cpos; + *phys = emi->ei_phys + coff; + if (len) + *len = emi->ei_clusters - coff; + if (flags) + *flags = emi->ei_flags; + } + + spin_unlock(&oi->ip_lock); + + if (emi == NULL) + return -ENOENT; + + return 0; +} + +/* + * Forget about all clusters equal to or greater than cpos. + */ +void ocfs2_extent_map_trunc(struct inode *inode, unsigned int cpos) +{ + struct ocfs2_extent_map_item *emi, *n; + struct ocfs2_inode_info *oi = OCFS2_I(inode); + struct ocfs2_extent_map *em = &oi->ip_extent_map; + LIST_HEAD(tmp_list); + unsigned int range; + + spin_lock(&oi->ip_lock); + list_for_each_entry_safe(emi, n, &em->em_list, ei_list) { + if (emi->ei_cpos >= cpos) { + /* Full truncate of this record. */ + list_move(&emi->ei_list, &tmp_list); + BUG_ON(em->em_num_items == 0); + em->em_num_items--; + continue; + } + + range = emi->ei_cpos + emi->ei_clusters; + if (range > cpos) { + /* Partial truncate */ + emi->ei_clusters = cpos - emi->ei_cpos; + } + } + spin_unlock(&oi->ip_lock); + + list_for_each_entry_safe(emi, n, &tmp_list, ei_list) { + list_del(&emi->ei_list); + kfree(emi); + } +} + +/* + * Is any part of emi2 contained within emi1 + */ +static int ocfs2_ei_is_contained(struct ocfs2_extent_map_item *emi1, + struct ocfs2_extent_map_item *emi2) +{ + unsigned int range1, range2; + + /* + * Check if logical start of emi2 is inside emi1 + */ + range1 = emi1->ei_cpos + emi1->ei_clusters; + if (emi2->ei_cpos >= emi1->ei_cpos && emi2->ei_cpos < range1) + return 1; + + /* + * Check if logical end of emi2 is inside emi1 + */ + range2 = emi2->ei_cpos + emi2->ei_clusters; + if (range2 > emi1->ei_cpos && range2 <= range1) + return 1; + + return 0; +} + +static void ocfs2_copy_emi_fields(struct ocfs2_extent_map_item *dest, + struct ocfs2_extent_map_item *src) +{ + dest->ei_cpos = src->ei_cpos; + dest->ei_phys = src->ei_phys; + dest->ei_clusters = src->ei_clusters; + dest->ei_flags = src->ei_flags; +} + +/* + * Try to merge emi with ins. Returns 1 if merge succeeds, zero + * otherwise. + */ +static int ocfs2_try_to_merge_extent_map(struct ocfs2_extent_map_item *emi, + struct ocfs2_extent_map_item *ins) +{ + /* + * Handle contiguousness + */ + if (ins->ei_phys == (emi->ei_phys + emi->ei_clusters) && + ins->ei_cpos == (emi->ei_cpos + emi->ei_clusters) && + ins->ei_flags == emi->ei_flags) { + emi->ei_clusters += ins->ei_clusters; + return 1; + } else if ((ins->ei_phys + ins->ei_clusters) == emi->ei_phys && + (ins->ei_cpos + ins->ei_clusters) == emi->ei_cpos && + ins->ei_flags == emi->ei_flags) { + emi->ei_phys = ins->ei_phys; + emi->ei_cpos = ins->ei_cpos; + emi->ei_clusters += ins->ei_clusters; + return 1; + } + + /* + * Overlapping extents - this shouldn't happen unless we've + * split an extent to change it's flags. That is exceedingly + * rare, so there's no sense in trying to optimize it yet. + */ + if (ocfs2_ei_is_contained(emi, ins) || + ocfs2_ei_is_contained(ins, emi)) { + ocfs2_copy_emi_fields(emi, ins); + return 1; + } + + /* No merge was possible. */ + return 0; +} + +/* + * In order to reduce complexity on the caller, this insert function + * is intentionally liberal in what it will accept. + * + * The only rule is that the truncate call *must* be used whenever + * records have been deleted. This avoids inserting overlapping + * records with different physical mappings. + */ +void ocfs2_extent_map_insert_rec(struct inode *inode, + struct ocfs2_extent_rec *rec) +{ + struct ocfs2_inode_info *oi = OCFS2_I(inode); + struct ocfs2_extent_map *em = &oi->ip_extent_map; + struct ocfs2_extent_map_item *emi, *new_emi = NULL; + struct ocfs2_extent_map_item ins; + + ins.ei_cpos = le32_to_cpu(rec->e_cpos); + ins.ei_phys = ocfs2_blocks_to_clusters(inode->i_sb, + le64_to_cpu(rec->e_blkno)); + ins.ei_clusters = le16_to_cpu(rec->e_leaf_clusters); + ins.ei_flags = rec->e_flags; + +search: + spin_lock(&oi->ip_lock); + + list_for_each_entry(emi, &em->em_list, ei_list) { + if (ocfs2_try_to_merge_extent_map(emi, &ins)) { + list_move(&emi->ei_list, &em->em_list); + spin_unlock(&oi->ip_lock); + goto out; + } + } + + /* + * No item could be merged. + * + * Either allocate and add a new item, or overwrite the last recently + * inserted. + */ + + if (em->em_num_items < OCFS2_MAX_EXTENT_MAP_ITEMS) { + if (new_emi == NULL) { + spin_unlock(&oi->ip_lock); + + new_emi = kmalloc(sizeof(*new_emi), GFP_NOFS); + if (new_emi == NULL) + goto out; + + goto search; + } + + ocfs2_copy_emi_fields(new_emi, &ins); + list_add(&new_emi->ei_list, &em->em_list); + em->em_num_items++; + new_emi = NULL; + } else { + BUG_ON(list_empty(&em->em_list) || em->em_num_items == 0); + emi = list_entry(em->em_list.prev, + struct ocfs2_extent_map_item, ei_list); + list_move(&emi->ei_list, &em->em_list); + ocfs2_copy_emi_fields(emi, &ins); + } + + spin_unlock(&oi->ip_lock); + +out: + kfree(new_emi); +} + +static int ocfs2_last_eb_is_empty(struct inode *inode, + struct ocfs2_dinode *di) +{ + int ret, next_free; + u64 last_eb_blk = le64_to_cpu(di->i_last_eb_blk); + struct buffer_head *eb_bh = NULL; + struct ocfs2_extent_block *eb; + struct ocfs2_extent_list *el; + + ret = ocfs2_read_extent_block(INODE_CACHE(inode), last_eb_blk, &eb_bh); + if (ret) { + mlog_errno(ret); + goto out; + } + + eb = (struct ocfs2_extent_block *) eb_bh->b_data; + el = &eb->h_list; + + if (el->l_tree_depth) { + ocfs2_error(inode->i_sb, + "Inode %lu has non zero tree depth in " + "leaf block %llu\n", inode->i_ino, + (unsigned long long)eb_bh->b_blocknr); + ret = -EROFS; + goto out; + } + + next_free = le16_to_cpu(el->l_next_free_rec); + + if (next_free == 0 || + (next_free == 1 && ocfs2_is_empty_extent(&el->l_recs[0]))) + ret = 1; + +out: + brelse(eb_bh); + return ret; +} + +/* + * Return the 1st index within el which contains an extent start + * larger than v_cluster. + */ +static int ocfs2_search_for_hole_index(struct ocfs2_extent_list *el, + u32 v_cluster) +{ + int i; + struct ocfs2_extent_rec *rec; + + for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) { + rec = &el->l_recs[i]; + + if (v_cluster < le32_to_cpu(rec->e_cpos)) + break; + } + + return i; +} + +/* + * Figure out the size of a hole which starts at v_cluster within the given + * extent list. + * + * If there is no more allocation past v_cluster, we return the maximum + * cluster size minus v_cluster. + * + * If we have in-inode extents, then el points to the dinode list and + * eb_bh is NULL. Otherwise, eb_bh should point to the extent block + * containing el. + */ +int ocfs2_figure_hole_clusters(struct ocfs2_caching_info *ci, + struct ocfs2_extent_list *el, + struct buffer_head *eb_bh, + u32 v_cluster, + u32 *num_clusters) +{ + int ret, i; + struct buffer_head *next_eb_bh = NULL; + struct ocfs2_extent_block *eb, *next_eb; + + i = ocfs2_search_for_hole_index(el, v_cluster); + + if (i == le16_to_cpu(el->l_next_free_rec) && eb_bh) { + eb = (struct ocfs2_extent_block *)eb_bh->b_data; + + /* + * Check the next leaf for any extents. + */ + + if (le64_to_cpu(eb->h_next_leaf_blk) == 0ULL) + goto no_more_extents; + + ret = ocfs2_read_extent_block(ci, + le64_to_cpu(eb->h_next_leaf_blk), + &next_eb_bh); + if (ret) { + mlog_errno(ret); + goto out; + } + + next_eb = (struct ocfs2_extent_block *)next_eb_bh->b_data; + el = &next_eb->h_list; + i = ocfs2_search_for_hole_index(el, v_cluster); + } + +no_more_extents: + if (i == le16_to_cpu(el->l_next_free_rec)) { + /* + * We're at the end of our existing allocation. Just + * return the maximum number of clusters we could + * possibly allocate. + */ + *num_clusters = UINT_MAX - v_cluster; + } else { + *num_clusters = le32_to_cpu(el->l_recs[i].e_cpos) - v_cluster; + } + + ret = 0; +out: + brelse(next_eb_bh); + return ret; +} + +static int ocfs2_get_clusters_nocache(struct inode *inode, + struct buffer_head *di_bh, + u32 v_cluster, unsigned int *hole_len, + struct ocfs2_extent_rec *ret_rec, + unsigned int *is_last) +{ + int i, ret, tree_height, len; + struct ocfs2_dinode *di; + struct ocfs2_extent_block *uninitialized_var(eb); + struct ocfs2_extent_list *el; + struct ocfs2_extent_rec *rec; + struct buffer_head *eb_bh = NULL; + + memset(ret_rec, 0, sizeof(*ret_rec)); + if (is_last) + *is_last = 0; + + di = (struct ocfs2_dinode *) di_bh->b_data; + el = &di->id2.i_list; + tree_height = le16_to_cpu(el->l_tree_depth); + + if (tree_height > 0) { + ret = ocfs2_find_leaf(INODE_CACHE(inode), el, v_cluster, + &eb_bh); + if (ret) { + mlog_errno(ret); + goto out; + } + + eb = (struct ocfs2_extent_block *) eb_bh->b_data; + el = &eb->h_list; + + if (el->l_tree_depth) { + ocfs2_error(inode->i_sb, + "Inode %lu has non zero tree depth in " + "leaf block %llu\n", inode->i_ino, + (unsigned long long)eb_bh->b_blocknr); + ret = -EROFS; + goto out; + } + } + + i = ocfs2_search_extent_list(el, v_cluster); + if (i == -1) { + /* + * Holes can be larger than the maximum size of an + * extent, so we return their lengths in a separate + * field. + */ + if (hole_len) { + ret = ocfs2_figure_hole_clusters(INODE_CACHE(inode), + el, eb_bh, + v_cluster, &len); + if (ret) { + mlog_errno(ret); + goto out; + } + + *hole_len = len; + } + goto out_hole; + } + + rec = &el->l_recs[i]; + + BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos)); + + if (!rec->e_blkno) { + ocfs2_error(inode->i_sb, "Inode %lu has bad extent " + "record (%u, %u, 0)", inode->i_ino, + le32_to_cpu(rec->e_cpos), + ocfs2_rec_clusters(el, rec)); + ret = -EROFS; + goto out; + } + + *ret_rec = *rec; + + /* + * Checking for last extent is potentially expensive - we + * might have to look at the next leaf over to see if it's + * empty. + * + * The first two checks are to see whether the caller even + * cares for this information, and if the extent is at least + * the last in it's list. + * + * If those hold true, then the extent is last if any of the + * additional conditions hold true: + * - Extent list is in-inode + * - Extent list is right-most + * - Extent list is 2nd to rightmost, with empty right-most + */ + if (is_last) { + if (i == (le16_to_cpu(el->l_next_free_rec) - 1)) { + if (tree_height == 0) + *is_last = 1; + else if (eb->h_blkno == di->i_last_eb_blk) + *is_last = 1; + else if (eb->h_next_leaf_blk == di->i_last_eb_blk) { + ret = ocfs2_last_eb_is_empty(inode, di); + if (ret < 0) { + mlog_errno(ret); + goto out; + } + if (ret == 1) + *is_last = 1; + } + } + } + +out_hole: + ret = 0; +out: + brelse(eb_bh); + return ret; +} + +static void ocfs2_relative_extent_offsets(struct super_block *sb, + u32 v_cluster, + struct ocfs2_extent_rec *rec, + u32 *p_cluster, u32 *num_clusters) + +{ + u32 coff = v_cluster - le32_to_cpu(rec->e_cpos); + + *p_cluster = ocfs2_blocks_to_clusters(sb, le64_to_cpu(rec->e_blkno)); + *p_cluster = *p_cluster + coff; + + if (num_clusters) + *num_clusters = le16_to_cpu(rec->e_leaf_clusters) - coff; +} + +int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster, + u32 *p_cluster, u32 *num_clusters, + struct ocfs2_extent_list *el, + unsigned int *extent_flags) +{ + int ret = 0, i; + struct buffer_head *eb_bh = NULL; + struct ocfs2_extent_block *eb; + struct ocfs2_extent_rec *rec; + u32 coff; + + if (el->l_tree_depth) { + ret = ocfs2_find_leaf(INODE_CACHE(inode), el, v_cluster, + &eb_bh); + if (ret) { + mlog_errno(ret); + goto out; + } + + eb = (struct ocfs2_extent_block *) eb_bh->b_data; + el = &eb->h_list; + + if (el->l_tree_depth) { + ocfs2_error(inode->i_sb, + "Inode %lu has non zero tree depth in " + "xattr leaf block %llu\n", inode->i_ino, + (unsigned long long)eb_bh->b_blocknr); + ret = -EROFS; + goto out; + } + } + + i = ocfs2_search_extent_list(el, v_cluster); + if (i == -1) { + ret = -EROFS; + mlog_errno(ret); + goto out; + } else { + rec = &el->l_recs[i]; + BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos)); + + if (!rec->e_blkno) { + ocfs2_error(inode->i_sb, "Inode %lu has bad extent " + "record (%u, %u, 0) in xattr", inode->i_ino, + le32_to_cpu(rec->e_cpos), + ocfs2_rec_clusters(el, rec)); + ret = -EROFS; + goto out; + } + coff = v_cluster - le32_to_cpu(rec->e_cpos); + *p_cluster = ocfs2_blocks_to_clusters(inode->i_sb, + le64_to_cpu(rec->e_blkno)); + *p_cluster = *p_cluster + coff; + if (num_clusters) + *num_clusters = ocfs2_rec_clusters(el, rec) - coff; + + if (extent_flags) + *extent_flags = rec->e_flags; + } +out: + if (eb_bh) + brelse(eb_bh); + return ret; +} + +int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, + u32 *p_cluster, u32 *num_clusters, + unsigned int *extent_flags) +{ + int ret; + unsigned int uninitialized_var(hole_len), flags = 0; + struct buffer_head *di_bh = NULL; + struct ocfs2_extent_rec rec; + + if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { + ret = -ERANGE; + mlog_errno(ret); + goto out; + } + + ret = ocfs2_extent_map_lookup(inode, v_cluster, p_cluster, + num_clusters, extent_flags); + if (ret == 0) + goto out; + + ret = ocfs2_read_inode_block(inode, &di_bh); + if (ret) { + mlog_errno(ret); + goto out; + } + + ret = ocfs2_get_clusters_nocache(inode, di_bh, v_cluster, &hole_len, + &rec, NULL); + if (ret) { + mlog_errno(ret); + goto out; + } + + if (rec.e_blkno == 0ULL) { + /* + * A hole was found. Return some canned values that + * callers can key on. If asked for, num_clusters will + * be populated with the size of the hole. + */ + *p_cluster = 0; + if (num_clusters) { + *num_clusters = hole_len; + } + } else { + ocfs2_relative_extent_offsets(inode->i_sb, v_cluster, &rec, + p_cluster, num_clusters); + flags = rec.e_flags; + + ocfs2_extent_map_insert_rec(inode, &rec); + } + + if (extent_flags) + *extent_flags = flags; + +out: + brelse(di_bh); + return ret; +} + +/* + * This expects alloc_sem to be held. The allocation cannot change at + * all while the map is in the process of being updated. + */ +int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno, + u64 *ret_count, unsigned int *extent_flags) +{ + int ret; + int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); + u32 cpos, num_clusters, p_cluster; + u64 boff = 0; + + cpos = ocfs2_blocks_to_clusters(inode->i_sb, v_blkno); + + ret = ocfs2_get_clusters(inode, cpos, &p_cluster, &num_clusters, + extent_flags); + if (ret) { + mlog_errno(ret); + goto out; + } + + /* + * p_cluster == 0 indicates a hole. + */ + if (p_cluster) { + boff = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); + boff += (v_blkno & (u64)(bpc - 1)); + } + + *p_blkno = boff; + + if (ret_count) { + *ret_count = ocfs2_clusters_to_blocks(inode->i_sb, num_clusters); + *ret_count -= v_blkno & (u64)(bpc - 1); + } + +out: + return ret; +} + +/* + * The ocfs2_fiemap_inline() may be a little bit misleading, since + * it not only handles the fiemap for inlined files, but also deals + * with the fast symlink, cause they have no difference for extent + * mapping per se. + */ +static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh, + struct fiemap_extent_info *fieinfo, + u64 map_start) +{ + int ret; + unsigned int id_count; + struct ocfs2_dinode *di; + u64 phys; + u32 flags = FIEMAP_EXTENT_DATA_INLINE|FIEMAP_EXTENT_LAST; + struct ocfs2_inode_info *oi = OCFS2_I(inode); + + di = (struct ocfs2_dinode *)di_bh->b_data; + if (ocfs2_inode_is_fast_symlink(inode)) + id_count = ocfs2_fast_symlink_chars(inode->i_sb); + else + id_count = le16_to_cpu(di->id2.i_data.id_count); + + if (map_start < id_count) { + phys = oi->ip_blkno << inode->i_sb->s_blocksize_bits; + if (ocfs2_inode_is_fast_symlink(inode)) + phys += offsetof(struct ocfs2_dinode, id2.i_symlink); + else + phys += offsetof(struct ocfs2_dinode, + id2.i_data.id_data); + + ret = fiemap_fill_next_extent(fieinfo, 0, phys, id_count, + flags); + if (ret < 0) + return ret; + } + + return 0; +} + +#define OCFS2_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC) + +int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, + u64 map_start, u64 map_len) +{ + int ret, is_last; + u32 mapping_end, cpos; + unsigned int hole_size; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + u64 len_bytes, phys_bytes, virt_bytes; + struct buffer_head *di_bh = NULL; + struct ocfs2_extent_rec rec; + + ret = fiemap_check_flags(fieinfo, OCFS2_FIEMAP_FLAGS); + if (ret) + return ret; + + ret = ocfs2_inode_lock(inode, &di_bh, 0); + if (ret) { + mlog_errno(ret); + goto out; + } + + down_read(&OCFS2_I(inode)->ip_alloc_sem); + + /* + * Handle inline-data and fast symlink separately. + */ + if ((OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) || + ocfs2_inode_is_fast_symlink(inode)) { + ret = ocfs2_fiemap_inline(inode, di_bh, fieinfo, map_start); + goto out_unlock; + } + + cpos = map_start >> osb->s_clustersize_bits; + mapping_end = ocfs2_clusters_for_bytes(inode->i_sb, + map_start + map_len); + is_last = 0; + while (cpos < mapping_end && !is_last) { + u32 fe_flags; + + ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos, + &hole_size, &rec, &is_last); + if (ret) { + mlog_errno(ret); + goto out_unlock; + } + + if (rec.e_blkno == 0ULL) { + cpos += hole_size; + continue; + } + + fe_flags = 0; + if (rec.e_flags & OCFS2_EXT_UNWRITTEN) + fe_flags |= FIEMAP_EXTENT_UNWRITTEN; + if (rec.e_flags & OCFS2_EXT_REFCOUNTED) + fe_flags |= FIEMAP_EXTENT_SHARED; + if (is_last) + fe_flags |= FIEMAP_EXTENT_LAST; + len_bytes = (u64)le16_to_cpu(rec.e_leaf_clusters) << osb->s_clustersize_bits; + phys_bytes = le64_to_cpu(rec.e_blkno) << osb->sb->s_blocksize_bits; + virt_bytes = (u64)le32_to_cpu(rec.e_cpos) << osb->s_clustersize_bits; + + ret = fiemap_fill_next_extent(fieinfo, virt_bytes, phys_bytes, + len_bytes, fe_flags); + if (ret) + break; + + cpos = le32_to_cpu(rec.e_cpos)+ le16_to_cpu(rec.e_leaf_clusters); + } + + if (ret > 0) + ret = 0; + +out_unlock: + brelse(di_bh); + + up_read(&OCFS2_I(inode)->ip_alloc_sem); + + ocfs2_inode_unlock(inode, 0); +out: + + return ret; +} + +int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int whence) +{ + struct inode *inode = file->f_mapping->host; + int ret; + unsigned int is_last = 0, is_data = 0; + u16 cs_bits = OCFS2_SB(inode->i_sb)->s_clustersize_bits; + u32 cpos, cend, clen, hole_size; + u64 extoff, extlen; + struct buffer_head *di_bh = NULL; + struct ocfs2_extent_rec rec; + + BUG_ON(whence != SEEK_DATA && whence != SEEK_HOLE); + + ret = ocfs2_inode_lock(inode, &di_bh, 0); + if (ret) { + mlog_errno(ret); + goto out; + } + + down_read(&OCFS2_I(inode)->ip_alloc_sem); + + if (*offset >= i_size_read(inode)) { + ret = -ENXIO; + goto out_unlock; + } + + if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { + if (whence == SEEK_HOLE) + *offset = i_size_read(inode); + goto out_unlock; + } + + clen = 0; + cpos = *offset >> cs_bits; + cend = ocfs2_clusters_for_bytes(inode->i_sb, i_size_read(inode)); + + while (cpos < cend && !is_last) { + ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos, &hole_size, + &rec, &is_last); + if (ret) { + mlog_errno(ret); + goto out_unlock; + } + + extoff = cpos; + extoff <<= cs_bits; + + if (rec.e_blkno == 0ULL) { + clen = hole_size; + is_data = 0; + } else { + clen = le16_to_cpu(rec.e_leaf_clusters) - + (cpos - le32_to_cpu(rec.e_cpos)); + is_data = (rec.e_flags & OCFS2_EXT_UNWRITTEN) ? 0 : 1; + } + + if ((!is_data && whence == SEEK_HOLE) || + (is_data && whence == SEEK_DATA)) { + if (extoff > *offset) + *offset = extoff; + goto out_unlock; + } + + if (!is_last) + cpos += clen; + } + + if (whence == SEEK_HOLE) { + extoff = cpos; + extoff <<= cs_bits; + extlen = clen; + extlen <<= cs_bits; + + if ((extoff + extlen) > i_size_read(inode)) + extlen = i_size_read(inode) - extoff; + extoff += extlen; + if (extoff > *offset) + *offset = extoff; + goto out_unlock; + } + + ret = -ENXIO; + +out_unlock: + + brelse(di_bh); + + up_read(&OCFS2_I(inode)->ip_alloc_sem); + + ocfs2_inode_unlock(inode, 0); +out: + return ret; +} + +int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr, + struct buffer_head *bhs[], int flags, + int (*validate)(struct super_block *sb, + struct buffer_head *bh)) +{ + int rc = 0; + u64 p_block, p_count; + int i, count, done = 0; + + trace_ocfs2_read_virt_blocks( + inode, (unsigned long long)v_block, nr, bhs, flags, + validate); + + if (((v_block + nr - 1) << inode->i_sb->s_blocksize_bits) >= + i_size_read(inode)) { + BUG_ON(!(flags & OCFS2_BH_READAHEAD)); + goto out; + } + + while (done < nr) { + down_read(&OCFS2_I(inode)->ip_alloc_sem); + rc = ocfs2_extent_map_get_blocks(inode, v_block + done, + &p_block, &p_count, NULL); + up_read(&OCFS2_I(inode)->ip_alloc_sem); + if (rc) { + mlog_errno(rc); + break; + } + + if (!p_block) { + rc = -EIO; + mlog(ML_ERROR, + "Inode #%llu contains a hole at offset %llu\n", + (unsigned long long)OCFS2_I(inode)->ip_blkno, + (unsigned long long)(v_block + done) << + inode->i_sb->s_blocksize_bits); + break; + } + + count = nr - done; + if (p_count < count) + count = p_count; + + /* + * If the caller passed us bhs, they should have come + * from a previous readahead call to this function. Thus, + * they should have the right b_blocknr. + */ + for (i = 0; i < count; i++) { + if (!bhs[done + i]) + continue; + BUG_ON(bhs[done + i]->b_blocknr != (p_block + i)); + } + + rc = ocfs2_read_blocks(INODE_CACHE(inode), p_block, count, + bhs + done, flags, validate); + if (rc) { + mlog_errno(rc); + break; + } + done += count; + } + +out: + return rc; +} + + |