/* * linux/fs/block_dev.c * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright (C) 2001 Andrea Arcangeli SuSE */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "internal.h" struct bdev_inode { struct block_device bdev; struct inode vfs_inode; }; static const struct address_space_operations def_blk_aops; static inline struct bdev_inode *BDEV_I(struct inode *inode) { return container_of(inode, struct bdev_inode, vfs_inode); } struct block_device *I_BDEV(struct inode *inode) { return &BDEV_I(inode)->bdev; } EXPORT_SYMBOL(I_BDEV); static void bdev_write_inode(struct block_device *bdev) { struct inode *inode = bdev->bd_inode; int ret; spin_lock(&inode->i_lock); while (inode->i_state & I_DIRTY) { spin_unlock(&inode->i_lock); ret = write_inode_now(inode, true); if (ret) { char name[BDEVNAME_SIZE]; pr_warn_ratelimited("VFS: Dirty inode writeback failed " "for block device %s (err=%d).\n", bdevname(bdev, name), ret); } spin_lock(&inode->i_lock); } spin_unlock(&inode->i_lock); } /* Kill _all_ buffers and pagecache , dirty or not.. */ void kill_bdev(struct block_device *bdev) { struct address_space *mapping = bdev->bd_inode->i_mapping; if (mapping->nrpages == 0 && mapping->nrshadows == 0) return; invalidate_bh_lrus(); truncate_inode_pages(mapping, 0); } EXPORT_SYMBOL(kill_bdev); /* Invalidate clean unused buffers and pagecache. */ void invalidate_bdev(struct block_device *bdev) { struct address_space *mapping = bdev->bd_inode->i_mapping; if (mapping->nrpages == 0) return; invalidate_bh_lrus(); lru_add_drain_all(); /* make sure all lru add caches are flushed */ invalidate_mapping_pages(mapping, 0, -1); /* 99% of the time, we don't need to flush the cleancache on the bdev. * But, for the strange corners, lets be cautious */ cleancache_invalidate_inode(mapping); } EXPORT_SYMBOL(invalidate_bdev); int set_blocksize(struct block_device *bdev, int size) { /* Size must be a power of two, and between 512 and PAGE_SIZE */ if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size)) return -EINVAL; /* Size cannot be smaller than the size supported by the device */ if (size < bdev_logical_block_size(bdev)) return -EINVAL; /* Don't change the size if it is same as current */ if (bdev->bd_block_size != size) { sync_blockdev(bdev); bdev->bd_block_size = size; bdev->bd_inode->i_blkbits = blksize_bits(size); kill_bdev(bdev); } return 0; } EXPORT_SYMBOL(set_blocksize); int sb_set_blocksize(struct super_block *sb, int size) { if (set_blocksize(sb->s_bdev, size)) return 0; /* If we get here, we know size is power of two * and it's value is between 512 and PAGE_SIZE */ sb->s_blocksize = size; sb->s_blocksize_bits = blksize_bits(size); return sb->s_blocksize; } EXPORT_SYMBOL(sb_set_blocksize); int sb_min_blocksize(struct super_block *sb, int size) { int minsize = bdev_logical_block_size(sb->s_bdev); if (size < minsize) size = minsize; return sb_set_blocksize(sb, size); } EXPORT_SYMBOL(sb_min_blocksize); static int blkdev_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh, int create) { bh->b_bdev = I_BDEV(inode); bh->b_blocknr = iblock; set_buffer_mapped(bh); return 0; } static ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; if (IS_DAX(inode)) return dax_do_io(iocb, inode, iter, offset, blkdev_get_block, NULL, DIO_SKIP_DIO_COUNT); return __blockdev_direct_IO(iocb, inode, I_BDEV(inode), iter, offset, blkdev_get_block, NULL, NULL, DIO_SKIP_DIO_COUNT); } int __sync_blockdev(struct block_device *bdev, int wait) { if (!bdev) return 0; if (!wait) return filemap_flush(bdev->bd_inode->i_mapping); return filemap_write_and_wait(bdev->bd_inode->i_mapping); } /* * Write out and wait upon all the dirty data associated with a block * device via its mapping. Does not take the superblock lock. */ int sync_blockdev(struct block_device *bdev) { return __sync_blockdev(bdev, 1); } EXPORT_SYMBOL(sync_blockdev); /* * Write out and wait upon all dirty data associated with this * device. Filesystem data as well as the underlying block * device. Takes the superblock lock. */ int fsync_bdev(struct block_device *bdev) { struct super_block *sb = get_super(bdev); if (sb) { int res = sync_filesystem(sb); drop_super(sb); return res; } return sync_blockdev(bdev); } EXPORT_SYMBOL(fsync_bdev); /** * freeze_bdev -- lock a filesystem and force it into a consistent state * @bdev: blockdevice to lock * * If a superblock is found on this device, we take the s_umount semaphore * on it to make sure nobody unmounts until the snapshot creation is done. * The reference counter (bd_fsfreeze_count) guarantees that only the last * unfreeze process can unfreeze the frozen filesystem actually when multiple * freeze requests arrive simultaneously. It counts up in freeze_bdev() and * count down in thaw_bdev(). When it becomes 0, thaw_bdev() will unfreeze * actually. */ struct super_block *freeze_bdev(str
#ifndef _STRONGARM_H
#define _STRONGARM_H

#include "exec/memory.h"

#define SA_CS0          0x00000000
#define SA_CS1          0x08000000
#define SA_CS2          0x10000000
#define SA_CS3          0x18000000
#define SA_PCMCIA_CS0   0x20000000
#define SA_PCMCIA_CS1   0x30000000
#define SA_CS4          0x40000000
#define SA_CS5          0x48000000
/* system registers here */
#define SA_SDCS0        0xc0000000
#define SA_SDCS1        0xc8000000
#define SA_SDCS2        0xd0000000
#define SA_SDCS3        0xd8000000

enum {
    SA_PIC_GPIO0_EDGE = 0,
    SA_PIC_GPIO1_EDGE,
    SA_PIC_GPIO2_EDGE,
    SA_PIC_GPIO3_EDGE,
    SA_PIC_GPIO4_EDGE,
    SA_PIC_GPIO5_EDGE,
    SA_PIC_GPIO6_EDGE,
    SA_PIC_GPIO7_EDGE,
    SA_PIC_GPIO8_EDGE,
    SA_PIC_GPIO9_EDGE,
    SA_PIC_GPIO10_EDGE,
    SA_PIC_GPIOX_EDGE,
    SA_PIC_LCD,
    SA_PIC_UDC,
    SA_PIC_RSVD1,
    SA_PIC_UART1,
    SA_PIC_UART2,
    SA_PIC_UART3,
    SA_PIC_MCP,
    SA_PIC_SSP,
    SA_PIC_DMA_CH0,
    SA_PIC_DMA_CH1,
    SA_PIC_DMA_CH2,
    SA_PIC_DMA_CH3,
    SA_PIC_DMA_CH4,
    SA_PIC_DMA_CH5,
    SA_PIC_OSTC0,
    SA_PIC_OSTC1,
    SA_PIC_OSTC2,
    SA_PIC_OSTC3,
    SA_PIC_RTC_HZ,
    SA_PIC_RTC_ALARM,
};

typedef struct {
    ARMCPU *cpu;
    MemoryRegion sdram;
    DeviceState *pic;
    DeviceState *gpio;
    DeviceState *ppc;
    DeviceState *ssp;
    SSIBus *ssp_bus;
} StrongARMState;

StrongARMState *sa1110_init(MemoryRegion *sysmem,
                            unsigned int sdram_size, const char *rev);

#endif
equal @bdev * @holder: holder trying to claim @bdev * * Prepare to claim @bdev. This function fails if @bdev is already * claimed by another holder and waits if another claiming is in * progress. This function doesn't actually claim. On successful * return, the caller has ownership of bd_claiming and bd_holder[s]. * * CONTEXT: * spin_lock(&bdev_lock). Might release bdev_lock, sleep and regrab * it multiple times. * * RETURNS: * 0 if @bdev can be claimed, -EBUSY otherwise. */ static int bd_prepare_to_claim(struct block_device *bdev, struct block_device *whole, void *holder) { retry: /* if someone else claimed, fail */ if (!bd_may_claim(bdev, whole, holder)) return -EBUSY; /* if claiming is already in progress, wait for it to finish */ if (whole->bd_claiming) { wait_queue_head_t *wq = bit_waitqueue(&whole->bd_claiming, 0); DEFINE_WAIT(wait); prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE); spin_unlock(&bdev_lock); schedule(); finish_wait(wq, &wait); spin_lock(&bdev_lock); goto retry; } /* yay, all mine */ return 0; } /** * bd_start_claiming - start claiming a block device * @bdev: block device of interest * @holder: holder trying to claim @bdev * * @bdev is about to be opened exclusively. Check @bdev can be opened * exclusively and mark that an exclusive open is in progress. Each * successful call to this function must be matched with a call to * either bd_finish_claiming() or bd_abort_claiming() (which do not * fail). * * This function is used to gain exclusive access to the block device * without actually causing other exclusive open attempts to fail. It * should be used when the open sequence itself requires exclusive * access but may subsequently fail. * * CONTEXT: * Might sleep. * * RETURNS: * Pointer to the block device containing @bdev on success, ERR_PTR() * value on failure. */ static struct block_device *bd_start_claiming(struct block_device *bdev, void *holder) { struct gendisk *disk; struct block_device *whole; int partno, err; might_sleep(); /* * @bdev might not have been initialized properly yet, look up * and grab the outer block device the hard way. */ disk = get_gendisk(bdev->bd_dev, &partno); if (!disk) return ERR_PTR(-ENXIO); /* * Normally, @bdev should equal what's returned from bdget_disk() * if partno is 0; however, some drivers (floppy) use multiple * bdev's for the same physical device and @bdev may be one of the * aliases. Keep @bdev if partno is 0. This means claimer * tracking is broken for those devices but it has always been that * way. */ if (partno) whole = bdget_disk(disk, 0); else whole = bdgrab(bdev); module_put(disk->fops->owner); put_disk(disk); if (!whole) return ERR_PTR(-ENOMEM); /* prepare to claim, if successful, mark claiming in progress */ spin_lock(&bdev_lock); err = bd_prepare_to_claim(bdev, whole, holder); if (err == 0) { whole->bd_claiming = holder; spin_unlock(&bdev_lock); return whole; } else { spin_unlock(&bdev_lock); bdput(whole); return ERR_PTR(err); } } #ifdef CONFIG_SYSFS struct bd_holder_disk { struct list_head list; struct gendisk *disk; int refcnt; }; static struct bd_holder_disk *bd_find_holder_disk(struct block_device *bdev, struct gendisk *disk) { struct bd_holder_disk *holder; list_for_each_entry(holder, &bdev->bd_holder_disks, list) if (holder->disk == disk) return holder; return NULL; } static int add_symlink(struct kobject *from, struct kobject *to) { return sysfs_create_link(from, to, kobject_name(to)); } static void del_symlink(struct kobject *from, struct kobject *to) { sysfs_remove_link(from, kobject_name(to)); } /** * bd_link_disk_holder - create symlinks between holding disk and slave bdev * @bdev: the claimed slave bdev * @disk: the holding disk * * DON'T USE THIS UNLESS YOU'RE ALREADY USING IT. * * This functions creates the following sysfs symlinks. * * - from "slaves" directory of the holder @disk to the claimed @bdev * - from "holders" directory of the @bdev to the holder @disk * * For example, if /dev/dm-0 maps to /dev/sda and disk for dm-0 is * passed to bd_link_disk_holder(), then: * * /sys/block/dm-0/slaves/sda --> /sys/block/sda * /sys/block/sda/holders/dm-0 --> /sys/block/dm-0 * * The caller must have claimed @bdev before calling this function and * ensure that both @bdev and @disk are valid during the creation and * lifetime of these symlinks. * * CONTEXT: * Might sleep. * * RETURNS: * 0 on success, -errno on failure. */ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) { struct bd_holder_disk *holder; int ret = 0; mutex_lock(&bdev->bd_mutex); WARN_ON_ONCE(!bdev->bd_holder); /* FIXME: remove the following once add_disk() handles errors */ if (WARN_ON(!disk->slave_dir || !bdev->bd_part->holder_dir)) goto out_unlock; holder = bd_find_holder_disk(bdev, disk); if (holder) { holder->refcnt++; goto out_unlock; } holder = kzalloc(sizeof(*holder), GFP_KERNEL); if (!holder) { ret = -ENOMEM; goto out_unlock; } INIT_LIST_HEAD(&holder->list); holder->disk = disk; holder->refcnt = 1; ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); if (ret) goto out_free; ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj); if (ret) goto out_del; /* * bdev could be deleted beneath us which would implicitly destroy * the holder directory. Hold on to it. */ kobject_get(bdev->bd_part->holder_dir); list_add(&holder->list, &bdev->bd_holder_disks); goto out_unlock; out_del: del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); out_free: kfree(holder); out_unlock: mutex_unlock(&bdev->bd_mutex); return ret; } EXPORT_SYMBOL_GPL(bd_link_disk_holder); /** * bd_unlink_disk_holder - destroy symlinks created by bd_link_disk_holder() * @bdev: the calimed slave bdev * @disk: the holding disk * * DON'T USE THIS UNLESS YOU'RE ALREADY USING IT. * * CONTEXT: * Might sleep. */ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk) { struct bd_holder_disk *holder; mutex_lock(&bdev->bd_mutex); holder = bd_find_holder_disk(bdev, disk); if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) { del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); del_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj); kobject_put(bdev->bd_part->holder_dir); list_del_init(&holder->list); kfree(holder); } mutex_unlock(&bdev->bd_mutex); } EXPORT_SYMBOL_GPL(bd_unlink_disk_holder); #endif /** * flush_disk - invalidates all buffer-cache entries on a disk * * @bdev: struct block device to be flushed * @kill_dirty: flag to guide handling of dirty inodes * * Invalidates all buffer-cache entries on a disk. It should be called * when a disk has been changed -- either by a media change or online * resize. */ static void flush_disk(struct block_device *bdev, bool kill_dirty) { if (__invalidate_device(bdev, kill_dirty)) { char name[BDEVNAME_SIZE] = ""; if (bdev->bd_disk) disk_name(bdev->bd_disk, 0, name); printk(KERN_WARNING "VFS: busy inodes on changed media or " "resized disk %s\n", name); } if (!bdev->bd_disk) return; if (disk_part_scan_enabled(bdev->bd_disk)) bdev->bd_invalidated = 1; } /** * check_disk_size_change - checks for disk size change and adjusts bdev size. * @disk: struct gendisk to check * @bdev: struct bdev to adjust. * * This routine checks to see if the bdev size does not match the disk size * and adjusts it if it differs. */ void check_disk_size_change(struct gendisk *disk, struct block_device *bdev) { loff_t disk_size, bdev_size; disk_size = (loff_t)get_capacity(disk) << 9; bdev_size = i_size_read(bdev->bd_inode); if (disk_size != bdev_size) { char name[BDEVNAME_SIZE]; disk_name(disk, 0, name); printk(KERN_INFO "%s: detected capacity change from %lld to %lld\n", name, bdev_size, disk_size); i_size_write(bdev->bd_inode, disk_size); flush_disk(bdev, false); } } EXPORT_SYMBOL(check_disk_size_change); /** * revalidate_disk - wrapper for lower-level driver's revalidate_disk call-back * @disk: struct gendisk to be revalidated * * This routine is a wrapper for lower-level driver's revalidate_disk * call-backs. It is used to do common pre and post operations needed * for all revalidate_disk operations. */ int revalidate_disk(struct gendisk *disk) { struct block_device *bdev; int ret = 0; if (disk->fops->revalidate_disk) ret = disk->fops->revalidate_disk(disk); blk_integrity_revalidate(disk); bdev = bdget_disk(disk, 0); if (!bdev) return ret; mutex_lock(&bdev->bd_mutex); check_disk_size_change(disk, bdev); bdev->bd_invalidated = 0; mutex_unlock(&bdev->bd_mutex); bdput(bdev); return ret; } EXPORT_SYMBOL(revalidate_disk); /* * This routine checks whether a removable media has been changed, * and invalidates all buffer-cache-entries in that case. This * is a relatively slow routine, so we have to try to minimize using * it. Thus it is called only upon a 'mount' or 'open'. This * is the best way of combining speed and utility, I think. * People changing diskettes in the middle of an operation deserve * to lose :-) */ int check_disk_change(struct block_device *bdev) { struct gendisk *disk = bdev->bd_disk; const struct block_device_operations *bdops = disk->fops; unsigned int events; events = disk_clear_events(disk, DISK_EVENT_MEDIA_CHANGE | DISK_EVENT_EJECT_REQUEST); if (!(events & DISK_EVENT_MEDIA_CHANGE)) return 0; flush_disk(bdev, true); if (bdops->revalidate_disk) bdops->revalidate_disk(bdev->bd_disk); return 1; } EXPORT_SYMBOL(check_disk_change); void bd_set_size(struct block_device *bdev, loff_t size) { unsigned bsize = bdev_logical_block_size(bdev); mutex_lock(&bdev->bd_inode->i_mutex); i_size_write(bdev->bd_inode, size); mutex_unlock(&bdev->bd_inode->i_mutex); while (bsize < PAGE_CACHE_SIZE) { if (size & bsize) break; bsize <<= 1; } bdev->bd_block_size = bsize; bdev->bd_inode->i_blkbits = blksize_bits(bsize); } EXPORT_SYMBOL(bd_set_size); static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part); /* * bd_mutex locking: * * mutex_lock(part->bd_mutex) * mutex_lock_nested(whole->bd_mutex, 1) */ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) { struct gendisk *disk; struct module *owner; int ret; int partno; int perm = 0; if (mode & FMODE_READ) perm |= MAY_READ; if (mode & FMODE_WRITE) perm |= MAY_WRITE; /* * hooks: /n/, see "layering violations". */ if (!for_part) { ret = devcgroup_inode_permission(bdev->bd_inode, perm); if (ret != 0) { bdput(bdev); return ret; } } restart: ret = -ENXIO; disk = get_gendisk(bdev->bd_dev, &partno); if (!disk) goto out; owner = disk->fops->owner; disk_block_events(disk); mutex_lock_nested(&bdev->bd_mutex, for_part); if (!bdev->bd_openers) { bdev->bd_disk = disk; bdev->bd_queue = disk->queue; bdev->bd_contains = bdev; bdev->bd_inode->i_flags = disk->fops->direct_access ? S_DAX : 0; if (!partno) { ret = -ENXIO; bdev->bd_part = disk_get_part(disk, partno); if (!bdev->bd_part) goto out_clear; ret = 0; if (disk->fops->open) { ret = disk->fops->open(bdev, mode); if (ret == -ERESTARTSYS) { /* Lost a race with 'disk' being * deleted, try again. * See md.c */ disk_put_part(bdev->bd_part); bdev->bd_part = NULL; bdev->bd_disk = NULL; bdev->bd_queue = NULL; mutex_unlock(&bdev->bd_mutex); disk_unblock_events(disk); put_disk(disk); module_put(owner); goto restart; } } if (!ret) bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); /* * If the device is invalidated, rescan partition * if open succeeded or failed with -ENOMEDIUM. * The latter is necessary to prevent ghost * partitions on a removed medium. */ if (bdev->bd_invalidated) { if (!ret) rescan_partitions(disk, bdev); else if (ret == -ENOMEDIUM) invalidate_partitions(disk, bdev); } if (ret) goto out_clear; } else { struct block_device *whole; whole = bdget_disk(disk, 0); ret = -ENOMEM; if (!whole) goto out_clear; BUG_ON(for_part); ret = __blkdev_get(whole, mode, 1); if (ret) goto out_clear; bdev->bd_contains = whole; bdev->bd_part = disk_get_part(disk, partno); if (!(disk->flags & GENHD_FL_UP) || !bdev->bd_part || !bdev->bd_part->nr_sects) { ret = -ENXIO; goto out_clear; } bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9); /* * If the partition is not aligned on a page * boundary, we can't do dax I/O to it. */ if ((bdev->bd_part->start_sect % (PAGE_SIZE / 512)) || (bdev->bd_part->nr_sects % (PAGE_SIZE / 512))) bdev->bd_inode->i_flags &= ~S_DAX; } } else { if (bdev->bd_contains == bdev) { ret = 0; if (bdev->bd_disk->fops->open) ret = bdev->bd_disk->fops->open(bdev, mode); /* the same as first opener case, read comment there */ if (bdev->bd_invalidated) { if (!ret) rescan_partitions(bdev->bd_disk, bdev); else if (ret == -ENOMEDIUM) invalidate_partitions(bdev->bd_disk, bdev); } if (ret) goto out_unlock_bdev; } /* only one opener holds refs to the module and disk */ put_disk(disk); module_put(owner); } bdev->bd_openers++; if (for_part) bdev->bd_part_count++; mutex_unlock(&bdev->bd_mutex); disk_unblock_events(disk); return 0; out_clear: disk_put_part(bdev->bd_part); bdev->bd_disk = NULL; bdev->bd_part = NULL; bdev->bd_queue = NULL; if (bdev != bdev->bd_contains) __blkdev_put(bdev->bd_contains, mode, 1); bdev->bd_contains = NULL; out_unlock_bdev: mutex_unlock(&bdev->bd_mutex); disk_unblock_events(disk); put_disk(disk); module_put(owner); out: bdput(bdev); return ret; } /** * blkdev_get - open a block device * @bdev: block_device to open * @mode: FMODE_* mask * @holder: exclusive holder identifier * * Open @bdev with @mode. If @mode includes %FMODE_EXCL, @bdev is * open with exclusive access. Specifying %FMODE_EXCL with %NULL * @holder is invalid. Exclusive opens may nest for the same @holder. * * On success, the reference count of @bdev is unchanged. On failure, * @bdev is put. * * CONTEXT: * Might sleep. * * RETURNS: * 0 on success, -errno on failure. */ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder) { struct block_device *whole = NULL; int res; WARN_ON_ONCE((mode & FMODE_EXCL) && !holder); if ((mode & FMODE_EXCL) && holder) { whole = bd_start_claiming(bdev, holder); if (IS_ERR(whole)) { bdput(bdev); return PTR_ERR(whole); } } res = __blkdev_get(bdev, mode, 0); if (whole) { struct gendisk *disk = whole->bd_disk; /* finish claiming */ mutex_lock(&bdev->bd_mutex); spin_lock(&bdev_lock); if (!res) { BUG_ON(!bd_may_claim(bdev, whole, holder)); /* * Note that for a whole device bd_holders * will be incremented twice, and bd_holder * will be set to bd_may_claim before being * set to holder */ whole->bd_holders++; whole->bd_holder = bd_may_claim; bdev->bd_holders++; bdev->bd_holder = holder; } /* tell others that we're done */ BUG_ON(whole->bd_claiming != holder); whole->bd_claiming = NULL; wake_up_bit(&whole->bd_claiming, 0); spin_unlock(&bdev_lock); /* * Block event polling for write claims if requested. Any * write holder makes the write_holder state stick until * all are released. This is good enough and tracking * individual writeable reference is too fragile given the * way @mode is used in blkdev_get/put(). */ if (!res && (mode & FMODE_WRITE) && !bdev->bd_write_holder && (disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) { bdev->bd_write_holder = true; disk_block_events(disk); } mutex_unlock(&bdev->bd_mutex); bdput(whole); } return res; } EXPORT_SYMBOL(blkdev_get); /** * blkdev_get_by_path - open a block device by name * @path: path to the block device to open * @mode: FMODE_* mask * @holder: exclusive holder identifier * * Open the blockdevice described by the device file at @path. @mode * and @holder are identical to blkdev_get(). * * On success, the returned block_device has reference count of one. * * CONTEXT: * Might sleep. * * RETURNS: * Pointer to block_device on success, ERR_PTR(-errno) on failure. */ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, void *holder) { struct block_device *bdev; int err; bdev = lookup_bdev(path); if (IS_ERR(bdev)) return bdev; err = blkdev_get(bdev, mode, holder); if (err) return ERR_PTR(err); if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) { blkdev_put(bdev, mode); return ERR_PTR(-EACCES); } return bdev; } EXPORT_SYMBOL(blkdev_get_by_path); /** * blkdev_get_by_dev - open a block device by device number * @dev: device number of block device to open * @mode: FMODE_* mask * @holder: exclusive holder identifier * * Open the blockdevice described by device number @dev. @mode and * @holder are identical to blkdev_get(). * * Use it ONLY if you really do not have anything better - i.e. when * you are behind a truly sucky interface and all you are given is a * device number. _Never_ to be used for internal purposes. If you * ever need it - reconsider your API. * * On success, the returned block_device has reference count of one. * * CONTEXT: * Might sleep. * * RETURNS: * Pointer to block_device on success, ERR_PTR(-errno) on failure. */ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder) { struct block_device *bdev; int err; bdev = bdget(dev); if (!bdev) return ERR_PTR(-ENOMEM); err = blkdev_get(bdev, mode, holder); if (err) return ERR_PTR(err); return bdev; } EXPORT_SYMBOL(blkdev_get_by_dev); static int blkdev_open(struct inode * inode, struct file * filp) { struct block_device *bdev; /* * Preserve backwards compatibility and allow large file access * even if userspace doesn't ask for it explicitly. Some mkfs * binary needs it. We might want to drop this workaround * during an unstable branch. */ filp->f_flags |= O_LARGEFILE; if (filp->f_flags & O_NDELAY) filp->f_mode |= FMODE_NDELAY; if (filp->f_flags & O_EXCL) filp->f_mode |= FMODE_EXCL; if ((filp->f_flags & O_ACCMODE) == 3) filp->f_mode |= FMODE_WRITE_IOCTL; bdev = bd_acquire(inode); if (bdev == NULL) return -ENOMEM; filp->f_mapping = bdev->bd_inode->i_mapping; return blkdev_get(bdev, filp->f_mode, filp); } static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) { struct gendisk *disk = bdev->bd_disk; struct block_device *victim = NULL; mutex_lock_nested(&bdev->bd_mutex, for_part); if (for_part) bdev->bd_part_count--; if (!--bdev->bd_openers) { WARN_ON_ONCE(bdev->bd_holders); sync_blockdev(bdev); kill_bdev(bdev); bdev_write_inode(bdev); /* * Detaching bdev inode from its wb in __destroy_inode() * is too late: the queue which embeds its bdi (along with * root wb) can be gone as soon as we put_disk() below. */ inode_detach_wb(bdev->bd_inode); } if (bdev->bd_contains == bdev) { if (disk->fops->release) disk->fops->release(disk, mode); } if (!bdev->bd_openers) { struct module *owner = disk->fops->owner; disk_put_part(bdev->bd_part); bdev->bd_part = NULL; bdev->bd_disk = NULL; if (bdev != bdev->bd_contains) victim = bdev->bd_contains; bdev->bd_contains = NULL; put_disk(disk); module_put(owner); } mutex_unlock(&bdev->bd_mutex); bdput(bdev); if (victim) __blkdev_put(victim, mode, 1); } void blkdev_put(struct block_device *bdev, fmode_t mode) { mutex_lock(&bdev->bd_mutex); if (mode & FMODE_EXCL) { bool bdev_free; /* * Release a claim on the device. The holder fields * are protected with bdev_lock. bd_mutex is to * synchronize disk_holder unlinking. */ spin_lock(&bdev_lock); WARN_ON_ONCE(--bdev->bd_holders < 0); WARN_ON_ONCE(--bdev->bd_contains->bd_holders < 0); /* bd_contains might point to self, check in a separate step */ if ((bdev_free = !bdev->bd_holders)) bdev->bd_holder = NULL; if (!bdev->bd_contains->bd_holders) bdev->bd_contains->bd_holder = NULL; spin_unlock(&bdev_lock); /* * If this was the last claim, remove holder link and * unblock evpoll if it was a write holder. */ if (bdev_free && bdev->bd_write_holder) { disk_unblock_events(bdev->bd_disk); bdev->bd_write_holder = false; } } /* * Trigger event checking and tell drivers to flush MEDIA_CHANGE * event. This is to ensure detection of media removal commanded * from userland - e.g. eject(1). */ disk_flush_events(bdev->bd_disk, DISK_EVENT_MEDIA_CHANGE); mutex_unlock(&bdev->bd_mutex); __blkdev_put(bdev, mode, 0); } EXPORT_SYMBOL(blkdev_put); static int blkdev_close(struct inode * inode, struct file * filp) { struct block_device *bdev = I_BDEV(filp->f_mapping->host); blkdev_put(bdev, filp->f_mode); return 0; } static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) { struct block_device *bdev = I_BDEV(file->f_mapping->host); fmode_t mode = file->f_mode; /* * O_NDELAY can be altered using fcntl(.., F_SETFL, ..), so we have * to updated it before every ioctl. */ if (file->f_flags & O_NDELAY) mode |= FMODE_NDELAY; else mode &= ~FMODE_NDELAY; return blkdev_ioctl(bdev, mode, cmd, arg); } /* * Write data to the block device. Only intended for the block device itself * and the raw driver which basically is a fake block device. * * Does not take i_mutex for the write and thus is not for general purpose * use. */ ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct inode *bd_inode = file->f_mapping->host; loff_t size = i_size_read(bd_inode); struct blk_plug plug; ssize_t ret; if (bdev_read_only(I_BDEV(bd_inode))) return -EPERM; if (!iov_iter_count(from)) return 0; if (iocb->ki_pos >= size) return -ENOSPC; iov_iter_truncate(from, size - iocb->ki_pos); blk_start_plug(&plug); ret = __generic_file_write_iter(iocb, from); if (ret > 0) { ssize_t err; err = generic_write_sync(file, iocb->ki_pos - ret, ret); if (err < 0) ret = err; } blk_finish_plug(&plug); return ret; } EXPORT_SYMBOL_GPL(blkdev_write_iter); ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) { struct file *file = iocb->ki_filp; struct inode *bd_inode = file->f_mapping->host; loff_t size = i_size_read(bd_inode); loff_t pos = iocb->ki_pos; if (pos >= size) return 0; size -= pos; iov_iter_truncate(to, size); return generic_file_read_iter(iocb, to); } EXPORT_SYMBOL_GPL(blkdev_read_iter); /* * Try to release a page associated with block device when the system * is under memory pressure. */ static int blkdev_releasepage(struct page *page, gfp_t wait) { struct super_block *super = BDEV_I(page->mapping->host)->bdev.bd_super; if (super && super->s_op->bdev_try_to_free_page) return super->s_op->bdev_try_to_free_page(super, page, wait); return try_to_free_buffers(page); } static const struct address_space_operations def_blk_aops = { .readpage = blkdev_readpage, .readpages = blkdev_readpages, .writepage = blkdev_writepage, .write_begin = blkdev_write_begin, .write_end = blkdev_write_end, .writepages = generic_writepages, .releasepage = blkdev_releasepage, .direct_IO = blkdev_direct_IO, .is_dirty_writeback = buffer_check_dirty_writeback, }; const struct file_operations def_blk_fops = { .open = blkdev_open, .release = blkdev_close, .llseek = block_llseek, .read_iter = blkdev_read_iter, .write_iter = blkdev_write_iter, .mmap = generic_file_mmap, .fsync = blkdev_fsync, .unlocked_ioctl = block_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = compat_blkdev_ioctl, #endif .splice_read = generic_file_splice_read, .splice_write = iter_file_splice_write, }; int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg) { int res; mm_segment_t old_fs = get_fs(); set_fs(KERNEL_DS); res = blkdev_ioctl(bdev, 0, cmd, arg); set_fs(old_fs); return res; } EXPORT_SYMBOL(ioctl_by_bdev); /** * lookup_bdev - lookup a struct block_device by name * @pathname: special file representing the block device * * Get a reference to the blockdevice at @pathname in the current * namespace if possible and return it. Return ERR_PTR(error) * otherwise. */ struct block_device *lookup_bdev(const char *pathname) { struct block_device *bdev; struct inode *inode; struct path path; int error; if (!pathname || !*pathname) return ERR_PTR(-EINVAL); error = kern_path(pathname, LOOKUP_FOLLOW, &path); if (error) return ERR_PTR(error); inode = d_backing_inode(path.dentry); error = -ENOTBLK; if (!S_ISBLK(inode->i_mode)) goto fail; error = -EACCES; if (path.mnt->mnt_flags & MNT_NODEV) goto fail; error = -ENOMEM; bdev = bd_acquire(inode); if (!bdev) goto fail; out: path_put(&path); return bdev; fail: bdev = ERR_PTR(error); goto out; } EXPORT_SYMBOL(lookup_bdev); int __invalidate_device(struct block_device *bdev, bool kill_dirty) { struct super_block *sb = get_super(bdev); int res = 0; if (sb) { /* * no need to lock the super, get_super holds the * read mutex so the filesystem cannot go away * under us (->put_super runs with the write lock * hold). */ shrink_dcache_sb(sb); res = invalidate_inodes(sb, kill_dirty); drop_super(sb); } invalidate_bdev(bdev); return res; } EXPORT_SYMBOL(__invalidate_device); void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg) { struct inode *inode, *old_inode = NULL; spin_lock(&blockdev_superblock->s_inode_list_lock); list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list) { struct address_space *mapping = inode->i_mapping; spin_lock(&inode->i_lock); if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW) || mapping->nrpages == 0) { spin_unlock(&inode->i_lock); continue; } __iget(inode); spin_unlock(&inode->i_lock); spin_unlock(&blockdev_superblock->s_inode_list_lock); /* * We hold a reference to 'inode' so it couldn't have been * removed from s_inodes list while we dropped the * s_inode_list_lock We cannot iput the inode now as we can * be holding the last reference and we cannot iput it under * s_inode_list_lock. So we keep the reference and iput it * later. */ iput(old_inode); old_inode = inode; func(I_BDEV(inode), arg); spin_lock(&blockdev_superblock->s_inode_list_lock); } spin_unlock(&blockdev_superblock->s_inode_list_lock); iput(old_inode); }