diff options
Diffstat (limited to 'fs')
| -rw-r--r-- | fs/gfs2/bmap.c | 26 | ||||
| -rw-r--r-- | fs/iomap/buffered-io.c | 96 | ||||
| -rw-r--r-- | fs/iomap/trace.h | 2 | ||||
| -rw-r--r-- | fs/xfs/xfs_aops.c | 128 | ||||
| -rw-r--r-- | fs/zonefs/file.c | 28 |
5 files changed, 157 insertions, 123 deletions
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 7703d0471139..0cc41de54aba 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -2469,23 +2469,25 @@ out: return error; } -static int gfs2_map_blocks(struct iomap_writepage_ctx *wpc, struct inode *inode, - loff_t offset, unsigned int len) +static ssize_t gfs2_writeback_range(struct iomap_writepage_ctx *wpc, + struct folio *folio, u64 offset, unsigned int len, u64 end_pos) { - int ret; - - if (WARN_ON_ONCE(gfs2_is_stuffed(GFS2_I(inode)))) + if (WARN_ON_ONCE(gfs2_is_stuffed(GFS2_I(wpc->inode)))) return -EIO; - if (offset >= wpc->iomap.offset && - offset < wpc->iomap.offset + wpc->iomap.length) - return 0; + if (offset < wpc->iomap.offset || + offset >= wpc->iomap.offset + wpc->iomap.length) { + int ret; - memset(&wpc->iomap, 0, sizeof(wpc->iomap)); - ret = gfs2_iomap_get(inode, offset, INT_MAX, &wpc->iomap); - return ret; + memset(&wpc->iomap, 0, sizeof(wpc->iomap)); + ret = gfs2_iomap_get(wpc->inode, offset, INT_MAX, &wpc->iomap); + if (ret) + return ret; + } + + return iomap_add_to_ioend(wpc, folio, offset, end_pos, len); } const struct iomap_writeback_ops gfs2_writeback_ops = { - .map_blocks = gfs2_map_blocks, + .writeback_range = gfs2_writeback_range, }; diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 93b2a90e6867..c558ac15bc87 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -1668,14 +1668,30 @@ static bool iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t pos, * At the end of a writeback pass, there will be a cached ioend remaining on the * writepage context that the caller will need to submit. */ -static int iomap_add_to_ioend(struct iomap_writepage_ctx *wpc, - struct folio *folio, loff_t pos, loff_t end_pos, unsigned len) +ssize_t iomap_add_to_ioend(struct iomap_writepage_ctx *wpc, struct folio *folio, + loff_t pos, loff_t end_pos, unsigned int dirty_len) { struct iomap_folio_state *ifs = folio->private; size_t poff = offset_in_folio(folio, pos); unsigned int ioend_flags = 0; + unsigned int map_len = min_t(u64, dirty_len, + wpc->iomap.offset + wpc->iomap.length - pos); int error; + trace_iomap_add_to_ioend(wpc->inode, pos, dirty_len, &wpc->iomap); + + WARN_ON_ONCE(!folio->private && map_len < dirty_len); + + switch (wpc->iomap.type) { + case IOMAP_INLINE: + WARN_ON_ONCE(1); + return -EIO; + case IOMAP_HOLE: + return map_len; + default: + break; + } + if (wpc->iomap.type == IOMAP_UNWRITTEN) ioend_flags |= IOMAP_IOEND_UNWRITTEN; if (wpc->iomap.flags & IOMAP_F_SHARED) @@ -1693,11 +1709,11 @@ new_ioend: wpc->ioend = iomap_alloc_ioend(wpc, pos, ioend_flags); } - if (!bio_add_folio(&wpc->ioend->io_bio, folio, len, poff)) + if (!bio_add_folio(&wpc->ioend->io_bio, folio, map_len, poff)) goto new_ioend; if (ifs) - atomic_add(len, &ifs->write_bytes_pending); + atomic_add(map_len, &ifs->write_bytes_pending); /* * Clamp io_offset and io_size to the incore EOF so that ondisk @@ -1740,63 +1756,39 @@ new_ioend: * Note that this defeats the ability to chain the ioends of * appending writes. */ - wpc->ioend->io_size += len; + wpc->ioend->io_size += map_len; if (wpc->ioend->io_offset + wpc->ioend->io_size > end_pos) wpc->ioend->io_size = end_pos - wpc->ioend->io_offset; - wbc_account_cgroup_owner(wpc->wbc, folio, len); - return 0; + wbc_account_cgroup_owner(wpc->wbc, folio, map_len); + return map_len; } +EXPORT_SYMBOL_GPL(iomap_add_to_ioend); -static int iomap_writepage_map_blocks(struct iomap_writepage_ctx *wpc, - struct folio *folio, u64 pos, u64 end_pos, unsigned dirty_len, +static int iomap_writeback_range(struct iomap_writepage_ctx *wpc, + struct folio *folio, u64 pos, u32 rlen, u64 end_pos, bool *wb_pending) { - int error; - do { - unsigned map_len; - - error = wpc->ops->map_blocks(wpc, wpc->inode, pos, dirty_len); - if (error) - break; - trace_iomap_writepage_map(wpc->inode, pos, dirty_len, - &wpc->iomap); + ssize_t ret; - map_len = min_t(u64, dirty_len, - wpc->iomap.offset + wpc->iomap.length - pos); - WARN_ON_ONCE(!folio->private && map_len < dirty_len); + ret = wpc->ops->writeback_range(wpc, folio, pos, rlen, end_pos); + if (WARN_ON_ONCE(ret == 0 || ret > rlen)) + return -EIO; + if (ret < 0) + return ret; + rlen -= ret; + pos += ret; - switch (wpc->iomap.type) { - case IOMAP_INLINE: - WARN_ON_ONCE(1); - error = -EIO; - break; - case IOMAP_HOLE: - break; - default: - error = iomap_add_to_ioend(wpc, folio, pos, end_pos, - map_len); - if (!error) - *wb_pending = true; - break; - } - dirty_len -= map_len; - pos += map_len; - } while (dirty_len && !error); + /* + * Holes are not be written back by ->writeback_range, so track + * if we did handle anything that is not a hole here. + */ + if (wpc->iomap.type != IOMAP_HOLE) + *wb_pending = true; + } while (rlen); - /* - * We cannot cancel the ioend directly here on error. We may have - * already set other pages under writeback and hence we have to run I/O - * completion to mark the error state of the pages under writeback - * appropriately. - * - * Just let the file system know what portion of the folio failed to - * map. - */ - if (error && wpc->ops->discard_folio) - wpc->ops->discard_folio(folio, pos); - return error; + return 0; } /* @@ -1908,8 +1900,8 @@ static int iomap_writepage_map(struct iomap_writepage_ctx *wpc, */ end_aligned = round_up(end_pos, i_blocksize(inode)); while ((rlen = iomap_find_dirty_range(folio, &pos, end_aligned))) { - error = iomap_writepage_map_blocks(wpc, folio, pos, end_pos, - rlen, &wb_pending); + error = iomap_writeback_range(wpc, folio, pos, rlen, end_pos, + &wb_pending); if (error) break; pos += rlen; diff --git a/fs/iomap/trace.h b/fs/iomap/trace.h index 455cc6f90be0..aaea02c9560a 100644 --- a/fs/iomap/trace.h +++ b/fs/iomap/trace.h @@ -169,7 +169,7 @@ DEFINE_EVENT(iomap_class, name, \ DEFINE_IOMAP_EVENT(iomap_iter_dstmap); DEFINE_IOMAP_EVENT(iomap_iter_srcmap); -TRACE_EVENT(iomap_writepage_map, +TRACE_EVENT(iomap_add_to_ioend, TP_PROTO(struct inode *inode, u64 pos, unsigned int dirty_len, struct iomap *iomap), TP_ARGS(inode, pos, dirty_len, iomap), diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 65485a52df3b..f6d44ab78442 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -234,6 +234,47 @@ xfs_end_bio( } /* + * We cannot cancel the ioend directly on error. We may have already set other + * pages under writeback and hence we have to run I/O completion to mark the + * error state of the pages under writeback appropriately. + * + * If the folio has delalloc blocks on it, the caller is asking us to punch them + * out. If we don't, we can leave a stale delalloc mapping covered by a clean + * page that needs to be dirtied again before the delalloc mapping can be + * converted. This stale delalloc mapping can trip up a later direct I/O read + * operation on the same region. + * + * We prevent this by truncating away the delalloc regions on the folio. Because + * they are delalloc, we can do this without needing a transaction. Indeed - if + * we get ENOSPC errors, we have to be able to do this truncation without a + * transaction as there is no space left for block reservation (typically why + * we see a ENOSPC in writeback). + */ +static void +xfs_discard_folio( + struct folio *folio, + loff_t pos) +{ + struct xfs_inode *ip = XFS_I(folio->mapping->host); + struct xfs_mount *mp = ip->i_mount; + + if (xfs_is_shutdown(mp)) + return; + + xfs_alert_ratelimited(mp, + "page discard on page "PTR_FMT", inode 0x%llx, pos %llu.", + folio, ip->i_ino, pos); + + /* + * The end of the punch range is always the offset of the first + * byte of the next folio. Hence the end offset is only dependent on the + * folio itself and not the start offset that is passed in. + */ + xfs_bmap_punch_delalloc_range(ip, XFS_DATA_FORK, pos, + folio_pos(folio) + folio_size(folio), NULL); +} + +/* * Fast revalidation of the cached writeback mapping. Return true if the current * mapping is valid, false otherwise. */ @@ -278,13 +319,12 @@ xfs_imap_valid( static int xfs_map_blocks( struct iomap_writepage_ctx *wpc, - struct inode *inode, loff_t offset, unsigned int len) { - struct xfs_inode *ip = XFS_I(inode); + struct xfs_inode *ip = XFS_I(wpc->inode); struct xfs_mount *mp = ip->i_mount; - ssize_t count = i_blocksize(inode); + ssize_t count = i_blocksize(wpc->inode); xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + count); xfs_fileoff_t cow_fsb; @@ -436,6 +476,24 @@ allocate_blocks: return 0; } +static ssize_t +xfs_writeback_range( + struct iomap_writepage_ctx *wpc, + struct folio *folio, + u64 offset, + unsigned int len, + u64 end_pos) +{ + ssize_t ret; + + ret = xfs_map_blocks(wpc, offset, len); + if (!ret) + ret = iomap_add_to_ioend(wpc, folio, offset, end_pos, len); + if (ret < 0) + xfs_discard_folio(folio, offset); + return ret; +} + static bool xfs_ioend_needs_wq_completion( struct iomap_ioend *ioend) @@ -488,47 +546,9 @@ xfs_submit_ioend( return 0; } -/* - * If the folio has delalloc blocks on it, the caller is asking us to punch them - * out. If we don't, we can leave a stale delalloc mapping covered by a clean - * page that needs to be dirtied again before the delalloc mapping can be - * converted. This stale delalloc mapping can trip up a later direct I/O read - * operation on the same region. - * - * We prevent this by truncating away the delalloc regions on the folio. Because - * they are delalloc, we can do this without needing a transaction. Indeed - if - * we get ENOSPC errors, we have to be able to do this truncation without a - * transaction as there is no space left for block reservation (typically why - * we see a ENOSPC in writeback). - */ -static void -xfs_discard_folio( - struct folio *folio, - loff_t pos) -{ - struct xfs_inode *ip = XFS_I(folio->mapping->host); - struct xfs_mount *mp = ip->i_mount; - - if (xfs_is_shutdown(mp)) - return; - - xfs_alert_ratelimited(mp, - "page discard on page "PTR_FMT", inode 0x%llx, pos %llu.", - folio, ip->i_ino, pos); - - /* - * The end of the punch range is always the offset of the first - * byte of the next folio. Hence the end offset is only dependent on the - * folio itself and not the start offset that is passed in. - */ - xfs_bmap_punch_delalloc_range(ip, XFS_DATA_FORK, pos, - folio_pos(folio) + folio_size(folio), NULL); -} - static const struct iomap_writeback_ops xfs_writeback_ops = { - .map_blocks = xfs_map_blocks, + .writeback_range = xfs_writeback_range, .submit_ioend = xfs_submit_ioend, - .discard_folio = xfs_discard_folio, }; struct xfs_zoned_writepage_ctx { @@ -545,11 +565,10 @@ XFS_ZWPC(struct iomap_writepage_ctx *ctx) static int xfs_zoned_map_blocks( struct iomap_writepage_ctx *wpc, - struct inode *inode, loff_t offset, unsigned int len) { - struct xfs_inode *ip = XFS_I(inode); + struct xfs_inode *ip = XFS_I(wpc->inode); struct xfs_mount *mp = ip->i_mount; xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + len); @@ -608,6 +627,24 @@ xfs_zoned_map_blocks( return 0; } +static ssize_t +xfs_zoned_writeback_range( + struct iomap_writepage_ctx *wpc, + struct folio *folio, + u64 offset, + unsigned int len, + u64 end_pos) +{ + ssize_t ret; + + ret = xfs_zoned_map_blocks(wpc, offset, len); + if (!ret) + ret = iomap_add_to_ioend(wpc, folio, offset, end_pos, len); + if (ret < 0) + xfs_discard_folio(folio, offset); + return ret; +} + static int xfs_zoned_submit_ioend( struct iomap_writepage_ctx *wpc, @@ -621,9 +658,8 @@ xfs_zoned_submit_ioend( } static const struct iomap_writeback_ops xfs_zoned_writeback_ops = { - .map_blocks = xfs_zoned_map_blocks, + .writeback_range = xfs_zoned_writeback_range, .submit_ioend = xfs_zoned_submit_ioend, - .discard_folio = xfs_discard_folio, }; STATIC int diff --git a/fs/zonefs/file.c b/fs/zonefs/file.c index edca4bbe4b72..c88e2c851753 100644 --- a/fs/zonefs/file.c +++ b/fs/zonefs/file.c @@ -124,29 +124,33 @@ static void zonefs_readahead(struct readahead_control *rac) * Map blocks for page writeback. This is used only on conventional zone files, * which implies that the page range can only be within the fixed inode size. */ -static int zonefs_write_map_blocks(struct iomap_writepage_ctx *wpc, - struct inode *inode, loff_t offset, - unsigned int len) +static ssize_t zonefs_writeback_range(struct iomap_writepage_ctx *wpc, + struct folio *folio, u64 offset, unsigned len, u64 end_pos) { - struct zonefs_zone *z = zonefs_inode_zone(inode); + struct zonefs_zone *z = zonefs_inode_zone(wpc->inode); if (WARN_ON_ONCE(zonefs_zone_is_seq(z))) return -EIO; - if (WARN_ON_ONCE(offset >= i_size_read(inode))) + if (WARN_ON_ONCE(offset >= i_size_read(wpc->inode))) return -EIO; /* If the mapping is already OK, nothing needs to be done */ - if (offset >= wpc->iomap.offset && - offset < wpc->iomap.offset + wpc->iomap.length) - return 0; + if (offset < wpc->iomap.offset || + offset >= wpc->iomap.offset + wpc->iomap.length) { + int error; + + error = zonefs_write_iomap_begin(wpc->inode, offset, + z->z_capacity - offset, IOMAP_WRITE, + &wpc->iomap, NULL); + if (error) + return error; + } - return zonefs_write_iomap_begin(inode, offset, - z->z_capacity - offset, - IOMAP_WRITE, &wpc->iomap, NULL); + return iomap_add_to_ioend(wpc, folio, offset, end_pos, len); } static const struct iomap_writeback_ops zonefs_writeback_ops = { - .map_blocks = zonefs_write_map_blocks, + .writeback_range = zonefs_writeback_range, }; static int zonefs_writepages(struct address_space *mapping, |
