Skip to content

Commit

Permalink
readahead: Add block device support for readahead
Browse files Browse the repository at this point in the history
Signed-off-by: Pedro Falcato <pedro.falcato@gmail.com>
  • Loading branch information
heatd committed Nov 17, 2024
1 parent e2dbb83 commit b4069e3
Show file tree
Hide file tree
Showing 6 changed files with 185 additions and 7 deletions.
1 change: 1 addition & 0 deletions kernel/include/onyx/block.h
Original file line number Diff line number Diff line change
Expand Up @@ -212,5 +212,6 @@ int block_set_bsize(struct blockdev *bdev, unsigned int block_size);
int bdev_do_open(struct blockdev *bdev, bool exclusive);
void bdev_release(struct blockdev *bdev);
unsigned int bdev_sector_size(struct blockdev *bdev);
u64 bdev_get_size(struct blockdev *bdev);
__END_CDECLS
#endif
1 change: 1 addition & 0 deletions kernel/include/onyx/buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ struct block_buf
#define BLOCKBUF_FLAG_WRITEBACK (1 << 1)
#define BLOCKBUF_FLAG_UPTODATE (1 << 2)
#define BLOCKBUF_FLAG_AREAD (1 << 3)
#define BLOCKBUF_FLAG_HOLE (1 << 4)

static inline bool bb_test_and_set(struct block_buf *buf, unsigned int flag)
{
Expand Down
7 changes: 6 additions & 1 deletion kernel/kernel/fs/block.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,19 @@ struct hd_geometry

static int block_reread_parts(struct blockdev *bdev);

u64 bdev_get_size(struct blockdev *bdev)
{
return bdev->nr_sectors * bdev->sector_size;
}

unsigned int blkdev_ioctl(int request, void *argp, struct file *f)
{
auto d = (blockdev *) f->f_ino->i_helper;

switch (request)
{
case BLKGETSIZE64: {
u64 len = d->nr_sectors * d->sector_size;
u64 len = bdev_get_size(d);
return copy_to_user(argp, &len, sizeof(u64));
}

Expand Down
176 changes: 171 additions & 5 deletions kernel/kernel/fs/buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ bool page_has_writeback_bufs(struct page *p)
struct block_buf *page_add_blockbuf(struct page *page, unsigned int page_off)
{
assert(page->flags & PAGE_FLAG_BUFFER);
DCHECK(page_locked(page));
CHECK_PAGE(page_locked(page), page);

auto buf = (struct block_buf *) kmem_cache_alloc(buffer_cache, GFP_KERNEL);
if (!buf)
Expand Down Expand Up @@ -255,9 +255,6 @@ void page_destroy_block_bufs(struct page *page)

ssize_t bbuffer_readpage(struct page *p, size_t off, struct inode *ino)
{
p->flags |= PAGE_FLAG_BUFFER;
p->priv = 0;

auto blkdev = reinterpret_cast<blockdev *>(ino->i_helper);
DCHECK(blkdev != nullptr);

Expand Down Expand Up @@ -291,6 +288,9 @@ ssize_t bbuffer_readpage(struct page *p, size_t off, struct inode *ino)
if (iost < 0)
return iost;

if (!page_test_set_buffer(p))
goto skip_setup;

for (size_t i = 0; i < nr_blocks; i++)
{
struct block_buf *b;
Expand All @@ -307,7 +307,10 @@ ssize_t bbuffer_readpage(struct page *p, size_t off, struct inode *ino)
curr_off += block_size;
}

p->flags |= PAGE_FLAG_UPTODATE;
skip_setup:
page_set_uptodate(p);
for (struct block_buf *b = (struct block_buf *) p->priv; b; b = b->next)
bb_test_and_set(b, BLOCKBUF_FLAG_UPTODATE);
return PAGE_SIZE;
}

Expand Down Expand Up @@ -411,6 +414,168 @@ static ssize_t buffer_directio(struct file *filp, size_t off, iovec_iter *iter,
return to_read;
}

static void buffer_readpages_endio(struct bio_req *bio) NO_THREAD_SAFETY_ANALYSIS
{
for (size_t i = 0; i < bio->nr_vecs; i++)
{
struct page_iov *iov = &bio->vec[i];
DCHECK(page_locked(iov->page));
struct block_buf *head = (struct block_buf *) iov->page->priv;

spin_lock(&head->pagestate_lock);
bool uptodate = true;

for (struct block_buf *b = head; b != nullptr; b = b->next)
{
if (b->page_off >= iov->page_off &&
b->page_off + b->block_size <= iov->page_off + iov->length)
{
bb_clear_flag(b, BLOCKBUF_FLAG_AREAD);
CHECK(bb_test_and_set(b, BLOCKBUF_FLAG_UPTODATE));
continue;
}

if (!bb_test_flag(b, BLOCKBUF_FLAG_UPTODATE))
uptodate = false;
}

spin_unlock(&head->pagestate_lock);

if (uptodate)
{
if ((bio->flags & BIO_STATUS_MASK) == BIO_REQ_DONE)
page_test_set_flag(iov->page, PAGE_FLAG_UPTODATE);
unlock_page(iov->page);
}
}
}

static int buffer_readpages(struct readpages_state *state,
struct inode *ino) NO_THREAD_SAFETY_ANALYSIS
{
blockdev *blkdev = reinterpret_cast<blockdev *>(ino->i_helper);
int st;
struct page *page;
unsigned int nr_ios = 0;
auto block_size = blkdev->block_size;
u64 nblocks = blkdev->nr_sectors / (block_size / blkdev->sector_size);

while ((page = readpages_next_page(state)))
{
const unsigned long pgoff = page->pageoff;
nr_ios = 0;
auto nr_blocks = PAGE_SIZE / block_size;
size_t starting_block_nr = (pgoff << PAGE_SHIFT) / block_size;
size_t curr_off = 0;

if (!page_test_set_flag(page, PAGE_FLAG_BUFFER))
goto skip_setup;

for (size_t i = 0; i < nr_blocks; i++)
{
struct block_buf *b;
if (!(b = page_add_blockbuf(page, curr_off)))
{
page_destroy_block_bufs(page);
st = -ENOMEM;
goto out_err;
}

b->block_nr = starting_block_nr + i;
if (b->block_nr >= nblocks)
bb_test_and_set(b, BLOCKBUF_FLAG_HOLE | BLOCKBUF_FLAG_UPTODATE);
b->block_size = block_size;
b->dev = blkdev;
curr_off += block_size;
}

if (starting_block_nr + nr_blocks <= nblocks)
{
/* Fast, simple case. Fire off a single BIO for this whole contiguous page. This makes
* it so we can fire off larger BIOs for, e.g, NVMe, which then increases the chance of
* it getting merged with other bios, etc.
*/
struct block_buf *b = (struct block_buf *) page->priv;
struct bio_req *bio = bio_alloc(GFP_NOFS, 1);
if (!bio)
{
st = -ENOMEM;
goto out_err;
}

bb_test_and_set(b, BLOCKBUF_FLAG_AREAD);

bio->sector_number = b->block_nr * (block_size / blkdev->sector_size);
bio->flags = BIO_REQ_READ_OP;
bio->b_end_io = buffer_readpages_endio;
bio_push_pages(bio, page, 0, PAGE_SIZE);
st = bio_submit_request(blkdev, bio);
bio_put(bio);

if (st < 0)
{
bb_clear_flag(b, BLOCKBUF_FLAG_AREAD);
goto out_err;
}

nr_ios++;
goto end_read;
}

skip_setup:
for (struct block_buf *b = (struct block_buf *) page->priv; b != nullptr; b = b->next)
{
sector_t block = b->block_nr;
if (bb_test_flag(b, BLOCKBUF_FLAG_UPTODATE))
continue;
if (bb_test_flag(b, BLOCKBUF_FLAG_HOLE))
continue;
if (!bb_test_and_set(b, BLOCKBUF_FLAG_AREAD))
continue;
CHECK(!bb_test_flag(b, BLOCKBUF_FLAG_UPTODATE));

struct bio_req *bio = bio_alloc(GFP_NOFS, 1);
if (!bio)
{
bb_clear_flag(b, BLOCKBUF_FLAG_AREAD);
st = -ENOMEM;
goto out_err;
}

/* Note: We do not need to ref, we hold the lock, no one can throw this page away
* while locked (almost like an implicit reference). */
bio->sector_number = block * (block_size / blkdev->sector_size);
bio->flags = BIO_REQ_READ_OP;
bio->b_end_io = buffer_readpages_endio;
bio_push_pages(bio, page, b->page_off, b->block_size);
st = bio_submit_request(blkdev, bio);
bio_put(bio);

if (st < 0)
{
bb_clear_flag(b, BLOCKBUF_FLAG_AREAD);
goto out_err;
}

nr_ios++;
}

end_read:
if (nr_ios == 0)
unlock_page(page);
page_unref(page);
}

return 0;
out_err:
/* On error, release the page we're holding. We do not unlock it if we submitted any IOs for the
* page, the endio page will do it for us. */
if (nr_ios == 0)
unlock_page(page);
page_unref(page);
return st;
}

static int block_prepare_write(struct inode *ino, struct page *page, size_t page_off, size_t offset,
size_t len)
{
Expand All @@ -437,6 +602,7 @@ struct file_ops buffer_ops = {
.writepages = filemap_writepages,
.fsyncdata = filemap_writepages,
.directio = buffer_directio,
.readpages = buffer_readpages,
};

struct block_buf *sb_read_block(const struct superblock *sb, unsigned long block)
Expand Down
2 changes: 1 addition & 1 deletion kernel/kernel/fs/filemap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ int filemap_find_page(struct inode *ino, size_t pgoff, unsigned int flags, struc
page_promote_referenced(p);
}

if (!(flags & (FIND_PAGE_NO_READPAGE | FIND_PAGE_NO_RA)) && ra_state && !S_ISBLK(ino->i_mode))
if (!(flags & (FIND_PAGE_NO_READPAGE | FIND_PAGE_NO_RA)) && ra_state)
{
rw_lock_read(&ino->i_pages->truncate_lock);
/* If we found PAGE_FLAG_READAHEAD, kick off more IO */
Expand Down
5 changes: 5 additions & 0 deletions kernel/kernel/fs/readahead.c
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ static void readpages_finish(struct readpages_state *state) NO_THREAD_SAFETY_ANA
}
}

u64 bdev_get_size(struct blockdev *bdev);

static int filemap_do_readahead(struct inode *inode, struct readahead_state *ra_state,
unsigned long pgoff) NO_THREAD_SAFETY_ANALYSIS
{
Expand All @@ -76,6 +78,9 @@ static int filemap_do_readahead(struct inode *inode, struct readahead_state *ra_
size_t endpg;
struct blk_plug plug;

if (S_ISBLK(inode->i_mode))
size = bdev_get_size(inode->i_helper);

/* Do basic bounds checks on our readahead window */
if (!size)
return 0;
Expand Down

0 comments on commit b4069e3

Please sign in to comment.