/* This file contains the heart of the mechanism used to read (and write)
 * files.  Read and write requests are split up into chunks that do not cross
 * block boundaries.  Each chunk is then processed in turn.  Reads on special
 * files are also detected and handled.
 *
 * The entry points into this file are
 *   do_read:	 perform the READ system call by calling read_write
 *   read_write: actually do the work of READ and WRITE
 *   read_map:	 given an inode and file position, look up its zone number
 *   rd_indir:	 read an entry in an indirect block 
 *   read_ahead: manage the block read ahead business
 */

#include "fs.h"
#include <fcntl.h>
#include <string.h>
#include <minix/com.h>
#include <sys/ioctl.h>
#include "assert.h"
INIT_ASSERT
#include "buf.h"
#include "file.h"
#include "fproc.h"
#include "inode.h"
#include "param.h"
#include "pipe.h"
#include "super.h"

FORWARD _PROTOTYPE( struct buf *rahead, (struct inode *rip,
		block_t baseblock, u64_t position, unsigned bytes_ahead));


/*===========================================================================*
 *				do_read					     *
 *===========================================================================*/
PUBLIC int do_read()
{
	int r;

  assert(fs_call == READ);

  r= read_write(READING, read_fd, read_buffer, read_nbytes);
  return r;
}


/*===========================================================================*
 *				read_write				     *
 *===========================================================================*/
PUBLIC int read_write(rw_flag, fd, buffer, nbytes)
int rw_flag;			/* READING or WRITING */
int fd;
char *buffer;
unsigned nbytes;
{
/* Perform read(fd, buffer, nbytes) or write(fd, buffer, nbytes) call. */

  register struct inode *rip;
  register struct filp *f;
  u64_t position, f_size;
  struct partition dev_geom;
  unsigned int off, cum_io;
  int oflags, r, chunk, usr, seg, block_spec, char_spec;
  int regular;
  dev_t dev;
  mode_t mode_word;
  int pipefifo;
  int fs_err= EGENERIC;

  /* MM loads segments by putting the process slot number and the segment number
   * int m.m1_i3 */
  if (who == MM_PROC_NR && m.m1_i3 != 0) {
	usr = (m.m1_i3 >> 4);
	seg = (m.m1_i3 & 0xF);
	assert(usr > 0 && usr < NR_PROCS);
	assert(seg >= 0 && seg < NR_SEGS);
  } else {
	usr = who;		/* normal case */
	seg = SEG_D;
  }

  /* If the file descriptor is valid, get the inode, size and mode. */
  if ((int)nbytes < 0) return(EINVAL);
  if ( (f = get_filp(fd, &fs_err)) == NIL_FILP) return(fs_err);
  if ( ((f->filp_mode) & (rw_flag == READING ? R_BIT : W_BIT)) == 0)
	return(EBADF);
  position = f->filp_pos;
  if (nbytes == 0) return(0);	/* so char special files need not check for 0*/
  oflags = f->filp_flags;
  rip = f->filp_ino;
  r = OK;
  cum_io = 0;
  mode_word = rip->i_mode & I_TYPE;

  regular = mode_word == I_REGULAR;
  pipefifo = mode_word == I_NAMED_PIPE;
  char_spec = mode_word == I_CHAR_SPECIAL;
  block_spec = mode_word == I_BLOCK_SPECIAL;
  rdwt_err = OK;		/* set to EIO if disk error occurs */

  if (pipefifo)
  {
  	r= pipe_rdwr(fd, rw_flag, buffer, nbytes);
  	if (r >= 0)
  	{
  		cum_io += r;
  		position = add64u(position, r);
  		r= OK;
  	}
  }
  else if (char_spec) { 			/* Character special file. */
	/* underlying device is closed, reply EIO. */
	if (f->filp_int_flags & FIF_CLOSED)
		r= EIO;
	dev = (dev_t) rip->i_zone[0];
	if (r == OK) {
		r = dev_rw(rw_flag == READING ? DEV_READ : DEV_WRITE,
		  dev, who, position, buffer, nbytes, fd, oflags & O_NONBLOCK);
	}
	if (r >= 0) {
		cum_io = r;
		position = add64u(position, r);
		r = OK;
	}
  } else {
	if (block_spec) {
		/* We need to know how far we can write a device. */
		f_size = make64(-1L, -1L);	/* infinite */
		if (rw_flag == WRITING) {
			dev = (dev_t) rip->i_zone[0];
			r= dev_ioctl(dev, FS_PROC_NR, DIOCGETP,
						  (char *)&dev_geom, -1, 0);
			if (r == OK) f_size = dev_geom.size;
		}
	} else {
		f_size = cvul64(rip->i_size);
	}

	if (rw_flag == WRITING && !block_spec) {
		/* Check in advance to see if file will grow too big. */
		if (cmp64ul(position, rip->i_sp->s_max_size - nbytes) > 0)
			return(EFBIG);

		/* Check for O_APPEND flag. */
		if (oflags & O_APPEND) position = f_size;

		/* Clear the zone containing present EOF if hole about
		 * to be created.  This is necessary because all unwritten
		 * blocks prior to the EOF must read as zeros.
		 */
		if (cmp64(position, f_size) > 0)
			clear_zone(rip, rip->i_size, 0);
	}

	/* Split the transfer into chunks that don't span two blocks. */
	while (nbytes != 0) {
		off = rem64u(position, BLOCK_SIZE);	/* offset in blk */
		chunk = BLOCK_SIZE - off;
		if (chunk > (unsigned) nbytes) chunk = nbytes;

		if (rw_flag == READING || block_spec) {
			if (cmp64(position, f_size) >= 0) break;  /* at EOF */
			if (cmp64(add64u(position, chunk), f_size) > 0)
				chunk = diff64(f_size, position);
		}

		/* Read or write 'chunk' bytes. */
		r = rw_chunk(rip, position, off, chunk, (unsigned) nbytes,
			     rw_flag, buffer, seg, usr);
		if (r != OK) break;	/* EOF reached */
		if (rdwt_err < 0) break;

		/* Update counters and pointers. */
		buffer += chunk;	/* user buffer address */
		nbytes -= chunk;	/* bytes yet to be read */
		cum_io += chunk;	/* bytes read so far */
		position = add64u(position, chunk); /* position within file */
	}
  }

  /* On write, update file size and access time. */
  if (rw_flag == WRITING) {
	if (regular) {
		if (cmp64(position, f_size) > 0) {
			rip->i_size = cv64ul(position);
			rip->i_dirt = DIRTY;
		}
	}
  }

  /* New file position. */
  if (!pipefifo) f->filp_pos = position;

  rip->i_seek = NO_SEEK;		/* forget we seeked */

  if (rdwt_err != OK) r = rdwt_err;	/* check for disk error */
  if (rdwt_err == END_OF_FILE) r = OK;
  if (r == OK) {
	if (rw_flag == READING) {
		rip->i_update |= ATIME;
		rip->i_dirt |= GRIMY;
	} else {
		rip->i_update |= CTIME | MTIME;
		rip->i_dirt |= regular ? DIRTY : GRIMY;
	}
	/* inode is thus now dirty */
	return(cum_io);
  } else {
	return(r);
  }
}


/*===========================================================================*
 *				rw_chunk				     *
 *===========================================================================*/
PUBLIC int rw_chunk(rip, position, off, chunk, left, rw_flag, buff, seg, usr)
register struct inode *rip;	/* pointer to inode for file to be rd/wr */
u64_t position;			/* position within file to read or write */
unsigned off;			/* off within the current block */
int chunk;			/* number of bytes to read or write */
unsigned left;			/* max number of bytes wanted after position */
int rw_flag;			/* READING or WRITING */
char *buff;			/* virtual address of the user buffer */
int seg;			/* SEG_T or SEG_D segment in user space */
int usr;			/* which user process */
{
/* Read or write (part of) a block. */

  register struct buf *bp;
  register int r;
  int n, block_spec;
  block_t b;
  dev_t dev;
  int fs_err= EGENERIC;

  block_spec = (rip->i_mode & I_TYPE) == I_BLOCK_SPECIAL;
  if (block_spec) {
	b = div64u(position, BLOCK_SIZE);
	dev = (dev_t) rip->i_zone[0];
  } else {
	b = read_map(rip, cv64ul(position));
	dev = rip->i_dev;
  }

  if (!block_spec && b == NO_BLOCK) {
	if (rw_flag == READING) {
		/* Reading from a nonexistent block.  Must read as all zeros.*/
		bp = get_block(NO_DEV, NO_BLOCK, BF_ALLOC);  /* get a buffer */
		memset(bp->b_data, 0, BLOCK_SIZE);
	} else {
		/* Writing to a nonexistent block. Create and enter in inode.*/
		if ((bp= new_block(rip, cv64ul(position),
				!(chunk == BLOCK_SIZE), &fs_err)) == NIL_BUF) {
			return(fs_err);
		}
	}
  } else
  if (rw_flag == READING) {
	/* Read and read ahead if convenient. */
	bp = rahead(rip, b, position, left);
  } else {
	/* Normally an existing block to be partially overwritten is first read
	 * in.  However, a full block need not be read in.  If it is already in
	 * the cache, acquire it, otherwise just acquire a free buffer.
	 */
	if (chunk == BLOCK_SIZE) {
		n = BF_ALLOC;
	} else
	if (!block_spec && off == 0 && cv64ul(position) >= rip->i_size) {
		n = BF_ALLOC;
	} else {
		n = BF_NORMAL;
	}
	bp = get_block(dev, b, n);
  }

  /* In all cases, bp now points to a valid buffer. */
  if (rw_flag == READING) {
	/* Copy a chunk from the block buffer to user space. */
	r = sys_copy(FS_PROC_NR, SEG_D, (phys_bytes) (bp->b_data+off),
			usr, seg, (phys_bytes) buff,
			(phys_bytes) chunk);
  } else {
	/* Copy a chunk from user space to the block buffer. */
	r = sys_copy(usr, seg, (phys_bytes) buff,
			FS_PROC_NR, SEG_D, (phys_bytes) (bp->b_data+off),
			(phys_bytes) chunk);
	if (block_spec) {
		bp->b_dirt = DIRTY;
	} else {
		bp->b_dirt |= rip->i_nlinks == 0 ? GRIMY : DIRTY;
	}
	bp->b_valid = 1;
  }
  put_block(bp);
  return(r);
}


/*===========================================================================*
 *				read_map				     *
 *===========================================================================*/
PUBLIC block_t read_map(rip, position)
register struct inode *rip;	/* ptr to inode to map from */
uoff_t position;		/* position in file whose blk wanted */
{
/* Given an inode and a position within the corresponding file, locate the
 * block (not zone) number in which that position is to be found and return it.
 */

  register struct buf *bp;
  register zone_t z;
  int scale, boff, dzones, nr_indirects, index, zind, ex;
  block_t b;
  long excess, zone, block_pos;
  
  scale = rip->i_sp->s_log_zone_size;	/* for block-zone conversion */
  block_pos = position/BLOCK_SIZE;	/* relative blk # in file */
  zone = block_pos >> scale;	/* position's zone */
  boff = (int) (block_pos - (zone << scale) ); /* relative blk # within zone */
  dzones = rip->i_sp->s_ndzones;
  nr_indirects = rip->i_sp->s_nindirs;

  /* Is 'position' to be found in the inode itself? */
  if (zone < dzones) {
	zind = (int) zone;	/* index should be an int */
	z = rip->i_zone[zind];
	if (z == NO_ZONE) return(NO_BLOCK);
	b = ((block_t) z << scale) + boff;
	return(b);
  }

  /* It is not in the inode, so it must be single or double indirect. */
  excess = zone - dzones;	/* first Vx_NR_DZONES don't count */

  if (excess < nr_indirects) {
	/* 'position' can be located via the single indirect block. */
	z = rip->i_zone[dzones];
  } else {
	/* 'position' can be located via the double indirect block. */
	if ( (z = rip->i_zone[dzones+1]) == NO_ZONE) return(NO_BLOCK);
	excess -= nr_indirects;			/* single indir doesn't count*/
	b = (block_t) z << scale;
	bp = get_block(rip->i_dev, b, BF_NORMAL);/* get double indirect block */
	index = (int) (excess/nr_indirects);
	z = rd_indir(bp, index);		/* z= zone for single*/
	put_block(bp);				/* release double ind block */
	excess = excess % nr_indirects;		/* index into single ind blk */
  }

  /* 'z' is zone num for single indirect block; 'excess' is index into it. */
  if (z == NO_ZONE) return(NO_BLOCK);
  b = (block_t) z << scale;			/* b is blk # for single ind */
  bp = get_block(rip->i_dev, b, BF_NORMAL);	/* get single indirect block */
  ex = (int) excess;				/* need an integer */
  z = rd_indir(bp, ex);				/* get block pointed to */
  put_block(bp);				/* release single indir blk */
  if (z == NO_ZONE) return(NO_BLOCK);
  b = ((block_t) z << scale) + boff;
  return(b);
}


/*===========================================================================*
 *				rd_indir				     *
 *===========================================================================*/
PUBLIC zone_t rd_indir(bp, index)
struct buf *bp;			/* pointer to indirect block */
int index;			/* index into *bp */
{
/* Given a pointer to an indirect block, read one entry.  The reason for
 * making a separate routine out of this is that there are four cases:
 * V1 (IBM and 68000), and V2 (IBM and 68000).
 */

  struct super_block *sp;
  zone_t zone;			/* V2 zones are longs (shorts in V1) */

  sp = get_super(bp->b_dev);	/* need super block to find file sys type */

  /* read a zone from an indirect block */
  if (sp->s_version == V1)
	zone = (zone_t) conv2(sp->s_native, (int)  bp->b_v1_ind[index]);
  else
	zone = (zone_t) conv4(sp->s_native, (long) bp->b_v2_ind[index]);

  if (zone != NO_ZONE &&
		(zone < (zone_t) sp->s_firstdatazone || zone >= sp->s_zones)) {
	printf("Illegal zone number %ld in indirect block, index %d\n",
	       (long) zone, index);
	panic("check file system", NO_NUM);
  }
  return(zone);
}


/*===========================================================================*
 *				rahead					     *
 *===========================================================================*/
PRIVATE struct buf *rahead(rip, baseblock, position, bytes_ahead)
register struct inode *rip;	/* pointer to inode for file to be read */
block_t baseblock;		/* block at current position */
u64_t position;			/* position within file */
unsigned bytes_ahead;		/* bytes beyond position for immediate use */
{
/* Fetch a block from the cache or the device.  If a physical read is
 * required, prefetch as many more blocks as convenient into the cache.
 * This usually covers bytes_ahead and is at least BLOCKS_MINIMUM.
 * The device driver may decide it knows better and stop reading at a
 * cylinder boundary (or after an error).  The first block is not optional,
 * and any errors are only reported for it.
 */

/* Minimum number of blocks to prefetch. */
# define BLOCKS_MINIMUM		64

  int block_spec;
  unsigned blocks_ahead;
  block_t block, blocks_left, ind1_blk;
  dev_t dev;
  struct buf *bp;
  struct buf *read_q, **q_last;

  block_spec = (rip->i_mode & I_TYPE) == I_BLOCK_SPECIAL;
  if (block_spec) {
	dev = (dev_t) rip->i_zone[0];
  } else {
	dev = rip->i_dev;
  }

  bp = get_block(dev, baseblock, BF_ALLOC);
  if (bp->b_valid) return(bp);		/* block already in the cache */

  /* The best guess for the number of blocks to prefetch:  A lot.
   * It is impossible to tell what the device looks like, so we don't even
   * try to guess the geometry, but leave it to the driver.
   *
   * The floppy driver can read a full track with no rotational delay, and it
   * avoids reading partial tracks if it can, so handing it enough buffers to
   * read two tracks is perfect.  (Two, because some diskette types have
   * an odd number of sectors per track, so a block may span tracks.)
   *
   * The disk drivers don't try to be smart.  With todays disks it is
   * impossible to tell what the real geometry looks like, so it is best to
   * read as much as you can.  With luck the caching on the drive allows
   * for a little time to start the next read.
   *
   * The current solution below is a bit of a hack, it just reads blocks from
   * the current file position hoping that more of the file can be found.  A
   * better solution must look at the already available zone pointers and
   * indirect blocks (but don't call read_map!).
   */

  bytes_ahead += rem64u(position, BLOCK_SIZE);
  blocks_ahead = (bytes_ahead + BLOCK_SIZE - 1) / BLOCK_SIZE;

  if (block_spec) {
	blocks_left = NR_IOREQS;
  } else {
	block = div64u(position, BLOCK_SIZE);
	blocks_left = (rip->i_size + BLOCK_SIZE - 1) / BLOCK_SIZE;

	/* Go for the first indirect block if we are in its neighborhood. */
	ind1_blk = (block_t) rip->i_sp->s_ndzones << rip->i_sp->s_log_zone_size;
	if (block <= ind1_blk && blocks_left > ind1_blk) {
		blocks_ahead++;
		blocks_left++;
	}
	blocks_left -= block;
  }

  /* Read at least the minimum number of blocks, but not after a seek. */
  if (blocks_ahead < BLOCKS_MINIMUM && rip->i_seek == NO_SEEK)
	blocks_ahead = BLOCKS_MINIMUM;

  /* Can't go past end of file. */
  if (blocks_ahead > blocks_left) blocks_ahead = blocks_left;

  /* No more than the maximum request. */
  if (blocks_ahead > NR_IOREQS) blocks_ahead = NR_IOREQS;

  /* Acquire block buffers. */
  q_last = &read_q;
  block = baseblock;
  for (;;) {
	*q_last = bp;
	q_last = &bp->b_list;

	if (--blocks_ahead == 0) break;

	/* Don't trash the cache, leave 4 free. */
	if (bufs_in_use >= nr_bufs - 4) break;

	block++;

	bp = get_block(dev, block, BF_ALLOC);
	if (bp->b_valid) {
		/* Oops, block already in the cache, get out. */
		put_block(bp);
		bp->b_usage -= 2;	/* don't count this access */
		break;
	}
  }
  *q_last = NIL_BUF;

  /* Put all but the first block back in the cache. */
  bp = read_q;
  while ((bp = bp->b_list) != NIL_BUF) put_block(bp);

  rw_scattered(read_q, READING);
  return(read_q);
}


/*
 * $PchId: read.c,v 1.6 1996/02/29 23:09:24 philip Exp $
 */
