/*
 * ocfsmain.c
 *
 * ocfs file system driver entry point
 *
 * Copyright (C) 2002 Oracle Corporation.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 * 
 * You should have recieved a copy of the GNU General Public
 * License along with this program; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 021110-1307, USA.
 *
 * Authors: Neeraj Goyal, Suchit Kaura, Kurt Hackel, Sunil Mushran,
 *          Manish Singh, Wim Coekaerts
 */

#define OCFSMAIN_PRIVATE_DECLS

#include <ocfs.h>
#include <linux/iobuf.h>
#include <linux/sysctl.h>

/* Tracing */
#define OCFS_DEBUG_CONTEXT      OCFS_DEBUG_CONTEXT_INIT

#if LINUX_VERSION_CODE >= LinuxVersionCode(2,4,18)
#define free_kiovec_sz(nr, buf, bh)     free_kiovec(nr, buf)
#define alloc_kiovec_sz(nr, buf, bh)    alloc_kiovec(nr, buf)
#endif

/*
** Globals
*/
ocfs_global_ctxt OcfsGlobalCtxt;
spinlock_t osb_id_lock;
__u32 osb_id;             /* Keeps track of next available OSB Id */
spinlock_t mount_cnt_lock;
__u32 mount_cnt;          /* Number of volumes currently mounted */

#define KERN_OCFS 989 
static ctl_table ocfs_dbg_table[] = {
	{1, "debug_level", &debug_level, sizeof (__u32), 0644, NULL, &proc_dointvec, 
	&sysctl_intvec, NULL, NULL, NULL},
	{2, "debug_context", &debug_context, sizeof (__u32), 0644, NULL, &proc_dointvec, 
	&sysctl_intvec, NULL, NULL, NULL},
	{3, "debug_exclude", &debug_exclude, sizeof (__u32), 0644, NULL, &proc_dointvec, 
	&sysctl_intvec, NULL, NULL, NULL},
	{0}
};

static ctl_table ocfs_kern_table[] = {
       	{KERN_OCFS, "ocfs", NULL, 0, 0555, ocfs_dbg_table}, 
	{0} 
};
static ctl_table ocfs_root_table[] = {
		{CTL_KERN, "kernel", NULL, 0, 0555, ocfs_kern_table},
		{0}
};
static struct ctl_table_header *ocfs_table_header = NULL;

int ocfs_get_block2 (struct inode *inode, long iblock, long *oblock, int len);

#ifdef AIO_ENABLED
int ocfs_kvec_read(struct file *filp, kvec_cb_t cb, size_t size, loff_t pos);
int ocfs_kvec_write(struct file *filp, kvec_cb_t cb, size_t size, loff_t pos);
#endif

/*
** ops
*/
static struct dentry_operations ocfs_dentry_ops = {
	.d_revalidate = ocfs_dentry_revalidate	// let's test it out!
};

static struct super_operations ocfs_sops = {
	.statfs = ocfs_statfs,
	.put_inode = ocfs_put_inode,
	.clear_inode = ocfs_clear_inode,
	//put_inode =           force_delete,
	//delete_inode =        ocfs_delete_inode, 
	.read_inode = ocfs_read_inode,
	.read_inode2 = ocfs_read_inode2,
	.put_super = ocfs_put_super,
#ifdef ALLOW_CACHE_FS
	.remount_fs = ocfs_remount,
#endif

};

static struct address_space_operations ocfs_aops = {
	.readpage = ocfs_readpage,
	.writepage = ocfs_writepage,
	.prepare_write = ocfs_prepare_write,
	.commit_write = ocfs_commit_write,
#if LINUX_VERSION_CODE >= LinuxVersionCode(2,4,10)
	/*
	 * we are only adding this here as a dummy basically, just need open with O_DIRECT
	 * to succeed, we still call ocfs_rw_direct()
	 */
	.direct_IO = ocfs_direct_IO
#endif
};

static struct file_operations ocfs_fops = {
	.read = ocfs_file_read,
	.write = ocfs_file_write,
	.mmap = generic_file_mmap,
	.fsync = ocfs_sync_file,
	.flush = ocfs_flush,
	.release = ocfs_file_release,
	.open = ocfs_file_open,
	.ioctl = ocfs_ioctl,
#ifdef AIO_ENABLED
	.kvec_read = ocfs_kvec_read,
	.kvec_write = ocfs_kvec_write,
	.aio_read = generic_file_aio_read,
        .aio_write = generic_file_aio_write,
#endif

};

static struct file_operations ocfs_dops = {
	.read = generic_read_dir,
	.readdir = ocfs_readdir,
	.fsync = ocfs_sync_file,
	.release = ocfs_file_release,
	//.open =                ocfs_file_open,
	.ioctl = ocfs_ioctl
};

static struct inode_operations ocfs_dir_iops = {
	.create = ocfs_create,
	.lookup = ocfs_lookup,
	.link = ocfs_link,
	.unlink = ocfs_unlink,
	.rmdir = ocfs_unlink,
	.symlink = ocfs_symlink,
	.mkdir = ocfs_mkdir,
	.mknod = ocfs_mknod,
	.rename = ocfs_rename,
//    .revalidate =          ocfs_inode_revalidate,
	.setattr = ocfs_setattr,
	.getattr = ocfs_getattr,
};

static struct inode_operations ocfs_file_iops = {
	.setattr = ocfs_setattr,
	.getattr = ocfs_getattr,
//    .revalidate =          ocfs_inode_revalidate,
};

char *node_name = NULL;
__u32 node_number = OCFS_INVALID_NODE_NUM;
__u32 debug_context = 0;
__u32 debug_level = 0;
__u32 debug_exclude = 0;
char *ip_address = NULL;
__u32 ip_port = 0;
char *guid = NULL;
__u32 cs = 0;
__u32 comm_voting = 0;

MODULE_PARM (node_name, "s");
MODULE_PARM_DESC(node_name, "Name of this machine in the cluster");
MODULE_PARM (node_number, "l");
MODULE_PARM_DESC(node_number, "Slot number for this machine within volume");
MODULE_PARM (debug_context, "l");
MODULE_PARM_DESC(debug_context, "Debug context");
MODULE_PARM (debug_level, "l");
MODULE_PARM_DESC(debug_level, "Debug level");
MODULE_PARM (debug_exclude, "l");
MODULE_PARM_DESC(debug_exclude, "Process ID to exclude from tracing");
MODULE_PARM (ip_address, "s");
MODULE_PARM_DESC(ip_address, "IP address for the network dlm on this node");
MODULE_PARM (ip_port, "l");
MODULE_PARM_DESC(ip_port, "Port number for the network dlm on this node");
MODULE_PARM (guid, "s");
MODULE_PARM_DESC(guid, "GUID for this machine");
MODULE_PARM (cs, "l");
MODULE_PARM_DESC(cs, "Checksum");
MODULE_PARM (comm_voting, "l");
MODULE_PARM_DESC(comm_voting, "Enable/Disable network dlm");

/*
 * ocfs_parse_options()
 *
 * e.g., gid=9999,uid=9999,[no]cache,reclaimid
 */
static int ocfs_parse_options (char *options, __u32 * uid, __u32 * gid,
			       bool * cache, bool * reclaim_id)
{
	char *c;
	char *value;
	int ret = 1;

	LOG_ENTRY ();
	
        *cache = false;
	*reclaim_id = false;
	if (!options) {
		ret = 0;
		goto bail;
	}

	for (c = strtok (options, ","); c != NULL; c = strtok (NULL, ",")) {
		if ((value = strchr (c, '=')) != NULL)
			*value++ = 0;
		if (!strcmp (c, "gid")) {
			if (!value || !*value) {
				LOG_ERROR_STR
				    ("gid option requires an argument");
				goto bail;
			}
			*gid = simple_strtoul (value, &value, 0);
			if (*value) {
				LOG_ERROR_ARGS ("Invalid gid option: %s",
						value);
				goto bail;
			}
		} else if (!strcmp (c, "uid")) {
			if (!value || !*value) {
				LOG_ERROR_STR
				    ("ERROR: uid option requires an argument");
				goto bail;
			}
			*uid = simple_strtoul (value, &value, 0);
			if (*value) {
				LOG_ERROR_ARGS ("Invalid uid option: %s",
						value);
				goto bail;
			}
		} else if (!strcmp (c, "cache")) {
			*cache = true;
		} else if (!strcmp (c, "nocache")) {
			*cache = false;
		} else if (!strcmp (c, "reclaimid")) {
			*reclaim_id = true;
		} else {
			LOG_ERROR_ARGS ("Invalid mount option: %s", c);
			goto bail;
		}
	}
	ret = 0;

      bail:
	LOG_EXIT_LONG (ret);
	return ret;
}				/* ocfs_parse_options */

/*
 * ocfs_read_super()
 *
 */
static struct super_block *ocfs_read_super (struct super_block *sb, void *data,
					    int silent)
{
	struct dentry *root;
	int status;
	struct inode *inode = NULL;
	__u32 uid = current->fsuid;
	__u32 gid = current->fsgid;
	bool c;
	bool reclaim_id;
        ocfs_super *osb = NULL;

	LOG_ENTRY ();

	MOD_INC_USE_COUNT;

	if (ocfs_parse_options (data, &uid, &gid, &c, &reclaim_id) != 0) {
		LOG_ERROR_STR ("ocfs_read_super: bad mount option");
		goto read_super_error;
	}

	/* TODO: fix this */
	sb->s_blocksize = 512;
	sb->s_blocksize_bits = 9;
#if LINUX_VERSION_CODE >= LinuxVersionCode(2,4,18)
	status = set_blocksize (sb->s_dev, 512);
	if (status < 0) {
		LOG_ERROR_STR ("ocfs_read_super: set_blocksize failed!");
		goto read_super_error;
	}
#else
	set_blocksize (sb->s_dev, 512);
#endif

	sb->s_magic = OCFS_MAGIC;
	sb->s_op = &ocfs_sops;
	sb->s_flags |= MS_NOATIME;

	/* this is needed to support O_LARGE_FILE */
	sb->s_maxbytes = OCFS_LINUX_MAX_FILE_SIZE;

	status = ocfs_mount_volume (sb, reclaim_id);
        osb = ((ocfs_super *)(sb->u.generic_sbp));
	if (status < 0 || !osb)
	if (status < 0 || !osb) {
		LOG_ERROR_STR ("Error mounting volume");
		goto read_super_error;
	}

#ifdef ALLOW_CACHE_MOUNT
	osb->cache_fs = c;		/* set caching type */
#else
	osb->cache_fs = false;
#endif

	inode = iget4 (sb, OCFS_ROOT_INODE_NUMBER, 0, NULL);
	if (!inode) {
		LOG_ERROR_STATUS (status);
		goto read_super_error;
	}

	root = d_alloc_root (inode);
	if (!root) {
		LOG_ERROR_STATUS (status);
		iput (inode);
		goto read_super_error;
	}

	sb->s_root = root;

	printk ("ocfs: Mounting device (%u,%u) in node %d\n",
		MAJOR(sb->s_dev), MINOR(sb->s_dev), osb->node_num);

	LOG_EXIT_PTR (sb);
	return sb;

      read_super_error:
	if (osb)
		ocfs_dismount_volume (sb);

	MOD_DEC_USE_COUNT;
	if (inode != NULL) {
		iput (inode);
		inode = NULL;
	}

	LOG_EXIT_PTR (0);
	return NULL;
}				/* ocfs_read_super */

static DECLARE_FSTYPE (ocfs_fs_type, "ocfs", ocfs_read_super, FS_REQUIRES_DEV);

/*
 * ocfs_driver_entry()
 *
 * Driver entry point. Called on insmod.
 */
static int __init ocfs_driver_entry (void)
{
	int status = 0;

	LOG_ENTRY ();

	ocfs_version_print ();

	ocfs_table_header = register_sysctl_table(ocfs_root_table, 0);
	if (!ocfs_table_header) {
		LOG_ERROR_STATUS(status = -ENOMEM);
		goto leave;
	}

	memset (&OcfsGlobalCtxt, 0, sizeof (ocfs_global_ctxt));
	memset (&OcfsIpcCtxt, 0, sizeof (ocfs_ipc_ctxt));

#ifdef OCFS_LINUX_MEM_DEBUG
        INIT_LIST_HEAD (&(OcfsGlobalCtxt.item_list));
#endif
	INIT_LIST_HEAD (&(OcfsGlobalCtxt.osb_next));
	INIT_LIST_HEAD (&(OcfsGlobalCtxt.osb_next));

	/* Read remaining insmod params */
	if ((status = ocfs_read_params ()) < 0)
	    goto leave;

	/* Initialize some required fields */
	OcfsGlobalCtxt.obj_id.type = OCFS_TYPE_GLOBAL_DATA;
	OcfsGlobalCtxt.obj_id.size = sizeof (ocfs_global_ctxt);

	/* Initialize the global data resource */
	ocfs_init_sem (&(OcfsGlobalCtxt.res));
	OCFS_SET_FLAG (OcfsGlobalCtxt.flags, OCFS_FLAG_GLBL_CTXT_RESOURCE_INITIALIZED);

	/* Initialize the memory slabs for oin, ofile, and file entry */
	status = ocfs_initialize_mem_lists ();
	if (status < 0) {
		LOG_ERROR_STATUS (status);
		goto leave;
	}

        /* Initialize the DLM */
	status = ocfs_init_dlm ();
	if (status < 0) {
		LOG_ERROR_STATUS (status);
		goto leave;
	}

	OcfsGlobalCtxt.hbm = DISK_HBEAT_COMM_ON;

	spin_lock_init (&osb_id_lock);
	spin_lock (&osb_id_lock);
	osb_id = 0;
	spin_unlock (&osb_id_lock);

	spin_lock_init (&mount_cnt_lock);
	spin_lock (&mount_cnt_lock);
	mount_cnt = 0;
	spin_unlock (&mount_cnt_lock);

	spin_lock_init (&OcfsGlobalCtxt.comm_seq_lock);
	spin_lock (&OcfsGlobalCtxt.comm_seq_lock);
	OcfsGlobalCtxt.comm_seq_num = 0;
	spin_unlock (&OcfsGlobalCtxt.comm_seq_lock);
	
	/* Initialize the proc interface */
	ocfs_proc_init ();

      leave:
	if (status < 0) {
		/* Free up lookaside lists */
		if (OcfsGlobalCtxt.flags & OCFS_FLAG_MEM_LISTS_INITIALIZED)
			ocfs_free_mem_lists ();

		/* Delete the global context resource */
		if (OcfsGlobalCtxt.flags & OCFS_FLAG_GLBL_CTXT_RESOURCE_INITIALIZED)
			OCFS_CLEAR_FLAG (OcfsGlobalCtxt.flags,
				       OCFS_FLAG_GLBL_CTXT_RESOURCE_INITIALIZED);

		if (ocfs_table_header)
        		unregister_sysctl_table(ocfs_table_header);
	}

	LOG_EXIT_STATUS (status);

	if (status >= 0)
		return register_filesystem (&ocfs_fs_type);
	else
		return -1;
}				/* ocfs_driver_entry */

/*
 * ocfs_read_params()
 *
 * Read insmod params
 */
int ocfs_read_params(void)
{
	int status = 0;
	__u32 check_sum = 0;
	int i;

	/* Read remaining insmod params */
	if (node_number != OCFS_INVALID_NODE_NUM) {
		if (node_number >= 0 && node_number < OCFS_MAXIMUM_NODES) {
			OcfsGlobalCtxt.pref_node_num = node_number;
			LOG_TRACE_ARGS("Preferred node number: %d\n",
				       node_number);
		}
		else {
			status = -EINVAL;
			LOG_ERROR_STR("'node_number' must be between 0 and 31");
		}
	}

	if (ip_port == 0)
		OcfsGlobalCtxt.comm_info.ip_port = OCFS_IPC_DEFAULT_PORT;
	else
		OcfsGlobalCtxt.comm_info.ip_port = ip_port;
	LOG_TRACE_ARGS("IP port: %d\n", OcfsGlobalCtxt.comm_info.ip_port);

	if (node_name && strlen (node_name) < MAX_NODE_NAME_LENGTH) {
		OcfsGlobalCtxt.node_name = node_name;
		LOG_TRACE_ARGS ("Node name: %s\n", OcfsGlobalCtxt.node_name);
	} else {
		status = -EINVAL;
		LOG_ERROR_STR ("'node_name' not set or too long");
	}

	if (ip_address && strlen (ip_address) < MAX_IP_ADDR_LEN) {
		OcfsGlobalCtxt.comm_info.ip_addr = ip_address;
		LOG_TRACE_ARGS ("IP address: %s\n", ip_address);
	} else {
		status = -EINVAL;
		LOG_ERROR_STR ("'ip_address' not set or too long");
	}

	if (guid && strlen (guid) == GUID_LEN) {
		memcpy (&OcfsGlobalCtxt.guid.guid, guid, GUID_LEN);
		LOG_TRACE_ARGS ("Node guid: %s\n", guid);
	} else {
		status = -EINVAL;
		LOG_ERROR_STR ("'guid' not set correctly");
	}

	if (status == 0) {
		for (i = 0; i < GUID_LEN; ++i)
			check_sum += (__u32) guid[i];
		if (cs != check_sum) {
			status = -EINVAL;
			LOG_ERROR_STR ("load module using load_ocfs");
		}
	}

	/* hardcoding... not used yet */
	OcfsGlobalCtxt.comm_info.type = OCFS_UDP;
	OcfsGlobalCtxt.comm_info.ip_mask = NULL;

	return status;
}				/* ocfs_read_params */


#ifdef OCFS_LINUX_MEM_DEBUG
/*
 * ocfs_memcheck()
 *
 */
static void ocfs_memcheck (void)
{
	struct list_head *iter;
	struct list_head *temp_iter;
	alloc_item *item;
        char *memtype;
        char sizeinfo[20];

	list_for_each_safe (iter, temp_iter, &OcfsGlobalCtxt.item_list) {
		item = list_entry (iter, alloc_item, list);
                switch (item->type)
                {
                    case SLAB_ITEM:
                        /* TODO: use the actual slab name */
                        memtype = "SLAB";
                        snprintf(sizeinfo, 20, "slab=%p", item->u.slab);
                        break;
                    case KMALLOC_ITEM:
                        memtype = "KMALLOC";
                        snprintf(sizeinfo, 20, "size=%d", item->u.length);
                        break;
                    case VMALLOC_ITEM:
                        memtype = "VMALLOC";
                        snprintf(sizeinfo, 20, "size=%d", item->u.length);
                        break;
                    default:
                        memtype = "UNKNOWN";
                        snprintf(sizeinfo, 20, "size=%d", item->u.length);
                        break;
                }
                LOG_ERROR_ARGS ("unfreed %s mem %x: %s tag='%s'", memtype, 
                                item->address, sizeinfo, item->tag);
	}
}  /* ocfs_memcheck */

#endif				/* OCFS_LINUX_MEM_DEBUG */

/*
 * ocfs_driver_exit()
 *
 * Called on rmmod
 */
static void __exit ocfs_driver_exit (void)
{
	ocfs_super *osb = NULL;
	struct list_head *osb_entry;
	struct list_head *osb_tmp;

	LOG_ENTRY ();

	if (ocfs_table_header)
        	unregister_sysctl_table(ocfs_table_header);

	/* Signal DLM thread to exit */
	ocfs_down_sem (&(OcfsGlobalCtxt.res), true);
	OCFS_SET_FLAG (OcfsGlobalCtxt.flags, OCFS_FLAG_SHUTDOWN_VOL_THREAD);

	list_for_each_safe (osb_entry, osb_tmp, &(OcfsGlobalCtxt.osb_next)) {
		osb = list_entry (osb_entry, ocfs_super, osb_next);

		ocfs_down_sem (&osb->osb_res, true);
		OCFS_SET_FLAG (osb->osb_flags, OCFS_OSB_FLAGS_SHUTDOWN);
		ocfs_up_sem (&osb->osb_res);

		osb->needs_flush = true;
		ocfs_trans_in_progress(osb);
		ocfs_commit_cache (osb, true);
		osb->needs_flush = false;

		list_del (&osb->osb_next);
	}

	if (OcfsGlobalCtxt.flags & OCFS_FLAG_MEM_LISTS_INITIALIZED)
		ocfs_free_mem_lists ();

	ocfs_up_sem (&(OcfsGlobalCtxt.res));

	/* Deinit the proc interface */
	ocfs_proc_deinit ();

	unregister_filesystem (&ocfs_fs_type);

#ifdef OCFS_LINUX_MEM_DEBUG
	ocfs_memcheck ();
#endif

	printk("Unloaded OCFS Driver module\n");
	LOG_EXIT ();
	return;
}				/* ocfs_driver_exit */

/*
 * here's how inodes get read from disk:
 * iget4 -> find_inode -> OCFS_FIND_INODE
 * found? : return the in-memory inode
 * not found? : get_new_inode -> OCFS_READ_INODE2
 */

/*
 * ocfs_find_inode()
 *
 * This is the iget4 helper function
 */
int ocfs_find_inode (struct inode *inode, unsigned long ino, void *opaque)
{
	ocfs_find_inode_args *args;
	ocfs_inode *oin;
	int ret = 0;
	__u64 fileOff, parentOff;
	mode_t mode;

	LOG_ENTRY_ARGS ("(0x%p, %u, 0x%p)\n", inode, ino, opaque);

	if (opaque == NULL || inode == NULL)
		goto bail;
	args = (ocfs_find_inode_args *) opaque;

	if (ino != inode->i_ino) {
		goto bail;
	}

	/* why do i need the parent offset?  isn't the file offset unique? */
	parentOff = 0;
	if (!ocfs_linux_get_inode_offset (inode, &fileOff, NULL)) {
		LOG_TRACE_STR ("error getting inode offset");
		goto bail;
	}

	if (S_ISDIR (inode->i_mode)) {
		LOG_TRACE_STR ("find_inode -> S_ISDIR\n");
		if (args->entry->extents[0].disk_off != fileOff) {
			LOG_TRACE_ARGS
			    ("DIR : inode number same but full offset does not match: %u.%u != %u.%u\n",
			     args->entry->extents[0].disk_off, fileOff);
			goto bail;
		}
	} else if (args->offset != fileOff) {
		LOG_TRACE_ARGS
		    ("FILE : inode number same but full offset does not match: %u.%u != %u.%u\n",
		     args->offset, fileOff);
		goto bail;
	}

	/* not sure if this is appropriate, but we have the most 
	 * current file entry so why not use it? */
	mode = args->entry->prot_bits;

	switch (args->entry->attribs & (~OCFS_ATTRIB_FILE_CDSL)) {
	    case OCFS_ATTRIB_DIRECTORY:
		    mode |= S_IFDIR;
		    break;
	    case OCFS_ATTRIB_CHAR:
		    mode |= S_IFCHR;
		    inode->i_rdev =
			MKDEV (args->entry->dev_major, args->entry->dev_minor);
		    break;
	    case OCFS_ATTRIB_BLOCK:
		    mode |= S_IFBLK;
		    inode->i_rdev =
			MKDEV (args->entry->dev_major, args->entry->dev_minor);
		    break;
	    case OCFS_ATTRIB_FIFO:
		    mode |= S_IFIFO;
		    break;
	    case OCFS_ATTRIB_SYMLINK:
		    mode |= S_IFLNK;
		    break;
	    case OCFS_ATTRIB_SOCKET:
		    mode |= S_IFSOCK;
		    break;
	    case OCFS_ATTRIB_REG:
	    default:
		    mode |= S_IFREG;
		    break;
	}
	oin = NULL;		/* set it back to our current OIN if we have one */
	if (inode_data_is_oin (inode))
		oin = ((ocfs_inode *)inode->u.generic_ip);
	ocfs_populate_inode (inode, args->entry, mode, oin);
	ret = 1;
      bail:
	LOG_EXIT_LONG (ret);
	return ret;
}				/* ocfs_find_inode */

/*
 * ocfs_populate_inode()
 *
 */
static void ocfs_populate_inode (struct inode *inode, ocfs_file_entry *fe,
				 umode_t mode, void *genptr)
{
	struct super_block *sb;
	ocfs_super *osb;
	__u64 offset;

	LOG_ENTRY_ARGS ("(0x%p, %u, size:%u)\n", inode, mode, fe->file_size);

	sb = inode->i_sb;
	osb = ((ocfs_super *)(sb->u.generic_sbp));
	offset = S_ISDIR (mode) ? fe->extents[0].disk_off : fe->this_sector;

	inode->i_dev = sb->s_dev;
	inode->i_mode = mode;
	inode->i_uid = fe->uid;
	inode->i_gid = fe->gid;
	inode->i_blksize = (__u32) osb->vol_layout.cluster_size;	// sb->s_blocksize;
	inode->i_blocks = (fe->file_size + sb->s_blocksize) >> sb->s_blocksize_bits;
	inode->i_mapping->a_ops = &ocfs_aops;
	inode->i_attr_flags |= ATTR_FLAG_NOATIME;
	inode->i_flags |= S_NOATIME;
	inode->i_atime = fe->modify_time;
	inode->i_mtime = fe->modify_time;
	inode->i_ctime = fe->create_time;

	if (genptr)
		SET_INODE_OIN (inode, genptr);
	else
		SET_INODE_OFFSET (inode, offset);

	switch (inode->i_mode & S_IFMT) {
	    case S_IFREG:
		    inode->i_fop = &ocfs_fops;
		    inode->i_op = &ocfs_file_iops;
		    inode->i_size = fe->file_size;
		    inode->i_rdev = sb->s_dev;
		    break;
	    case S_IFDIR:
		    inode->i_op = &ocfs_dir_iops;
		    inode->i_fop = &ocfs_dops;
		    inode->i_size = OCFS_DEFAULT_DIR_NODE_SIZE;
		    inode->i_rdev = sb->s_dev;
		    break;
	    case S_IFLNK:
		    inode->i_op = &page_symlink_inode_operations;
		    //inode->i_fop = &ocfs_fops;
		    inode->i_size = fe->file_size;
		    inode->i_rdev = sb->s_dev;
		    break;
	    default:
		    init_special_inode (inode, mode, inode->i_rdev);
		    break;
	}

	LOG_EXIT ();
	return;
}				/* ocfs_populate_inode */

/*
 * ocfs_read_inode2()
 *
 * by this point, i_sb, i_dev, i_ino are filled in
 *
 */
static void ocfs_read_inode2 (struct inode *inode, void *opaque)
{
	struct super_block *sb;
	ocfs_find_inode_args *args;
	ocfs_super *osb;
	ocfs_inode *newoin;
	umode_t mode;

	LOG_ENTRY_ARGS ("(0x%p, 0x%p)\n", inode, opaque);

	if (inode == NULL || inode->i_sb == NULL) {
		LOG_ERROR_STR ("bad inode");
		goto bail;
	}
	sb = inode->i_sb;
	osb = ((ocfs_super *)(sb->u.generic_sbp));
	if (inode->i_ino == OCFS_ROOT_INODE_NUMBER) {
		inode->i_mode = S_IFDIR | osb->vol_layout.prot_bits;
		inode->i_blksize = (__u32) osb->vol_layout.cluster_size;
		inode->i_size = OCFS_DEFAULT_DIR_NODE_SIZE;
		inode->i_blocks = OCFS_DEFAULT_DIR_NODE_SIZE;
		inode->i_rdev = inode->i_dev;	/* is this correct?! */
		inode->i_mapping->a_ops = &ocfs_aops;
		inode->i_attr_flags |= ATTR_FLAG_NOATIME;
		inode->i_flags |= S_NOATIME;
		inode->i_atime = CURRENT_TIME;
		inode->i_mtime = CURRENT_TIME;
		inode->i_ctime = CURRENT_TIME;
		inode->i_op = &ocfs_dir_iops;
		inode->i_fop = &ocfs_dops;
		inode->i_uid = osb->vol_layout.uid;
		inode->i_gid = osb->vol_layout.gid;
		SET_INODE_OIN (inode, osb->oin_root_dir);
		goto bail;
	}

	if (opaque == NULL) {
		make_bad_inode (inode);
		goto bail;
	}

	args = (ocfs_find_inode_args *) opaque;
	newoin = NULL;

	mode = args->entry->prot_bits;

	switch (args->entry->attribs & (~OCFS_ATTRIB_FILE_CDSL)) {
	    case OCFS_ATTRIB_DIRECTORY:
		    mode |= S_IFDIR;
		    break;
	    case OCFS_ATTRIB_CHAR:
		    inode->i_rdev =
			MKDEV (args->entry->dev_major, args->entry->dev_minor);
		    mode |= S_IFCHR;
		    break;
	    case OCFS_ATTRIB_BLOCK:
		    inode->i_rdev =
			MKDEV (args->entry->dev_major, args->entry->dev_minor);
		    mode |= S_IFBLK;
		    break;
	    case OCFS_ATTRIB_FIFO:
		    mode |= S_IFIFO;
		    break;
	    case OCFS_ATTRIB_SYMLINK:
		    mode |= S_IFLNK;
		    break;
	    case OCFS_ATTRIB_SOCKET:
		    mode |= S_IFSOCK;
		    break;
	    case OCFS_ATTRIB_REG:
	    default:
		    mode |= S_IFREG;
		    break;
	}
	ocfs_populate_inode (inode, args->entry, mode, newoin);

      bail:
	LOG_EXIT ();
	return;
}				/* ocfs_read_inode2 */

/*
 * ocfs_read_inode()
 *
 */
static void ocfs_read_inode (struct inode *inode)
{
	make_bad_inode (inode);
}				/* ocfs_read_inode() */

/*
 * ocfs_lookup()
 *
 */
static struct dentry *ocfs_lookup (struct inode *dir, struct dentry *dentry)
{
	int status;
	ocfs_find_inode_args args;
	ocfs_file_entry *fe = NULL;
	ocfs_inode *parentOin = NULL;
	__u64 parentOffset;
	struct inode *inode = NULL;
	struct super_block *sb = dir->i_sb;
	struct dentry *ret;
	ocfs_super *osb = ((ocfs_super *)(sb->u.generic_sbp));

	LOG_ENTRY_ARGS ("(0x%p, 0x%p, '%*s')\n", dir, dentry,
                        dentry->d_name.len, dentry->d_name.name);

	atomic_inc (&dir->i_count);

	if (dentry->d_name.len > OCFS_MAX_FILENAME_LENGTH) {
		ret = ERR_PTR (-ENAMETOOLONG);
		goto bail;
	}

	if (!ocfs_linux_get_inode_offset (dir, &parentOffset, &parentOin)) {
		LOG_ERROR_STR ("bad offset in parent inode");
		ret = ERR_PTR (-EACCES);
		goto bail;
	}

	fe = ocfs_allocate_file_entry();
	if (!fe) {
		ret = ERR_PTR (-ENOMEM);
		goto bail;
	}

	status = ocfs_find_files_on_disk (osb, parentOffset, &(dentry->d_name), fe, NULL);
	if (status >= 0) {
		args.offset = fe->this_sector;
		args.entry = fe;
		inode =
		    iget4 (sb, LO (fe->this_sector),
			   (find_inode_t) ocfs_find_inode, (void *) (&args));
		if (inode == NULL) {
			ret = ERR_PTR (-EACCES);
			goto bail;
		}
		if (is_bad_inode (inode)) {
			iput (inode);
			inode = NULL;
			ret = ERR_PTR (-EACCES);
			goto bail;
		}
	}
	dentry->d_op = &ocfs_dentry_ops;
	d_add (dentry, inode);
	ret = NULL;

      bail:
	if (fe)
		ocfs_release_file_entry (fe);

	atomic_dec (&dir->i_count);
	LOG_EXIT_PTR (ret);
	return ret;
}				/* ocfs_lookup */

/*
 * ocfs_statfs()
 *
 */
static int ocfs_statfs (struct super_block *sb, struct statfs *buf)
{
        ocfs_super *osb = NULL;
        __u32 numbits, freebits = 0;
        // ocfs_lock_res *pLockResource;
        int status = 0;
        __u8 lockbuf[512];
        ocfs_bitmap_lock *bm_lock = (ocfs_bitmap_lock *)lockbuf;

        LOG_ENTRY_ARGS ("(0x%p, 0x%p)\n", sb, buf);

        osb = ((ocfs_super *)(sb->u.generic_sbp));
        numbits = osb->cluster_bitmap.size;
        
        status = ocfs_read_force_disk (osb, lockbuf, OCFS_SECTOR_SIZE, (__u64)OCFS_BITMAP_LOCK_OFFSET);

        if (numbits >= bm_lock->used_bits)
            freebits = numbits - bm_lock->used_bits;

        buf->f_type = OCFS_MAGIC;
        buf->f_bsize = sb->s_blocksize;
        buf->f_namelen = OCFS_MAX_FILENAME_LENGTH;
        buf->f_blocks =
            (unsigned long) ((unsigned long) (numbits) *
                             (unsigned long) (osb->vol_layout.
                                              cluster_size >> 9));
        buf->f_bfree =
            (unsigned long) (freebits * (osb->vol_layout.cluster_size >> 9));
        buf->f_bavail = buf->f_bfree;
        buf->f_files = (unsigned long) (numbits);
        buf->f_ffree = (unsigned long) (numbits) - freebits;

        LOG_EXIT_LONG (0);
        return 0;
}                               /* ocfs_statfs */



/*
 * ocfs_block_symlink()
 *
 */
static int ocfs_block_symlink (struct inode *inode, const char *symname, int len)
{
	struct address_space *mapping;
	struct page *page;
	int err = -ENOMEM;
	char *kaddr;

	LOG_ENTRY ();

	mapping = inode->i_mapping;
	page = grab_cache_page (mapping, 0);

	if (!page)
		goto fail;
	err = mapping->a_ops->prepare_write (NULL, page, 0, len - 1);
	if (err) {
		LOG_ERROR_STATUS (err);
		goto fail_map;
	}

	kaddr = page_address (page);
	memset (kaddr, 0, PAGE_CACHE_SIZE);
	memcpy (kaddr, symname, len - 1);
	mapping->a_ops->commit_write (NULL, page, 0, len - 1);
	err = mapping->a_ops->readpage (NULL, page);
	wait_on_page (page);
	page_cache_release (page);
	if (err < 0) {
		LOG_ERROR_STATUS (err);
		goto fail;
	}

	mark_inode_dirty (inode);
	return 0;

      fail_map:
	UnlockPage (page);
	page_cache_release (page);
      fail:
	LOG_EXIT_STATUS (err);
	return err;
}				/* ocfs_block_symlink */

/*
 * ocfs_symlink_get_block()
 *  
 */
int ocfs_symlink_get_block (struct inode *inode,
			long iblock, struct buffer_head *bh_result, int create)
{
	int err = -EIO;
	ocfs_super *osb;
	int status;
	ocfs_file_entry *fe = NULL;
	__u64 entryOffset;

	LOG_ENTRY_ARGS ("(0x%p, %d, 0x%p, %d)\n", inode, iblock, bh_result,
			create);

	if (!inode) {
		LOG_ERROR_STR ("bad inode");
		goto bail;
	}
	osb = ((ocfs_super *)(inode->i_sb->u.generic_sbp));

	if ((iblock << 9) > PATH_MAX + 1) {
		LOG_ERROR_ARGS ("file offset > PATH_MAX: %u.%u", iblock << 9);
		goto bail;
	}

	fe = ocfs_allocate_file_entry ();
	if (fe == NULL) {
		LOG_ERROR_STATUS (status = -ENOMEM);
		goto bail;
	}
	if (!ocfs_linux_get_inode_offset (inode, &entryOffset, NULL)) {
		LOG_ERROR_STR ("could not get inode offset!");
		goto bail;
	}
	status = ocfs_read_file_entry (osb, fe, entryOffset);
	if (status < 0) {
		LOG_ERROR_STATUS (status);
		goto bail;
	}

	if (!IS_VALID_FILE_ENTRY (fe)) {
		LOG_ERROR_ARGS ("Invalid fe at offset %u.%u", HI(entryOffset),
				LO(entryOffset));
		goto bail;
	}
	if ((iblock << 9) >= (__s64) fe->alloc_size) {
		LOG_ERROR_ARGS
		    ("file offset is outside the allocated size: %u.%u",
		     iblock << 9);
		goto bail;
	}

	bh_result->b_dev = inode->i_dev;
	bh_result->b_blocknr = fe->extents[0].disk_off >> 9;
	bh_result->b_blocknr += iblock;
	bh_result->b_state |= (1UL << BH_Mapped);
	err = 0;

      bail:
	if (fe != NULL)
		ocfs_release_file_entry (fe);
	LOG_EXIT_LONG (err);
	return err;
}				/* ocfs_symlink_get_block */

/*
 * ocfs_get_block()
 *
 */
int ocfs_get_block (struct inode *inode,
		long iblock, struct buffer_head *bh_result, int create)
{
	int err = -EIO;
	ocfs_inode *oin;
	ocfs_super *osb;
	__s64 vbo = 0;
	__s64 lbo = 0;
	__u32 len, numExts;
	int status;
	void *ioRuns = NULL;

	LOG_ENTRY_ARGS ("(0x%p, %d, 0x%p, %d)\n", inode, iblock, bh_result,
			create);

	if (S_ISLNK (inode->i_mode)) {
		err = ocfs_symlink_get_block (inode, iblock, bh_result, create);
		goto bail;
	}

	if (!inode || !inode_data_is_oin (inode)) {
		LOG_ERROR_STR ("bad inode or inode has no oin");
		goto bail;
	}

	oin = ((ocfs_inode *)inode->u.generic_ip);
	osb = (ocfs_super *) oin->osb;

	vbo = (__s64) iblock << inode->i_sb->s_blocksize_bits;

	len = 1;
	status = ocfs_lookup_file_allocation (osb, oin, vbo, &lbo,
					   len, &numExts, &ioRuns);
	if (status < 0) {
		LOG_ERROR_STATUS (status);
		goto bail;
	}

	if (create) {
		/* TODO */
		LOG_TRACE_ARGS ("CREATE: offset: %d -> block#: %d\n", iblock,
				lbo >> inode->i_sb->s_blocksize_bits);
		/* goto bail; */
	}
	bh_result->b_dev = inode->i_dev;
	bh_result->b_blocknr = lbo >> inode->i_sb->s_blocksize_bits;
	bh_result->b_state |= (1UL << BH_Mapped);
	if (create)
		bh_result->b_state |= (1UL << BH_New);
	err = 0;

	if (bh_result->b_blocknr == 0) {
		LOG_ERROR_ARGS("vbo:%u.%u, lbo:%u.%u, fe:%u.%u", HILO(vbo),
			       HILO(lbo), HILO(oin->file_disk_off));
		err = -EIO;
	}

      bail:
	ocfs_safefree (ioRuns);
	LOG_EXIT_LONG (err);
	return err;
}				/* ocfs_get_block */

/*
 * ocfs_get_block2()
 *
 */
int ocfs_get_block2 (struct inode *inode, long iblock, long *oblock, int len)
{
	int err = 0;
	ocfs_inode *oin;
	ocfs_super *osb;
	__s64 vbo = 0;
	__s64 lbo = 0;
	__u32 numExts;
	int status;
	void *ioRuns = NULL;

	LOG_ENTRY_ARGS ("(0x%p, %d)\n", inode, iblock);

	if (!inode || !inode_data_is_oin (inode)) {
		LOG_ERROR_STR ("bad inode or inode has no oin");
		err = -1;
		goto bail;
	}
	oin = ((ocfs_inode *)inode->u.generic_ip);
	osb = (ocfs_super *) oin->osb;

	vbo = (__s64) iblock << inode->i_sb->s_blocksize_bits;
	len = 1;
	status = ocfs_lookup_file_allocation (osb, oin, vbo, &lbo,
					len, &numExts, &ioRuns);
	if (status < 0) {
		LOG_ERROR_STATUS (status);
		err = -1;
		goto bail;
	}

      bail:
	if (ioRuns != NULL) {
		ocfs_safefree (ioRuns);
	}
	LOG_EXIT_LONG (err);
	*oblock = lbo >> inode->i_sb->s_blocksize_bits;
	return err;
}				/* ocfs_get_block2 */

/*
 * ocfs_file_write()
 *
 */
static ssize_t ocfs_file_write (struct file *filp, const char *buf, size_t count,
		 loff_t * ppos)
{
	int ret = 0;
	int saAcquired = false, acquired = false, writingAtEOF = false;
	ocfs_inode *oin = NULL;
	ocfs_super *osb = NULL;
	struct inode *inode = filp->f_dentry->d_inode;
	int status;
	__u64 newsize;
	ocfs_lock_res *lockres = NULL;

	LOG_ENTRY_ARGS ("(0x%p, 0x%p, %d, '%*s')\n", filp, buf, count,
                        filp->f_dentry->d_name.len, filp->f_dentry->d_name.name);

	/* happy write of zero bytes */
	if (count == 0) {
		ret = 0;
		goto bail;
	}

	if (!inode || !inode_data_is_oin (inode)) {
		LOG_TRACE_STR ("bad inode or inode has no oin");
		ret = -EIO;
		goto bail;
	}
	oin = ((ocfs_inode *)inode->u.generic_ip);
	osb = (ocfs_super *) oin->osb;

	lockres = oin->lock_res;
	if (lockres == NULL) {
		ret = -EIO;
		goto bail;
	}

	ocfs_get_lockres (lockres);

	if ((lockres->master_node_num != osb->node_num) ||
	    (lockres->lock_state != OCFS_DLM_ENABLE_CACHE_LOCK)) {
		oin->cache_enabled = false;
	} else {
		oin->cache_enabled = true;
	}

	if (osb->osb_flags & OCFS_OSB_FLAGS_SHUTDOWN) {
		LOG_TRACE_STR ("Volume has already started shutdown");
		ret = -EIO;
		goto bail;
	}

	if (filp->f_flags & O_DIRECT) {
		/* anything special for o_direct? */
		LOG_TRACE_STR ("O_DIRECT");
	} else {
		ocfs_down_sem (&(oin->main_res), true);
		acquired = true;

		LOG_TRACE_ARGS ("non O_DIRECT write, fileopencount=%d\n",
				oin->open_hndl_cnt);
		if (oin->open_hndl_cnt > 1) {
			if (oin->oin_flags & OCFS_OIN_OPEN_FOR_WRITE) {
				LOG_TRACE_STR
				    ("uh oh! someone else is doing non O_DIRECT writes!\n");
				ret = -EIO;
				goto bail;
			} else {
				LOG_TRACE_STR
				    ("there are other readers, but you're the first writer\n");
				saAcquired = true;
				OCFS_SET_FLAG(oin->oin_flags, OCFS_OIN_OPEN_FOR_WRITE);
			}
		}
	}

	if (OIN_NEEDS_VERIFICATION (oin)) {
		LOG_TRACE_STR ("OIN_NEEDS_VERIFICATION");
		status = ocfs_verify_update_oin (osb, oin);
		if (status < 0) {
			LOG_TRACE_STR ("ocfs_verify_update_oin failed");
			LOG_TRACE_STR ("TODO: disable volume");
			ret = -EIO;
			goto bail;
		}
	}

	if (acquired) {
		ocfs_up_sem (&(oin->main_res));
		acquired = false;
	}

	if (filp->f_flags & O_APPEND) 
		newsize = count + inode->i_size;
	else
		newsize = count + *ppos;

	if (newsize > inode->i_size) {
		writingAtEOF = true;
		LOG_TRACE_STR ("Writing at EOF"); 
	}

	LOG_TRACE_ARGS ("ppos=%u.%u newsize=%u.%u cursize=%u.%u\n",
			HI (*ppos), LO (*ppos), HI (newsize), LO (newsize),
			HI (inode->i_size), LO (inode->i_size));

	if (newsize > oin->alloc_size) {
		LOG_TRACE_ARGS
		    ("Will need more allocation: have=%u.%u, need=%u.%u\n",
		     HI (oin->alloc_size), LO (oin->alloc_size), HI (newsize),
		     LO (newsize));

		status =
		    ocfs_create_modify_file (osb, oin->parent_dirnode_off, oin,
				      NULL, newsize, &oin->file_disk_off, 
                                      FLAG_FILE_EXTEND, NULL, NULL);
		if (status < 0) {
			if (status != -EINTR && status != -ENOSPC) {
				LOG_ERROR_STATUS (status);
				ret = -ENOSPC;
			} else
				ret = status;
			goto bail;
		}
	}

	if (filp->f_flags & O_DIRECT) 
		ret = ocfs_rw_direct (WRITE, filp, (char *) buf, count, ppos);
	else
		ret = generic_file_write (filp, buf, count, ppos);

	if (writingAtEOF && ret >= 0) {
		struct iattr attr;

		LOG_TRACE_STR
		    ("Generic_file_write ok, asking for OIN update now");
		inode->i_size = newsize;
		memset (&attr, 0, sizeof (struct iattr));
		attr.ia_valid |= ATTR_SIZE;
		attr.ia_size = newsize;
		status =
		    ocfs_create_modify_file (osb, oin->parent_dirnode_off, oin,
				      NULL, newsize,
				      &oin->file_disk_off, FLAG_FILE_UPDATE,
				      NULL, &attr);
		if (status < 0) {
			if (status != -EINTR) {
				LOG_ERROR_STATUS (status);
				ret = -EIO;
			} else
				ret = status;
		}
	}

      bail:
	if (saAcquired) {
		OCFS_CLEAR_FLAG(oin->oin_flags, OCFS_OIN_OPEN_FOR_WRITE);
	}
	if (acquired) {
		ocfs_up_sem (&(oin->main_res));
		acquired = false;
	}
        if (inode && oin && !oin->cache_enabled && !(filp->f_flags & O_DIRECT)) {
                fsync_inode_buffers(inode);
        }

	ocfs_put_lockres (lockres);

	LOG_EXIT_LONG (ret);
	return ret;
}				/* ocfs_file_write */

/*
 * ocfs_file_read()
 *
 */
static ssize_t ocfs_file_read (struct file *filp, char *buf, size_t count, loff_t * ppos)
{
	int ret = 0;
	int acquired = false;
	ocfs_inode *oin = NULL;
	ocfs_super *osb = NULL;
	struct inode *inode = filp->f_dentry->d_inode;
	int status;

	LOG_ENTRY_ARGS ("(0x%p, 0x%p, %d, '%*s')\n", filp, buf, count,
                        filp->f_dentry->d_name.len, filp->f_dentry->d_name.name);


	if (!inode || !inode_data_is_oin (inode)) {
		LOG_ERROR_STR ("Bad inode or inode has no oin");
		ret = -EINVAL;
		goto bail;
	}
	oin = ((ocfs_inode *)inode->u.generic_ip);
	osb = (ocfs_super *) oin->osb;

	if (filp->f_flags & O_DIRECT) {
		/* anything special for o_direct? */
		LOG_TRACE_STR ("O_DIRECT");
	} else {
		ocfs_down_sem (&(oin->main_res), true);
		acquired = true;
	}

	if (OIN_NEEDS_VERIFICATION (oin)) {
		status = ocfs_verify_update_oin (osb, oin);
		if (status < 0) {
			LOG_TRACE_STR ("ocfs_verify_update_oin failed");
			LOG_TRACE_STR ("TODO: disable volume");
			ret = -EIO;
			goto bail;
		}
	}

	if(acquired) {
		ocfs_up_sem (&(oin->main_res));
		acquired = false;
	}

	if (filp->f_flags & O_DIRECT) 
		ret = ocfs_rw_direct (READ, filp, buf, count, ppos);
	else 
		ret = generic_file_read (filp, buf, count, ppos);

	if (ret == -EINVAL)
		LOG_TRACE_STR ("Generic_file_read returned -EINVAL");

      bail:
	if (acquired)
		ocfs_up_sem (&(oin->main_res));

	LOG_EXIT_LONG (ret);
	return ret;
}				/* ocfs_file_read */

/*
 * ocfs_readpage()
 *
 */
static int ocfs_readpage (struct file *file, struct page *page)
{
	int ret;

	LOG_ENTRY_ARGS ("(0x%p, %u)\n", file, (page ? page->index : 0));

	ret = block_read_full_page (page, ocfs_get_block);

	LOG_EXIT_LONG (ret);
	return ret;
}				/* ocfs_readpage */

/*
 * ocfs_writepage()
 *
 */
static int ocfs_writepage (struct page *page)
{
	int ret;

	LOG_ENTRY_ARGS ("(0x%p)\n", page);

	ret = block_write_full_page (page, ocfs_get_block);

	LOG_EXIT_LONG (ret);
	return ret;
}				/* ocfs_writepage */

#if LINUX_VERSION_CODE >= LinuxVersionCode(2,4,10)
/*
 * ocfs_direct_IO()
 *
 */
static int ocfs_direct_IO (int rw,
#ifdef SUSE
		struct file *filp,
#else
		struct inode *inode,
#endif
		struct kiobuf *iobuf, unsigned long blocknr, int blocksize)
{
        /* we are not using this function anymore, in fact
	 * we should never get here any more
	 * so let's just BUG(), hint from sct@redhat.com
	 */
	BUG();
	return -1;
}				/* ocfs_direct_IO */
#endif

/*
 * ocfs_prepare_write()
 *
 */
static int ocfs_prepare_write (struct file *file,
		    struct page *page, unsigned from, unsigned to)
{
	int ret;

	LOG_ENTRY_ARGS ("(0x%p, 0x%p, %u, %u)\n", file, page, from, to);

	ret = block_prepare_write (page, from, to, ocfs_get_block);

	LOG_EXIT_LONG (ret);
	return ret;
}				/* ocfs_prepare_write */

#define SECTOR_BITS 9
#define SECTOR_SIZE (1U << SECTOR_BITS)
#define SECTOR_MASK (SECTOR_SIZE - 1)

/*
 * ocfs_rw_direct()
 *
 */
static ssize_t ocfs_rw_direct (int rw, struct file *filp, char *buf, size_t size,
		loff_t * offp)
{
	struct kiobuf *iobuf;
	int err = 0;
	unsigned long blocknr, blocks, myiosize;
	size_t transferred;
	int iosize, clustersize;
	int i;
	struct inode *inode = filp->f_dentry->d_inode;
	int max_sectors;
	int nbhs;
	int sector_size, sector_bits, sector_mask;
	int ret = 0;
	bool large_io = false;
	bool inuse = false;
	unsigned long blocks_end_cluster = 0;
	loff_t saved_off;
	size_t saved_size;

        unsigned long firstlogic;
        long firstphys;
        long nextphys;
        unsigned long nextlogic = 0;
        unsigned long totalioblocks = 0;

	saved_off = *offp;
	saved_size = size;

	#warning not all devices have 512 byte sectors
	sector_size = 512;
	sector_bits = SECTOR_BITS;
	sector_mask = 511;
	/* max sectors is 1024 in 2.4.9
	 * max data is 512kb  
	 */

	err = -EINVAL;
	if (size == 0) {
		printk("direct write of 0 byte\n");
		return 0;
	}

	if (rw == READ) {
	   if (inode->i_size < *offp) /* read past end of file */
              return 0;
           if  (size > (inode->i_size - *offp))
              size = inode->i_size - *offp;
        }

	/* make sure we are aligned to either 4kb or 512 byte IO */
#ifndef LARGEIOS
	if ((*offp & 511) || (size & 511))
	   /* if not, then fail, we need either to do dio */
           return err;

        max_sectors = KIO_MAX_SECTORS;
        large_io = false;
#endif
#ifdef LARGEIOS
	if ((*offp & 4095) || (size & 4095)) {
		/* if it's not 4kb, then 512 */
		 if ((*offp & 511) || (size & 511))
			 /* if not, then fail, we need either to do dio */
	                 return err;
		 max_sectors = KIO_MAX_SECTORS; /* for 2.4.9 - 1024 */
	} /* ok we 're 4kb aligned, lets see if the buffer is */
	else {
		if (!((unsigned long) buf & 4095)) {
			/* yippie we are .. we can do 4kb size io's */
			large_io = true;
			/* for 2.4.9 */
			max_sectors = KIO_MAX_SECTORS / 8;
		} else {
			max_sectors = KIO_MAX_SECTORS;
			large_io = false;
		}

	}	
#endif

	/* find out how far we are to the end of our cluster */

	err = 0;
	if (size)
		err = -ENXIO;

	/* Split the IO into KIO_MAX_SECTORS chunks, mapping and */
	/* unmapping the single kiobuf as we go to perform each chunk of IO. */

	transferred = 0;
	blocknr = *offp >> SECTOR_BITS;
        clustersize = inode->i_blksize >> 9;
        myiosize = size >> 9;
        blocks_end_cluster = clustersize - (blocknr % clustersize);
        firstlogic = blocknr;
        totalioblocks = 0;

        ret = ocfs_get_block2 (inode, blocknr, &firstphys, 512);
        if (ret == -1) {
            err = 0;
            goto out;
         }
        while (myiosize > 0) {
            if (blocks_end_cluster + 1 > myiosize) {
                totalioblocks += myiosize;
                myiosize = 0;
                goto doio;
            } else {
                totalioblocks += blocks_end_cluster;
                myiosize -= blocks_end_cluster;
                nextlogic = firstlogic + blocks_end_cluster;
            }
again:
            ret = ocfs_get_block2 (inode, nextlogic, &nextphys, 512);
            if (ret == -1) {
                err = 0;
                goto out;
            }
            if (nextphys == (firstphys + totalioblocks)) {
		// merge ok
		blocks_end_cluster = clustersize - (nextlogic % clustersize);
                if (blocks_end_cluster + 1 > myiosize) {
                   totalioblocks += myiosize;
                   myiosize = 0;
                } else {
                   totalioblocks += blocks_end_cluster;
                   myiosize -= blocks_end_cluster;
                   nextlogic = nextlogic + blocks_end_cluster;
                   goto again;
                }
            }
doio:
                size = totalioblocks << 9;
                if (large_io)
                        nbhs = (size >> 12);
                else
                        nbhs = (size >> SECTOR_BITS);
                if (nbhs > max_sectors)
                        nbhs = max_sectors;
                err = alloc_kiovec_sz (1, &iobuf, &nbhs);
                inuse = true;
                if (err)
                        goto out;
                totalioblocks = 0;
                while (size > 0) {
                        if (large_io) {
                                blocks = size >> 12;
                                if (blocks > max_sectors)
                                        blocks = max_sectors;
                                iosize = blocks << 12;
                        } else {
                                blocks = size >> SECTOR_BITS;
                                if (blocks > max_sectors)
                                        blocks = max_sectors;
                                iosize = blocks << SECTOR_BITS;
                        }
                        if (!blocks)
                                break;
                        err = map_user_kiobuf (rw, iobuf, (unsigned long) buf, iosize);
                        if (err) {
				printk("error in mapping iobuf\n");
                                break;
			}
			/* get the blocknr depending on io size for all blocks */
                        /* since we are awlays within the extent we only need to get the first block */
                        iobuf->blocks[0] = firstphys + totalioblocks;

                        if (large_io) {
                                blocknr+=8;
                                iobuf->blocks[0]=iobuf->blocks[0] / 8;
                        } else {
                                blocknr++;
                        }

                        for (i = 1; i < blocks; i++) {
                                if (large_io) {
                                        blocknr+=8;
                                } else {
                                        blocknr++;
                                }
                                iobuf->blocks[i]=iobuf->blocks[0] + i;
                        }
                        err =
                                brw_kiovec (rw, 1, &iobuf, inode->i_dev, iobuf->blocks,
                                        large_io ? 4096 : 512);
#ifdef SUSE
			if (rw == READ &&  err > 0)
				mark_dirty_kiobuf(iobuf, err);
#endif
                        if (err >= 0) {
                                transferred += err;
                                size -= err;
                                buf += err;
                                totalioblocks += blocks;
                        } else {
				printk(
				       "ocfs_rw_direct : brw_kiovec() %d\n",
				       err);	
				unmap_kiobuf(iobuf);
				break;
			}
			
                        unmap_kiobuf (iobuf);
                        if (err != iosize) {
				printk("err is %d iosize = %u\n", err, iosize);
                                break;
			}
                }
		if (err < 0) {
			printk("need to fail out\n");
			break;
		}
                free_kiovec_sz(1, &iobuf, &nbhs);
                inuse = false;
        	totalioblocks = 0;
        	firstlogic = nextlogic;
        	firstphys = nextphys;
           }
           if (transferred) {
                *offp += transferred;
                err = transferred;
        }

out:
        if (inuse)
           free_kiovec_sz (1, &iobuf, &nbhs);
	return err;
}				/* ocfs_rw_direct */

#ifdef AIO_ENABLED
static int ocfs_kvec_rw(struct file *filp, int rw, kvec_cb_t cb, size_t size, loff_t pos);

int ocfs_kvec_read(struct file *file, kvec_cb_t cb, size_t size, loff_t pos) {
	return ocfs_kvec_rw(file, READ, cb, size, pos);
}

int ocfs_kvec_write(struct file *file, kvec_cb_t cb, size_t size, loff_t pos) {
	return ocfs_kvec_rw(file, WRITE, cb, size, pos);
}

int ocfs_kvec_rw(struct file *filp, int rw, kvec_cb_t cb, size_t size, loff_t pos) {
	int 		err = 0;
	int sector_bits = SECTOR_BITS;
	int sector_mask = 511;
        int max_sectors = KIO_MAX_SECTORS;	
	struct inode *inode = filp->f_dentry->d_inode;
	unsigned int i;
	unsigned long blocknr, blocks, iosize;
	long firstphys;
	int clustersize;
	unsigned long blocks_end_cluster = 0;	

	if (!size || (pos == inode->i_size)) {
		cb.fn(cb.data, cb.vec, err);
		return err;
	}

	err = -ENXIO;
	if (pos >= inode->i_size) {
		return err;
	}

	err = -EINVAL;
	if ((pos < 0) || (pos & 511) || (size & 511)) {
		return err;
	}

	for (i=0; i<cb.vec->nr; i++)
	 	if ((cb.vec->veclet[i].offset & sector_mask) ||
	            (cb.vec->veclet[i].length & sector_mask)) {
	              printk("veclet offset/length wrong");
		      return err;
		}
	blocknr = pos >> SECTOR_BITS;

	blocks = size >> SECTOR_BITS;;
	if (blocks > max_sectors)
		blocks = max_sectors;
	if (!blocks) {
		err = -ENXIO;
		return err;;
	}

	iosize = blocks << SECTOR_BITS;
	clustersize = inode->i_blksize >> 9;
	blocks_end_cluster = clustersize - (blocknr % clustersize);
	
	err = ocfs_get_block2(inode, blocknr, &firstphys, 512);
	if ( err == -1 ) {
		err = 0;
		return err;
	}

	blocks = blocks > blocks_end_cluster ? blocks_end_cluster : blocks;
	
	err = brw_kvec_async(rw, cb, inode->i_dev, blocks, firstphys, sector_bits);
	return err;
}
#endif

/*
 * ocfs_commit_write()
 *
 */
static int ocfs_commit_write (struct file *file,
		   struct page *page, unsigned from, unsigned to)
{
	int ret;

	LOG_ENTRY_ARGS ("(0x%p, 0x%p, %u, %u)\n", file, page, from, to);

	ret = generic_commit_write (file, page, from, to);

	LOG_EXIT_LONG (ret);
	return ret;
}				/* ocfs_commit_write */

/*
 * ocfs_create_or_open_file()
 *
 */
static int ocfs_create_or_open_file (struct inode *inode,
			  struct inode *dir, struct dentry *dentry,
			  int mode, ocfs_file ** newofile, int dev)
{
	int create = (inode == NULL);
	int status = 0;
	ocfs_super *osb = NULL;
	ocfs_file *OFile = NULL;
	ocfs_inode *ParentOin = NULL;
	ocfs_inode *NewOIN = NULL;
	ocfs_inode *oin = NULL;
	bool bAcquiredOSB = false;
	bool bAcquiredOIN = false;
	bool bClearInUse = false;
	bool new_oin = false;
	ocfs_file_entry *fe = NULL;
	__u64 ParentDirNodeOffset;
	__u64 parentDirCluster;
	struct file *NewFileObject = NULL;
	__u64 allocSize = 0;
	__u64 endofFile = 0;
	ocfs_sem *oin_sem = NULL;

	LOG_ENTRY_ARGS ("(%s '%*s')\n", create?"create":"open",
                        dentry->d_name.len, dentry->d_name.name);

	OCFS_ASSERT (dir->i_sb);
	OCFS_ASSERT (newofile);
	osb = ((ocfs_super *)(dir->i_sb->u.generic_sbp));
	OCFS_ASSERT (osb);
	OCFS_ASSERT (osb->obj_id.type == OCFS_TYPE_OSB);

	if (!ocfs_linux_get_inode_offset (dir, &ParentDirNodeOffset, &ParentOin)) {
		LOG_ERROR_STATUS (status = -ENOENT);
		goto leave;
	}
	parentDirCluster = ParentDirNodeOffset;

	ocfs_down_sem (&(osb->osb_res), true);
	bAcquiredOSB = true;

	/*  If the volume has been shutdown, fail the request */
	if (osb->osb_flags & OCFS_OSB_FLAGS_SHUTDOWN) {
		LOG_ERROR_STR ("Volume has been shutdown");
		status = -EACCES;
		goto leave;
	}
	ocfs_up_sem (&(osb->osb_res));
	bAcquiredOSB = false;

	if ((fe = ocfs_allocate_file_entry ()) == NULL) {
		LOG_ERROR_STATUS (status = -ENOMEM);
		goto leave;
	}

	if (create)
		status = -ENOENT;
	else {
		/* kch - for an open request we are already given the 
		 * inode, and therefore we are given the oin too */
		down(&inode->i_sem);
		oin = NULL;
		if (inode_data_is_oin (inode))
			oin = ((ocfs_inode *)inode->u.generic_ip);
		status = -EFAIL;
                if (oin != NULL) {
 
                        if (!(oin->oin_flags & OCFS_OIN_IN_TEARDOWN) &&
                            !(oin->oin_flags & OCFS_OIN_DELETE_ON_CLOSE)) {
                                OCFS_SET_FLAG (oin->oin_flags, OCFS_OIN_IN_USE);

				status = 0;
			}
			if (status < 0) {
                               if (oin->oin_flags & OCFS_OIN_IN_TEARDOWN)
                                       LOG_ERROR_ARGS ("oin (%p) in teardown", oin);
                               else
                                       LOG_ERROR_ARGS ("oin (%p) deleted", oin);
			}
		} else {
			/* now it IS possible to have an inode but no OIN attached yet
			 * must be loaded now to open file */
			status = -ENOENT;
		}
		up(&inode->i_sem);
	}

	if (status < 0) {
		if (status != -ENOENT) {
			LOG_ERROR_STATUS (status);
			goto leave;
		}

		/*  Look on the disk now ... */
		status = ocfs_find_files_on_disk (osb, ParentDirNodeOffset, &(dentry->d_name),
					  fe, NULL);
		if (status >= 0) {
			oin = NULL;
			ocfs_down_sem (&(osb->osb_res), true);
			bAcquiredOSB = true;
			status = ocfs_create_oin_from_entry (osb, fe, &oin,
						     parentDirCluster, NULL);
			new_oin = true;
			ocfs_up_sem (&(osb->osb_res));
			bAcquiredOSB = false;

			if (status >= 0) {
				/*  Set Oin in Use... */
				bClearInUse = true;
			}

			if (status < 0) {
				if (status != -ENOENT && status != -EINTR) {
					LOG_ERROR_STATUS (status);
					goto leave;
				}
			}
		} else if (!create) {
			LOG_TRACE_STR
			    ("Open request made for nonexistent file!");
			status = -ENOENT;
			goto leave;
		}
	}

	if (status < 0) {	/* not found on disk or in mem */
		if (!create || status != -ENOENT) {
			LOG_ERROR_STATUS (status);
			goto leave;
		}
	} else {
		bClearInUse = true;
	}

	if (status < 0) {	/* the CREATE case */
		__u64 fileEntry = 0;
		ocfs_file_entry *tempFileEnt;

		if ((tempFileEnt = ocfs_allocate_file_entry ()) == NULL) {
			LOG_ERROR_STATUS (status = -ENOMEM);
			goto leave;
		}

		/* must pass a partially filled FILE_ENTRY to set */
		/* linux-only fields */
		memset (tempFileEnt, 0, sizeof (ocfs_file_entry));
		tempFileEnt->uid = current->fsuid;
		tempFileEnt->gid = current->fsgid;
		tempFileEnt->prot_bits = mode & 0007777;
		if (S_ISCHR (mode) || S_ISBLK (mode)) {
			tempFileEnt->dev_major = MAJOR (dev);
			tempFileEnt->dev_minor = MINOR (dev);
		} else {
			tempFileEnt->dev_major = MAJOR (dir->i_sb->s_dev);
			tempFileEnt->dev_minor = MINOR (dir->i_sb->s_dev);
		}

		if (S_ISLNK (mode))
			tempFileEnt->attribs |= OCFS_ATTRIB_SYMLINK;
		else if (S_ISCHR (mode))
			tempFileEnt->attribs |= OCFS_ATTRIB_CHAR;
		else if (S_ISBLK (mode))
			tempFileEnt->attribs |= OCFS_ATTRIB_BLOCK;
		else if (S_ISFIFO (mode))
			tempFileEnt->attribs |= OCFS_ATTRIB_FIFO;
		else if (S_ISSOCK (mode))
			tempFileEnt->attribs |= OCFS_ATTRIB_SOCKET;
		else if (S_ISDIR (mode))
			tempFileEnt->attribs |= OCFS_ATTRIB_DIRECTORY;
		else
			tempFileEnt->attribs |= OCFS_ATTRIB_REG;

		OCFS_ASSERT (create);
		/*  We need the dir_node for the file being created. */
		/*  Create the file here using the new algorithm... */
		fileEntry = 0;

		status =
		    ocfs_create_modify_file (osb, parentDirCluster, NULL, &(dentry->d_name),
				      0, &fileEntry,
				      S_ISDIR (mode) ? FLAG_FILE_CREATE_DIR :
				      FLAG_FILE_CREATE, tempFileEnt, NULL);

		ocfs_release_file_entry (tempFileEnt);

		if (status < 0) {
			if (status != -EINTR)
			LOG_ERROR_STATUS (status);
			goto leave;
		}

		status = ocfs_find_files_on_disk (osb, parentDirCluster, &(dentry->d_name),
					  fe, NULL);
		if (status < 0) {
			LOG_ERROR_STATUS (status);
			goto leave;
		}

		fileEntry = fe->this_sector;

		/*  Create a new ofile here ... */
		OFile = ocfs_allocate_ofile ();
		if (OFile == NULL) {
			LOG_ERROR_STATUS (status = -ENOMEM);
			goto leave;
		}

		OFile->k_file = NewFileObject;

		status = ocfs_create_new_oin (&OFile->oin, &allocSize, &endofFile,
					   NewFileObject, osb);
		if (status < 0) {
			/*  Release the memory for the OFile we allocated above */
			ocfs_release_ofile (OFile);
			LOG_ERROR_STATUS (status);
			goto leave;
		}

		status = ocfs_initialize_oin (OFile->oin, osb,
				   OCFS_OIN_CACHE_UPDATE | (S_ISDIR (mode) ?
							    OCFS_OIN_DIRECTORY :
							    0), NewFileObject,
				   fileEntry,
				   S_ISDIR (mode) ? fe->extents[0].
				   disk_off : fileEntry);
		if (status < 0) {
			if (status != -EINTR)
				LOG_ERROR_STATUS (status);
			ocfs_release_ofile (OFile);
			ocfs_release_oin (OFile->oin, true);
			goto leave;
		}

		if (ParentOin)
			OCFS_CLEAR_FLAG (ParentOin->oin_flags, OCFS_OIN_IN_USE);

		if (OFile->oin->lock_res != NULL) {
			OFile->oin->lock_res->master_node_num =
			    DISK_LOCK_CURRENT_MASTER (fe);
			OFile->oin->lock_res->lock_state =
			    DISK_LOCK_FILE_LOCK (fe);
		}

                /*  Insert the OFile on the OIN list */
		NewOIN = OFile->oin;

		NewOIN->chng_seq_num = DISK_LOCK_SEQNUM (fe);
		NewOIN->parent_dirnode_off = parentDirCluster;

		if (S_ISDIR (mode)) 
			NewOIN->dir_disk_off = fe->extents[0].disk_off;
		status = 0;
		*newofile = OFile;
		goto leave;
	} else {		/* the OPEN case */
                /* check if another process doing an open */
                /* concurrently has just set the oin */
                down(&inode->i_sem);
                if (new_oin) {
                        if (inode_data_is_oin (inode)) {
                                // delete the oin we just made 
                                oin->inode = NULL;
                                oin->lock_res = NULL;
                                ocfs_release_oin(oin, true);
                                // and use the correct one
                                oin = (ocfs_inode *)inode->u.generic_ip;
                        } else {
                                oin->inode = inode;
                                SET_INODE_OIN (inode, oin);
                        }
                }
 
                /* we should now have a single oin regardless */
                /* of how many concurrent openers at this point */
                /* so take the oin->main_res so we won't need the i_sem */

		up(&inode->i_sem);
 
		oin_sem = &(oin->main_res);
		if (!bAcquiredOIN) {
			ocfs_down_sem (oin_sem, true);
			bAcquiredOIN = true;
		}

		if (oin->oin_flags & OCFS_OIN_DELETE_ON_CLOSE) {
			LOG_TRACE_STR
			    ("oin has DELETE_ON_CLOSE set, returning DELETE_PENDING");
			status = -ENOENT;
			goto leave;
		}

		/* only call ocfs_verify_update_oin if there's a good inode */
		if (oin->inode == inode && OIN_NEEDS_VERIFICATION(oin)) {
			status = ocfs_verify_update_oin (osb, oin);
			if (status < 0) {
				/*  disable VOLUME TODO */
				LOG_ERROR_STATUS (status);
				goto leave;
			}
		}
		if (oin->open_hndl_cnt > 0) {
			/*  The OIN is currently in use by some thread. */
			/*  We must check whether the requested access/share access */
			/*  conflicts with the existing open operations. */

			LOG_TRACE_ARGS ("oin->open_hndl_cnt > 0! : %u\n",
					oin->open_hndl_cnt);
                        if (!(mode & O_DIRECT)) {
                                if ((oin->oin_flags & OCFS_OIN_OPEN_FOR_DIRECTIO) && !(mode & O_RDONLY)) {
                                        status = -EACCES;
                                        LOG_TRACE_STR("file is already open O_DIRECT, "
                                                      "cannot open non O_DIRECT");
                                        goto leave;
                                }
                                OCFS_CLEAR_FLAG(oin->oin_flags, OCFS_OIN_OPEN_FOR_DIRECTIO);
                        } else if (mode & O_DIRECT) {
                                if (!(oin->oin_flags & OCFS_OIN_OPEN_FOR_DIRECTIO)) {
                                        status = -EACCES;
                                        LOG_TRACE_STR("file is already open non O_DIRECT, "
                                                      "cannot open O_DIRECT");
                                        goto leave;
                                }
                                OCFS_SET_FLAG(oin->oin_flags, OCFS_OIN_OPEN_FOR_DIRECTIO);

			}
			status = 0;
		} else {
			ocfs_delete_all_extent_maps (oin);
			if (mode & O_DIRECT)
				OCFS_SET_FLAG(oin->oin_flags, OCFS_OIN_OPEN_FOR_DIRECTIO);
			else 
				OCFS_CLEAR_FLAG(oin->oin_flags, OCFS_OIN_OPEN_FOR_DIRECTIO);
		}

		/*  Allocate a new OFile */
		OFile = ocfs_allocate_ofile ();
		if (OFile == NULL) {
			LOG_ERROR_STATUS (status = -ENOMEM);
			goto leave;
		}

		/*  Setup the OFile and insert it on the oin list */
		OFile->k_file = NewFileObject;
		OFile->oin = oin;
		OFile->oin->open_hndl_cnt++;

		/* We should clear the in use now as we are safe from the case */
		/* where the voting thread can vote and we have an open in */
		/* progress */
		OCFS_CLEAR_FLAG (OFile->oin->oin_flags, OCFS_OIN_IN_USE);
		if (ParentOin)
			OCFS_CLEAR_FLAG (ParentOin->oin_flags, OCFS_OIN_IN_USE);

		OCFS_ASSERT (OFile->oin);

		if (bAcquiredOIN) {
			ocfs_up_sem (oin_sem);
			bAcquiredOIN = false;
		}

		ocfs_down_sem (&(osb->osb_res), true);
		bAcquiredOSB = true;
		(osb->file_open_cnt)++;
		ocfs_up_sem (&(osb->osb_res));
		bAcquiredOSB = false;

		*newofile = OFile;
		status = 0;
		goto leave;
	}

      leave:

	if (bClearInUse) {
		if (!bAcquiredOIN && oin_sem) {
			ocfs_down_sem (oin_sem, true);
			bAcquiredOIN = true;
		}

		OCFS_CLEAR_FLAG (oin->oin_flags, OCFS_OIN_IN_USE);

		if (bAcquiredOIN && oin_sem) {
			ocfs_up_sem (oin_sem);
			bAcquiredOIN = false;
		}
	}

	if (bAcquiredOIN && oin_sem) {
		ocfs_up_sem (oin_sem);
		bAcquiredOIN = false;
	}

	if (bAcquiredOSB) {
		ocfs_up_sem (&(osb->osb_res));
		bAcquiredOSB = false;
	}

	ocfs_release_file_entry (fe);

	LOG_EXIT_STATUS (status);
	return (status);
}				/* ocfs_create_or_open_file */

/*
 * ocfs_file_open()
 *
 */
static int ocfs_file_open (struct inode *inode, struct file *file)
{
	struct dentry *dentry = file->f_dentry;
	struct inode *parent = dentry->d_parent->d_inode;
	ocfs_file *ofile = NULL;
	int status;
	int ret, err = 0;

	LOG_ENTRY_ARGS ("(0x%p, 0x%p, '%*s')\n", inode, file, 
                        file->f_dentry->d_name.len, file->f_dentry->d_name.name);

	atomic_inc (&parent->i_count);
	status = ocfs_create_or_open_file (inode, parent, dentry, file->f_flags,
					   &ofile, NODEV);
	if (status < 0) {
		if (status != -ENOENT && status != -ENOMEM &&
		    status != -EACCES && status != -EINTR) {
			LOG_ERROR_STATUS (status);
			ret = -EACCES;
		} else
			ret = status;
		goto bail;
	}

	file->private_data = (void *) ofile;
	ofile->k_file = file;
	ret = 0;

      bail:
	if (ret != 0)
		atomic_dec (&parent->i_count);
	LOG_TRACE_ARGS
	    ("exiting: file=%p dentry=%p inode=%p oin=%p kiovec=%d\n",
	     file, file->f_dentry, file->f_dentry->d_inode,
	     ((ocfs_inode *)file->f_dentry->d_inode->u.generic_ip), err);
	LOG_EXIT_LONG (ret);
	return ret;
}				/* ocfs_file_open */

/*
 * ocfs_mknod()
 *
 */
static int ocfs_mknod (struct inode *dir, struct dentry *dentry, int mode, int dev)
{
	int status;
	struct inode *inode;
	ocfs_file *newofile = NULL;
	int error = -EACCES;

	LOG_ENTRY_ARGS ("(0x%p, 0x%p, %d, %d, '%*s')", dir, dentry, mode,
			dev, dentry->d_name.len, dentry->d_name.name);

	atomic_inc (&dir->i_count);

	status = ocfs_create_or_open_file (NULL, dir, dentry, mode, &newofile,
					   dev);
	if (status >= 0) {
		ocfs_inode *oin;

		if (newofile == NULL || newofile->oin == NULL) {
			LOG_ERROR_STR ("OFile returned is bad. TODO: cleanup!");
			goto bail;
		}

		oin = newofile->oin;
		inode = new_inode (dir->i_sb);
		error = PTR_ERR (inode);
		if (!IS_ERR (inode)) {
			ocfs_file_entry fe;

			fe.extents[0].disk_off = fe.this_sector = 0;
			fe.uid = current->fsuid;
			fe.gid = current->fsgid;
			fe.file_size = 0;
			fe.modify_time = fe.create_time = CURRENT_TIME;
			inode->i_rdev = dev;
			inode->i_ino = LO (oin->file_disk_off);
			oin->inode = inode;

			ocfs_populate_inode (inode, &fe, mode, oin);
			insert_inode_hash (inode);
			d_instantiate (dentry, inode);
			error = 0;
		} else {
			LOG_ERROR_ARGS ("new_inode failed! error=%d", error);
		}
	} else if (status == -ENOSPC) {
		LOG_ERROR_STR ("Disk is full");
		error = -ENOSPC;
	} else if (status == -EINTR) {
		error = -EINTR;
	} else {
		LOG_ERROR_STATUS (status);
	}

      bail:
	atomic_dec (&dir->i_count);

	/* uh, hmmm... */
	if (newofile != NULL)
		ocfs_release_ofile (newofile);

	LOG_EXIT_LONG (error);
	return error;
}				/* ocfs_mknod */

/*
 * ocfs_mkdir()
 *
 */
static int ocfs_mkdir (struct inode *dir, struct dentry *dentry, int mode)
{
	int ret;

	LOG_ENTRY_ARGS ("(0x%p, 0x%p, %d, '%*s')\n", dir, dentry, mode,
                        dentry->d_name.len, dentry->d_name.name);

	ret = ocfs_mknod (dir, dentry, mode | S_IFDIR, NODEV);

	LOG_EXIT_LONG (ret);
	return ret;
}				/* ocfs_mkdir */

/*
 * ocfs_create()
 *
 */
static int ocfs_create (struct inode *dir, struct dentry *dentry, int mode)
{
	int ret;

	LOG_ENTRY_ARGS ("(0x%p, 0x%p, %d, '%*s')\n", dir, dentry, mode,
                        dentry->d_name.len, dentry->d_name.name);

	ret = ocfs_mknod (dir, dentry, mode | S_IFREG, NODEV);

	LOG_EXIT_LONG (ret);
	return ret;
}				/* ocfs_create */

/*
 * ocfs_link()
 *
 */
static int ocfs_link (struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
{
	int status;
	__u64 off;
	ocfs_file_entry *fe;
	struct inode *inode;

	LOG_ENTRY_ARGS ("(0x%p, 0x%p, 0x%p, old='%*s' new='%*s')\n", old_dentry, dir, dentry,
                        old_dentry->d_name.len, old_dentry->d_name.name,
                        dentry->d_name.len, dentry->d_name.name);

	inode = old_dentry->d_inode;
	fe = ocfs_allocate_file_entry ();
	if (fe == NULL) {
		status = -ENOMEM;
		LOG_ERROR_STATUS (status);
		goto bail;
	}
	if (!ocfs_linux_get_inode_offset (inode, &off, NULL)) {
		LOG_ERROR_STR ("could not get inode offset!");
		goto bail;
	}

	status = ocfs_read_file_entry (((ocfs_super *)(inode->i_sb->u.generic_sbp)), fe, off);
	if (status < 0) {
		LOG_ERROR_STATUS (status);
		goto bail;
	}
	if (!IS_VALID_FILE_ENTRY (fe)) {
		LOG_ERROR_ARGS ("Invalid fe at offset %u.%u", HI(off), LO(off));
		goto bail;
	}

      bail:
	if (fe) {
		ocfs_release_file_entry (fe);
	}

	LOG_EXIT_LONG (0);
	return -EPERM;
}				/* ocfs_link */

/*
 * ocfs_positive()
 *
 */
static inline int ocfs_positive (struct dentry *dentry)
{
	int ret;

	LOG_ENTRY_ARGS ("(0x%p, '%*s')\n", dentry, 
                        dentry->d_name.len, dentry->d_name.name);

	ret = dentry->d_inode && !d_unhashed (dentry);

	LOG_EXIT_LONG (ret);
	return ret;
}				/* ocfs_positive */

/*
 * ocfs_empty()
 *
 */
static int ocfs_empty (struct dentry *dentry)
{
	struct list_head *list;
	int ret;

	LOG_ENTRY_ARGS ("(0x%p, '%*s')\n", dentry,
                        dentry->d_name.len, dentry->d_name.name);

	spin_lock (&dcache_lock);
	list = dentry->d_subdirs.next;

	while (list != &dentry->d_subdirs) {
		struct dentry *de = list_entry (list, struct dentry, d_child);

		if (de->d_inode && !d_unhashed (de)) {
			spin_unlock (&dcache_lock);
			ret = 0;
			goto bail;
		}
		list = list->next;
	}
	spin_unlock (&dcache_lock);
	ret = 1;

      bail:
	LOG_EXIT_LONG (ret);
	return ret;
}				/* ocfs_empty */

/*
 * ocfs_unlink()
 *
 */
static int ocfs_unlink (struct inode *dir, struct dentry *dentry)
{
	int status, tmpstat;
	struct inode *inode;
	ocfs_inode *oin = NULL;
	int retval = -EBUSY;
	__u64 off;
	ocfs_lock_res *lockres;
	ocfs_super *osb = NULL;

	LOG_ENTRY_ARGS ("(0x%p, 0x%p, '%*s')\n", dir, dentry,
                        dentry->d_name.len, dentry->d_name.name);


	inode = dentry->d_inode;

	if ((atomic_read (&inode->i_count) > 1)
	    || (atomic_read (&dentry->d_count) > 2)) {
		goto bail;
	}

	if (ocfs_empty (dentry)) {
		status = ocfs_set_disposition_information (dir, dentry);
		if (status < 0) {
			if (status != -ENOTEMPTY && status != -EPERM &&
  	   		    status != -EBUSY && status != -EINTR) {
				LOG_ERROR_STATUS (status);
				retval = -EBUSY;
			} else
				retval = status;
			goto bail;
		}

		if (inode_data_is_oin (inode)
		    && (oin = ((ocfs_inode *)inode->u.generic_ip)) == NULL) {
			LOG_ERROR_STR ("inode has bad oin");
			retval = -EIO;
			goto bail;
		}

		if (oin)
			ocfs_release_oin (oin, true);
		else {
                	osb = ((ocfs_super *)(inode->i_sb->u.generic_sbp));
                	if (osb && ocfs_linux_get_inode_offset (inode, &off, NULL)) {
                        	tmpstat = ocfs_lookup_sector_node (osb, off, &lockres);
                        	if (tmpstat >= 0 && lockres) {
                                	ocfs_remove_sector_node (osb, lockres);
                        		ocfs_put_lockres (lockres);
				} else 
					LOG_TRACE_STR ("lookup sectornode failed");
			}
                }

		inode->i_nlink--;
		/* This probably is not needed, leavign it in as comment */
		/* if we ever hit bug in dcache again, it's something else */
		/* but this should take care of the BUG() in dcache : prune_dcache() */
		/* dput(dentry); */
		retval = 0;
	} else
		LOG_TRACE_STR ("dentry is not empty, cannot delete");

      bail:
	LOG_EXIT_LONG (retval);
	return retval;
}				/* ocfs_unlink */

/*
 * ocfs_rename()
 *
 */
static int ocfs_rename (struct inode *old_dir, struct dentry *old_dentry,
			struct inode *new_dir, struct dentry *new_dentry)
{
	int status;
	struct inode *old_inode = old_dentry->d_inode;
	struct inode *new_inode = new_dentry->d_inode;
	int error = 0;

	LOG_ENTRY_ARGS ("(0x%p, 0x%p, 0x%p, 0x%p, from='%*s' to='%*s')\n",
			old_dir, old_dentry, new_dir, new_dentry,
                        old_dentry->d_name.len, old_dentry->d_name.name,
                        new_dentry->d_name.len, new_dentry->d_name.name);

	if (atomic_read (&old_inode->i_count) > 1) {
		error = -EBUSY;
		goto bail;
	} else if (atomic_read (&old_dentry->d_count) > 1) {
#if LINUX_VERSION_CODE >= LinuxVersionCode(2,4,18)
		shrink_dcache_parent (old_dentry);
		if (atomic_read (&old_dentry->d_count) > 1) {
			error = -EBUSY;
			goto bail;
		}
#else
		error = -EBUSY;
		goto bail;
#endif
	}

	if (new_inode) {
		if (S_ISDIR (old_inode->i_mode) && !ocfs_empty (new_dentry)) {
			error = -ENOTEMPTY;
			LOG_TRACE_STR ("New (directory) dentry NOT empty!");
			goto bail;
		}

		status = ocfs_set_rename_information (old_dir, old_dentry, new_dir,
						   new_dentry);
		if (status < 0) {
			if (status != -EINTR && status != -ENOENT) {
				LOG_ERROR_STATUS (status);
				error = -ENOENT;
			} else
				error = status;
			goto bail;
		}
		old_inode->i_nlink++;
		new_dir->i_mtime = new_dir->i_ctime = CURRENT_TIME;
		if (S_ISDIR (old_inode->i_mode))
			new_inode->i_nlink--;
		new_inode->i_nlink--;
	} else {
//		   if (S_ISDIR(old_inode->i_mode)) 
//		   {
//		   error = -EMLINK;
//		   if (new_dir->i_nlink >= OCFS_LINK_MAX)
//		   goto bail;
//		   }
		status = ocfs_set_rename_information (old_dir, old_dentry, new_dir,
						   new_dentry);
		if (status < 0) {
			if (status != -EINTR && status != -ENOENT) {
				LOG_ERROR_STATUS (status);
				error = -ENOENT;
			} else
				error = status;
			goto bail;
		}
		old_inode->i_nlink++;
		if (S_ISDIR (old_inode->i_mode))
			new_dir->i_nlink++;
	}
	old_inode->i_nlink--;
	if (S_ISDIR (old_inode->i_mode)) {
		new_dir->i_mtime = new_dir->i_ctime = CURRENT_TIME;
		old_dir->i_nlink--;
	}
//	   mark_inode_dirty(new_dir);
//	   mark_inode_dirty(old_dir);
//	   mark_inode_dirty(new_inode);
//	   mark_inode_dirty(old_inode);

      bail:
	LOG_EXIT_LONG (error);
	return error;
}				/* ocfs_rename */

/*
 * ocfs_symlink()
 *
 */
static int ocfs_symlink (struct inode *dir, struct dentry *dentry, const char *symname)
{
	int error;
	ocfs_super *osb = NULL;
	ocfs_inode *oin = NULL;
	struct inode *inode, *parentInode = NULL;
	__u64 fileOff, parentOff;
	__u64 newsize;
	int status;

	LOG_ENTRY_ARGS ("(0x%p, 0x%p, symname='%s' actual='%*s')\n", dir, dentry, symname,
                        dentry->d_name.len, dentry->d_name.name);

	atomic_inc (&dir->i_count);
	if (!dentry->d_parent || !dentry->d_parent->d_inode) {
		LOG_ERROR_STR ("failed to get parent inode!");
		error = -EIO;
		goto bail;
	}
	parentInode = dentry->d_parent->d_inode;

	if (!dentry->d_parent || !dentry->d_parent->d_inode) {
		LOG_ERROR_STR ("failed to get parent inode!");
		error = -EIO;
		goto bail;
	}
	parentInode = dentry->d_parent->d_inode;

	error = ocfs_mknod (dir, dentry, S_IFLNK | S_IRWXUGO, NODEV);
	if (!error) {
		int l = strlen (symname) + 1;

		newsize = l - 1;
		/* now that d_inode points to something */
		inode = dentry->d_inode;

		if (!ocfs_linux_get_inode_offset (parentInode, &parentOff, NULL)) {
			LOG_ERROR_STR ("failed to get parent offset!");
			error = -EIO;
			goto bail;
		}

		if (ocfs_linux_get_inode_offset (inode, &fileOff, &oin)
		    && oin != NULL) {
			osb = (ocfs_super *) oin->osb;
			ocfs_down_sem (&(oin->main_res), true);
			if (OIN_NEEDS_VERIFICATION (oin)) {
				LOG_TRACE_STR ("OIN_NEEDS_VERIFICATION");
				status = ocfs_verify_update_oin (osb, oin);
				if (status < 0) {
					LOG_TRACE_STR
					    ("ocfs_verify_update_oin failed");
					LOG_TRACE_STR ("TODO: disable volume");
					ocfs_up_sem (&(oin->main_res));
					error = -EIO;
					goto bail;
				}
			}
			ocfs_up_sem (&(oin->main_res));
		}

		status =
		    ocfs_create_modify_file (osb, parentOff, oin, NULL, newsize,
				      &fileOff, FLAG_FILE_EXTEND, NULL, NULL);

		if (status < 0) {
			if (status != -EINTR && status != -ENOSPC) {
				LOG_ERROR_STATUS (status);
				error = -ENOSPC;
			} else
				error = status;
			goto bail;
		}

		if (oin != NULL) {
			ocfs_down_sem (&(oin->main_res), true);
			inode->i_size = newsize;
			ocfs_up_sem (&(oin->main_res));
		}
		error = ocfs_block_symlink (inode, symname, l);
		if (error < 0)
			LOG_ERROR_STATUS (error);
	}

      bail:
	atomic_dec (&dir->i_count);
	LOG_EXIT_LONG (error);
	return error;
}				/* ocfs_symlink */

/*
 * ocfs_file_release()
 *
 */
static int ocfs_file_release (struct inode *inode, struct file *file)
{
	//int nbhs = KIO_MAX_SECTORS;
	ocfs_file *ofile = NULL;
        ocfs_super * osb;
        ocfs_inode *oin;
        struct dentry *dentry;
        struct inode *parent;

	LOG_ENTRY_ARGS ("(0x%p, 0x%p, '%*s')\n", inode, file,
                        file->f_dentry->d_name.len, file->f_dentry->d_name.name);

	dentry = file->f_dentry;

	if (file->private_data)
                ofile = (ocfs_file *) file->private_data;


        /* dir */
	if (S_ISDIR (inode->i_mode)) {
	        /* fix all this - need a real open/close for directories */
		if (ofile) {
                        if (ofile->curr_dir_buf) {
			        ocfs_safefree (ofile->curr_dir_buf);
			        ofile->curr_dir_buf = NULL;
		        }
		        // hmm
		        // if (ofile->f_iobuf)
		        //        free_kiovec_sz(1, &ofile->f_iobuf, &nbhs);
		        ocfs_release_ofile (ofile);
                }
                goto bail;
        }

        /* file */
        osb = ((ocfs_super *)(inode->i_sb->u.generic_sbp));
        oin = NULL;
                
        if (ofile == NULL)
                goto do_parent_dec;

        oin = ofile->oin;
        OCFS_ASSERT (oin);

        ocfs_down_sem (&(oin->main_res), true);
        ocfs_release_ofile (ofile);
        ocfs_down_sem (&(osb->osb_res), true);
        osb->file_open_cnt--;
        oin->open_hndl_cnt--;
        ocfs_up_sem (&(osb->osb_res));
                
        if (oin->oin_flags & OCFS_OIN_ROOT_DIRECTORY) {
                ocfs_up_sem (&(oin->main_res));
                goto do_parent_dec;
        }
                
        LOG_TRACE_ARGS ("openhandles: %d / osbfiles: %d / refcount: %d\n",
                         oin->open_hndl_cnt, osb->file_open_cnt,
                        atomic_read(&dentry->d_count)); 
               
        /* FIXME: in all the other places I run thru all the dentries */
        /* for the inode, but here I just check this one becuz I'm lz */
        /* no hard links yet so who cares */
        if (!atomic_read(&dentry->d_count)) { 
		if (oin->oin_flags & OCFS_OIN_OPEN_FOR_DIRECTIO) {
			OCFS_CLEAR_FLAG(oin->oin_flags, OCFS_OIN_OPEN_FOR_DIRECTIO);
               }
                if (oin->oin_flags & OCFS_OIN_NEEDS_DELETION ||
                    oin->oin_flags & OCFS_OIN_IN_USE) {
                        ocfs_up_sem (&(oin->main_res));
                        goto do_parent_dec; 
                }
                ocfs_up_sem (&(oin->main_res));
                ocfs_release_oin (oin, true);
        } else {
                ocfs_up_sem (&(oin->main_res));
                ocfs_release_cached_oin (osb, oin);
        }
        
do_parent_dec:
        if (dentry && dentry->d_parent && 
            dentry->d_parent->d_inode) {
                parent = dentry->d_parent->d_inode;
                if (parent)
                        atomic_dec (&parent->i_count);
        }

bail:
	LOG_EXIT_LONG (0);
	return 0;
}				/* ocfs_file_release */

/*
 * ocfs_flush()
 *
 */
static int ocfs_flush (struct file *file)
{
	LOG_ENTRY_ARGS ("(0x%p, '%*s')\n", file,
                        file->f_dentry->d_name.len, file->f_dentry->d_name.name);

	fsync_inode_buffers(file->f_dentry->d_inode);
	LOG_EXIT_LONG (0);
	return 0;
}				/* ocfs_flush */

/*
 * ocfs_sync_file()
 *
 */
static int ocfs_sync_file (struct file *file, struct dentry *dentry, int datasync)
{
	LOG_ENTRY_ARGS ("(0x%p, 0x%p, %d, '%*s')\n", file, dentry, datasync,
                        dentry->d_name.len, dentry->d_name.name);
	fsync_inode_buffers(dentry->d_inode);
	LOG_EXIT_LONG (0);
	return 0;
}				/* ocfs_sync_file */

/*
 * ocfs_put_super()
 *
 */
static void ocfs_put_super (struct super_block *sb)
{
	LOG_ENTRY_ARGS ("(0x%p)\n", sb);

	fsync_no_super (sb->s_dev);
	LOG_TRACE_STR ("put super... do nothing!  DONE!!!!");
	MOD_DEC_USE_COUNT;

	LOG_EXIT ();
	return;
}				/* ocfs_put_super */

/*
 * ocfs_readdir()
 *
 */
static int ocfs_readdir (struct file *filp, void *dirent, filldir_t filldir)
{
	int pos;
	struct inode *inode;
	struct super_block *sb;
	ocfs_super *osb;
	ocfs_inode *oin;
	ocfs_file *ofile;
	ocfs_file_entry *entry = NULL;
	__u64 rootOff;
	int ret = 0;

	LOG_ENTRY_ARGS ("(0x%p, 0x%p, '%*s')\n", filp, dirent, 
                        filp->f_dentry->d_name.len, filp->f_dentry->d_name.name);

	if (!filp ||
	    !filp->f_dentry ||
	    !filp->f_dentry->d_inode || !filp->f_dentry->d_inode->i_sb) {
		LOG_TRACE_STR ("Bad file pointer");
		goto bail;
	}
	pos = filp->f_pos;
	inode = filp->f_dentry->d_inode;
	sb = inode->i_sb;
	if (!sb->u.generic_sbp) {
		LOG_TRACE_STR ("Invalid OSB");
		goto bail;
	}
	osb = ((ocfs_super *)(sb->u.generic_sbp));

	if (!ocfs_linux_get_inode_offset (inode, &rootOff, &oin)) {
		LOG_TRACE_STR ("Inode has no OIN");
		goto bail;
	}

	if (!S_ISDIR (inode->i_mode)) {
		LOG_TRACE_STR ("Not a dir");
		ret = -ENOTDIR;
		goto bail;
	}

	switch (pos) {
	    case 0:
	    {
		    if (filldir (dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0)
			    break;
		    pos++;
		    filp->f_pos++;
		    break;
	    }
	    case 1:
	    {
		    if (filldir (dirent, "..", 2, 1,
				 filp->f_dentry->d_parent->d_inode->i_ino,
				 DT_DIR) < 0)
			    break;
		    pos++;
		    filp->f_pos++;
		    break;
	    }
		    /* case 2: */
	    default:
	    {
		    if (pos == 2) {
			    ocfs_safefree (filp->private_data);
			    filp->private_data = (void *) ocfs_allocate_ofile ();
			    if (filp->private_data == NULL) {
				    LOG_TRACE_STR ("Failed to allocate OFile");
				    ret = -ENOMEM;
				    goto bail;
			    }
		    }

		    entry = ocfs_allocate_file_entry ();
		    if (entry == NULL) {
			    LOG_TRACE_STR ("Failed to allocate file entry");
			    ret = -ENOMEM;
			    goto bail;
		    }
		    ofile = (ocfs_file *) filp->private_data;
		    while (1) {
                           int r;
                            /* todo - find out if we need locking */
                            if (ofile->filldir.ino != 0) {
                                r=filldir (dirent, ofile->filldir.fname,
                                    strlen (ofile->filldir.fname),
                                    ofile->filldir.pos, ofile->filldir.ino,
                                    DT_UNKNOWN);
                                ofile->filldir.ino = 0;
                            } else {
                                if (ocfs_find_files_on_disk (osb, rootOff, NULL,
                                    entry, ofile) < 0)
                                     break;
                              r=filldir (dirent, entry->filename, strlen (entry->filename), filp->f_pos,
                                    LO (entry->this_sector), DT_UNKNOWN);
                            }
                            if (r < 0) {
                                    memcpy(ofile->filldir.fname, entry->filename, OCFS_MAX_FILENAME_LENGTH);
                                    ofile->filldir.pos = filp->f_pos;
                                    ofile->filldir.ino = LO (entry->this_sector);
                                    goto bail;
                            }
			    pos++;
			    filp->f_pos++;
		    }

		    break;
	    }
	}

      bail:
	if (entry)
		ocfs_release_file_entry (entry);
	LOG_EXIT_LONG (ret);
	return ret;
}				/* ocfs_readdir */

/*
 * ocfs_put_inode()
 *
 */
static void ocfs_put_inode (struct inode *inode)
{
	LOG_ENTRY_ARGS ("(0x%p)\n", inode);
	LOG_TRACE_ARGS ("put_inode: count=%d\n", inode->i_count);
	if (inode_data_is_oin(inode) && (atomic_read (&inode->i_count) == 1) ) {
	     ocfs_inode *oin;
	     oin = ((ocfs_inode *)inode->u.generic_ip);
 	     ocfs_delete_all_extent_maps (oin);
	} 
	LOG_EXIT ();
	return;
}				/* ocfs_put_inode */

/*
 * ocfs_clear_inode()
 *
 */
static void ocfs_clear_inode (struct inode *inode)
{
	LOG_ENTRY_ARGS ("(ino=0x%p)\n", inode);

	if (inode) {
		if (inode_data_is_oin (inode)) {
			ocfs_inode *oin;
			ocfs_super *osb;
			ocfs_file *ofile;
			struct list_head *iter;
			struct list_head *temp_iter;

			LOG_TRACE_STR ("inode with oin : clear inode");

			oin = ((ocfs_inode *)inode->u.generic_ip);
			osb = (ocfs_super *) oin->osb;
			if (oin == osb->oin_root_dir) {
				LOG_TRACE_STR
				    ("this is the root inode, doing cleanup now!");
				fsync_no_super (inode->i_sb->s_dev);
				LOG_TRACE_STR ("syncing past root inode");
				LOG_TRACE_STR ("calling dismount");
				ocfs_dismount_volume (inode->i_sb);
				goto bail;
			}

			list_for_each_safe (iter, temp_iter, &(oin->next_ofile)) {
				ofile =
				    list_entry (iter, ocfs_file, next_ofile);
				ocfs_release_ofile (ofile);
			}

			ocfs_delete_all_extent_maps (oin);

			ocfs_release_cached_oin (osb, oin);
			ocfs_release_oin (oin, true);
			oin = NULL;
			LOG_TRACE_STR ("yeah! done with deallocs!");
		} else {
			__u64 fileOff;
			ocfs_lock_res *lockres = NULL;
			ocfs_super *osb;

			osb = ((ocfs_super *)(inode->i_sb->u.generic_sbp));

			if (ocfs_linux_get_inode_offset (inode, &fileOff, NULL)) {
				if (!ocfs_lookup_sector_node (osb, fileOff, &lockres)) {
					if (lockres) {
						if (lockres->oin) {
							ocfs_put_lockres (lockres->oin->lock_res);
							lockres->oin->lock_res = NULL;
							lockres->oin = NULL;
						}
						ocfs_remove_sector_node (osb, lockres);
						ocfs_put_lockres(lockres);
					} else
						LOG_TRACE_STR ("lockres in hash is null");
				} else {
					LOG_TRACE_STR
					    ("hashtable has already been destroyed.  skipping.");
				}
			} else {
				LOG_TRACE_STR ("Could not find offset");
			}
		}
	}
      bail:
	LOG_EXIT ();
	return;
}				/* ocfs_clear_inode */

#if 0
/*
 * ocfs_delete_inode()
 *
 */
static void ocfs_delete_inode (struct inode *inode)
{
	LOG_ENTRY ();

	LOG_TRACE_STR ("Inode being junked, need to do cleanup here");

	LOG_EXIT ();
	return;
}				/* ocfs_delete_inode */
#endif

/*
 * ocfs_setattr()
 *
 */
static int ocfs_setattr (struct dentry *dentry, struct iattr *attr)
{
	struct inode *parentInode;
	struct inode *inode = dentry->d_inode;
	int error = 0;
	__u64 newsize;
	int status;
	ocfs_inode *oin = NULL;
	ocfs_super *osb = NULL;
	__u64 parentOff, fileOff;
        ocfs_file_entry *fe=NULL;

	LOG_ENTRY_ARGS ("(0x%p, '%*s')\n", dentry,
                        dentry->d_name.len, dentry->d_name.name);

	osb = ((ocfs_super *)(inode->i_sb->u.generic_sbp));

	if (!dentry->d_parent || !dentry->d_parent->d_inode) {
		LOG_ERROR_STR ("bad inode or root inode");
		goto bail2;
	}
	if (dentry == inode->i_sb->s_root) {
		LOG_ERROR_STR("changes to root inode not allowed");
		goto bail2;
	}

	parentInode = dentry->d_parent->d_inode;
	atomic_inc (&parentInode->i_count);
	newsize = attr->ia_size;

	if (attr->ia_valid & ATTR_MODE)
		LOG_TRACE_ARGS ("mode change: %d\n", attr->ia_mode);
	if (attr->ia_valid & ATTR_UID)
		LOG_TRACE_ARGS ("uid change: %d\n", attr->ia_uid);
	if (attr->ia_valid & ATTR_GID)
		LOG_TRACE_ARGS ("gid change: %d\n", attr->ia_gid);
	if (attr->ia_valid & ATTR_SIZE)
		LOG_TRACE_STR ("size change...");
	if (attr->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_CTIME))
		LOG_TRACE_STR ("time change...");

	if (!(attr->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_CTIME |
				ATTR_SIZE | ATTR_GID | ATTR_UID | ATTR_MODE))) {
		LOG_TRACE_STR
		    ("can only change mode, uid, gid, size and time.  exiting!");
		goto bail;
	}

	error = inode_change_ok (inode, attr);
	if (error)
		goto bail;

	/* get the file and parent offsets, and the file oin if present */
	if (!ocfs_linux_get_inode_offset (inode, &fileOff, &oin) ||
	    !ocfs_linux_get_inode_offset (parentInode, &parentOff, NULL)) {
		LOG_ERROR_STR ("error getting inode offset");
		goto bail;
	}

	if (attr->ia_valid & ATTR_SIZE) {
                __u32 flags;

		if (oin != NULL) {
			ocfs_down_sem (&(oin->main_res), true);
			if (OIN_NEEDS_VERIFICATION (oin)) {
				LOG_TRACE_STR ("OIN_NEEDS_VERIFICATION");
				status = ocfs_verify_update_oin (osb, oin);
				if (status < 0) {
					LOG_ERROR_STATUS (status);
					LOG_TRACE_STR ("TODO: disable volume");
					ocfs_up_sem (&(oin->main_res));
					error = -EIO;
					goto bail;
				}
			}
			ocfs_up_sem (&(oin->main_res));
		}

#ifdef RECLAIM_SPACE_ON_TRUNCATE
                if (inode->i_size > newsize)
                    flags = FLAG_FILE_TRUNCATE;
                else 
#endif
                    flags = FLAG_FILE_EXTEND;

		status = ocfs_create_modify_file (osb, parentOff, oin,
				     NULL, newsize, &fileOff, 
				     flags, NULL, NULL);

		if (status < 0) {
			if (status != -EINTR && status != -ENOSPC) {
				LOG_ERROR_STATUS (status);
			error = -ENOSPC;
			} else
				error = status;
			goto bail;
		}

		if (oin != NULL) {
			ocfs_down_sem (&(oin->main_res), true);
#ifdef RECLAIM_SPACE_ON_TRUNCATE
                        if (flags==FLAG_FILE_TRUNCATE) {
                            ocfs_delete_all_extent_maps(oin);
                        }
#endif
			inode->i_size = newsize;
			ocfs_up_sem (&(oin->main_res));
		}
	}

	/* if directory, put FILE_ENTRY ptr into fileOff */
	if (S_ISDIR (inode->i_mode))
		ocfs_linux_get_dir_entry_offset (osb, &fileOff, parentOff, &(dentry->d_name), &fe);

	status = -EFAIL;
	if (fileOff != -1)
		status =
		    ocfs_create_modify_file (osb, parentOff, NULL, NULL, newsize,
				      &fileOff, FLAG_FILE_UPDATE, NULL, attr);
	if (status < 0) {
		if (status != -EINTR) {
		LOG_ERROR_STATUS (status);
		error = -EIO;
		} else
			error = status;
		goto bail;
	}

	inode_setattr (inode, attr);

      bail:
	atomic_dec (&parentInode->i_count);
      bail2:
	ocfs_release_file_entry(fe);
	LOG_EXIT_LONG (error);
	return error;
}				/* ocfs_setattr */

/*
 * ocfs_getattr()
 *
 */
static int ocfs_getattr (struct dentry *dentry, struct iattr *attr)
{
	ocfs_inode *oin;
	struct inode *inode;
        struct super_block *sb = dentry->d_inode->i_sb;
	int status;

	LOG_ENTRY_ARGS ("(0x%p, 0x%p, '%*s')\n", dentry, attr,
                        dentry->d_name.len, dentry->d_name.name);

	inode = dentry->d_inode;
	if (inode == NULL || !inode_data_is_oin (inode))
		goto bail;
	oin = ((ocfs_inode *)inode->u.generic_ip);
	if (oin == ((ocfs_super *)(sb->u.generic_sbp))->oin_root_dir)
		goto bail;
	if (oin != NULL) {
		ocfs_down_sem (&(oin->main_res), true);
		status = ocfs_verify_update_oin (oin->osb, oin);
		if (status < 0)
			LOG_ERROR_STATUS (status);
		ocfs_up_sem (&(oin->main_res));
	}

      bail:
	LOG_EXIT_LONG (0);
	return 0;
}				/* ocfs_getattr */

/*
 * ocfs_dentry_revalidate()
 *
 */
static int ocfs_dentry_revalidate (struct dentry *dentry, int flags)
{
	int ret = 0;    /* if all else fails, just return false */
	int tmpstat = 0;
	ocfs_file_entry *fe = NULL;
	struct inode *inode;
	ocfs_inode *oin;
	ocfs_super *osb;
	__u64 off;
	ocfs_find_inode_args args;
        struct qstr q;
	
        LOG_ENTRY_ARGS ("(0x%p, %d, '%*s')\n", dentry, flags,
                        dentry->d_name.len, dentry->d_name.name);

	if ((inode = dentry->d_inode) == NULL ||
	    (osb = (ocfs_super *)(inode->i_sb->u.generic_sbp)) == NULL)
                goto bail;

	if (osb->publ_map == (1 << osb->node_num)) {
		LOG_TRACE_STR ("Only node alive.  revalidate=true.");
		ret = 1;
		goto bail;
	}

        /* check for oin */
	if (inode_data_is_oin (inode)) {
                ocfs_lock_res *res = NULL;
                ret = 1;  /* with an oin we cannot fail revalidate */
		oin = ((ocfs_inode *)inode->u.generic_ip);

                if (ocfs_lookup_sector_node (osb, oin->file_disk_off, &res)==0) {
                        /* if I hold cache lock, no revalidate needed */
                        if (res->lock_type == OCFS_DLM_ENABLE_CACHE_LOCK &&
                            res->master_node_num == osb->node_num) {
				ocfs_put_lockres (res);
                                goto bail;
                        } else
				ocfs_put_lockres (res);
                }
                /* hit the disk */
                /* TODO: optimize */
		ocfs_down_sem (&(oin->main_res), true);
		oin->needs_verification = true;
		tmpstat = ocfs_verify_update_oin(osb, oin);
		if (tmpstat < 0)
			LOG_ERROR_STATUS (tmpstat);
		ocfs_up_sem (&(oin->main_res));
		goto bail;
	} 
        
        /* no oin for this dentry, must hit the disk */
        /* TODO: optimize */
        if (S_ISDIR (inode->i_mode)) {
		__u64 parentOff;
                struct inode *parent = dentry->d_parent->d_inode;

                /* shouldn't be revalidating root dir, need offset to parent, and fe for this dir */
		if (osb->oin_root_dir->inode == inode ||
		    !ocfs_linux_get_inode_offset (parent, &parentOff, NULL) ||
		    !ocfs_linux_get_dir_entry_offset (osb, &off, parentOff, &(dentry->d_name), &fe)) 
                        goto bail;
	} else if (ocfs_linux_get_inode_offset (inode, &off, NULL)) {
                if (ocfs_force_get_file_entry (osb, &fe, off, true) < 0)
                        goto bail;                    
        } else {
                /* icky failure case :( */
                goto bail;
        }            

        /* we now have a file entry to call read_inode */
        q.name = fe->filename;
        q.len = strlen(fe->filename);
        if (fe->sync_flags & OCFS_SYNC_FLAG_MARK_FOR_DELETION ||
            fe->sync_flags & OCFS_SYNC_FLAG_NAME_DELETED ||
            fe->sync_flags & OCFS_SYNC_FLAG_DELETED ||
            ocfs_compare_qstr(&dentry->d_name, &q) != 0) {
                LOG_TRACE_STR("found the file entry, but it has been deleted or renamed!");
                ret = 0;  /* it is now officially stale :) */
        } else {
	        args.offset = fe->this_sector;
	        args.entry = fe;
	        ocfs_read_inode2 (inode, (void *) &args);
	        ret = 1;
        }
        
bail:
        if (fe)
                ocfs_release_file_entry (fe);
	LOG_EXIT_LONG (ret);
	return ret;
}				/* ocfs_dentry_revalidate */

#if 0
    /*
     * ocfs_inode_revalidate()
     *
     */
    static int ocfs_inode_revalidate (struct dentry *dentry)
    {
            int ret;		/*  -ESTALE */
            struct inode *inode;
            ocfs_inode *oin;
            int status;

            LOG_ENTRY_ARGS ("(0x%p)\n", dentry);

            ret = 0;
            inode = dentry->d_inode;
            if (inode == NULL || !inode_data_is_oin (inode))
                    goto bail;
            oin = ((ocfs_inode *)inode->u.generic_ip);
            if (oin == (ocfs_super *)(dentry->d_inode->i_sb->u.generic_sbp)->oin_root_dir)
                    goto bail;
            if (oin != NULL)
    //    if (OIN_NEEDS_VERIFICATION(oin))
            {
                    LOG_TRACE_STR ("OIN needs verification");
                    status = ocfs_verify_update_oin (oin->osb, oin);
                    if (status < 0) {
                            LOG_ERROR_STR ("ocfs_verify_update_oin failed!");
                            ret = -ESTALE;
                            goto bail;
                    }
            }

          bail:
            LOG_EXIT_LONG (ret);
            return ret;
    }				/* ocfs_inode_revalidate */
#endif

/*
 * ocfs_release_cached_oin()
 *
 */
void ocfs_release_cached_oin (ocfs_super * osb, ocfs_inode * oin)
{
	bool bAcquiredOIN = false;
	ocfs_lock_res *lockResource = NULL;
	ocfs_lock_res *val=NULL;
        struct dentry *dentry;
        struct list_head *iter;
        struct list_head *temp_iter;
        struct inode *inode;
        int refcount = 0;
	int status = 0;


	LOG_ENTRY_ARGS ("(oin = 0x%p)\n", oin);

	if (oin == NULL)
		goto bail;

	ocfs_down_sem (&(oin->main_res), true);
	bAcquiredOIN = true;
        inode = oin->inode;
        
        if (inode) {
                list_for_each_safe (iter, temp_iter, &(inode->i_dentry)) {
                        dentry = list_entry (iter, struct dentry, d_alias);
                        refcount += atomic_read(&dentry->d_count);
                }
        }


        if (refcount != 0 || oin->open_hndl_cnt != 0 || 
            oin->oin_flags & OCFS_OIN_IN_USE) {
		if (bAcquiredOIN) {
			ocfs_up_sem (&(oin->main_res));
			bAcquiredOIN = false;
		}
		goto bail;
	} else {
		OCFS_SET_FLAG (oin->oin_flags, OCFS_OIN_IN_TEARDOWN);

		if (bAcquiredOIN) {
			ocfs_up_sem (&(oin->main_res));
			bAcquiredOIN = false;
		}

		lockResource = (ocfs_lock_res *) oin->lock_res;
		if (lockResource == NULL) {
			LOG_ERROR_STR ("lockres=null");
			goto bail;
		}

		if (lockResource->signature != 0x55AA) {
			LOG_ERROR_STR("Invalid lock resource");
			goto bail;
		}
		ocfs_get_lockres (lockResource);

		if (lockResource->sector_num == 0 || lockResource->oin != oin)
			goto bail;

		lockResource->oin = NULL;
		if (lockResource->in_cache_list) {
			list_del (& (lockResource-> cache_list));
			lockResource->in_cache_list = false;
		}

		status = ocfs_lookup_sector_node (osb, lockResource->sector_num, &val);
		if (status >= 0) {
			if (val == lockResource)
				ocfs_remove_sector_node (osb, val);
			else
				LOG_ERROR_ARGS("(lres=0x%p) != (val=0x%p)",
					       lockResource, val);
			ocfs_put_lockres (val);
		} else {
			if (status == -EFAIL) {
				ocfs_put_lockres (oin->lock_res);
				oin->lock_res = NULL;
				LOG_TRACE_ARGS ("hashtable already destroyed\n");
				goto bail;
			}
			LOG_ERROR_ARGS("lres=0x%p is not in the hash!",
				       lockResource);
		}
		ocfs_put_lockres (oin->lock_res);
		oin->lock_res = NULL;
	}

      bail:
	ocfs_put_lockres (lockResource);
	LOG_EXIT ();
	return;
}				/* ocfs_release_cached_oin */

#ifdef CDTOR_FOR_SLAB
static void lockres_ctor(void *p, kmem_cache_t *slab, unsigned long flags)
{
	ocfs_lock_res *lockres;
	lockres = p;
	lockres->signature = 0x55AA;
}

static void lockres_dtor(void *p, kmem_cache_t *slab, unsigned long flags)
{
	ocfs_lock_res *lockres;
	lockres = p;
	if (lockres->signature == 0x55AA)
		ocfs_free_lockres((ocfs_lock_res *)p);
}
#endif

/*
 * ocfs_initialize_mem_lists()
 *
 */
int ocfs_initialize_mem_lists (void)
{
	OcfsGlobalCtxt.oin_cache = kmem_cache_create ("oin_cache",
		sizeof (ocfs_inode) + OCFS_POINTER_SIZE, 0, SLAB_NO_REAP | SLAB_HWCACHE_ALIGN,
		NULL, NULL);

	OcfsGlobalCtxt.ofile_cache = kmem_cache_create ("ofile_cache",
		sizeof (ocfs_file) + OCFS_POINTER_SIZE, 0, SLAB_NO_REAP | SLAB_HWCACHE_ALIGN,
		NULL, NULL);

	OcfsGlobalCtxt.lockres_cache = kmem_cache_create ("lockres_cache",
		sizeof (ocfs_lock_res) + OCFS_POINTER_SIZE, 0, SLAB_NO_REAP | SLAB_HWCACHE_ALIGN,
		NULL, NULL);

	OcfsGlobalCtxt.fe_cache = kmem_cache_create ("fileentry_cache",
		OCFS_SECTOR_SIZE, 0, SLAB_NO_REAP | SLAB_HWCACHE_ALIGN, NULL, NULL);

	OCFS_SET_FLAG (OcfsGlobalCtxt.flags, OCFS_FLAG_MEM_LISTS_INITIALIZED);

	return 0;
}				/* ocfs_initialize_mem_lists */

/*
 * ocfs_free_mem_lists()
 *
 */
void ocfs_free_mem_lists (void)
{
	kmem_cache_destroy (OcfsGlobalCtxt.oin_cache);
	kmem_cache_destroy (OcfsGlobalCtxt.ofile_cache);
	kmem_cache_destroy (OcfsGlobalCtxt.fe_cache);
	kmem_cache_destroy (OcfsGlobalCtxt.lockres_cache);
	OCFS_CLEAR_FLAG (OcfsGlobalCtxt.flags, OCFS_FLAG_MEM_LISTS_INITIALIZED);
}				/* ocfs_free_mem_lists */

/*
 * ocfs_set_exclusive_mount_flag()
 *
 */
static void ocfs_set_exclusive_mount_flag (struct super_block *sb, int val)
{
	struct buffer_head *bh;
	ocfs_vol_disk_hdr *hdr;

	LOG_ENTRY ();

	bh = bread (sb->s_dev, 0, 512);
	hdr = (ocfs_vol_disk_hdr *) bh->b_data;
	hdr->excl_mount = val;
	mark_buffer_dirty (bh);
	ll_rw_block (WRITE, 1, &bh);
	wait_on_buffer (bh);
	bforget (bh);

	LOG_EXIT ();
}				/* ocfs_set_exclusive_mount_flag */

/*
 * ocfs_remount()
 *
 */
int ocfs_remount (struct super_block *sb, int *flags, char *data)
{
	int status;
	ocfs_lock_res *lr = NULL;
	ocfs_super *osb;
	__u8 *buffer = NULL;
	int ret = 0;
	__u32 uid = current->fsuid;
	__u32 gid = current->fsgid;
	int length;
	bool c;
	bool reclaim_id;
	ocfs_file_entry	*fe = NULL;

	LOG_ENTRY ();

	fe = ocfs_allocate_file_entry();
	if (fe == NULL) {
		LOG_ERROR_STATUS (ret = -ENOMEM);
		goto bail;
	}

	ocfs_parse_options (data, &uid, &gid, &c, &reclaim_id);
	osb = (ocfs_super *)(sb->u.generic_sbp);

	if (!c) {
		osb->cache_fs = false;
		ocfs_set_exclusive_mount_flag (sb, NOT_MOUNTED_EXCLUSIVE);
		fsync_no_super (sb->s_dev);
		LOG_ERROR_STR ("remounted with nocache");
		ret = 0;
		goto bail;
	}

	length = (OCFS_MAXIMUM_NODES * osb->sect_size);
	buffer = ocfs_malloc (length);
	if (buffer == NULL) {
		LOG_ERROR_STATUS (ret = -ENOMEM);
		goto bail;
	}

	status = ocfs_acquire_lock (osb, OCFS_VOLUME_LOCK_OFFSET,
				    OCFS_DLM_EXCLUSIVE_LOCK, FLAG_FILE_CREATE,
				    &lr, fe);
	if (status < 0) {
		LOG_ERROR_STATUS (status);
		ret = -EBUSY;
		goto bail;
	}

	memset (buffer, 0, length);
	{
		bool save = osb->cache_fs;

		osb->cache_fs = false;
		status = ocfs_read_disk (osb, buffer, length,
					 osb->vol_layout.publ_sect_off);
		osb->cache_fs = save;
	}
	if (status >= 0) {
		int i;

		ocfs_down_sem (&(osb->osb_res), true);
		ocfs_update_publish_map (osb, buffer, false);
		ocfs_up_sem (&(osb->osb_res));

		for (i = 0; i < OCFS_MAXIMUM_NODES; i++) {
			if (IS_NODE_ALIVE
			    (osb->publ_map, i, OCFS_MAXIMUM_NODES))
				LOG_ERROR_ARGS ("node #%d is alive", i);
		}
		LOG_TRACE_ARGS ("publishmap = %u.%u\n", osb->publ_map);
		if (osb->publ_map == (1 << osb->node_num)) {
			ocfs_set_exclusive_mount_flag (sb, (osb->node_num));
			fsync_no_super (sb->s_dev);
			LOG_ERROR_STR ("remount synced device");
			ret = 0;
		} else {
			LOG_ERROR_STR ("failed to remount device");
		}
	}

	ocfs_safefree (buffer);
	status = ocfs_release_lock (osb, OCFS_VOLUME_LOCK_OFFSET,
				    OCFS_DLM_EXCLUSIVE_LOCK, 0, lr, fe);
	if (ret == 0) {
		osb->cache_fs = true;
	}

      bail:
	ocfs_release_file_entry(fe);
	ocfs_put_lockres (lr);
	LOG_EXIT_LONG (ret);
	return ret;
}				/* ocfs_remount */

module_init (ocfs_driver_entry);
module_exit (ocfs_driver_exit);
MODULE_LICENSE ("GPL");
