/* IBM_PROLOG_BEGIN_TAG                                                   */
/* This is an automatically generated prolog.                             */
/*                                                                        */
/* bos720 src/bos/kernel/j2/include/j2_imap.h 1.32                        */
/*                                                                        */
/* Licensed Materials - Property of IBM                                   */
/*                                                                        */
/* Restricted Materials of IBM                                            */
/*                                                                        */
/* COPYRIGHT International Business Machines Corp. 1999,2009              */
/* All Rights Reserved                                                    */
/*                                                                        */
/* US Government Users Restricted Rights - Use, duplication or            */
/* disclosure restricted by GSA ADP Schedule Contract with IBM Corp.      */
/*                                                                        */
/* IBM_PROLOG_END_TAG                                                     */

/* @(#)90     1.32  src/bos/kernel/j2/include/j2_imap.h, sysj2, bos720 4/22/09 12:55:48 */
/*
 * COMPONENT_NAME: (SYSJ2) JFS2 Physical File System
 *
 * FUNCTIONS:
 *
 * ORIGINS: 27
 *
 * (C) COPYRIGHT International Business Machines Corp. 1996, 1999
 * All Rights Reserved
 * Licensed Materials - Property of IBM
 *
 * US Government Users Restricted Rights - Use, duplication or
 * disclosure restricted by GSA ADP Schedule Contract with IBM Corp.
 */

#ifndef	_H_J2_IMAP
#define _H_J2_IMAP	
/*
 * FUNCTION: disk inode allocation map manager
 */

/* Include j2_inode and j2_xtree out of tradition */
#include <j2/j2_inode.h>
#include <j2/j2_xtree.h>
#include <j2/j2_snapshot.h>

/*
 *	j2_imap.h: disk inode manager
 */

#define	EXTSPERIAG	128	/* number of disk inode extent per iag 	*/
#define IMAPBLKNO	0	/* lblkno of dinomap within inode map	*/
#define SMAPSZ		4	/* number of words per summary map	*/
#define	EXTSPERSUM	32	/* number of extents per summary map entry */
#define	L2EXTSPERSUM	5	/* l2 number of extents per summary map	*/
#define	PGSPERIEXT	4	/* number of 4K pages per dinode extent */
#define	MAXIAGS		((1<<20)-1)	/* maximum number of iags	*/
#define	MAXAG		128	/* maximum number of allocation groups	*/

#define AMAPSIZE	512	/* bytes in the IAG allocation maps */
#define SMAPSIZE	16	/* bytes in the IAG summary maps */

/* convert inode number to iag number */
#define	INOTOIAG(ino)	((ino) >> L2INOSPERIAG)

/* convert iag number to logical block number of the iag page */
#define IAGTOLBLK(iagno,l2nbperpg)	(((iagno) + 1) << (l2nbperpg))

/* get the starting block number of the 4K page of an inode extent
 * that contains ino.
 */
#define INOPBLK(pxd,ino,l2nbperpg)    	(addressPXD((pxd)) +		\
	((((ino) & (INOSPEREXT-1)) >> L2INOSPERPAGE) << (l2nbperpg)))

/* number of backed inodes in inode extent. 0 if PXD address is NULL. */
#define IXnBackedInode(_ixpxd, _l2bsize) \
	( addressPXD(_ixpxd) ? \
 	  (((lengthPXD(_ixpxd) << (_l2bsize)) >> L2jDISIZE)) : 0 )

/* mask of backed inodes in inode extent bit map */
#define IXBitMap(_niix)	\
    	((0xffffffff >> (INOSPEREXT - (_niix))) << (INOSPEREXT - (_niix)))

/* dinode buffer page size */
#define IXPageSize(_ixpxd, _l2bsize) \
	MIN(BPSIZE, (lengthPXD(_ixpxd) << (_l2bsize)))

/*
 *	Serialization of inode allocation map
 *
 *	Each IAG is locked by obtaining the buffer for the IAG page.
 *
 *	Each AG has a exclusive lock which is used to control
 *	the serialization of the AG level information.
 *	This lock should be taken first whenever an AG
 *	level list will be modified or accessed.
 *
 *  There is a inode readers/writer lock for the inode map inode.
 *	A read lock needs to be taken whenever an IAG is read from the map or
 *	the global level information is read.
 *	A write lock needs to be taken whenever the global level information
 *	is modified or an atomic operation needs to be used.
 *
 *	The control page of the inode map is read into memory by diMount().
 *	Thereafter it should only be modified in memory and then it will be
 *	written out when the filesystem is unmounted by diUnmount().
 *
 *	imapctllock is used to serialize the transactions on the ipimap ctl
 *	page.  It is taken in most functions that call siUpdatePSeries(), which
 *	update the intsnap information.  It is not used in siCreate() because
 *	the filesystem is in a quiesced state at that point.  It also needs to
 *	be taken by diNewIAG(), which updates lists on the ctl page.
 *
 *	The imapctllock is roughly paired with tlckIMAPCTL.  It is taken before
 *	txLock(tlckIMAPCTL) and released after txEnd().  Since txCommit()
 *	in the middle calls diUpdatePMap() which grabs the ipimap lock, it
 *	must be taken before the ipimap lock everywhere.  It should also be
 *	taken after the iplist is locked.  Detailed lock ordering in diNewIAG()
 *	is described in its prolog.
 *
 *	The imapserieslock is a complex lock for read/write access to the snapshot
 *	series for both internal and external snapshots to make sure the pseries
 *	information in the imapctl page stays in
 * 	step with the list of ipSnapshots in the wisSeries structure, and to
 *	make sure queries see the pSeries in a consistent state.  Although external
 *	snapshots do not store the pSeries under the imap directly, the ipimap
 *	acts as a gateway for access to both.
 */
/* iag free list lock */
#define	IAGFREE_LOCK_ALLOC(imap,n)\
	MUTEXLOCK_ALLOC(&imap->im_freelock,LOCK_ALLOC_PAGED,J2_IAGFREE_LOCK_CLASS,n)
#define	IAGFREE_LOCK_FREE(imap)	MUTEXLOCK_FREE(&imap->im_freelock)
#define	IAGFREE_LOCK_INIT(imap)	MUTEXLOCK_INIT(&imap->im_freelock)
#define IAGFREE_LOCK(imap)	MUTEXLOCK_LOCK(&imap->im_freelock)
#define IAGFREE_UNLOCK(imap)	MUTEXLOCK_UNLOCK(&imap->im_freelock)
#define	FSIAGFREE_LOCK_ALLOC(imap,n)\
	MUTEXLOCK_ALLOC(&imap->im_fsfreelock,LOCK_ALLOC_PAGED,J2_IAGFREE_LOCK_CLASS,n)
#define	FSIAGFREE_LOCK_FREE(imap)	MUTEXLOCK_FREE(&imap->im_fsfreelock)
#define	FSIAGFREE_LOCK_INIT(imap)	MUTEXLOCK_INIT(&imap->im_fsfreelock)
#define FSIAGFREE_LOCK(imap)	MUTEXLOCK_LOCK(&imap->im_fsfreelock)
#define FSIAGFREE_UNLOCK(imap)	MUTEXLOCK_UNLOCK(&imap->im_fsfreelock)

/* imapctl lock used to serialize txns on ipimap ctl page.  See above.  */
#define IMAPCTL_LOCK_ALLOC(__imap, __n)\
	MUTEXLOCK_ALLOC(&(__imap)->im_imapctllock,LOCK_ALLOC_PAGED,J2_IMAP_LOCK_CLASS,__n)
#define	IMAPCTL_LOCK_FREE(__imap)	MUTEXLOCK_FREE(&(__imap)->im_imapctllock)
#define	IMAPCTL_LOCK_INIT(__imap)	MUTEXLOCK_INIT(&(__imap)->im_imapctllock)

#define IMAPCTL_LOCK_TAKEN(__imap, __locktaken)		\
{							\
	MUTEXLOCK_LOCK(&(__imap)->im_imapctllock);	\
	__locktaken = TRUE;				\
}

#define IMAPCTL_UNLOCK_TAKEN(__imap, __locktaken)		\
{								\
	if (__locktaken)					\
	{							\
		MUTEXLOCK_UNLOCK(&(__imap)->im_imapctllock);	\
		__locktaken = FALSE;				\
	}							\
}

/* snapseries used to serialize access to snapshot series, gated by ipimap.
 * See above
 */
#define SNAPSERIES_LOCK_ALLOC(__imap, __n)\
	RDWRLOCK_ALLOC(&(__imap)->im_snapserieslock,LOCK_ALLOC_PAGED,J2_IMAP_LOCK_CLASS,__n)
#define	SNAPSERIES_LOCK_FREE(__imap)	RDWRLOCK_FREE(&(__imap)->im_snapserieslock)
#define	SNAPSERIES_LOCK_INIT(__imap)	RDWRLOCK_INIT(&(__imap)->im_snapserieslock)

#define SNAPSERIES_READLOCK_TAKEN(__imap, __locktaken)	\
{							\
	READ_LOCK(&(__imap)->im_snapserieslock);	\
	__locktaken = TRUE;				\
}

#define SNAPSERIES_READLOCK_MINE(__imap, __locktaken)	\
{							\
	if (!lock_mine(&(__imap)->im_snapserieslock)) {	\
		READ_LOCK(&(__imap)->im_snapserieslock);	\
		__locktaken = TRUE;				\
	} else						\
		__locktaken = FALSE;			\
}

#define SNAPSERIES_READUNLOCK_TAKEN(__imap, __locktaken)	\
{								\
	if (__locktaken)					\
	{							\
		READ_UNLOCK(&(__imap)->im_snapserieslock);	\
		__locktaken = FALSE;				\
	}							\
}

#define SNAPSERIES_WRITELOCK_TAKEN(__imap, __locktaken)	\
{							\
	WRITE_LOCK(&(__imap)->im_snapserieslock);	\
	__locktaken = TRUE;				\
}

#define SNAPSERIES_WRITEUNLOCK_TAKEN(__imap, __locktaken)	\
{								\
	if (__locktaken)					\
	{							\
		WRITE_UNLOCK(&(__imap)->im_snapserieslock);	\
		__locktaken = FALSE;				\
	}							\
}

/* per ag iag list locks */
#define	AG_LOCK_ALLOC(imap,index,n)\
	MUTEXLOCK_ALLOC(&imap->im_aglock[index],LOCK_ALLOC_PAGED,J2_AG_LOCK_CLASS,n)
#define	AG_LOCK_FREE(imap,index)	MUTEXLOCK_FREE(&imap->im_aglock[index])
#define	AG_LOCK_INIT(imap,index)	MUTEXLOCK_INIT(&(imap->im_aglock[index]))
#define AG_LOCK(imap,agno)	MUTEXLOCK_LOCK(&imap->im_aglock[agno])
#define AG_UNLOCK(imap,agno)	MUTEXLOCK_UNLOCK(&imap->im_aglock[agno])

#if defined(_POWER_MP) || defined(SMP)
#define ATOMIC_ADD(addr, incr)	fetch_and_add(&(addr), (incr))
#else /* UP */
#define ATOMIC_ADD(addr, incr)	(addr) += (incr)
#endif

#define j2lwmInodePerAG	 5   	 /* set a minimum # of blocks for free inode  
			  	  * per AG is 5%.
				  */
/*
 *	inode allocation map:
 * 
 * inode allocation map consists of 
 * . the inode map control page dinomap_t and
 * . inode allocation group pages iag_t (per 4096 inodes)
 * which are addressed by standard J2 xtree.
 */
/*
 *	inode allocation group page (per 4096 inodes of an IAG)
 */
typedef struct {
	int64	agstart;	/* 8: starting block of ag		*/
	int32	iagnum;		/* 4: inode allocation group number 	*/
	int32	inofreefwd;	/* 4: ag inode free list forward 	*/
	int32	inofreeback;	/* 4: ag inode free list back 		*/
	int32	extfreefwd;	/* 4: ag inode extent free list forward */
	int32	extfreeback;	/* 4: ag inode extent free list back 	*/
	int32	iagfree;	/* 4: iag free list			*/

	/* summary map: 1 bit per inode extent */
	int32	inosmap[SMAPSZ];/* 16: sum map of mapwords w/ free inodes;
				 *	note: this indicates free and backed
				 *	inodes, if the extent is not backed the
				 *	value will be 1.  if the extent is
				 *	backed but all inodes are being used the
				 *	value will be 1.  if the extent is
				 *	backed but at least one of the inodes is
				 *	free the value will be 0.
				 */
	int32	extsmap[SMAPSZ];/* 16: sum map of mapwords w/ free extents */
	int32	nfreeinos;	/* 4: number of free inodes		*/
	int32	nfreeexts;	/* 4: number of free extents		*/
				/* (72) */

	int32	dsnum;		/* 4: dataset number from aggregate imap*/
	int32	dsiagnum;	/* 4: dataset iag number */
	int32	dsiaglist;	/* 4: dataset iag list link */

	uint8	pad[1964];		/* 1964: pad to 2048 bytes */

	/* allocation bit map: 1 bit per inode (0 - free, 1 - allocated) */
	uint32	wmap[EXTSPERIAG];	/* 512: working allocation map	*/
	uint32	pmap[EXTSPERIAG];	/* 512: persistent allocation map */
	pxd_t	inoext[EXTSPERIAG];	/* 1024: inode extent addresses */
} iag_t;				/* (4096) */

RAS_FILE_ASSERT(iag_t, sizeof(iag_t) == PSIZE);

/*
 *	per AG control information (in inode map control page)
 */
typedef struct
{
	int32	inofree;	/* 4: free inode list anchor      	*/
	int32	extfree;	/* 4: free extent list anchor 		*/
	int32	numinos;	/* 4: number of backed inodes 		*/
	int32	numfree;	/* 4: number of free inodes 		*/
} iagctl_t;			/* (16) */

/* J2DSET_CNAME_MAX: # chars in dataset descriptive name; matches DSET_CNAME_MAX
 * defined in dset.h; not used here since that is not exported to user space
 */
#define	J2DSET_CNAME_MAX	64

/*
 *	per fileset/aggregate inode map control page
 */
typedef struct
{
	/* 64: prefix area */
	union {
		uint8	prefix[64];

		struct {
			int32	in_freeiag;	 /* 4: free iag list anchor	*/
			int32	in_nextiag;	 /* 4: next free iag number	*/
			int32	in_numinos;	 /* 4: num of backed inodes */
			int32	in_numfree;	 /* 4: num of free backed inodes */

			int32	in_nbperiext;	 /* 4: num of blocks per inode extent */
			int32	in_l2nbperiext;	 /* 4: l2 of in_nbperiext */
			int32	in_diskblock;	 /* 4: defunct */
			int32	in_maxag;	 /* 4: defunct */

			int32	in_iaglisthead;	/* 4: dataset iag list anchor */
			int32	in_iaglisttail;	/* 4: dataset iag list anchor */

		};	/* 40: */
	};

	/* 384: per imap extension */
	union {
		uint8	extension1[384];

		/* dataset management info per fsimap */
		struct {
			/* global filesystem wide values */
			int32	in_fsfreeiag;	/* 4: fs free iag list anchor	*/
			int32	in_fsnextiag;	/* 4: fs next free iag number	*/
			int32	in_fsnuminos;	/* 4: fs # backed inodes */
			int32	in_fsnumfree; 	/* 4: fs # free backed inodes */

			/* dataset */
			int32	in_dsDirectory;	/* 4: Dataset directory inum */
			int32	in_dsZombieList;	/* 4: dsZombieList anchor */

			/* snapshot */
			int32	in_numDSSnap;	/* 4: # dataset snapshots */
			int32	in_snapDir;	/* 4: Snapshot directory inum */
			int32	in_snapZombieList;	/* 4: snapshotZombieList anchor */
		} fsImap;	/* 36: */

		/* dataset management info per dsimap */
		struct {
			uint64	in_uuid[2];	/* 16: 128-bit UUID */
			uint64	in_auxuuid[2];	/* 16: 128-bit AUXUUID */
			char	in_dscname[J2DSET_CNAME_MAX];
					/* 64: Dataset descriptive name */
			char	in_dslname[J2_NAME_MAX+1];	/* 256: local name */
			int32	in_dsZombieNext;	/* 4: dsZombieList link */
			int32	in_dsZombiePrev;	/* 4: dsZombieList link */
			/* snapshotZombieList link fields are defined
			 * in sMapObject
			 */
			uint32	in_dsflags;	/* 4: ds flags */
		} dsImap;	/* 364 */
	};	/* 384: */

	/* 576: internal snapshot extension */
	union {
		uint8	extension2[576];

		struct {
			int32	in_snapType;	/* 4: Type of snapshot */
			isSeries_t	in_pSeries; /* 528: Internal snapshot series */
		};
	};

	uint8		pad[1024];	 /* 1024: pad to 2048 */

	/* 2048: AG control information */
	iagctl_t	in_agctl[MAXAG];
} dinomap_t;				/* (4096) */

RAS_FILE_ASSERT(dinomap_t, sizeof(dinomap_t) == PSIZE);

/* slot size for logging purposes, in bytes log2 */
#define L2IMAPCTLSLOTSIZE	2

#ifdef _KERNEL

#include <j2/j2_txnmgr.h>
#include <dset/dset.h>

/* IP_HKWD_INUM: Used to get inode number for trace points
 *	This uses the high-order 32-bits for the fileset number since the inode
 *	number trace points are already passing the maximum number of params.
 *	If MAXIAGS changes to allow inodes with > 32 bit inode number, this
 *	approach will have to be modified.
 */
#define	IP_HKWD_INUM(IP)	(((IP)->i_fileset == FILESYSTEM_I) ? (IP)->i_number \
			: ((((uint64)(IP)->i_fileset) << 32) | (IP)->i_number))

/*
 *	In-core inode map control page
 */
typedef struct inomap
{
	dinomap_t	im_imap;	  /* 4096: on-disk inode allocation control */

	inode_t		*im_ipimap;	  /* 8: ptr to inode for imap	*/

	MUTEXLOCK_T	im_freelock;	  /* 8: iag free list lock	*/
	MUTEXLOCK_T	im_aglock[MAXAG]; /* 512: per AG locks		*/
	uint32		*im_DBGdimap;
	MUTEXLOCK_T	im_fsfreelock;	  /* 8: fs iag free list lock	*/
	
	MUTEXLOCK_T	im_imapctllock;   /* 8: serialize ctl page txns:       *
					   *    taken before bmRead(imapctl),  *
					   * 	    and txLock(IMAPCTL)        *
					   *    released after txEnd()	       */
	RDWRLOCK_T	im_snapserieslock;/* 16: serialize access to snapshot  *
					   * series */

	/* dataset extension */
	union {
		/* dataset management info per fsimap */
		struct {
			int32	im_dsnacount;	/* active dataset/dsnode count */

			/* list of instantiated datasets of the filesystem */
			inode_t	*im_fsdsList;	/* 8: anchor in fsimap */
		};

		/* dataset management info per dsimap */
		struct {
			dsnode_t	*im_dsnp;		/* 8: dsnode */
			int32	im_dsncount;	/* dsnode reference count */

			/* list of instantiated datasets of the filesystem */
			inode_t	*im_fsdsNext;	/* 8: link in dsimap */
			inode_t	*im_fsdsPrev;	/* 8: link in dsimap */
		};
	};

	/* snapshot extension */
	struct wisSeries	*im_wisSeries;	/* 8: ptr to snapshot series */
	/* array of instantiated snapshot generations bound with
	 * the filesystem/dataset is specified by snapshotSeries
	 */
} imap_t;

#define	im_freeiag	im_imap.in_freeiag
#define	im_nextiag	im_imap.in_nextiag
#define	im_numinos	im_imap.in_numinos
#define	im_numfree	im_imap.in_numfree
#define	im_nbperiext	im_imap.in_nbperiext
#define	im_l2nbperiext	im_imap.in_l2nbperiext
#define	im_agctl	im_imap.in_agctl

#define	im_iaglisthead	im_imap.in_iaglisthead
#define	im_iaglisttail	im_imap.in_iaglisttail

#define	im_fsfreeiag	im_imap.fsImap.in_fsfreeiag
#define	im_fsnextiag	im_imap.fsImap.in_fsnextiag
#define	im_fsnuminos	im_imap.fsImap.in_fsnuminos
#define	im_fsnumfree	im_imap.fsImap.in_fsnumfree

#define	im_dsDirectory	im_imap.fsImap.in_dsDirectory
#define	im_dsZombieList	im_imap.fsImap.in_dsZombieList
#define	im_numDSSnap	im_imap.fsImap.in_numDSSnap
#define	im_snapDir	im_imap.fsImap.in_snapDir
#define	im_snapZombieList	im_imap.fsImap.in_snapZombieList

#define	im_dsflags	im_imap.dsImap.in_dsflags
#define	im_uuid		im_imap.dsImap.in_uuid
#define	im_auxuuid	im_imap.dsImap.in_auxuuid
#define	im_dscname	im_imap.dsImap.in_dscname
#define	im_dslname	im_imap.dsImap.in_dslname

#define	im_pSeries	im_imap.in_pSeries
#define	im_snapType	im_imap.in_snapType

/* disk inode allocation request structure */
typedef struct ialloc 
{
	ino64_t	ino;		/* inode number			*/
	pxd_t	ixpxd;		/* inode extent address 	*/
	pxd_t	iagpxd;		/* iag page address		*/
	int64	agstart;	/* start of ag holding inode 	*/
	inode_t	*ip;		/* for diFree(), diAlloc() */
} ialloc_t;

/* for standalone testdriver
 */
#define	im_diskblock	im_imap.in_diskblock
#define	im_maxag	im_imap.in_maxag

/* external references */
int32 diMount(inode_t *ipimap);
int32 diUnmount(inode_t *ipimap, uint32 mounterror, uint32 free_imap);
int32 diAlloc(inode_t *ipimap, boolean_t dir, struct ialloc *iad);
int32 diCOWIAG(struct inode *ipimap, struct inode *ipSnapshot,
	       struct inode *ip);
int32 diFree(struct inode *ipimap, struct ialloc *iad, int32 flag);
int32 diFreeExtent(int32 tid, inode_t *ipimap, pbuf_t *bp, int32 ixn);
int32 diRead(inode_t *ip);
int32 diReadSpecial(inode_t *ip, int64 offset);
int32 diWrite(int32 tid, inode_t *ip, struct txLock *ditlck, boolean_t updateData);
int32 diIAGRead(imap_t *imap, int32 iagno, jbuf_t **bpp);
int32 diUpdatePMap(inode_t *ip, ino64_t inum, boolean_t is_free, tblock_t *tblk);
int32 diSync(inode_t *ipimap);
int32 diRelocateIMap(inode_t *ipimap, int64 xfence,
	int64 *nBlockToMove, int64 *nBlockMoved);
int32 diRelocateIAG(int32 tid, inode_t *ipimap, int64 pbn,
	int32 index, int64 dxaddr);
int32 diRelocateExtent(inode_t *ipimap, int32 iagn, int32 ixn,
	inode_t **ixipList, int64 xfence);
int32 diResizeFS(inode_t *ipimap, inode_t *ipbmap, int64 xfence, int32 flag);
int32 diAllocSpecial(inode_t *ipmnt, ino64_t fsno, ino64_t inum, ialloc_t *iap);
#endif /* _KERNEL */

#endif	/* _H_J2_IMAP */
