/* IBM_PROLOG_BEGIN_TAG                                                   */
/* This is an automatically generated prolog.                             */
/*                                                                        */
/* bos720 src/bos/kernel/sys/vmdiskfs.h 1.3                               */
/*                                                                        */
/* Licensed Materials - Property of IBM                                   */
/*                                                                        */
/* COPYRIGHT International Business Machines Corp. 1988,2008              */
/* All Rights Reserved                                                    */
/*                                                                        */
/* US Government Users Restricted Rights - Use, duplication or            */
/* disclosure restricted by GSA ADP Schedule Contract with IBM Corp.      */
/*                                                                        */
/* IBM_PROLOG_END_TAG                                                     */
/* @(#)03	1.3  src/bos/kernel/sys/vmdiskfs.h, sysvmm, bos720 3/12/08 15:23:02 */

#ifndef _H_VMDISKFS
#define _H_VMDISKFS

/*
 * COMPONENT_NAME: (SYSVMM) Virtual Memory Manager
 *
 * FUNCTIONS:
 *
 * ORIGINS: 27
 *
 */

#include <sys/types.h>

/*
 * the same kind of allocation map is used for disk fragments 
 * and inodes. the description given below is in terms for
 * disk fragments but the same holds for inodes or fragments.
 * (allocation state of each is represented by a bit).
 */

/*
 * bit maps are grouped into a working map and a permanent map.
 * the working map represents the current allocation state
 * and the permanent map represents a committed state. the map
 * is written to disk periodically by log sync code or by
 * normal paging activity . if the system crashes, the info
 * in the log and the last copy of the permanent map on 
 * disk is used to compute the committed allocation state.
 * one bit in each map represents the state of a block (0 = free)
 * with the bit position in the map being equal to block number.
 */

/* in V3 file-systems, allocation requests are for a sequence of
 * 1 to 8 bits, all of which must be in a byte of the map. for
 * V4 requests are for a sequence of 1 to 32 bits. considering
 * the map as an array of double-words (64-bits), a request is
 * is wholly satisfied with bits within a double-word. 
 *
 * to avoid sequential scans of all the bytes in the map, a 
 * tree structure is maintained that specifies the longest
 * sequence in groups of words in a page. a leaf of  the tree
 * is a word of 4 bytes with each byte encoding the longest
 * sequence in 8-bytes of the map. a node at the next level
 * is a word each byte of which encodes the longest sequence
 * in the 4 bytes of a leaf word. the tree has 4-levels with
 * a fan-out of 4 at each level.
 */

/* the tree structure is contained in the first 512 bytes
 * of the vmdmap structure. these 512 bytes represent 
 * control information. in V3, the control information for
 * a page is located at the beginning of each page. in V4
 * the control information has been moved to pages only have
 * have control information. thus in V4 , pages 9*k are 
 * control info pages and all other pages consist of only
 * bit maps. page 9*k contains the control info for pages 
 * 9*k + 1, 9*k + 2, .. 9*k+8. the control info for these is
 * mapped in page 9*k in the obvious way : the info for page
 * 9*k + n is at offset 512*(n - 1).
 */

/* allocation groups. disk blocks are aggregated into contiguous
 * blocks called allocation groups and a group of inodes is placed
 * in each disk-allocation group. the run-time block allocator
 * attempts to place the disk-blocks for a file near its inode
 * so that disk-seeks are minimized by preferentially allocating
 * from its allocation group.
 */

/* in converting an existing V3 file sytem to a V4 system,
 * the sizes of permitted allocation group sizes (expressed in
 * number of bits) must be considered. in V4 the size of a
 * group is constrained to be a power of 2 multiple of
 * MINAGSIZEV4 = 512 no larger than DBPERPAGEV4 = 16384.
 * with V3 it is constrained to be any multiple of MINAGSIZE = 256
 * which is also a divisor of DBPERPAGE = 7*2048. in V3 only
 * filesystems of size < 8 megabytes when created will have
 * a size other than AGDEFAULT = 2048. for a file system of 
 * size greater then 4 and up to 7 megabytes, it is 7*256.
 * for all other small sizes, it is a power of 2 multiple 
 * 256 < AGDEFAULT. 
 */

/*
 * each (file-system) disk map and each inode map is kept in
 * its own segment. however , all paging space disk maps
 * are kept in one segment which is allocated at VMM init
 * and whose index in the scb table is the constant DMAPSIDX.
 * all disk maps are the same size DMAPSIZE (8 megabytes).
 * the offset of the first page of the map corresponding
 * to the (paging space) device with PDT index pdtx is equal
 * to pdtx*DMAPSIZE. paging space disk maps use the version 0
 * formats.
 */

#define MAXMAPSIZE	(16*(1 << 20))  	/* maximum map size in blocks */
#define DMAPSIZE	(1 << 23)		/* map size = 8 megabyte */
#define L2DMSIZE	23			/* log of DMAPSIZE	 */
#define L2DMPAGES	L2DMSIZE - 12		/* log of DMAPSIZE in pages */
#define WPERPAGE 	(7*512/8)		/* work-map words V3 */
#define WPERPAGEV4	(8*512/8)		/* work-map words V4  */
#define DBWORD		32			/* bits per word */
#define DBPERDWORD	(DBWORD*2)		/* bits per double word */
#define L2DBWORD	5			/* log of DBWORD    */
#define DBPERPAGE	(DBWORD*WPERPAGE)	/* bits per page V3 */
#define DBPERPAGEV4	(DBWORD*WPERPAGEV4)	/* bits per page V4 */
#define MAXPGDEV	128			/* max number of paging disks */
#define LEAFIND		21			/* index left-most tree leaf */
#define TREESIZE	(64+16+4+1)		/* tree size in words */
#define MINAGSIZE	256			/* min alloc group V3  */
#define MINAGSIZEV4	512			/* min alloc group V4  */
#define MAXAGPAGE	(DBPERPAGE/MINAGSIZE)	/* max alloc groups per page */
#define AGDEFAULT	2048			/* dflt alloc gr size 4k pages*/
#define AGDEFAULTV4	2048			/* dflt alloc gr size 4k pages*/
#define CLDEFAULT	8			/* dflt cluster size V3 */
#define FSCLSIZE	4			/* fs realloc cluster size */
#define ALLOCMAPV3	0			/* version number for V3 maps */
#define ALLOCMAPV4	1			/* version number for V4 maps */
#define WPAR_PGD	2			/* Async MCR pagin device */
#define FRAGDEFAULT	4096			/* default fragment size */
#define MINFRAGSIZE	512			/* min val fragsize */
#define MAXFRAGSIZE	4096			/* max val fragsize */
#define NBPIDEFAULT	4096			/* dflt num bytes per inode */
#define MINNBPI		512			/* min num bytes per inode */
#define MAXNBPI		131072			/* max num bytes per inode */
#define CLDEFAULTV4	32			/* default cluster size V4 */
#define CLDEFAULTBIG	64			/* default cluster size big allocation */
#define MAXAGSIZEV4	DBPERPAGEV4		/* largest ag size in bits */
#define LMAPCTL 	512			/* length control data in bytes */

/* the first 512 bytes of vmdmap represent control information.
 * in version V3 , this is followed by an array of working-bit
 * maps and then an array of permanent bit maps. the size of
 * vmdmap is 4k bytes. in version V4, the remainder of the page
 * is an array of 7 more 512 byte control structures.
 */
struct vmdmap
{
	/* begin control data. 512 bytes */
	uint	mapsize;	/* number of fragments covered by map */
	uint	freecnt;	/* total number of free fragments */
	uint    agsize;		/* allocation group size in fragments */
	uint	agcnt;	        /* number of allocation groups this page*/
	uint 	totalags;	/* number of ags in map (page 0 only) */
	uint	lastalloc;	/* last fragment allocated (page 0 only) */
	uint	maptype;	/* type of map                        	 */
	uint 	clsize;		/* maximum sequence to allocate */
	uint	clmask;		/* encoded form of clsize V3 only */
	uint	version;	/* version number */
	uint 	spare0;		/* spare */
	uint	spare1;		/* spare */
	short	agfree[MAXAGPAGE]; 	/* free counts in allocation grs. */
	uint    tree[TREESIZE];		/* tree of max - sequences */
	uint	btree_nxt;      /* next uninitialized btree leaf (p0 only) */
	caddr_t	btree;		/* btree pointer - only p0 */
#ifndef __64BIT__		/* The combination of the fields btree and
    				   spare2 should always be double word */
        int     spare2;         /* spare */
#endif
	/* begin allocation maps for V3..more summary info for V4 to
	 * the end of 4k page.
	 */
	uint	mapsorsummary[2*WPERPAGE];  
};

#endif /* _H_VMDISKFS */
