/* IBM_PROLOG_BEGIN_TAG */ /* This is an automatically generated prolog. */ /* */ /* bos720 src/bos/kernel/j2/include/j2_imap.h 1.32 */ /* */ /* Licensed Materials - Property of IBM */ /* */ /* Restricted Materials of IBM */ /* */ /* COPYRIGHT International Business Machines Corp. 1999,2009 */ /* All Rights Reserved */ /* */ /* US Government Users Restricted Rights - Use, duplication or */ /* disclosure restricted by GSA ADP Schedule Contract with IBM Corp. */ /* */ /* IBM_PROLOG_END_TAG */ /* @(#)90 1.32 src/bos/kernel/j2/include/j2_imap.h, sysj2, bos720 4/22/09 12:55:48 */ /* * COMPONENT_NAME: (SYSJ2) JFS2 Physical File System * * FUNCTIONS: * * ORIGINS: 27 * * (C) COPYRIGHT International Business Machines Corp. 1996, 1999 * All Rights Reserved * Licensed Materials - Property of IBM * * US Government Users Restricted Rights - Use, duplication or * disclosure restricted by GSA ADP Schedule Contract with IBM Corp. */ #ifndef _H_J2_IMAP #define _H_J2_IMAP /* * FUNCTION: disk inode allocation map manager */ /* Include j2_inode and j2_xtree out of tradition */ #include #include #include /* * j2_imap.h: disk inode manager */ #define EXTSPERIAG 128 /* number of disk inode extent per iag */ #define IMAPBLKNO 0 /* lblkno of dinomap within inode map */ #define SMAPSZ 4 /* number of words per summary map */ #define EXTSPERSUM 32 /* number of extents per summary map entry */ #define L2EXTSPERSUM 5 /* l2 number of extents per summary map */ #define PGSPERIEXT 4 /* number of 4K pages per dinode extent */ #define MAXIAGS ((1<<20)-1) /* maximum number of iags */ #define MAXAG 128 /* maximum number of allocation groups */ #define AMAPSIZE 512 /* bytes in the IAG allocation maps */ #define SMAPSIZE 16 /* bytes in the IAG summary maps */ /* convert inode number to iag number */ #define INOTOIAG(ino) ((ino) >> L2INOSPERIAG) /* convert iag number to logical block number of the iag page */ #define IAGTOLBLK(iagno,l2nbperpg) (((iagno) + 1) << (l2nbperpg)) /* get the starting block number of the 4K page of an inode extent * that contains ino. */ #define INOPBLK(pxd,ino,l2nbperpg) (addressPXD((pxd)) + \ ((((ino) & (INOSPEREXT-1)) >> L2INOSPERPAGE) << (l2nbperpg))) /* number of backed inodes in inode extent. 0 if PXD address is NULL. */ #define IXnBackedInode(_ixpxd, _l2bsize) \ ( addressPXD(_ixpxd) ? \ (((lengthPXD(_ixpxd) << (_l2bsize)) >> L2jDISIZE)) : 0 ) /* mask of backed inodes in inode extent bit map */ #define IXBitMap(_niix) \ ((0xffffffff >> (INOSPEREXT - (_niix))) << (INOSPEREXT - (_niix))) /* dinode buffer page size */ #define IXPageSize(_ixpxd, _l2bsize) \ MIN(BPSIZE, (lengthPXD(_ixpxd) << (_l2bsize))) /* * Serialization of inode allocation map * * Each IAG is locked by obtaining the buffer for the IAG page. * * Each AG has a exclusive lock which is used to control * the serialization of the AG level information. * This lock should be taken first whenever an AG * level list will be modified or accessed. * * There is a inode readers/writer lock for the inode map inode. * A read lock needs to be taken whenever an IAG is read from the map or * the global level information is read. * A write lock needs to be taken whenever the global level information * is modified or an atomic operation needs to be used. * * The control page of the inode map is read into memory by diMount(). * Thereafter it should only be modified in memory and then it will be * written out when the filesystem is unmounted by diUnmount(). * * imapctllock is used to serialize the transactions on the ipimap ctl * page. It is taken in most functions that call siUpdatePSeries(), which * update the intsnap information. It is not used in siCreate() because * the filesystem is in a quiesced state at that point. It also needs to * be taken by diNewIAG(), which updates lists on the ctl page. * * The imapctllock is roughly paired with tlckIMAPCTL. It is taken before * txLock(tlckIMAPCTL) and released after txEnd(). Since txCommit() * in the middle calls diUpdatePMap() which grabs the ipimap lock, it * must be taken before the ipimap lock everywhere. It should also be * taken after the iplist is locked. Detailed lock ordering in diNewIAG() * is described in its prolog. * * The imapserieslock is a complex lock for read/write access to the snapshot * series for both internal and external snapshots to make sure the pseries * information in the imapctl page stays in * step with the list of ipSnapshots in the wisSeries structure, and to * make sure queries see the pSeries in a consistent state. Although external * snapshots do not store the pSeries under the imap directly, the ipimap * acts as a gateway for access to both. */ /* iag free list lock */ #define IAGFREE_LOCK_ALLOC(imap,n)\ MUTEXLOCK_ALLOC(&imap->im_freelock,LOCK_ALLOC_PAGED,J2_IAGFREE_LOCK_CLASS,n) #define IAGFREE_LOCK_FREE(imap) MUTEXLOCK_FREE(&imap->im_freelock) #define IAGFREE_LOCK_INIT(imap) MUTEXLOCK_INIT(&imap->im_freelock) #define IAGFREE_LOCK(imap) MUTEXLOCK_LOCK(&imap->im_freelock) #define IAGFREE_UNLOCK(imap) MUTEXLOCK_UNLOCK(&imap->im_freelock) #define FSIAGFREE_LOCK_ALLOC(imap,n)\ MUTEXLOCK_ALLOC(&imap->im_fsfreelock,LOCK_ALLOC_PAGED,J2_IAGFREE_LOCK_CLASS,n) #define FSIAGFREE_LOCK_FREE(imap) MUTEXLOCK_FREE(&imap->im_fsfreelock) #define FSIAGFREE_LOCK_INIT(imap) MUTEXLOCK_INIT(&imap->im_fsfreelock) #define FSIAGFREE_LOCK(imap) MUTEXLOCK_LOCK(&imap->im_fsfreelock) #define FSIAGFREE_UNLOCK(imap) MUTEXLOCK_UNLOCK(&imap->im_fsfreelock) /* imapctl lock used to serialize txns on ipimap ctl page. See above. */ #define IMAPCTL_LOCK_ALLOC(__imap, __n)\ MUTEXLOCK_ALLOC(&(__imap)->im_imapctllock,LOCK_ALLOC_PAGED,J2_IMAP_LOCK_CLASS,__n) #define IMAPCTL_LOCK_FREE(__imap) MUTEXLOCK_FREE(&(__imap)->im_imapctllock) #define IMAPCTL_LOCK_INIT(__imap) MUTEXLOCK_INIT(&(__imap)->im_imapctllock) #define IMAPCTL_LOCK_TAKEN(__imap, __locktaken) \ { \ MUTEXLOCK_LOCK(&(__imap)->im_imapctllock); \ __locktaken = TRUE; \ } #define IMAPCTL_UNLOCK_TAKEN(__imap, __locktaken) \ { \ if (__locktaken) \ { \ MUTEXLOCK_UNLOCK(&(__imap)->im_imapctllock); \ __locktaken = FALSE; \ } \ } /* snapseries used to serialize access to snapshot series, gated by ipimap. * See above */ #define SNAPSERIES_LOCK_ALLOC(__imap, __n)\ RDWRLOCK_ALLOC(&(__imap)->im_snapserieslock,LOCK_ALLOC_PAGED,J2_IMAP_LOCK_CLASS,__n) #define SNAPSERIES_LOCK_FREE(__imap) RDWRLOCK_FREE(&(__imap)->im_snapserieslock) #define SNAPSERIES_LOCK_INIT(__imap) RDWRLOCK_INIT(&(__imap)->im_snapserieslock) #define SNAPSERIES_READLOCK_TAKEN(__imap, __locktaken) \ { \ READ_LOCK(&(__imap)->im_snapserieslock); \ __locktaken = TRUE; \ } #define SNAPSERIES_READLOCK_MINE(__imap, __locktaken) \ { \ if (!lock_mine(&(__imap)->im_snapserieslock)) { \ READ_LOCK(&(__imap)->im_snapserieslock); \ __locktaken = TRUE; \ } else \ __locktaken = FALSE; \ } #define SNAPSERIES_READUNLOCK_TAKEN(__imap, __locktaken) \ { \ if (__locktaken) \ { \ READ_UNLOCK(&(__imap)->im_snapserieslock); \ __locktaken = FALSE; \ } \ } #define SNAPSERIES_WRITELOCK_TAKEN(__imap, __locktaken) \ { \ WRITE_LOCK(&(__imap)->im_snapserieslock); \ __locktaken = TRUE; \ } #define SNAPSERIES_WRITEUNLOCK_TAKEN(__imap, __locktaken) \ { \ if (__locktaken) \ { \ WRITE_UNLOCK(&(__imap)->im_snapserieslock); \ __locktaken = FALSE; \ } \ } /* per ag iag list locks */ #define AG_LOCK_ALLOC(imap,index,n)\ MUTEXLOCK_ALLOC(&imap->im_aglock[index],LOCK_ALLOC_PAGED,J2_AG_LOCK_CLASS,n) #define AG_LOCK_FREE(imap,index) MUTEXLOCK_FREE(&imap->im_aglock[index]) #define AG_LOCK_INIT(imap,index) MUTEXLOCK_INIT(&(imap->im_aglock[index])) #define AG_LOCK(imap,agno) MUTEXLOCK_LOCK(&imap->im_aglock[agno]) #define AG_UNLOCK(imap,agno) MUTEXLOCK_UNLOCK(&imap->im_aglock[agno]) #if defined(_POWER_MP) || defined(SMP) #define ATOMIC_ADD(addr, incr) fetch_and_add(&(addr), (incr)) #else /* UP */ #define ATOMIC_ADD(addr, incr) (addr) += (incr) #endif #define j2lwmInodePerAG 5 /* set a minimum # of blocks for free inode * per AG is 5%. */ /* * inode allocation map: * * inode allocation map consists of * . the inode map control page dinomap_t and * . inode allocation group pages iag_t (per 4096 inodes) * which are addressed by standard J2 xtree. */ /* * inode allocation group page (per 4096 inodes of an IAG) */ typedef struct { int64 agstart; /* 8: starting block of ag */ int32 iagnum; /* 4: inode allocation group number */ int32 inofreefwd; /* 4: ag inode free list forward */ int32 inofreeback; /* 4: ag inode free list back */ int32 extfreefwd; /* 4: ag inode extent free list forward */ int32 extfreeback; /* 4: ag inode extent free list back */ int32 iagfree; /* 4: iag free list */ /* summary map: 1 bit per inode extent */ int32 inosmap[SMAPSZ];/* 16: sum map of mapwords w/ free inodes; * note: this indicates free and backed * inodes, if the extent is not backed the * value will be 1. if the extent is * backed but all inodes are being used the * value will be 1. if the extent is * backed but at least one of the inodes is * free the value will be 0. */ int32 extsmap[SMAPSZ];/* 16: sum map of mapwords w/ free extents */ int32 nfreeinos; /* 4: number of free inodes */ int32 nfreeexts; /* 4: number of free extents */ /* (72) */ int32 dsnum; /* 4: dataset number from aggregate imap*/ int32 dsiagnum; /* 4: dataset iag number */ int32 dsiaglist; /* 4: dataset iag list link */ uint8 pad[1964]; /* 1964: pad to 2048 bytes */ /* allocation bit map: 1 bit per inode (0 - free, 1 - allocated) */ uint32 wmap[EXTSPERIAG]; /* 512: working allocation map */ uint32 pmap[EXTSPERIAG]; /* 512: persistent allocation map */ pxd_t inoext[EXTSPERIAG]; /* 1024: inode extent addresses */ } iag_t; /* (4096) */ RAS_FILE_ASSERT(iag_t, sizeof(iag_t) == PSIZE); /* * per AG control information (in inode map control page) */ typedef struct { int32 inofree; /* 4: free inode list anchor */ int32 extfree; /* 4: free extent list anchor */ int32 numinos; /* 4: number of backed inodes */ int32 numfree; /* 4: number of free inodes */ } iagctl_t; /* (16) */ /* J2DSET_CNAME_MAX: # chars in dataset descriptive name; matches DSET_CNAME_MAX * defined in dset.h; not used here since that is not exported to user space */ #define J2DSET_CNAME_MAX 64 /* * per fileset/aggregate inode map control page */ typedef struct { /* 64: prefix area */ union { uint8 prefix[64]; struct { int32 in_freeiag; /* 4: free iag list anchor */ int32 in_nextiag; /* 4: next free iag number */ int32 in_numinos; /* 4: num of backed inodes */ int32 in_numfree; /* 4: num of free backed inodes */ int32 in_nbperiext; /* 4: num of blocks per inode extent */ int32 in_l2nbperiext; /* 4: l2 of in_nbperiext */ int32 in_diskblock; /* 4: defunct */ int32 in_maxag; /* 4: defunct */ int32 in_iaglisthead; /* 4: dataset iag list anchor */ int32 in_iaglisttail; /* 4: dataset iag list anchor */ }; /* 40: */ }; /* 384: per imap extension */ union { uint8 extension1[384]; /* dataset management info per fsimap */ struct { /* global filesystem wide values */ int32 in_fsfreeiag; /* 4: fs free iag list anchor */ int32 in_fsnextiag; /* 4: fs next free iag number */ int32 in_fsnuminos; /* 4: fs # backed inodes */ int32 in_fsnumfree; /* 4: fs # free backed inodes */ /* dataset */ int32 in_dsDirectory; /* 4: Dataset directory inum */ int32 in_dsZombieList; /* 4: dsZombieList anchor */ /* snapshot */ int32 in_numDSSnap; /* 4: # dataset snapshots */ int32 in_snapDir; /* 4: Snapshot directory inum */ int32 in_snapZombieList; /* 4: snapshotZombieList anchor */ } fsImap; /* 36: */ /* dataset management info per dsimap */ struct { uint64 in_uuid[2]; /* 16: 128-bit UUID */ uint64 in_auxuuid[2]; /* 16: 128-bit AUXUUID */ char in_dscname[J2DSET_CNAME_MAX]; /* 64: Dataset descriptive name */ char in_dslname[J2_NAME_MAX+1]; /* 256: local name */ int32 in_dsZombieNext; /* 4: dsZombieList link */ int32 in_dsZombiePrev; /* 4: dsZombieList link */ /* snapshotZombieList link fields are defined * in sMapObject */ uint32 in_dsflags; /* 4: ds flags */ } dsImap; /* 364 */ }; /* 384: */ /* 576: internal snapshot extension */ union { uint8 extension2[576]; struct { int32 in_snapType; /* 4: Type of snapshot */ isSeries_t in_pSeries; /* 528: Internal snapshot series */ }; }; uint8 pad[1024]; /* 1024: pad to 2048 */ /* 2048: AG control information */ iagctl_t in_agctl[MAXAG]; } dinomap_t; /* (4096) */ RAS_FILE_ASSERT(dinomap_t, sizeof(dinomap_t) == PSIZE); /* slot size for logging purposes, in bytes log2 */ #define L2IMAPCTLSLOTSIZE 2 #ifdef _KERNEL #include #include /* IP_HKWD_INUM: Used to get inode number for trace points * This uses the high-order 32-bits for the fileset number since the inode * number trace points are already passing the maximum number of params. * If MAXIAGS changes to allow inodes with > 32 bit inode number, this * approach will have to be modified. */ #define IP_HKWD_INUM(IP) (((IP)->i_fileset == FILESYSTEM_I) ? (IP)->i_number \ : ((((uint64)(IP)->i_fileset) << 32) | (IP)->i_number)) /* * In-core inode map control page */ typedef struct inomap { dinomap_t im_imap; /* 4096: on-disk inode allocation control */ inode_t *im_ipimap; /* 8: ptr to inode for imap */ MUTEXLOCK_T im_freelock; /* 8: iag free list lock */ MUTEXLOCK_T im_aglock[MAXAG]; /* 512: per AG locks */ uint32 *im_DBGdimap; MUTEXLOCK_T im_fsfreelock; /* 8: fs iag free list lock */ MUTEXLOCK_T im_imapctllock; /* 8: serialize ctl page txns: * * taken before bmRead(imapctl), * * and txLock(IMAPCTL) * * released after txEnd() */ RDWRLOCK_T im_snapserieslock;/* 16: serialize access to snapshot * * series */ /* dataset extension */ union { /* dataset management info per fsimap */ struct { int32 im_dsnacount; /* active dataset/dsnode count */ /* list of instantiated datasets of the filesystem */ inode_t *im_fsdsList; /* 8: anchor in fsimap */ }; /* dataset management info per dsimap */ struct { dsnode_t *im_dsnp; /* 8: dsnode */ int32 im_dsncount; /* dsnode reference count */ /* list of instantiated datasets of the filesystem */ inode_t *im_fsdsNext; /* 8: link in dsimap */ inode_t *im_fsdsPrev; /* 8: link in dsimap */ }; }; /* snapshot extension */ struct wisSeries *im_wisSeries; /* 8: ptr to snapshot series */ /* array of instantiated snapshot generations bound with * the filesystem/dataset is specified by snapshotSeries */ } imap_t; #define im_freeiag im_imap.in_freeiag #define im_nextiag im_imap.in_nextiag #define im_numinos im_imap.in_numinos #define im_numfree im_imap.in_numfree #define im_nbperiext im_imap.in_nbperiext #define im_l2nbperiext im_imap.in_l2nbperiext #define im_agctl im_imap.in_agctl #define im_iaglisthead im_imap.in_iaglisthead #define im_iaglisttail im_imap.in_iaglisttail #define im_fsfreeiag im_imap.fsImap.in_fsfreeiag #define im_fsnextiag im_imap.fsImap.in_fsnextiag #define im_fsnuminos im_imap.fsImap.in_fsnuminos #define im_fsnumfree im_imap.fsImap.in_fsnumfree #define im_dsDirectory im_imap.fsImap.in_dsDirectory #define im_dsZombieList im_imap.fsImap.in_dsZombieList #define im_numDSSnap im_imap.fsImap.in_numDSSnap #define im_snapDir im_imap.fsImap.in_snapDir #define im_snapZombieList im_imap.fsImap.in_snapZombieList #define im_dsflags im_imap.dsImap.in_dsflags #define im_uuid im_imap.dsImap.in_uuid #define im_auxuuid im_imap.dsImap.in_auxuuid #define im_dscname im_imap.dsImap.in_dscname #define im_dslname im_imap.dsImap.in_dslname #define im_pSeries im_imap.in_pSeries #define im_snapType im_imap.in_snapType /* disk inode allocation request structure */ typedef struct ialloc { ino64_t ino; /* inode number */ pxd_t ixpxd; /* inode extent address */ pxd_t iagpxd; /* iag page address */ int64 agstart; /* start of ag holding inode */ inode_t *ip; /* for diFree(), diAlloc() */ } ialloc_t; /* for standalone testdriver */ #define im_diskblock im_imap.in_diskblock #define im_maxag im_imap.in_maxag /* external references */ int32 diMount(inode_t *ipimap); int32 diUnmount(inode_t *ipimap, uint32 mounterror, uint32 free_imap); int32 diAlloc(inode_t *ipimap, boolean_t dir, struct ialloc *iad); int32 diCOWIAG(struct inode *ipimap, struct inode *ipSnapshot, struct inode *ip); int32 diFree(struct inode *ipimap, struct ialloc *iad, int32 flag); int32 diFreeExtent(int32 tid, inode_t *ipimap, pbuf_t *bp, int32 ixn); int32 diRead(inode_t *ip); int32 diReadSpecial(inode_t *ip, int64 offset); int32 diWrite(int32 tid, inode_t *ip, struct txLock *ditlck, boolean_t updateData); int32 diIAGRead(imap_t *imap, int32 iagno, jbuf_t **bpp); int32 diUpdatePMap(inode_t *ip, ino64_t inum, boolean_t is_free, tblock_t *tblk); int32 diSync(inode_t *ipimap); int32 diRelocateIMap(inode_t *ipimap, int64 xfence, int64 *nBlockToMove, int64 *nBlockMoved); int32 diRelocateIAG(int32 tid, inode_t *ipimap, int64 pbn, int32 index, int64 dxaddr); int32 diRelocateExtent(inode_t *ipimap, int32 iagn, int32 ixn, inode_t **ixipList, int64 xfence); int32 diResizeFS(inode_t *ipimap, inode_t *ipbmap, int64 xfence, int32 flag); int32 diAllocSpecial(inode_t *ipmnt, ino64_t fsno, ino64_t inum, ialloc_t *iap); #endif /* _KERNEL */ #endif /* _H_J2_IMAP */