Vfs的初始化 内核启动的时候,调用vfs_init()完成虚拟文件系统的初始化: vfsinit() { struct vfsconf *vfsp; int i, maxtypenum;
/* * Initialize the vnode table */ vntblinit(); /* * Initialize the vnode name cache */ nchinit(); /* * Build vnode operation vectors. */ vfs_op_init(); vfs_opv_init(); /* finish the job */ /* * Initialize each file system type. */ vattr_null(&va_null); maxtypenum = 0; for (vfsp = vfsconf, i = 1; i <= maxvfsconf; i++, vfsp++) { if (i < maxvfsconf) vfsp->vfc_next = vfsp + 1; if (maxtypenum <= vfsp->vfc_typenum) maxtypenum = vfsp->vfc_typenum + 1; (*vfsp->vfc_vfsops->vfs_init)(vfsp); } /* next vfc_typenum to be used */ maxvfsconf = maxtypenum; } 这段代码注释的很清楚,不多说。这里面有个struct vfsconf结构,它描述文件系统的配置信息。定义为: struct vfsconf { struct vfsops *vfc_vfsops; /* filesystem operations vector */ char vfc_name[MFSNAMELEN]; /* filesystem type name */ int vfc_typenum; /* historic filesystem type number */ int vfc_refcount; /* number mounted of this type */ int vfc_flags; /* permanent flags */ int (*vfc_mountroot)(void); /* if != NULL, routine to mount root */ struct vfsconf *vfc_next; /* next in list */ };注释很清楚,不多说 vfs_init就是初始化vnode表,name cache,和vnode operation vectors,最后对每种文件系统调用对应的vfsp->vfc_vfsops->vfs_init完成初始化。 struct vfsops定义vfs上的操作函数: struct vfsops { int (*vfs_mount) __P((struct mount *mp, char *path, caddr_t data, struct nameidata *ndp, struct proc *p)); int (*vfs_start) __P((struct mount *mp, int flags, struct proc *p)); int (*vfs_unmount) __P((struct mount *mp, int mntflags, struct proc *p)); int (*vfs_root) __P((struct mount *mp, struct vnode **vpp)); int (*vfs_quotactl) __P((struct mount *mp, int cmds, uid_t uid, caddr_t arg, struct proc *p)); int (*vfs_statfs) __P((struct mount *mp, struct statfs *sbp, struct proc *p)); int (*vfs_sync) __P((struct mount *mp, int waitfor, struct ucred *cred, struct proc *p)); int (*vfs_vget) __P((struct mount *mp, ino_t ino, struct vnode **vpp)); int (*vfs_fhtovp) __P((struct mount *mp, struct fid *fhp, struct mbuf *nam, struct vnode **vpp, int *exflagsp, struct ucred **credanonp)); int (*vfs_vptofh) __P((struct vnode *vp, struct fid *fhp)); int (*vfs_init) __P((struct vfsconf *)); int (*vfs_sysctl) __P((int *, u_int, void *, size_t *, void *, size_t, struct proc *)); }; 下面对这段代码种调用的几个主要函数说明: void vntblinit() { simple_lock_init(&mntvnode_slock); simple_lock_init(&mntid_slock); simple_lock_init(&spechash_slock); TAILQ_INIT(&vnode_free_list); simple_lock_init(&vnode_free_list_slock); CIRCLEQ_INIT(&mountlist); } 这个函数完成vnode管理相关数据结构的初始化,分别初始化mntvnode_slock,mntid_slock,spechash_slock等simple lock,这些都是系统全局的lock,主要用来保护mount文件系统的操作,但spechash_slock这个lock干什么用我还不清楚。 接下来初始化系统全局的vnode_free_list,所有free的vnode均挂在vnode_free_list上。再初始化vnode_free_list_slock,这个lock用来同步对vnode_free_list的访问。最后初始化mountlist,这是系统所有mounted fs的连表。 void nchinit() { TAILQ_INIT(&nclruhead); nchashtbl = hashinit(desiredvnodes, M_CACHE, &nchash); } 这个函数调用hashinit初始化了name cache的hash table。name cache的目的是加速通过文件名查找对应vnode的操作。每次一个lookup操作结束后系统都将查找到的name和对应的vnode以及vnode的capability(即vnode数据结构种的v_id域)copy到name cache中。这样下次lookup的时候,首先会查找这个name cache,如果找到相同名称的文件,则马上比较capability,如果与name cache中保存的值相同则成功返回。同样如果一次lookup在某目录下没找到对应文件,则也会在name cache 中保存一个所谓的negative cache,即把下面结构的nc_name符给该文件名,但把nc_vp设为null,这样下次lookup这个文件的时候,内核通过name cache中的这个negative cache就知道在某某目录下没有这个文件,就可以直接跳过。 struct namecache { LIST_ENTRY(namecache) nc_hash; /* hash chain */ TAILQ_ENTRY(namecache) nc_lru; /* LRU chain */ struct vnode *nc_dvp; /* vnode of parent of name */ u_long nc_dvpid; /* capability number of nc_dvp */ struct vnode *nc_vp; /* vnode the name refers to */ u_long nc_vpid; /* capability number of nc_vp */ char nc_nlen; /* length of name */ char nc_name[NCHNAMLEN]; /* segment name */ }; 然后是vfs_op_init():它完成vnode operations vectors的初始化,但这里仅仅做了将系统全局的vfs vnode op vector数据结果请零,并没有真正设定每个文件系统对应的操作函数。 void vfs_op_init() { int i; // Set all vnode vectors to a well known value. for (i = 0; vfs_opv_descs; i++) *(vfs_opv_descs->opv_desc_vector_p) = NULL; /* * Figure out how many ops there are by counting the table, * and assign each its offset. */ for (vfs_opv_numops = 0, i = 0; vfs_op_descs; i++) { vfs_op_descs->vdesc_offset = vfs_opv_numops; vfs_opv_numops++; } } 系统中每个文件系统都有一个vnodeopv_desc来描述它的vnode操作函数 //This structure is used to configure the new vnodeops vector. struct vnodeopv_entry_desc { struct vnodeop_desc *opve_op; /* which operation this is */ int (*opve_impl)(); /* code implementing this operation */ }; struct vnodeopv_desc { /* ptr to the ptr to the vector where op should go */ int (***opv_desc_vector_p)(); struct vnodeopv_entry_desc *opv_desc_ops; /* null terminated list */ }; 第一个opv_desc_vector_p很奇怪,好象没什么用处,不知道什么意思,好象唯一的作用就是定位每个文件系统的vnodeopv_entry_desc结构地址。主要还是第2个域opv_desc_ops,它定义了一个文件系统支持的所有操作,对每个operation都有一个描述它的vnodeopv_entry_desc.比如对于berkeley的ffs文件系统,对应的op vector就是: int (**ffs_vnodeop_p)(); struct vnodeopv_entry_desc ffs_vnodeop_entries[] = { { &vop_default_desc, vn_default_error }, { &vop_lookup_desc, ufs_lookup }, /* lookup */ { &vop_create_desc, ufs_create }, /* create */ { &vop_whiteout_desc, ufs_whiteout }, /* whiteout */ { &vop_mknod_desc, ufs_mknod }, /* mknod */ { &vop_open_desc, ufs_open }, /* open */ { &vop_close_desc, ufs_close }, /* close */ { &vop_access_desc, ufs_access }, /* access */ { &vop_getattr_desc, ufs_getattr }, /* getattr */ { &vop_setattr_desc, ufs_setattr }, /* setattr */ { &vop_read_desc, ffs_read }, /* read */ { &vop_write_desc, ffs_write }, /* write */ { &vop_lease_desc, ufs_lease_check }, /* lease */ { &vop_ioctl_desc, ufs_ioctl }, /* ioctl */ { &vop_select_desc, ufs_select }, /* select */ { &vop_revoke_desc, ufs_revoke }, /* revoke */ { &vop_mmap_desc, ufs_mmap }, /* mmap */ { &vop_fsync_desc, ffs_fsync }, /* fsync */ { &vop_seek_desc, ufs_seek }, /* seek */ { &vop_remove_desc, ufs_remove }, /* remove */ { &vop_link_desc, ufs_link }, /* link */ { &vop_rename_desc, ufs_rename }, /* rename */ { &vop_mkdir_desc, ufs_mkdir }, /* mkdir */ { &vop_rmdir_desc, ufs_rmdir }, /* rmdir */ { &vop_symlink_desc, ufs_symlink }, /* symlink */ { &vop_readdir_desc, ufs_readdir }, /* readdir */ { &vop_readlink_desc, ufs_readlink }, /* readlink */ { &vop_abortop_desc, ufs_abortop }, /* abortop */ { &vop_inactive_desc, ufs_inactive }, /* inactive */ { &vop_reclaim_desc, ffs_reclaim }, /* reclaim */ { &vop_lock_desc, ufs_lock }, /* lock */ { &vop_unlock_desc, ufs_unlock }, /* unlock */ { &vop_bmap_desc, ufs_bmap }, /* bmap */ { &vop_strategy_desc, ufs_strategy }, /* strategy */ { &vop_print_desc, ufs_print }, /* print */ { &vop_islocked_desc, ufs_islocked }, /* islocked */ { &vop_pathconf_desc, ufs_pathconf }, /* pathconf */ { &vop_advlock_desc, ufs_advlock }, /* advlock */ { &vop_blkatoff_desc, ffs_blkatoff }, /* blkatoff */ { &vop_valloc_desc, ffs_valloc }, /* valloc */ { &vop_reallocblks_desc, ffs_reallocblks }, /* reallocblks */ { &vop_vfree_desc, ffs_vfree }, /* vfree */ { &vop_truncate_desc, ffs_truncate }, /* truncate */ { &vop_update_desc, ffs_update }, /* update */ { &vop_bwrite_desc, vn_bwrite }, { (struct vnodeop_desc*)NULL, (int(*)())NULL } }; struct vnodeopv_desc ffs_vnodeop_opv_desc = { &ffs_vnodeop_p, ffs_vnodeop_entries }; vfs_op_init()首先将系统所有文件系统的那个opv_desc_vector_p设为null,然后设置每个fs的struct vnodeop_desc 结构中的vdesc_offset,这个结构是一个辅助的数据结构,用来保存一些文件系统相关的参数。定义如下: struct vnodeop_desc { int vdesc_offset; /* offset in vector--first for speed */ char *vdesc_name; /* a readable name for debugging */ int vdesc_flags; /* VDESC_* flags */
/* * These ops are used by bypass routines to map and locate arguments. * Creds and procs are not needed in bypass routines, but sometimes * they are useful to (for example) transport layers. * Nameidata is useful because it has a cred in it. */ int *vdesc_vp_offsets; /* list ended by VDESC_NO_OFFSET */ int vdesc_vpp_offset; /* return vpp location */ int vdesc_cred_offset; /* cred location, if any */ int vdesc_proc_offset; /* proc location, if any */ int vdesc_componentname_offset; /* if any */ /* * Finally, we've got a list of private data (about each operation) * for each transport layer. (Support to manage this list is not * yet part of BSD.) */ caddr_t *vdesc_transports; };我对于这个结构每个成员的作用还不清楚,先放在这里。 vfs_op_init()做的事情很少,主要的初始化工作在vfs_opv_init()中完成,它初始化每个文件系统的操作函数(op vector)。 void vfs_opv_init() { int i, j, k; int (***opv_desc_vector_p)(); int (**opv_desc_vector)(); struct vnodeopv_entry_desc *opve_descp; //Allocate the dynamic vectors and fill them in. for (i=0; vfs_opv_descs; i++) { opv_desc_vector_p = vfs_opv_descs->opv_desc_vector_p; /* * Allocate and init the vector, if it needs it. * Also handle backwards compatibility. */ A. if (*opv_desc_vector_p == NULL) { /* XXX - shouldn't be M_VNODE */ MALLOC(*opv_desc_vector_p, PFI*, vfs_opv_numops*sizeof(PFI), M_VNODE, M_WAITOK); bzero (*opv_desc_vector_p, vfs_opv_numops*sizeof(PFI)); DODEBUG(printf("vector at %x allocated\n", opv_desc_vector_p)); } B. opv_desc_vector = *opv_desc_vector_p; for (j=0; vfs_opv_descs->opv_desc_ops[j].opve_op; j++) { opve_descp = &(vfs_opv_descs->opv_desc_ops[j]);
/* * Sanity check: is this operation listed * in the list of operations? We check this * by seeing if its offest is zero. Since * the default routine should always be listed * first, it should be the only one with a zero * offset. Any other operation with a zero * offset is probably not listed in * vfs_op_descs, and so is probably an error. * * A panic here means the layer programmer * has committed the all-too common bug * of adding a new operation to the layer's * list of vnode operations but * not adding the operation to the system-wide * list of supported operations. */ if (opve_descp->opve_op->vdesc_offset == 0 && opve_descp->opve_op->vdesc_offset !=
VOFFSET(vop_default)) { printf("operation %s not listed in %s.\n", opve_descp->opve_op->vdesc_name, "vfs_op_descs"); panic ("vfs_opv_init: bad operation"); } // Fill in this entry. C. opv_desc_vector[opve_descp->opve_op->vdesc_offset] = opve_descp->opve_impl; } } /* * Finally, go back and replace unfilled routines * with their default. (Sigh, an O(n^3) algorithm. I * could make it better, but that'd be work, and n is small.) */ D. for (i = 0; vfs_opv_descs; i++) { opv_desc_vector = *(vfs_opv_descs->opv_desc_vector_p); /* * Force every operations vector to have a default routine. */ if (opv_desc_vector[VOFFSET(vop_default)]==NULL) { panic("vfs_opv_init: operation vector without default routine."); } for (k = 0; k<vfs_opv_numops; k++) if (opv_desc_vector[k] == NULL) opv_desc_vector[k] = opv_desc_vector[VOFFSET(vop_default)]; E. } } vfs_opv_descs是系统定义的全局变量,是个数组,每个文件系统占一个元素,我们刚刚在vfs_op_init()中把每个opv_desc的opv_desc_vector_p设成NULL,这里我们为每个opv_desc分配内存(A~B段代码)。B~C段做一些检查,C那里把对应的操作函数地址填到op vector中,而对于那些没有提供文件系统特定(filesystem specfic)的操作则在D~E段设定default的函数。 这段涉及到两种op vector,一个是所谓的vnode的操作vector,即vfs_opv_descs描述的,它是对vnode来说的,还有上面的struct vfsops 定义的,这是与文件系统相关的操作,两个是不同的。
用户1064776 2012-1-16 12:47
用户122571 2008-4-3 12:50