:linux文件系统之文件的打开与关闭

来源:百度文库 编辑:九乡新闻网 时间:2024/04/30 22:21:20
------------------------------------------ 本文系本站原创,欢迎转载!转载请注明出处:http://ericxiao.cublog.cn/------------------------------------------一:前言文件的操作主要包括了文件的打开关闭和读写.在这节中主要分析了linux内核中的文件操作的实现.还是同前两节一样,涉及到块设备与页面缓存的部份先放一边.后续有会有专题分析与此相关的内容.二:文件的打开在用户空间的,打开文件常用的api是open().它的系统调用入口是sys_open():. asmlinkage long sys_open(const char __user * filename, int flags, int mode){     char * tmp;     int fd, error; #if BITS_PER_LONG != 32     flags |= O_LARGEFILE;#endif     //从用户空间copy值     tmp = getname(filename);     fd = PTR_ERR(tmp);     if (!IS_ERR(tmp)) {         //分配一个没有被使用的fd         fd = get_unused_fd();         if (fd >= 0) {              //取得与文件相关的file结构              struct file *f = filp_open(tmp, flags, mode);              error = PTR_ERR(f);              if (IS_ERR(f))                   goto out_error;              //将file 添加file_struct中的fd数组的相应项              fd_install(fd, f);         }out:         //释放分配的内存空间         putname(tmp);     }     return fd; out_error:     put_unused_fd(fd);     fd = error;     goto out;}与进程相关的文件系统结构在<>已经分析过了.如有不太清楚的可以自行参阅这篇文章.首先在进程中取得一个没有被使用的文件描述符.这是在get_unused_fd()中完成的.它的代码如下:int get_unused_fd(void){     struct files_struct * files = current->files;     int fd, error;      error = -EMFILE;     spin_lock(&files->file_lock); repeat:     //取得files->open_fds->fds_bits中下一个没有使用的位     fd = find_next_zero_bit(files->open_fds->fds_bits,                    files->max_fdset,                    files->next_fd);      /*      * N.B. For clone tasks sharing a files structure, this test      * will limit the total number of files that can be opened.      */      //超过了文件描述符的最大值限制     if (fd >= current->rlim[RLIMIT_NOFILE].rlim_cur)         goto out;      /* Do we need to expand the fdset array? */     //max_fdset: 位图位的总数      //如果超过了位图的总数     if (fd >= files->max_fdset) {         error = expand_fdset(files, fd);         if (!error) {              error = -EMFILE;              goto repeat;         }         goto out;     }          /*       * Check whether we need to expand the fd array.      */      //如果超过了所描述对象的总数     if (fd >= files->max_fds) {         //扩充文件描述对象数组         error = expand_fd_array(files, fd);         if (!error) {              error = -EMFILE;              goto repeat;         }         goto out;     }      //在open_fds置该位     FD_SET(fd, files->open_fds);     //在close_on_exec中清除该位.表示如果调用exec()执行一个新程序的时候不需要关闭这个     //文件描述符     FD_CLR(fd, files->close_on_exec);     files->next_fd = fd + 1;#if 1     /* Sanity check */     //如果在fd中的相应项不为NULL 将其置NULL     if (files->fd[fd] != NULL) {         printk(KERN_WARNING "get_unused_fd: slot %d not NULL!\n", fd);         files->fd[fd] = NULL;     }#endif     error = fd; out:     spin_unlock(&files->file_lock);     return error;}如果文件描述符位图空间不够或者文件对象描述符数组空间不够.就会调用expand_fdset()和expand_fd_array()进行空间的扩展.代码分别如下所示:int expand_fdset(struct files_struct *files, int nr){     fd_set *new_openset = NULL, *new_execset = NULL;     int error, nfds = 0;      error = -EMFILE;     //超过了总限制     if (files->max_fdset >= NR_OPEN || nr >= NR_OPEN)         goto out;      //现在文件描述符的最大值     nfds = files->max_fdset;     spin_unlock(&files->file_lock);      /* Expand to the max in easy steps */     //如果现在的文件描述符数目少于8个page大小,则扩展到8个page     //否则将其扩大两倍.其值不能超过规定的最大值     do {         if (nfds < (PAGE_SIZE * 8))              nfds = PAGE_SIZE * 8;         else {              nfds = nfds * 2;              if (nfds > NR_OPEN)                   nfds = NR_OPEN;         }     } while (nfds <= nr);      //分新配大小分配存储空间     error = -ENOMEM;     new_openset = alloc_fdset(nfds);     new_execset = alloc_fdset(nfds);     spin_lock(&files->file_lock);     if (!new_openset || !new_execset)         goto out;      error = 0;          /* Copy the existing tables and install the new pointers */     //将旧值copy到新分配的空间内.并将剩余空间置为0     //新新空间挂载到进程的file中.并释放旧空间     if (nfds > files->max_fdset) {         int i = files->max_fdset / (sizeof(unsigned long) * 8);         int count = (nfds - files->max_fdset) / 8;                  /*           * Don't copy the entire array if the current fdset is          * not yet initialised.            */          //copy和剩余段置零的过程         if (i) {              memcpy (new_openset, files->open_fds, files->max_fdset/8);              memcpy (new_execset, files->close_on_exec, files->max_fdset/8);              memset (&new_openset->fds_bits[i], 0, count);              memset (&new_execset->fds_bits[i], 0, count);         }          //交换新旧空晨         nfds = xchg(&files->max_fdset, nfds);         new_openset = xchg(&files->open_fds, new_openset);         new_execset = xchg(&files->close_on_exec, new_execset);         spin_unlock(&files->file_lock);         //将旧空间释放掉         free_fdset (new_openset, nfds);         free_fdset (new_execset, nfds);         spin_lock(&files->file_lock);         return 0;     }      /* Somebody expanded the array while we slept ... */ out:     spin_unlock(&files->file_lock);     if (new_openset)         free_fdset(new_openset, nfds);     if (new_execset)         free_fdset(new_execset, nfds);     spin_lock(&files->file_lock);     return error;}expand_fd_array()的代码如下:int expand_fd_array(struct files_struct *files, int nr){     struct file **new_fds;     int error, nfds;           error = -EMFILE;     if (files->max_fds >= NR_OPEN || nr >= NR_OPEN)         goto out;      //取得现在的文件描述对象数     nfds = files->max_fds;     spin_unlock(&files->file_lock);      /*       * Expand to the max in easy steps, and keep expanding it until      * we have enough for the requested fd array size.       */      //设置新的描述对象数值     do {#if NR_OPEN_DEFAULT < 256         if (nfds < 256)              nfds = 256;         else #endif         if (nfds < (PAGE_SIZE / sizeof(struct file *)))              nfds = PAGE_SIZE / sizeof(struct file *);         else {              nfds = nfds * 2;              if (nfds > NR_OPEN)                   nfds = NR_OPEN;         }     } while (nfds <= nr);      error = -ENOMEM;     new_fds = alloc_fd_array(nfds);     spin_lock(&files->file_lock);     if (!new_fds)         goto out;      /* Copy the existing array and install the new pointer */     //copy和设置剩余空间的过程,并将新旧空间交换.操作完成过后,释放旧空间     if (nfds > files->max_fds) {         struct file **old_fds;         int i;                  old_fds = xchg(&files->fd, new_fds);         i = xchg(&files->max_fds, nfds);          /* Don't copy/clear the array if we are creating a new            fd array for fork() */         if (i) {              memcpy(new_fds, old_fds, i * sizeof(struct file *));              /* clear the remainder of the array */              memset(&new_fds[i], 0,                     (nfds-i) * sizeof(struct file *));                spin_unlock(&files->file_lock);              free_fd_array(old_fds, i);              spin_lock(&files->file_lock);         }     } else {         /* Somebody expanded the array while we slept ... */         spin_unlock(&files->file_lock);         free_fd_array(new_fds, nfds);         spin_lock(&files->file_lock);     }     error = 0;out:     return error;}取得空闲文件描述符之后,将取得与文件对应的file.将file与文件对象符关联起来的操作是在fd_install()关联起来的.它的代码如下:void fastcall fd_install(unsigned int fd, struct file * file){     struct files_struct *files = current->files;     spin_lock(&files->file_lock);     //如果相应项已经有对象了.则是一个BUG     if (unlikely(files->fd[fd] != NULL))         BUG();     //将file添加至对象描述符数组     files->fd[fd] = file;     spin_unlock(&files->file_lock);}Sys_open()核心的操作是取得与文件相对应的file.这个操作是在filp_open()中完成的.它的代码如下:/* * Note that while the flag value (low two bits) for sys_open means: *   00 - read-only *   01 - write-only *   10 - read-write *   11 - special * it is changed into *   00 - no permissions needed *   01 - read-permission *   10 - write-permission *   11 - read-write * for the internal routines (ie open_namei()/follow_link() etc). 00 is * used by symlinks. */struct file *filp_open(const char * filename, int flags, int mode){     int namei_flags, error;     struct nameidata nd;      //因为在sys_open对flag的定义如filp_open的定义不相同。因此要把两者的flag进行转换     namei_flags = flags;     //转换低两位     if ((namei_flags+1) & O_ACCMODE)         namei_flags++;     //O_TRUNC:表示需要截尾,因此如果O_TRUNC被置是需要写权限的     if (namei_flags & O_TRUNC)         namei_flags |= 2;      //取得文件结点对应的nameidata.如果节点不存在,则新建之     error = open_namei(filename, namei_flags, mode, &nd);     if (!error)         //将文件节点对应的nameidata转换为file         return dentry_open(nd.dentry, nd.mnt, flags);      return ERR_PTR(error);}这段代码要注意作者附加给的注释.在sys_open与filp_open()中标志位定义有些不相同.所示有必须对标志进行相应的转换.转进去看一下open_namei()的操作.代码如下:{     int acc_mode, error = 0;     struct dentry *dentry;     struct dentry *dir;     int count = 0;      acc_mode = ACC_MODE(flag);      /* Allow the LSM permission hook to distinguish append         access from general write access. */        //附加模式     if (flag & O_APPEND)         acc_mode |= MAY_APPEND;      /* Fill in the open() intent data */     nd->intent.open.flags = flag;     nd->intent.open.create_mode = mode;      /*      * The simplest case - just a plain lookup.      */      //O_CREAT:如果文件不存在.则新建之      //如果没有定义O_CREAT标志.只要查找文件系统中结点是否存在就可以了     if (!(flag & O_CREAT)) {         error = path_lookup(pathname, lookup_flags(flag)|LOOKUP_OPEN, nd);         if (error)              return error;         goto ok;     }      /*      * Create - we need to know the parent.      */      //如果定义了O_CREAT标志.则先查找父结点     error = path_lookup(pathname, LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE, nd);     if (error)         return error;      /*      * We have the parent and last component. First of all, check      * that we are not asked to creat(2) an obvious directory - that      * will not do.      */     error = -EISDIR;     //判断查找是否成功     if (nd->last_type != LAST_NORM || nd->last.name[nd->last.len])         goto exit;      dir = nd->dentry;     nd->flags &= ~LOOKUP_PARENT;     down(&dir->d_inode->i_sem);      //到父目录中查找是否有该结点.如果没有该结点就会创建相应的dentry但dentry->d_inode为空     dentry = __lookup_hash(&nd->last, nd->dentry, nd); do_last:     error = PTR_ERR(dentry);     //查找错误,出错返回     if (IS_ERR(dentry)) {         up(&dir->d_inode->i_sem);         goto exit;     }      /* Negative dentry, just create the file */          //dentry->d_inode为空.说明这个结点是新建的     if (!dentry->d_inode) {         if (!IS_POSIXACL(dir->d_inode))              mode &= ~current->fs->umask;         error = vfs_create(dir->d_inode, dentry, mode, nd);         up(&dir->d_inode->i_sem);         dput(nd->dentry);         nd->dentry = dentry;         if (error)              goto exit;         /* Don't check for write permission, don't truncate */         acc_mode = 0;         flag &= ~O_TRUNC;         goto ok;     }      /*      * It already exists.      */      //结点原本就存在的情况     up(&dir->d_inode->i_sem);      error = -EEXIST;     if (flag & O_EXCL)         goto exit_dput;      //如果是挂载目录.则跳转到挂载文件系统的根目录     if (d_mountpoint(dentry)) {         error = -ELOOP;         if (flag & O_NOFOLLOW)              goto exit_dput;         while (__follow_down(&nd->mnt,&dentry) && d_mountpoint(dentry));     }     error = -ENOENT;     //对异常情况的排除     if (!dentry->d_inode)         goto exit_dput;     //如果结点是一个符号链接     if (dentry->d_inode->i_op && dentry->d_inode->i_op->follow_link)         goto do_link;      dput(nd->dentry);     nd->dentry = dentry;     error = -EISDIR;      //如果结点是一个目录,出错退出     if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode))         goto exit;ok:     //对打开文件进行的各项统一处理     error = may_open(nd, acc_mode, flag);     if (error)         goto exit;     return 0; exit_dput:     dput(dentry);exit:     path_release(nd);     return error; do_link:     error = -ELOOP;     if (flag & O_NOFOLLOW)         goto exit_dput;     /*      * This is subtle. Instead of calling do_follow_link() we do the      * thing by hands. The reason is that this way we have zero link_count      * and path_walk() (called from ->follow_link) honoring LOOKUP_PARENT.      * After that we have the parent and last component, i.e.      * we are in the same situation as after the first path_walk().      * Well, almost - if the last component is normal we get its copy      * stored in nd->last.name and we will have to putname() it when we      * are done. Procfs-like symlinks just set LAST_BIND.      */     nd->flags |= LOOKUP_PARENT;     error = security_inode_follow_link(dentry, nd);     if (error)         goto exit_dput;     touch_atime(nd->mnt, dentry);     nd_set_link(nd, NULL);     error = dentry->d_inode->i_op->follow_link(dentry, nd);     if (!error) {         char *s = nd_get_link(nd);         if (s)              error = __vfs_follow_link(nd, s);         if (dentry->d_inode->i_op->put_link)              dentry->d_inode->i_op->put_link(dentry, nd);     }     dput(dentry);     if (error)         return error;     nd->flags &= ~LOOKUP_PARENT;     if (nd->last_type == LAST_BIND) {         dentry = nd->dentry;         goto ok;     }     error = -EISDIR;     if (nd->last_type != LAST_NORM)         goto exit;     if (nd->last.name[nd->last.len]) {         putname(nd->last.name);         goto exit;     }     error = -ELOOP;     if (count++==32) {         putname(nd->last.name);         goto exit;     }     dir = nd->dentry;     down(&dir->d_inode->i_sem);     dentry = __lookup_hash(&nd->last, nd->dentry, nd);     putname(nd->last.name);     goto do_last;}在这里忽略了结点为符号链接的情况,这种情况下就是找到符号链接的路径,然后重新进行一次相同的操作而已经.我们把注意力主要放在一般的文件操上.在这里,对于已存在文件和要新建的文件有着不同的处理,只要是新创建文件会调用vfs_create()处理.其代码如下:int vfs_create(struct inode *dir, struct dentry *dentry, int mode,         struct nameidata *nd){     //创建文件之前的检查.(在sys_mkdir()的时候已经分析过个函数)     int error = may_create(dir, dentry, nd);      if (error)         return error;      //如果文件系统不允许creat     if (!dir->i_op || !dir->i_op->create)         return -EACCES;    /* shouldn't it be ENOSYS? */     mode &= S_IALLUGO;     mode |= S_IFREG;     error = security_inode_create(dir, dentry, mode);     if (error)         return error;     DQUOT_INIT(dir);     //调用父结点对应的create操作     error = dir->i_op->create(dir, dentry, mode, nd);     if (!error) {         //如果创建成功,则发出通知         inode_dir_notify(dir, DN_CREATE);         security_inode_post_create(dir, dentry, mode);     }     return error;}要这里,我们可以看到,它会调用父目录结点的creat操作来创建结点.等分析完sys­_open()操作之后,再转入具体的文件系统进行分析.不管是新建的结点还是已经建立的结点,都会进入到may_open()中进行处理.其代码如下所示:int may_open(struct nameidata *nd, int acc_mode, int flag){     struct dentry *dentry = nd->dentry;     struct inode *inode = dentry->d_inode;     int error;      //结点所对应的inode不存在     if (!inode)         return -ENOENT;      //是一个链接或者是目录的情况     if (S_ISLNK(inode->i_mode))         return -ELOOP;          if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE))         return -EISDIR;      //检查是否有相应的权限     error = permission(inode, acc_mode, nd);     if (error)         return error;      /*      * FIFO's, sockets and device files are special: they don't      * actually live on the filesystem itself, and as such you      * can write to them even if the filesystem is read-only.      */      //如果是FIFO和SOCK文件,则将O_TRUNC标志去掉     if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {         flag &= ~O_TRUNC;     } else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) {         //如果是一个块设备文件或者是一个字符设备文件,却挂载选项带有MNT_NODEV         //标志.出错退出         if (nd->mnt->mnt_flags & MNT_NODEV)              return -EACCES;          flag &= ~O_TRUNC;     } else if (IS_RDONLY(inode) && (flag & FMODE_WRITE))         //操作是可写出,但结点又是只读的.出错退出         return -EROFS;     /*      * An append-only file must be opened in append mode for writing.      */      //如果节点是append模式的,则必须要以append模式打开     if (IS_APPEND(inode)) {         if  ((flag & FMODE_WRITE) && !(flag & O_APPEND))              return -EPERM;         if (flag & O_TRUNC)              return -EPERM;     }      /* O_NOATIME can only be set by the owner or superuser */     //如果操作带有O_NOATIME标志,则只允许文件的所有者或者是root用户操作     if (flag & O_NOATIME)         if (current->fsuid != inode->i_uid && !capable(CAP_FOWNER))              return -EPERM;      /*      * Ensure there are no outstanding leases on the file.      */     error = break_lease(inode, flag);     if (error)         return error;      if (flag & O_TRUNC) {         error = get_write_access(inode);         if (error)              return error;          /*          * Refuse to truncate files with mandatory locks held on them.          */          //检查文件系统是否使用了强制锁且已经加上了强制锁         error = locks_verify_locked(inode);         if (!error) {              DQUOT_INIT(inode);              //对文件进行截尾              error = do_truncate(dentry, 0);         }         put_write_access(inode);         if (error)              return error;     } else         if (flag & FMODE_WRITE)              DQUOT_INIT(inode);      return 0;}在这里,涉及到了两种锁.文件租借锁与强制锁.简单介绍如下:文件租借锁:当一个进程试图打开被租借锁保护的文件时,它会阻塞.同时,拥有这个租借锁的所有进程都会收到一个相应的信号.拥有进程会更新文件的内容,使文件保持一致.如果拥有租借锁的进程没有在规定时间内完成.则内核将租借锁删除,因租借锁阻塞的时候进程继续执行.强制锁:系统默认是劝告锁,当挂载文件系统时指定MS_MANDLOCK安装标志时,强制锁被打开.文件的组设置位为1且组执行位为0的进程都是强制锁的候选者.break_lease()用来判断文件是否有租借锁.被对租借锁的相应处理.代码如下:static inline int break_lease(struct inode *inode, unsigned int mode){     //当前节点有锁     if (inode->i_flock)         return __break_lease(inode, mode);     //没有锁直接返回     return 0;}int __break_lease(struct inode *inode, unsigned int mode){     int error = 0, future;     struct file_lock *new_fl, *flock;     struct file_lock *fl;     int alloc_err;     unsigned long break_time;     int i_have_this_lease = 0;      //申请一个租借锁     alloc_err = lease_alloc(NULL, mode & FMODE_WRITE ? F_WRLCK : F_RDLCK,              &new_fl);      lock_kernel();      //对文件中现有租借锁的延时进行处理     time_out_leases(inode);      flock = inode->i_flock;      //如果没有锁,或者锁不为租借锁,退出     //租借锁都会存放在inode->i_flock的头部     if ((flock == NULL) || !IS_LEASE(flock))         goto out;      //如果进程本身是租借锁的拥有者,i_have_this_lease为1     for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next)         if (fl->fl_owner == current->files)              i_have_this_lease = 1;      if (mode & FMODE_WRITE) {         /* If we want write access, we have to revoke any lease. */         //如果是带有写属性的open`需要将租借锁全部都移除         future = F_UNLCK | F_INPROGRESS;     } else if (flock->fl_type & F_INPROGRESS) {         /* If the lease is already being broken, we just leave it */         //操作正在进行         future = flock->fl_type;     } else if (flock->fl_type & F_WRLCK) {         /* Downgrade the exclusive lease to a read-only lease. */         future = F_RDLCK | F_INPROGRESS;     } else {         /* the existing lease was read-only, so we can read too. */         goto out;     }      //如果分配内存失败且本进程不允许强制锁且不允许阻塞.退出     if (alloc_err && !i_have_this_lease && ((mode & O_NONBLOCK) == 0)) {         error = alloc_err;         goto out;     }      //设置break_time     break_time = 0;     if (lease_break_time > 0) {         break_time = jiffies + lease_break_time * HZ;         if (break_time == 0)              break_time++; /* so that 0 means no break time */     }      //因为进程要获得此租用锁了,将其类型更将,指定延时到达时间为初始化时间     //且向其它拥有租用锁的进程发送信号     for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next) {         if (fl->fl_type != future) {              fl->fl_type = future;              fl->fl_break_time = break_time;              kill_fasync(&fl->fl_fasync, SIGIO, POLL_MSG);         }     }      //如果进程本身就是锁的拥有者,或者不允许阻塞,退出      if (i_have_this_lease || (mode & O_NONBLOCK)) {         error = -EWOULDBLOCK;         goto out;     } restart:     //计算剩余的延时到达时间     break_time = flock->fl_break_time;     if (break_time != 0) {         break_time -= jiffies;         if (break_time == 0)              break_time++;     }      //将新分配的租用锁插入到链表。直接break time到达,或者是被其它拥有者唤醒     error = locks_block_on_timeout(flock, new_fl, break_time);     if (error >= 0) {         //如果正常返回,更新结点中的租借锁状态         if (error == 0)              time_out_leases(inode);         /* Wait for the next lease that has not been broken yet */         //如果还有租用锁没有被处理,继续前述的处理过程         for (flock = inode->i_flock; flock && IS_LEASE(flock);                   flock = flock->fl_next) {              if (flock->fl_type & F_INPROGRESS)                   goto restart;         }         error = 0;     } out:     unlock_kernel();     if (!alloc_err)         locks_free_lock(new_fl);     return error;}对强制锁的检查是在locks_verify_locked()中完成的.代码如下:static inline int locks_verify_locked(struct inode *inode){     //强制锁的初始条件     //即:1:挂载文件系统的类型为MS_MANDLOCK 且文件的组设置位为1且组执行位为0     if (MANDATORY_LOCK(inode))         //判断文件中是否有强制锁         return locks_mandatory_locked(inode);     return 0;}int locks_mandatory_locked(struct inode *inode){     fl_owner_t owner = current->files;     struct file_lock *fl;      /*      * Search the lock list for this inode for any POSIX locks.      */     lock_kernel();     for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {         //判断是否为强制锁         if (!IS_POSIX(fl))              continue;         //不是进程的强制锁.说明被其它的进程置了强制锁了         if (fl->fl_owner != owner)              break;     }     unlock_kernel();     return fl ? -EAGAIN : 0;}另外,还有一个很重要的过程,即对文件截短的操作.因为这个过程涉及到i_mapping的东东.以后再专题分析.回到filp_open().找到文件对应的结点之后,要将inode结构与file结构关联起来.这里在dentry_open()中处理的.它的代码如下:struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags){     struct file * f;     struct inode *inode;     int error;      error = -ENFILE;     f = get_empty_filp();     if (!f)         goto cleanup_dentry;     f->f_flags = flags;     f->f_mode = ((flags+1) & O_ACCMODE) | FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;     inode = dentry->d_inode;     if (f->f_mode & FMODE_WRITE) {         error = get_write_access(inode);         if (error)              goto cleanup_file;     }      f->f_mapping = inode->i_mapping;     //file所对应的dentry与vfsmount     f->f_dentry = dentry;     f->f_vfsmnt = mnt;     f->f_pos = 0;     //将文件的操作指向inode->i_fop     f->f_op = fops_get(inode->i_fop);     file_move(f, &inode->i_sb->s_files);      //如果file结构中指定了文件的open函数,调用它     if (f->f_op && f->f_op->open) {         error = f->f_op->open(inode,f);         if (error)              goto cleanup_all;     }     f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);      file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);      /* NB: we're sure to have correct a_ops only after f_op->open */     if (f->f_flags & O_DIRECT) {         if (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO) {              fput(f);              f = ERR_PTR(-EINVAL);         }     }      return f; cleanup_all:     fops_put(f->f_op);     if (f->f_mode & FMODE_WRITE)         put_write_access(inode);     file_kill(f);     f->f_dentry = NULL;     f->f_vfsmnt = NULL;cleanup_file:     put_filp(f);cleanup_dentry:     dput(dentry);     mntput(mnt);     return ERR_PTR(error);}从上面的代码中可以看出.对file的各种操作,都会对应到inode的f_op中.在上面的代码曾分析到,对不存在的文件会调用vfs_create().继续会调用目录结点的create()方法.下面分析一下rootfs和ext2中的create实现. 2.1: rootfs中的文件创建经过以前的分析,可得知rootfs中inode对应的操作如下:static struct inode_operations ramfs_dir_inode_operations = {     .create       = ramfs_create,     .lookup       = simple_lookup,     .link         = simple_link,     .unlink       = simple_unlink,     .symlink = ramfs_symlink,     .mkdir        = ramfs_mkdir,     .rmdir        = simple_rmdir,     .mknod        = ramfs_mknod,     .rename       = simple_rename,}对应的create为ramfs_create.代码如下:static int ramfs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd){     //S_IFREG模式     return ramfs_mknod(dir, dentry, mode | S_IFREG, 0);}从上面可以看到.上面的过程与rootfs中目录的建立大体相同.只是文件的模式改为了S_IFREG.即一般的文件. 2.2:ext2中的文件创建经过前面的分析我们可以得知,ext2中目录对应的操作为:struct inode_operations ext2_dir_inode_operations = {     .create       = ext2_create,     .lookup       = ext2_lookup,     .link         = ext2_link,     .unlink       = ext2_unlink,     .symlink = ext2_symlink,     .mkdir        = ext2_mkdir,     .rmdir        = ext2_rmdir,     .mknod        = ext2_mknod,     .rename       = ext2_rename,#ifdef CONFIG_EXT2_FS_XATTR     .setxattr = generic_setxattr,     .getxattr = generic_getxattr,     .listxattr    = ext2_listxattr,     .removexattr  = generic_removexattr,#endif     .setattr = ext2_setattr,     .permission   = ext2_permission,}其create函数的入口为ext2_create().代码如下:static int ext2_create (struct inode * dir, struct dentry * dentry, int mode, struct nameidata *nd){     //分配一个新的结点     struct inode * inode = ext2_new_inode (dir, mode);     int err = PTR_ERR(inode);     //指定i_op和i_fop.页面缓存的操作方式     if (!IS_ERR(inode)) {         inode->i_op = &ext2_file_inode_operations;         inode->i_fop = &ext2_file_operations;         if (test_opt(inode->i_sb, NOBH))              inode->i_mapping->a_ops = &ext2_nobh_aops;         else              inode->i_mapping->a_ops = &ext2_aops;         //将inode置脏         mark_inode_dirty(inode);         err = ext2_add_nondir(dentry, inode);     }     return err;}ext2_new_inode()的代码在前面的分析中已经讨论过.这里不再赘述. 三:文件的关闭关闭文件在用户空间的api接口为close().它在内核中的系统调用入口是sys_close().代码如下:asmlinkage long sys_close(unsigned int fd){     struct file * filp;     struct files_struct *files = current->files;      spin_lock(&files->file_lock);     //参数有效性判断     if (fd >= files->max_fds)         goto out_unlock;     //取得文件描述符对应的file     filp = files->fd[fd];     if (!filp)         goto out_unlock;     //将文件描述符对应的file置空     files->fd[fd] = NULL;     //清除close_on_exec的标志位,表示进程结束时不应该关闭对应位的文件描述对象     FD_CLR(fd, files->close_on_exec);     //清除文件描述的分配位图     __put_unused_fd(files, fd);     spin_unlock(&files->file_lock);     return filp_close(filp, files); out_unlock:     spin_unlock(&files->file_lock);     return -EBADF;}转到filp_close():int filp_close(struct file *filp, fl_owner_t id){     int retval;      /* Report and clear outstanding errors */     retval = filp->f_error;     if (retval)         filp->f_error = 0;      //file引用计数为零.已经无效了     if (!file_count(filp)) {         printk(KERN_ERR "VFS: Close: file count is 0\n");         return retval;     }      //如果文件对象有flush()操作,调用之     if (filp->f_op && filp->f_op->flush) {         int err = filp->f_op->flush(filp);         if (!retval)              retval = err;     }      //发出flush通告     dnotify_flush(filp, id);     //文件要关闭了,将进程拥有的文件的强制锁清除掉     locks_remove_posix(filp, id);     //释放file对象     fput(filp);     return retval;}下面以具体的文件为例,讨论file的flush过程. 3.1 rootfs的flush()Rootfs格式的一般文件的i_fop对应为:struct file_operations ramfs_file_operations = {     .read         = generic_file_read,     .write        = generic_file_write,     .mmap         = generic_file_mmap,     .fsync        = simple_sync_file,     .sendfile = generic_file_sendfile,     .llseek       = generic_file_llseek,}可以看到里面并没有flush()操作,对文件的关闭无需进行特殊的操作. 3.2:ext2的flush()Ext2类型的文件系统对应的普通文件的i_fop为:struct file_operations ext2_file_operations = {     .llseek       = generic_file_llseek,     .read         = generic_file_read,     .write        = generic_file_write,     .aio_read = generic_file_aio_read,     .aio_write    = generic_file_aio_write,     .ioctl        = ext2_ioctl,     .mmap         = generic_file_mmap,     .open         = generic_file_open,     .release = ext2_release_file,     .fsync        = ext2_sync_file,     .readv        = generic_file_readv,     .writev       = generic_file_writev,     .sendfile = generic_file_sendfile,}可以看到,里面也没有定义flush操作. 四:小结在本节里,主要概述了文件的打开与关闭操作.其中文件的关闭操作对大部份文件系统来说,只要处理好进程本身的文件描述符映射就可以了.无需进程其它特殊的操作.