写点什么

C++ 学习 ---_IO_new_fdopen 函数原理分析学习

作者:桑榆
  • 2022 年 10 月 04 日
    广东
  • 本文字数:7582 字

    阅读完需:约 25 分钟

引言

_IO_new_fdopen 是 Glibc 中 fdopen 函数的内部实现,接受 fd 和打开 mode,返回文件流 FILE 指针。是 stdio.h 中比较重要的函数,我们来一起看看它的源码实现。

入参说明

接受两个参数,fd 是对应文件的 fd 号,mode 是对应的模式,包括如下的组合:

  • "r":read,文件必须存在;

  • "w":write,文件存在则从头开始覆盖写,不存在则创建新文件写入;

  • "a":append,文件存在则从末尾开始追加写入,不存在则创建新文件写入;

  • "r+":read/update,文件必须存在,可读可写;

  • "w+":write/update,文件存在则从头开始读写,不存在则创建新文件读写;

  • "a+":append/update,文件存在则从末尾开始追加读写,不存在则创建新文件读写。

//glibc/include/stdio.h185 extern FILE *_IO_new_fdopen (int, const char*);186 #   define fdopen(fd, mode) _IO_new_fdopen (fd, mode)
复制代码

对于二进制文件读写则使用"b",则组合之后有"rb", "wb", "ab", "r+b", "w+b", "a+b","rb+", "wb+", "ab+"。C++11 中增加了一个新的"x"与"w"组合使用,如果当前文件存在,则函数强行返回 fail,避免对已有文件的 overwriting,"wx""wbx""w+x" or "w+bx"/"wb+x"

1.常用变量定义

定义了 locked_FILE 指针 new_f,里面包含了_IO_FILE_plus,_IO_lock_t,_IO_wide_data,主要是_IO_FILE_plus 包含了主要的记录内容。do_seek 表明当前默认是不进行文件读写指针移动的,除非 mode 指定需要 append 模式。

//glibc/libio/iofdopen.c 33 FILE * 34 _IO_new_fdopen (int fd, const char *mode) 35 { 36   int read_write; 37   struct locked_FILE 38   { 39     struct _IO_FILE_plus fp; 40 #ifdef _IO_MTSAFE_IO 41     _IO_lock_t lock; 42 #endif 43     struct _IO_wide_data wd; 44   } *new_f; 45   int i; 46   int use_mmap = 0; 47  48   /* Decide whether we modify the offset of the file we attach to and seek to 49      the end of file.  We only do this if the mode is 'a' and if the file 50      descriptor did not have O_APPEND in its flags already.  */ 51   bool do_seek = false;
复制代码

_IO_FILE_plus

其中实际上保存了我们最后要返回的 FILE file,再加上针对 C++ streambuf 做的兼容虚函数表。

293 struct _IO_jump_t294 {295     JUMP_FIELD(size_t, __dummy);296     JUMP_FIELD(size_t, __dummy2);297     JUMP_FIELD(_IO_finish_t, __finish);                                                                                                              298     JUMP_FIELD(_IO_overflow_t, __overflow);299     JUMP_FIELD(_IO_underflow_t, __underflow);300     JUMP_FIELD(_IO_underflow_t, __uflow);301     JUMP_FIELD(_IO_pbackfail_t, __pbackfail);302     /* showmany */303     JUMP_FIELD(_IO_xsputn_t, __xsputn);304     JUMP_FIELD(_IO_xsgetn_t, __xsgetn);305     JUMP_FIELD(_IO_seekoff_t, __seekoff);306     JUMP_FIELD(_IO_seekpos_t, __seekpos);307     JUMP_FIELD(_IO_setbuf_t, __setbuf);308     JUMP_FIELD(_IO_sync_t, __sync);309     JUMP_FIELD(_IO_doallocate_t, __doallocate);310     JUMP_FIELD(_IO_read_t, __read);311     JUMP_FIELD(_IO_write_t, __write);312     JUMP_FIELD(_IO_seek_t, __seek);313     JUMP_FIELD(_IO_close_t, __close);314     JUMP_FIELD(_IO_stat_t, __stat);315     JUMP_FIELD(_IO_showmanyc_t, __showmanyc);316     JUMP_FIELD(_IO_imbue_t, __imbue);317 };318 319 /* We always allocate an extra word following an _IO_FILE.320    This contains a pointer to the function jump table used.321    This is for compatibility with C++ streambuf; the word can322    be used to smash to a pointer to a virtual function table. */323 324 struct _IO_FILE_plus325 {326   FILE file;327   const struct _IO_jump_t *vtable;328 }
复制代码

FILE 结构体里面包含的内容参考https://xie.infoq.cn/article/e7f8771d4f0ed770105041f52的说明

_IO_lock_t

参考https://xie.infoq.cn/article/53aced666a17f52c15b40d0c2

_IO_wide_data

宽字节数据的读写控制信息

120 /* Extra data for wide character streams.  */121 struct _IO_wide_data122 {123   wchar_t *_IO_read_ptr;    /* Current read pointer */124   wchar_t *_IO_read_end;    /* End of get area. */125   wchar_t *_IO_read_base;   /* Start of putback+get area. */126   wchar_t *_IO_write_base;  /* Start of put area. */127   wchar_t *_IO_write_ptr;   /* Current put pointer. */128   wchar_t *_IO_write_end;   /* End of put area. */129   wchar_t *_IO_buf_base;    /* Start of reserve area. */130   wchar_t *_IO_buf_end;     /* End of reserve area. */131   /* The following fields are used to support backing up and undo. */132   wchar_t *_IO_save_base;   /* Pointer to start of non-current get area. */133   wchar_t *_IO_backup_base; /* Pointer to first valid character of134                    backup area */135   wchar_t *_IO_save_end;    /* Pointer to end of non-current get area. */136 137   __mbstate_t _IO_state;138   __mbstate_t _IO_last_state;139   struct _IO_codecvt _codecvt;140 141   wchar_t _shortbuf[1];142 143   const struct _IO_jump_t *_wide_vtable; 144 };
复制代码

2.mode 信息解析

通过解析传入的 mode 字符串,对 int read_write 进行赋值,基本逻辑与上面提到的字符组合一致。首先查看第一个字符,只能是"r"/"w"/"a"中的一个,进行组合赋值 70 #define _IO_NO_READS 0x0004 /* Reading not allowed. /71 #define _IO_NO_WRITES 0x0008 / Writing not allowed. */80 #define _IO_IS_APPENDING 0x1000 然后检查第二个到第五个(最长的情况为"wb+x",加上最后的\0),注意这里只针对"+"和"m"做了特殊标记,分别为_IO_IS_APPENDING(注意这里是与,所以即使前面有 rwa 填充,+都只会保留为 0x1000);use_mmap 标记为 1。

 53   switch (*mode) 54     { 55     case 'r': 56       read_write = _IO_NO_WRITES; 57       break; 58     case 'w': 59       read_write = _IO_NO_READS; 60       break; 61     case 'a': 62       read_write = _IO_NO_READS|_IO_IS_APPENDING; 63       break; 64     default: 65       __set_errno (EINVAL); 66       return NULL; 67   } 68   for (i = 1; i < 5; ++i) 69     { 70       switch (*++mode) 71     { 72     case '\0': 73       break; 74     case '+': 75       read_write &= _IO_IS_APPENDING; 76       break; 77     case 'm': 78       use_mmap = 1; 79       continue; 80     case 'x': 81     case 'b': 82     default:                                                                                                                                          83       /* Ignore */ 84       continue; 85     } 86       break; 87     }
复制代码

3.调用__fcntl 获取 FD 的状态信息

这里获取 FD 的状态信息(获取信息失败也返回 NULL)后进行检查,以下两种情况都视为 EINVAL,参数异常,返回 NULL。

  • 如果当前 fd 的访问权限为只读,且前文要求的 mode 中没有"r",说明想要写或添加一个只读文件;

  • 如果当前 fd 的访问权限为只写,且前文要求的 mode 中没有"w",说明想要读或添加一个只写文件。

 88   int fd_flags = __fcntl (fd, F_GETFL); 89   if (fd_flags == -1) 90     return NULL; 91  92   if (((fd_flags & O_ACCMODE) == O_RDONLY && !(read_write & _IO_NO_WRITES)) 93       || ((fd_flags & O_ACCMODE) == O_WRONLY && !(read_write & _IO_NO_READS))) 94     { 95       __set_errno (EINVAL); 96       return NULL; 97     }  86 #define F_GETFL     3   /* Get file status flags.  */  24 /* File access modes for `open' and `fcntl'.  */ 25 #define O_RDONLY    0   /* Open read-only.  */ 26 #define O_WRONLY    1   /* Open write-only.  */ 27 #define O_RDWR      2   /* Open read/write.  */  78 /* Mask for file access modes.  This is system-dependent in case 79    some system ever wants to define some other flavor of access.  */ 80 #define O_ACCMODE   (O_RDONLY|O_WRONLY|O_RDWR)
复制代码

4.解析 append 信息并更新 fd 的 status flags

这里判断 read_write 是否有设定"a",即追加模式,而且当前查询的文件 status flags 没有设置 append,那么将 do_seek(进行文件读写指针移动)置为 true,

同时调用__fcntl,F_SETFL 模式将文件的 status flags 设置为原有 flags 与 O_APPEND 的或。

115   if ((read_write & _IO_IS_APPENDING) && !(fd_flags & O_APPEND))116     {117       do_seek = true;118       if (__fcntl (fd, F_SETFL, fd_flags | O_APPEND) == -1)119     return NULL;120     }
87 #define F_SETFL 4 /* Set file status flags. */
复制代码

5.为 new_f 分配堆内存空间

分配大小为 sizeof (struct locked_FILE),如果分配失败,则返回 NULL。

122   new_f = (struct locked_FILE *) malloc (sizeof (struct locked_FILE));123   if (new_f == NULL)124     return NULL;
复制代码

6._IO_no_init 初始化

使用 &new_f->lock 初始化 new_f->fp.file._lock,即_IO_FILE 中的_lock;

125 #ifdef _IO_MTSAFE_IO126   new_f->fp.file._lock = &new_f->lock;127 #endif128   _IO_no_init (&new_f->fp.file, 0, 0, &new_f->wd,129 #if _G_HAVE_MMAP130            (use_mmap && (read_write & _IO_NO_WRITES))131            ? &_IO_wfile_jumps_maybe_mmap :132 #endif133            &_IO_wfile_jumps);
复制代码

调用_IO_no_init 初始化 &new_f->fp.file 指针,其中 flags 和 orientation 置 0,其余参数基本都是置空,注意,这里还调用了_IO_old_init 对一些基础参数做了置空操作,同时,值得关注的一点:_IO_lock_init 初始化了*fp->_lock,便于后续的使用。

其中_IO_jump_t 根据_G_HAVE_MMAP 宏和当前模式中有"m"且"r",则使用_IO_wfile_jumps_maybe_mmap,否则使用_IO_wfile_jumps,两者区别就在于 mmap 和 munmap 在读写文件时的使用,这里就不细展开了。

//glibc/libio/genops.c 560 void 561 _IO_no_init (FILE *fp, int flags, int orientation,                          562          struct _IO_wide_data *wd, const struct _IO_jump_t *jmp) 563 { 564   _IO_old_init (fp, flags); 565   fp->_mode = orientation; 566   if (orientation >= 0) 567     { 568       fp->_wide_data = wd; 569       fp->_wide_data->_IO_buf_base = NULL; 570       fp->_wide_data->_IO_buf_end = NULL; 571       fp->_wide_data->_IO_read_base = NULL; 572       fp->_wide_data->_IO_read_ptr = NULL; 573       fp->_wide_data->_IO_read_end = NULL; 574       fp->_wide_data->_IO_write_base = NULL; 575       fp->_wide_data->_IO_write_ptr = NULL; 576       fp->_wide_data->_IO_write_end = NULL; 577       fp->_wide_data->_IO_save_base = NULL; 578       fp->_wide_data->_IO_backup_base = NULL; 579       fp->_wide_data->_IO_save_end = NULL; 580  581       fp->_wide_data->_wide_vtable = jmp; 582     } 583   else 584     /* Cause predictable crash when a wide function is called on a byte 585        stream.  */ 586     fp->_wide_data = (struct _IO_wide_data *) -1L; 587   fp->_freeres_list = NULL; 588 }  529 void 530 _IO_old_init (FILE *fp, int flags) 531 { 532   fp->_flags = _IO_MAGIC|flags; 533   fp->_flags2 = 0; 534   if (stdio_needs_locking) 535     fp->_flags2 |= _IO_FLAGS2_NEED_LOCK; 536   fp->_IO_buf_base = NULL; 537   fp->_IO_buf_end = NULL; 538   fp->_IO_read_base = NULL; 539   fp->_IO_read_ptr = NULL; 540   fp->_IO_read_end = NULL; 541   fp->_IO_write_base = NULL; 542   fp->_IO_write_ptr = NULL; 543   fp->_IO_write_end = NULL; 544   fp->_chain = NULL; /* Not necessary. */ 545  546   fp->_IO_save_base = NULL; 547   fp->_IO_backup_base = NULL; 548   fp->_IO_save_end = NULL; 549   fp->_markers = NULL; 550   fp->_cur_column = 0; 551 #if _IO_JUMPS_OFFSET 552   fp->_vtable_offset = 0; 553 #endif 554 #ifdef _IO_MTSAFE_IO 555   if (fp->_lock != NULL)                                          556     _IO_lock_init (*fp->_lock); 557 #endif 558 }
复制代码

7._IO_JUMPS 初始化 &new_f->fp

宏展开后,可以看到,实际上是初始化 &new_f->fp->vtable,即前文提到的兼容 C++ streambuf 的虚函数表,根据当前模式中有"m"且"r"置为_IO_file_jumps_maybe_mmap,否则为_IO_file_jumps。

 98 #define _IO_JUMPS(THIS) (THIS)->vtable 134   _IO_JUMPS (&new_f->fp) =135 #if _G_HAVE_MMAP136     (use_mmap && (read_write & _IO_NO_WRITES)) ? &_IO_file_jumps_maybe_mmap :137 #endif138       &_IO_file_jumps;
复制代码

8._IO_new_file_init_internal 初始化

将_offset 初始化为-1;_flags 或上(_IO_IS_FILEBUF+_IO_NO_READS+_IO_NO_WRITES+_IO_TIED_PUT_GET)即,文件 buffer+只写+只读+put/get 指针移动一致指向同一个位置;调用_IO_link_in 链接 fp;_fileno 初始化为-1;

139   _IO_new_file_init_internal (&new_f->fp);
//glibc/libio/fileops.c 104 void 105 _IO_new_file_init_internal (struct _IO_FILE_plus *fp) 106 { 107 /* POSIX.1 allows another file handle to be used to change the position 108 of our file descriptor. Hence we actually don't know the actual 109 position before we do the first fseek (and until a following fflush). */ 110 fp->file._offset = _IO_pos_BAD; 111 fp->file._flags |= CLOSED_FILEBUF_FLAGS; 112 113 _IO_link_in (fp); 114 fp->file._fileno = -1; 115 } 93 /* _IO_pos_BAD is an off64_t value indicating error, unknown, or EOF. */ 94 #define _IO_pos_BAD ((off64_t) -1) 100 #define CLOSED_FILEBUF_FLAGS \ 101 (_IO_IS_FILEBUF+_IO_NO_READS+_IO_NO_WRITES+_IO_TIED_PUT_GET) 78 #define _IO_TIED_PUT_GET 0x0400 /* Put and get pointer move in unison. */
复制代码

_IO_link_in (fp)

这里实际上就是链接文件的过程,_IO_lock_lock 加锁解锁都是配对的,这里主要是要将_chain 赋值为_IO_list_all,然后将_IO_list_all 赋值为当前的 fp,代表当前要打开的文件链接到_IO_list_all 上了。

  35 #ifdef _IO_MTSAFE_IO  36 static _IO_lock_t list_all_lock = _IO_lock_initializer;  37 #endif  85 void  86 _IO_link_in (struct _IO_FILE_plus *fp)  87 {  88   if ((fp->file._flags & _IO_LINKED) == 0)  89     {  90       fp->file._flags |= _IO_LINKED;  91 #ifdef _IO_MTSAFE_IO  92       _IO_cleanup_region_start_noarg (flush_cleanup);  93       _IO_lock_lock (list_all_lock);  94       run_fp = (FILE *) fp;  95       _IO_flockfile ((FILE *) fp);  96 #endif  97       fp->file._chain = (FILE *) _IO_list_all;  98       _IO_list_all = fp;  99 #ifdef _IO_MTSAFE_IO 100       _IO_funlockfile ((FILE *) fp); 101       run_fp = NULL; 102       _IO_lock_unlock (list_all_lock); 103       _IO_cleanup_region_end (0); 104 #endif 105     } 106 }
复制代码

9._IO_mask_flags 设置 mask

将在_IO_new_file_init_internal 中置为-1 的_fileno 置为 fd 自身;

_flags 先与~_IO_DELETE_DONT_CLOSE,表示后续可调用关闭 close,再清空_flags 后置为 read_write 的值。

140   /* We only need to record the fd because _IO_file_init_internal will141      have unset the offset.  It is important to unset the cached142      offset because the real offset in the file could change between143      now and when the handle is activated and we would then mislead144      ftell into believing that we have a valid offset.  */145   new_f->fp.file._fileno = fd;146   new_f->fp.file._flags &= ~_IO_DELETE_DONT_CLOSE;147 148   _IO_mask_flags (&new_f->fp.file, read_write,149           _IO_NO_READS+_IO_NO_WRITES+_IO_IS_APPENDING);
74 #define _IO_DELETE_DONT_CLOSE 0x0040 /* Don't call close(_fileno) on close. */ 518 #define _IO_mask_flags(fp, f, mask) \ 519 ((fp)->_flags = ((fp)->_flags & ~(mask)) | ((f) & (mask)))
复制代码

10.针对 append 模式特殊处理

如果是 append 模式,那么我们要将对应的读写位置移动到文件末尾。

151   /* For append mode, set the file offset to the end of the file if we added152      O_APPEND to the file descriptor flags.  Don't update the offset cache153      though, since the file handle is not active.  */154   if (do_seek && ((read_write & (_IO_IS_APPENDING | _IO_NO_READS))155           == (_IO_IS_APPENDING | _IO_NO_READS)))156     {157       off64_t new_pos = _IO_SYSSEEK (&new_f->fp.file, 0, _IO_seek_end);158       if (new_pos == _IO_pos_BAD && errno != ESPIPE)159     return NULL;160     }
复制代码

_IO_SYSSEEK

_IO_SYSSEEK (&new_f->fp.file, 0, _IO_seek_end)即相对当前文件末尾,跳动 0,即移动到末尾。

257 #define _IO_SYSSEEK(FP, OFFSET, MODE) JUMP2 (__seek, FP, OFFSET, MODE)125 #define JUMP2(FUNC, THIS, X1, X2) (_IO_JUMPS_FUNC(THIS)->FUNC) (THIS, X1, X2)
252 /* The 'sysseek' hook is used to re-position an external file.253 It generalizes the Unix lseek(2) function.254 It matches the streambuf::sys_seek virtual function, which is255 specific to this implementation. */256 typedef off64_t (*_IO_seek_t) (FILE *, off64_t, int); 57 #define _IO_seek_end 2
复制代码

11.最后返回文件对象

161   return &new_f->fp.file;
复制代码


发布于: 刚刚阅读数: 4
用户头像

桑榆

关注

北海虽赊,扶摇可接;东隅已逝,桑榆非晚! 2020.02.29 加入

Android手机厂商-相机软件系统工程师 爬山/徒步/Coding

评论

发布
暂无评论
C++学习---_IO_new_fdopen函数原理分析学习_c++_桑榆_InfoQ写作社区