文件分块和缓冲？ | 文件分块和缓冲

本文介绍了文件分块和缓冲？的处理方法，对大家解决问题具有一定的参考价值，需要的朋友们下面随着小编来一起学习吧！

问题描述

最后，我只是寻求削减二进制文件分成不大于X的大小。难道没有其他事情。如果输入文件是21MB，我想3个7MB的，我可以用猫加入或什么not.I在简单的例子，下面的作品，我使用的是7MB块大小的缓冲区。我一定要使用，在所有获得7MB的文件块？如果块大小为2GB说，这显然不是我想要的东西就摆在内存中。所以，我需要在所有创建缓冲区。

我没有读就在这里几个职位和这个其他网站，但他们似乎都使用某种缓冲由malloc或阵列创建，并查找非缓冲方式使我太超前了我的知识纳入插座， TCP / IP相关的主题。

我是注定要一大堆的if / while语句？

P.S。我在哪里可以找到C对I / O流的书吗？我能找到对C很多++，但不是C。

  IFP = FOPEN（IFILE，RB）; // ifile的是25MB的声音文件
OFP = FOPEN（OFILE，W）; //省略错误检查。setvbuf用来（IFP，NULL，_IOFBF，1024）; //这些是上
setvbuf用来（OFP，NULL，_IOFBF，1024）; // 默认？为size_t CHUNK = 7000000; // 7MB大小的块
为size_t结果为0;
为size_t *缓冲区=的malloc（分块）;如果（缓冲== NULL）{的fputs（无法分配内存，标准错误）;出口（1）;}
//一次读取1 btye？
结果= FREAD（缓冲，1，块，IFP）;
如果（结果= CHUNK！）{的fputs（ERROR：缓冲器/读不匹配，标准错误）;出口（1）;}的fwrite（缓冲液，CHUNK，1，OFP）;免费（缓冲）;

解决方案

下面是一个程序， bsplit ，我最初在1991年写的分割文件成任意大小的块;默认的大小以千字节指定。（当然， - 1024字节）

  / *
@（＃）文件：$ RCSfile：bsplit.c，V $
@（＃）版本：$修订：$ 1.11
@（＃）上次修改：$日期：2008年8月9日5时54分55秒$
@（＃）用途：文件分割成块 - 二进制
@（＃）作者：J-莱弗勒
* /＃如果__STDC_VERSION__＆GT; = 199901L
的#define _XOPEN_SOURCE 600
＃其他
的#define _XOPEN_SOURCE 500
#ENDIF / * * __STDC_VERSION__ /＃包括LT＆;＆文件ctype.h GT;
＃包括LT＆;＆stdio.h中GT;
＃包括LT＆;＆string.h中GT;
＃包括LT＆;＆stdlib.h中GT;
＃包括LT＆;＆unistd.h中GT;
＃包括stderr.h
＃包括filter.h＃定义MAXFILENAMELEN 256
千字节的#define 1024
＃定义兆字节（千字节*千字节）
技嘉的#define（兆*千字节）
＃定义NIL（X）（（X）0）
＃定义MIN（A，B）（（（A）≤（b））的（一）：（b））的字符* preFIX =bsplit。
为size_t块大小= 64;
为size_t的nblocks = 0;
为size_t skipblocks = 0;
字符缓冲区[64 *千字节]
柜长= 0;静态INT bsplit（FILE * IFP，为const char * FN）
{
    为size_t N; / *读取的字节这一次* /
    为size_t BSIZE; / *当前块*写入尺寸/
    为size_t TSIZE; / *为当前文件写入大小* /
    为size_t RSIZE; / *金额读* /
    FILE *运算; / *输出文件流* /
    字符文件[MAXFILENAMELEN] / *输出文件名* /    TSIZE = 0;
    BSIZE = 0;
    OP = NIL（FILE *）;
    RSIZE = MIN（的sizeof（缓冲区），块大小）;
    而（（N = FREAD（缓冲区的sizeof（炭），RSIZE，IFP））大于0）
    {
        TSIZE + = N;
        如果（TSIZE＆GT; skipblocks）
        {
            如果（BSIZE == 0）
            {
                sprintf的（文件％s％03ld，preFIX，计数器++）;
                如果（（OP = FOPEN（文件，W））== NIL（FILE *））
                {
                    err_sysrem2（无法打开文件，文件）;
                    返回（-1）;
                }
                的printf（％S \\ n，文件）;
            }
            BSIZE + = N;
            如果（FWRITE（缓冲区的sizeof（字符），N，OP）！= N）
            {
                err_sysrem2（无法写入文件，文件）;
                返回（-1）;
            }
            如果（BSIZE＆GT; =块大小）
            {
                FCLOSE（OP）;
                BSIZE = 0;
            }
            如果（的nblocks大于0＆放大器;＆放大器; TSIZE＆GT =的nblocks）
                打破;
        }
    }
    返回0;
}INT主（INT ARGC，字符** argv的）
{
    INT选择;
    为size_t乘数=千字节;
    字符* P;
    焦炭℃;
    INT RC;    OPTERR = 0;
    err_setarg0（的argv [0]）;    而（（选择= getopt的（ARGC，ARGV，S：N：P：B：V！））=  -  1）
    {
        开关（OPT）
        {
        案例'P'：
            preFIX = OPTARG;
            如果（strlen的（preFIX）GT; MAXFILENAMELEN  - 的sizeof（000））
                err_error（文件名preFIX（％S）是太长（最多％D），preFIX，
                          （中间体）（MAXFILENAMELEN-的sizeof（000）））;
            打破;
        案件的：
            skipblocks =的atoi（OPTARG）;
            打破;
        案例'N'：
            的nblocks =的atoi（OPTARG）;
            打破;
        案例'B'：
            块大小=的atoi（OPTARG）;
            P = OPTARG + strspn（OPTARG，0123456789）;
            如果（* P！='\\ 0'）
            {
                C = tolower的（（无符号字符）* P）;
                如果（C =='C'）
                    乘数= 1;
                否则，如果（C =='B'）
                    乘数=千字节/ 2;
                否则，如果（C =='K'）
                    乘数=千字节;
                否则如果（C ==M）
                    乘数=兆字节;
                否则，如果（C =='G'）
                    乘数=技嘉;
                其他
                    err_error（未知大小乘数后缀％S \\ n，p）的;
                如果（第[1]！='\\ 0'）
                    err_error（未知大小乘数后缀％S \\ n，p）的;
            }
            打破;
        案例'V'：
            err_version（BSPLIT，＆安培;@（＃）$修订：$ 1.11（$日期：2008年8月9日5时54分55秒$）[4]）;
            打破;
        默认：
            err_usage（[ -  B块大小] [ -  P preFIX] [ -  S skipblocks] [ -  N块] [文件[...]]）;
            打破;
        }
    }    / *转换大小，以字节* /
    块大小* =事半功倍;
    skipblocks * =块大小;
    如果（的nblocks大于0）
        的nblocks = skipblocks +的nblocks *块大小;    RC = filter_stdout（ARGC，ARGV，OPTIND，bsplit）;
    回报（RC）;
}

头 stderr.h 宣布了一系列的错误报告程序的;我用它在我的大多数程序。头文件 filter.h 声明函数 filter_stdout（）它通过一个参数列表的步骤，打开文件进行读取和调用一个函数 - 在这种情况下， bsplit（） - 来处理每个文件依次。它处理'没有参数意味着读取标准输入自动等。（联系我为code - 见我的个人资料）

请注意，该乘数 C 办法字符 B 表示512字节的块，而 K ， M 和先按g 意味着昆明植物研究所，MIB和吉布分别。

Ultimately I'm just looking to cut a binary file into pieces no greater than X in size. Do no else with it. If the input file is 21MB, I want 3 pieces of 7MB I could join with cat or what not.I In the simple example below that works, I'm using a 7MB chunk size buffer. Do I have to use that at all to get 7MB file chunks? If the chunk size was say 2GB, that is obviously not something I want to put in memory. So do I need to create a buffer at all.

I did reads several posts on here and other sites about this, but they all seem to use some sort of buffer created by malloc or arrays, and looking up non-buffered ways lead me too far ahead of my knowledge into sockets and TCP/IP related topics.

Am I doomed to a whole lot of if/while statements?

P.S. Where can I find books on I/O streams in C? I can find a LOT for C++, but not C.

ifp = fopen(ifile, "rb"); // ifile is a 25MB sound file
ofp = fopen(ofile, "w");  // Omitted error checking.

setvbuf( ifp, NULL, _IOFBF, 1024); // Are these on
setvbuf( ofp, NULL, _IOFBF, 1024); // by default?

size_t CHUNK = 7000000;  // 7MB Chunk sizes
size_t result = 0;
size_t *buffer = malloc(CHUNK);

if (buffer == NULL) {fputs ("Could not allocate memory",stderr); exit (1);}
// Read 1 btye at a time?
result = fread(buffer, 1, CHUNK, ifp);
if (result != CHUNK) {fputs ("ERROR: Buffer/read mismatch.",stderr); exit (1);}

fwrite(buffer, CHUNK, 1, ofp);

free(buffer);

解决方案

Here's a program, bsplit, that I originally wrote in 1991. It splits a file up into arbitrary sized chunks; the default size is specified in kilobytes (well, kibibytes — 1024 bytes).

/*
@(#)File:           $RCSfile: bsplit.c,v $
@(#)Version:        $Revision: 1.11 $
@(#)Last changed:   $Date: 2008/08/09 05:54:55 $
@(#)Purpose:        Split file into blocks -- binary
@(#)Author:         J Leffler
*/

#if __STDC_VERSION__ >= 199901L
#define _XOPEN_SOURCE 600
#else
#define _XOPEN_SOURCE 500
#endif /* __STDC_VERSION__ */

#include <ctype.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include "stderr.h"
#include "filter.h"

#define MAXFILENAMELEN  256
#define KILOBYTE 1024
#define MEGABYTE (KILOBYTE*KILOBYTE)
#define GIGABYTE (MEGABYTE*KILOBYTE)
#define NIL(x)  ((x)0)
#define MIN(a,b)    (((a) < (b)) ? (a) : (b))

char    *prefix = "bsplit.";
size_t   blocksize = 64;
size_t   nblocks = 0;
size_t   skipblocks = 0;
char     buffer[64*KILOBYTE];
long     counter = 0;

static int  bsplit(FILE *ifp, const char *fn)
{
    size_t   n;         /* Bytes read this time */
    size_t   bsize;     /* Size written for current block */
    size_t   tsize;     /* Size written for current file */
    size_t   rsize;     /* Amount to read */
    FILE    *op;        /* Output file stream */
    char     file[MAXFILENAMELEN];  /* Output file name */

    tsize = 0;
    bsize = 0;
    op = NIL(FILE *);
    rsize = MIN(sizeof(buffer), blocksize);
    while ((n = fread(buffer, sizeof(char), rsize, ifp)) > 0)
    {
        tsize += n;
        if (tsize > skipblocks)
        {
            if (bsize == 0)
            {
                sprintf(file, "%s%03ld", prefix, counter++);
                if ((op = fopen(file, "w")) == NIL(FILE *))
                {
                    err_sysrem2("failed to open file", file);
                    return(-1);
                }
                printf("%s\n", file);
            }
            bsize += n;
            if (fwrite(buffer, sizeof(char), n, op) != n)
            {
                err_sysrem2("failed to write to file", file);
                return(-1);
            }
            if (bsize >= blocksize)
            {
                fclose(op);
                bsize = 0;
            }
            if (nblocks > 0 && tsize >= nblocks)
                break;
        }
    }
    return 0;
}

int main(int argc, char **argv)
{
    int opt;
    size_t multiplier = KILOBYTE;
    char *p;
    char  c;
    int   rc;

    opterr = 0;
    err_setarg0(argv[0]);

    while ((opt = getopt(argc, argv, "s:n:p:b:V")) != -1)
    {
        switch (opt)
        {
        case 'p':
            prefix = optarg;
            if (strlen(prefix) > MAXFILENAMELEN - sizeof("000"))
                err_error("file name prefix (%s) is too long (max %d)", prefix,
                          (int)(MAXFILENAMELEN-sizeof("000")));
            break;
        case 's':
            skipblocks = atoi(optarg);
            break;
        case 'n':
            nblocks = atoi(optarg);
            break;
        case 'b':
            blocksize = atoi(optarg);
            p = optarg + strspn(optarg, "0123456789");
            if (*p != '\0')
            {
                c = tolower((unsigned char)*p);
                if (c == 'c')
                    multiplier = 1;
                else if (c == 'b')
                    multiplier = KILOBYTE/2;
                else if (c == 'k')
                    multiplier = KILOBYTE;
                else if (c == 'm')
                    multiplier = MEGABYTE;
                else if (c == 'g')
                    multiplier = GIGABYTE;
                else
                    err_error("unknown size multiplier suffix %s\n", p);
                if (p[1] != '\0')
                    err_error("unknown size multiplier suffix %s\n", p);
            }
            break;
        case 'V':
            err_version("BSPLIT", &"@(#)$Revision: 1.11 $ ($Date: 2008/08/09 05:54:55 $)"[4]);
            break;
        default:
            err_usage("[-b blocksize][-p prefix][-s skipblocks][-n blocks][file [...]]");
            break;
        }
    }

    /* Convert sizes to bytes */
    blocksize  *= multiplier;
    skipblocks *= blocksize;
    if (nblocks > 0)
        nblocks = skipblocks + nblocks * blocksize;

    rc = filter_stdout(argc, argv, optind, bsplit);
    return(rc);
}

The header stderr.h declares a series of error reporting routines; I use it in most of my programs. The header filter.h declares the function filter_stdout() which steps through an argument list, opening the files for reading and calling a function — in this case bsplit() — to process each file in turn. It handles 'no arguments means read standard input' etc automatically. (Contact me for the code — see my profile.)

Note that the multiplier c means 'characters', b means 512-byte blocks, and k, m, and g mean KiB, MiB and GiB respectively.

这篇关于文件分块和缓冲？的文章就介绍到这了，希望我们推荐的答案对大家有所帮助，也希望大家多多支持！