问题描述
最后,我只是寻求削减二进制文件分成不大于X的大小。难道没有其他事情。如果输入文件是21MB,我想3个7MB的,我可以用猫加入或什么not.I在简单的例子,下面的作品,我使用的是7MB块大小的缓冲区。我一定要使用,在所有获得7MB的文件块?如果块大小为2GB说,这显然不是我想要的东西就摆在内存中。所以,我需要在所有创建缓冲区。
我没有读就在这里几个职位和这个其他网站,但他们似乎都使用某种缓冲由malloc或阵列创建,并查找非缓冲方式使我太超前了我的知识纳入插座, TCP / IP相关的主题。
我是注定要一大堆的if / while语句?
P.S。我在哪里可以找到C对I / O流的书吗?我能找到对C很多++,但不是C。
IFP = FOPEN(IFILE,RB); // ifile的是25MB的声音文件
OFP = FOPEN(OFILE,W); //省略错误检查。setvbuf用来(IFP,NULL,_IOFBF,1024); //这些是上
setvbuf用来(OFP,NULL,_IOFBF,1024); // 默认?为size_t CHUNK = 7000000; // 7MB大小的块
为size_t结果为0;
为size_t *缓冲区=的malloc(分块);如果(缓冲== NULL){的fputs(无法分配内存,标准错误);出口(1);}
//一次读取1 btye?
结果= FREAD(缓冲,1,块,IFP);
如果(结果= CHUNK!){的fputs(ERROR:缓冲器/读不匹配,标准错误);出口(1);}的fwrite(缓冲液,CHUNK,1,OFP);免费(缓冲);
下面是一个程序, bsplit
,我最初在1991年写的分割文件成任意大小的块;默认的大小以千字节指定。(当然, - 1024字节)
/ *
@(#)文件:$ RCSfile:bsplit.c,V $
@(#)版本:$修订:$ 1.11
@(#)上次修改:$日期:2008年8月9日5时54分55秒$
@(#)用途:文件分割成块 - 二进制
@(#)作者:J-莱弗勒
* /#如果__STDC_VERSION__> = 199901L
的#define _XOPEN_SOURCE 600
#其他
的#define _XOPEN_SOURCE 500
#ENDIF / * * __STDC_VERSION__ /#包括LT&;&文件ctype.h GT;
#包括LT&;&stdio.h中GT;
#包括LT&;&string.h中GT;
#包括LT&;&stdlib.h中GT;
#包括LT&;&unistd.h中GT;
#包括stderr.h
#包括filter.h#定义MAXFILENAMELEN 256
千字节的#define 1024
#定义兆字节(千字节*千字节)
技嘉的#define(兆*千字节)
#定义NIL(X)((X)0)
#定义MIN(A,B)(((A)≤(b))的(一):(b))的字符* preFIX =bsplit。
为size_t块大小= 64;
为size_t的nblocks = 0;
为size_t skipblocks = 0;
字符缓冲区[64 *千字节]
柜长= 0;静态INT bsplit(FILE * IFP,为const char * FN)
{
为size_t N; / *读取的字节这一次* /
为size_t BSIZE; / *当前块*写入尺寸/
为size_t TSIZE; / *为当前文件写入大小* /
为size_t RSIZE; / *金额读* /
FILE *运算; / *输出文件流* /
字符文件[MAXFILENAMELEN] / *输出文件名* / TSIZE = 0;
BSIZE = 0;
OP = NIL(FILE *);
RSIZE = MIN(的sizeof(缓冲区),块大小);
而((N = FREAD(缓冲区的sizeof(炭),RSIZE,IFP))大于0)
{
TSIZE + = N;
如果(TSIZE> skipblocks)
{
如果(BSIZE == 0)
{
sprintf的(文件%s%03ld,preFIX,计数器++);
如果((OP = FOPEN(文件,W))== NIL(FILE *))
{
err_sysrem2(无法打开文件,文件);
返回(-1);
}
的printf(%S \\ n,文件);
}
BSIZE + = N;
如果(FWRITE(缓冲区的sizeof(字符),N,OP)!= N)
{
err_sysrem2(无法写入文件,文件);
返回(-1);
}
如果(BSIZE> =块大小)
{
FCLOSE(OP);
BSIZE = 0;
}
如果(的nblocks大于0&放大器;&放大器; TSIZE&GT =的nblocks)
打破;
}
}
返回0;
}INT主(INT ARGC,字符** argv的)
{
INT选择;
为size_t乘数=千字节;
字符* P;
焦炭℃;
INT RC; OPTERR = 0;
err_setarg0(的argv [0]); 而((选择= getopt的(ARGC,ARGV,S:N:P:B:V!))= - 1)
{
开关(OPT)
{
案例'P':
preFIX = OPTARG;
如果(strlen的(preFIX)GT; MAXFILENAMELEN - 的sizeof(000))
err_error(文件名preFIX(%S)是太长(最多%D),preFIX,
(中间体)(MAXFILENAMELEN-的sizeof(000)));
打破;
案件的:
skipblocks =的atoi(OPTARG);
打破;
案例'N':
的nblocks =的atoi(OPTARG);
打破;
案例'B':
块大小=的atoi(OPTARG);
P = OPTARG + strspn(OPTARG,0123456789);
如果(* P!='\\ 0')
{
C = tolower的((无符号字符)* P);
如果(C =='C')
乘数= 1;
否则,如果(C =='B')
乘数=千字节/ 2;
否则,如果(C =='K')
乘数=千字节;
否则如果(C ==M)
乘数=兆字节;
否则,如果(C =='G')
乘数=技嘉;
其他
err_error(未知大小乘数后缀%S \\ n,p)的;
如果(第[1]!='\\ 0')
err_error(未知大小乘数后缀%S \\ n,p)的;
}
打破;
案例'V':
err_version(BSPLIT,&安培;@(#)$修订:$ 1.11($日期:2008年8月9日5时54分55秒$)[4]);
打破;
默认:
err_usage([ - B块大小] [ - P preFIX] [ - S skipblocks] [ - N块] [文件[...]]);
打破;
}
} / *转换大小,以字节* /
块大小* =事半功倍;
skipblocks * =块大小;
如果(的nblocks大于0)
的nblocks = skipblocks +的nblocks *块大小; RC = filter_stdout(ARGC,ARGV,OPTIND,bsplit);
回报(RC);
}
头 stderr.h
宣布了一系列的错误报告程序的;我用它在我的大多数程序。头文件 filter.h
声明函数 filter_stdout()
它通过一个参数列表的步骤,打开文件进行读取和调用一个函数 - 在这种情况下, bsplit()
- 来处理每个文件依次。它处理'没有参数意味着读取标准输入自动等。 (联系我为code - 见我的个人资料)
请注意,该乘数 C
办法字符 B
表示512字节的块,而 K
, M
和先按g
意味着昆明植物研究所,MIB和吉布分别。
Ultimately I'm just looking to cut a binary file into pieces no greater than X in size. Do no else with it. If the input file is 21MB, I want 3 pieces of 7MB I could join with cat or what not.I In the simple example below that works, I'm using a 7MB chunk size buffer. Do I have to use that at all to get 7MB file chunks? If the chunk size was say 2GB, that is obviously not something I want to put in memory. So do I need to create a buffer at all.
I did reads several posts on here and other sites about this, but they all seem to use some sort of buffer created by malloc or arrays, and looking up non-buffered ways lead me too far ahead of my knowledge into sockets and TCP/IP related topics.
Am I doomed to a whole lot of if/while statements?
P.S. Where can I find books on I/O streams in C? I can find a LOT for C++, but not C.
ifp = fopen(ifile, "rb"); // ifile is a 25MB sound file
ofp = fopen(ofile, "w"); // Omitted error checking.
setvbuf( ifp, NULL, _IOFBF, 1024); // Are these on
setvbuf( ofp, NULL, _IOFBF, 1024); // by default?
size_t CHUNK = 7000000; // 7MB Chunk sizes
size_t result = 0;
size_t *buffer = malloc(CHUNK);
if (buffer == NULL) {fputs ("Could not allocate memory",stderr); exit (1);}
// Read 1 btye at a time?
result = fread(buffer, 1, CHUNK, ifp);
if (result != CHUNK) {fputs ("ERROR: Buffer/read mismatch.",stderr); exit (1);}
fwrite(buffer, CHUNK, 1, ofp);
free(buffer);
Here's a program, bsplit
, that I originally wrote in 1991. It splits a file up into arbitrary sized chunks; the default size is specified in kilobytes (well, kibibytes — 1024 bytes).
/*
@(#)File: $RCSfile: bsplit.c,v $
@(#)Version: $Revision: 1.11 $
@(#)Last changed: $Date: 2008/08/09 05:54:55 $
@(#)Purpose: Split file into blocks -- binary
@(#)Author: J Leffler
*/
#if __STDC_VERSION__ >= 199901L
#define _XOPEN_SOURCE 600
#else
#define _XOPEN_SOURCE 500
#endif /* __STDC_VERSION__ */
#include <ctype.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include "stderr.h"
#include "filter.h"
#define MAXFILENAMELEN 256
#define KILOBYTE 1024
#define MEGABYTE (KILOBYTE*KILOBYTE)
#define GIGABYTE (MEGABYTE*KILOBYTE)
#define NIL(x) ((x)0)
#define MIN(a,b) (((a) < (b)) ? (a) : (b))
char *prefix = "bsplit.";
size_t blocksize = 64;
size_t nblocks = 0;
size_t skipblocks = 0;
char buffer[64*KILOBYTE];
long counter = 0;
static int bsplit(FILE *ifp, const char *fn)
{
size_t n; /* Bytes read this time */
size_t bsize; /* Size written for current block */
size_t tsize; /* Size written for current file */
size_t rsize; /* Amount to read */
FILE *op; /* Output file stream */
char file[MAXFILENAMELEN]; /* Output file name */
tsize = 0;
bsize = 0;
op = NIL(FILE *);
rsize = MIN(sizeof(buffer), blocksize);
while ((n = fread(buffer, sizeof(char), rsize, ifp)) > 0)
{
tsize += n;
if (tsize > skipblocks)
{
if (bsize == 0)
{
sprintf(file, "%s%03ld", prefix, counter++);
if ((op = fopen(file, "w")) == NIL(FILE *))
{
err_sysrem2("failed to open file", file);
return(-1);
}
printf("%s\n", file);
}
bsize += n;
if (fwrite(buffer, sizeof(char), n, op) != n)
{
err_sysrem2("failed to write to file", file);
return(-1);
}
if (bsize >= blocksize)
{
fclose(op);
bsize = 0;
}
if (nblocks > 0 && tsize >= nblocks)
break;
}
}
return 0;
}
int main(int argc, char **argv)
{
int opt;
size_t multiplier = KILOBYTE;
char *p;
char c;
int rc;
opterr = 0;
err_setarg0(argv[0]);
while ((opt = getopt(argc, argv, "s:n:p:b:V")) != -1)
{
switch (opt)
{
case 'p':
prefix = optarg;
if (strlen(prefix) > MAXFILENAMELEN - sizeof("000"))
err_error("file name prefix (%s) is too long (max %d)", prefix,
(int)(MAXFILENAMELEN-sizeof("000")));
break;
case 's':
skipblocks = atoi(optarg);
break;
case 'n':
nblocks = atoi(optarg);
break;
case 'b':
blocksize = atoi(optarg);
p = optarg + strspn(optarg, "0123456789");
if (*p != '\0')
{
c = tolower((unsigned char)*p);
if (c == 'c')
multiplier = 1;
else if (c == 'b')
multiplier = KILOBYTE/2;
else if (c == 'k')
multiplier = KILOBYTE;
else if (c == 'm')
multiplier = MEGABYTE;
else if (c == 'g')
multiplier = GIGABYTE;
else
err_error("unknown size multiplier suffix %s\n", p);
if (p[1] != '\0')
err_error("unknown size multiplier suffix %s\n", p);
}
break;
case 'V':
err_version("BSPLIT", &"@(#)$Revision: 1.11 $ ($Date: 2008/08/09 05:54:55 $)"[4]);
break;
default:
err_usage("[-b blocksize][-p prefix][-s skipblocks][-n blocks][file [...]]");
break;
}
}
/* Convert sizes to bytes */
blocksize *= multiplier;
skipblocks *= blocksize;
if (nblocks > 0)
nblocks = skipblocks + nblocks * blocksize;
rc = filter_stdout(argc, argv, optind, bsplit);
return(rc);
}
The header stderr.h
declares a series of error reporting routines; I use it in most of my programs. The header filter.h
declares the function filter_stdout()
which steps through an argument list, opening the files for reading and calling a function — in this case bsplit()
— to process each file in turn. It handles 'no arguments means read standard input' etc automatically. (Contact me for the code — see my profile.)
Note that the multiplier c
means 'characters', b
means 512-byte blocks, and k
, m
, and g
mean KiB, MiB and GiB respectively.
这篇关于文件分块和缓冲?的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持!