From c1a890715d4791b75b9512c1d9b02cb691c5de39 Mon Sep 17 00:00:00 2001 From: iovar Date: Sat, 27 Jan 2007 17:18:58 +0000 Subject: Changed cache format to utilize better interframe encoding and thus require sufficiently less hard disk space. Instead of the old (plane_width/16)*(plane_height/16) blocksize, now we are using 16x16 for the y plane and 8x8 blocks for the u,v planes. Also this byte alignment allows comparing of blocks to happen by first casting into a larger datatype(u_int64_t if available, u_int32_t else). This way we do as little as 1/8 or 1/4 of comparisons. This essentially offsets the higher CPU required to check the now increased number of blocks, and even provides a slight performance boost from the previous way of caching. Also, the load cache loop has been debloated, by moving much of the functionality that previously resided in an if evaluation statement, into anew function(ReadFrame). For further abstraction, ReadFrame utilizes ReadZF to do the actual reading. ReadZF determines if a gzread or an fread has to be done. Another improvement in the program is that, now, write operations on the disk, happen in 4 kbytes pages, minimizing thus the overall cost of access. Last change in this set is that the indexes in the CachedFrame and FrameHeader datatypes, are now u_int32_t's, to accomodate the possibly large number of blocks(could have been 16 bit types but that would give 4096*4096 max resolution, which might have caused problems in the future) git-svn-id: https://recordmydesktop.svn.sourceforge.net/svnroot/recordmydesktop/trunk@266 f606c939-3180-4ac9-a4b8-4b8779d57d0a --- recordmydesktop/include/rmdmacro.h | 47 ++++++++--- recordmydesktop/include/rmdtypes.h | 26 ++++-- recordmydesktop/src/cache_frame.c | 166 +++++++++++++++++++++---------------- recordmydesktop/src/load_cache.c | 159 +++++++++++++++++++++-------------- 4 files changed, 246 insertions(+), 152 deletions(-) diff --git a/recordmydesktop/include/rmdmacro.h b/recordmydesktop/include/rmdmacro.h index 87ac39f..c14785f 100644 --- a/recordmydesktop/include/rmdmacro.h +++ b/recordmydesktop/include/rmdmacro.h @@ -69,8 +69,35 @@ //avoid problems (amd64 has 8byte ulong) #define RMD_ULONG_SIZE_T (sizeof(unsigned long)) +//size of stride when comparing planes(depending on type) +//this is just to avoid thousands of sizeof's +#ifdef HAVE_U_INT64_T + #define COMPARE_STRIDE 8 +#else + #define COMPARE_STRIDE 4 +#endif + //500 mb file size #define CACHE_FILE_SIZE_LIMIT (500*1<<20) +//minimize hard disk access +#define CACHE_OUT_BUFFER_SIZE 4096 + + +//The width, in bytes, of the blocks +//on which the y,u and v planes are broken. +//These blocks are square. +#define Y_UNIT_WIDTH 0x0010 +#define UV_UNIT_WIDTH 0x0008 + +//The number of bytes for every +//sub-block of the y,u and v planes. +//Since the blocks are square +//these are obviously the squares +//of the widths(specified above), +//but the definitions bellow are only +//for convenience anyway. +#define Y_UNIT_BYTES 0x0100 +#define UV_UNIT_BYTES 0x0040 #define CLIP_EVENT_AREA(e,brwin,wgeom){\ @@ -251,10 +278,10 @@ __copy_type,\ __bit_depth__){ \ int k,i;\ - register RMD_TYPE_##__bit_depth__ t_val;\ + register u_int##__bit_depth__##_t t_val;\ register unsigned char *yuv_y=yuv->y+x_tm+y_tm*yuv->y_width,\ *_yr=Yr,*_yg=Yg,*_yb=Yb;\ - register RMD_TYPE_##__bit_depth__ *datapi=(RMD_TYPE_##__bit_depth__ *)data\ + register u_int##__bit_depth__##_t *datapi=(u_int##__bit_depth__##_t *)data\ +((__copy_type==__X_SHARED)?\ (x_tm+y_tm*yuv->y_width):0);\ for(k=0;ku+x_tm/2+(y_tm*yuv->uv_width)/2,\ *yuv_v=yuv->v+x_tm/2+(y_tm*yuv->uv_width)/2,\ *_ur=Ur,*_ug=Ug,*_ub=Ub,\ *_vr=Vr,*_vg=Vg,*_vb=Vb;\ - register RMD_TYPE_##__bit_depth__ *datapi=(RMD_TYPE_##__bit_depth__ *)data\ + register u_int##__bit_depth__##_t *datapi=(u_int##__bit_depth__##_t *)data\ +((__copy_type==__X_SHARED)?\ (x_tm+y_tm*yuv->y_width):0),\ *datapi_next=NULL;\ @@ -445,20 +472,18 @@ free(t_buf);\ };\ -#define INIT_FRAME(frame_t,fheader_t,yuv_t){\ +#define INIT_FRAME(frame_t,fheader_t,yuv_t,\ + YBlocks_t,UBlocks_t,VBlocks_t){\ (frame_t)->header=(fheader_t);\ - (frame_t)->YBlocks=malloc(256);\ - (frame_t)->UBlocks=malloc(64);\ - (frame_t)->VBlocks=malloc(64);\ + (frame_t)->YBlocks=YBlocks_t;\ + (frame_t)->UBlocks=UBlocks_t;\ + (frame_t)->VBlocks=VBlocks_t;\ (frame_t)->YData=malloc((yuv_t)->y_width*(yuv_t)->y_height);\ (frame_t)->UData=malloc((yuv_t)->uv_width*(yuv_t)->uv_height);\ (frame_t)->VData=malloc((yuv_t)->uv_width*(yuv_t)->uv_height);\ }; #define CLEAR_FRAME(frame_t){\ - free((frame_t)->YBlocks);\ - free((frame_t)->UBlocks);\ - free((frame_t)->VBlocks);\ free((frame_t)->YData);\ free((frame_t)->UData);\ free((frame_t)->VData);\ diff --git a/recordmydesktop/include/rmdtypes.h b/recordmydesktop/include/rmdtypes.h index ea1e517..7da30e9 100644 --- a/recordmydesktop/include/rmdtypes.h +++ b/recordmydesktop/include/rmdtypes.h @@ -31,6 +31,7 @@ #include #endif + #include #include #include @@ -61,8 +62,19 @@ #include #include -typedef u_int16_t RMD_TYPE_16; -typedef u_int32_t RMD_TYPE_32; +//this type exists only +//for comparing the planes at caching. +//u_int64_t mught not be available everywhere. +//The performance gain comes from casting the unsigned char +//buffers to this type before comparing the two blocks. +//This is made possible by the fact that blocks +//for the Y plane are 16 bytes in width and blocks +//for the U,V planes are 8 bytes in width +#ifdef HAVE_U_INT64_T +typedef u_int64_t cmp_int_t; +#else +typedef u_int32_t cmp_int_t; +#endif //how we obtained the image we are converting to yuv enum{ @@ -293,11 +305,9 @@ typedef struct _FrameHeader{ u_int32_t frameno, //number of frame(cached frames) current_total; //number of frames that should have been //taken at time of caching this one - u_int16_t Ynum, //number of changed blocks in the Y plane + u_int32_t Ynum, //number of changed blocks in the Y plane Unum, //number of changed blocks in the U plane Vnum; //number of changed blocks in the V plane - u_int16_t pad; //always zero - }FrameHeader; //The frame after retrieval. @@ -307,9 +317,9 @@ typedef struct _FrameHeader{ typedef struct _CachedFrame{ FrameHeader *header; - unsigned char *YBlocks; //identifying number on the grid, - unsigned char *UBlocks; //starting at top left - unsigned char *VBlocks; // >> >> + u_int32_t *YBlocks; //identifying number on the grid, + u_int32_t *UBlocks; //starting at top left + u_int32_t *VBlocks; // >> >> unsigned char *YData; //pointer to data for the blocks that have changed, unsigned char *UData; //which have to be remapped unsigned char *VData; //on the buffer when reading diff --git a/recordmydesktop/src/cache_frame.c b/recordmydesktop/src/cache_frame.c index 07c7c70..4258932 100644 --- a/recordmydesktop/src/cache_frame.c +++ b/recordmydesktop/src/cache_frame.c @@ -32,23 +32,24 @@ int CompareBlocks(unsigned char *incoming, int blockno, int width, int height, - int divisor){ + int blockwidth){ int j,i, - block_i=blockno/divisor,//place on the grid - block_k=blockno%divisor; - register unsigned char *incoming_reg=&(incoming[block_i* - (width*height/divisor)+ - block_k*width/divisor]), - *old_reg=&(old[block_i*(width*height/divisor)+ - block_k*width/divisor]); - - for(j=0;j=CACHE_OUT_BUFFER_SIZE || + (flush && out_buffer_bytes)){ + if(ucfp==NULL) + gzwrite(fp,(void *)out_buffer,out_buffer_bytes); + else + fwrite((void *)out_buffer,1,out_buffer_bytes,ucfp); + bytes_written=out_buffer_bytes; + out_buffer_bytes=0; } - else{ - for(j=0;jenc_data->yuv.y_width/Y_UNIT_WIDTH, + blocknum_y=pdata->enc_data->yuv.y_height/Y_UNIT_WIDTH, firstrun=1, frameno=0, nbytes=0, nth_cache=1; - + u_int32_t ynum,unum,vnum; + u_int32_t yblocks[blocknum_x*blocknum_y], + ublocks[blocknum_x*blocknum_y], + vblocks[blocknum_x*blocknum_y]; if(!pdata->args.zerocompression){ fp=pdata->cache_data->ifp; if(fp==NULL)exit(13); @@ -121,8 +140,6 @@ void *CacheImageBuffer(ProgData *pdata){ while(pdata->running){ int prev; int j; - unsigned short ynum,unum,vnum; - unsigned char yblocks[256],ublocks[64],vblocks[64]; FrameHeader fheader; ynum=unum=vnum=0; @@ -151,41 +168,41 @@ void *CacheImageBuffer(ProgData *pdata){ //find and flush different blocks if(firstrun){ firstrun=0; - for(j=0;jargs.zerocompression){ - if(ynum+unum+vnum>(pow(divisor,2)+pow(divisor/2,2)*2)/10) + if(ynum*4+unum+vnum>(blocknum_x*blocknum_y*6)/10) gzsetparams (fp,1,Z_FILTERED); else gzsetparams (fp,0,Z_FILTERED); @@ -203,52 +220,60 @@ void *CacheImageBuffer(ProgData *pdata){ strncpy(fheader.frame_prefix,"FRAM",4); fheader.frameno=++frameno; fheader.current_total=frames_total; + fheader.Ynum=ynum; fheader.Unum=unum; fheader.Vnum=vnum; - fheader.pad=0; if(!pdata->args.zerocompression){ nbytes+=gzwrite(fp,(void*)&fheader,sizeof(FrameHeader)); //flush indexes - if(ynum)nbytes+=gzwrite(fp,yblocks,ynum); - if(unum)nbytes+=gzwrite(fp,ublocks,unum); - if(vnum)nbytes+=gzwrite(fp,vblocks,vnum); + if(ynum)nbytes+=gzwrite(fp,(void*)yblocks,ynum*index_entry_size); + if(unum)nbytes+=gzwrite(fp,(void*)ublocks,unum*index_entry_size); + if(vnum)nbytes+=gzwrite(fp,(void*)vblocks,vnum*index_entry_size); } else{ nbytes+=sizeof(FrameHeader)* fwrite((void*)&fheader,sizeof(FrameHeader),1,ucfp); //flush indexes - if(ynum)nbytes+=ynum*fwrite(yblocks,ynum,1,ucfp); - if(unum)nbytes+=unum*fwrite(ublocks,unum,1,ucfp); - if(vnum)nbytes+=vnum*fwrite(vblocks,vnum,1,ucfp); + if(ynum)nbytes+=index_entry_size* + fwrite(yblocks,index_entry_size,ynum,ucfp); + if(unum)nbytes+=index_entry_size* + fwrite(ublocks,index_entry_size,unum,ucfp); + if(vnum)nbytes+=index_entry_size* + fwrite(vblocks,index_entry_size,vnum,ucfp); } //flush the blocks for each buffer - if(ynum) + if(ynum){ for(j=0;javd+=pdata->frametime; if(nbytes>CACHE_FILE_SIZE_LIMIT){ @@ -274,7 +299,6 @@ void *CacheImageBuffer(ProgData *pdata){ nbytes=0; } } - //clean up since we're not finished for(i=0;i<2;i++){ free(yuv[i].y); diff --git a/recordmydesktop/src/load_cache.c b/recordmydesktop/src/load_cache.c index c9fc96b..6098bc7 100644 --- a/recordmydesktop/src/load_cache.c +++ b/recordmydesktop/src/load_cache.c @@ -32,19 +32,86 @@ void LoadBlock(unsigned char *dest, int blockno, int width, int height, - int divisor){ + int blockwidth){ int j, - block_i=blockno/divisor,//place on the grid - block_k=blockno%divisor; - - for(j=0;jheader->Ynum>0){ + if(ReadZF(frame->YBlocks, + index_entry_size, + frame->header->Ynum, + ucfp, + ifp)!=index_entry_size*frame->header->Ynum){ + return -1; + } + } + if(frame->header->Unum>0){ + if(ReadZF(frame->UBlocks, + index_entry_size, + frame->header->Unum, + ucfp, + ifp)!=index_entry_size*frame->header->Unum){ + return -1; + } + } + if(frame->header->Vnum>0){ + if(ReadZF(frame->VBlocks, + index_entry_size, + frame->header->Vnum, + ucfp, + ifp)!=index_entry_size*frame->header->Vnum){ + return -1; + } + } + if(frame->header->Ynum>0){ + if(ReadZF(frame->YData, + Y_UNIT_BYTES, + frame->header->Ynum, + ucfp, + ifp)!=Y_UNIT_BYTES*frame->header->Ynum){ + return -2; + } + } + if(frame->header->Unum>0){ + if(ReadZF(frame->UData, + UV_UNIT_BYTES, + frame->header->Unum, + ucfp, + ifp)!=UV_UNIT_BYTES*frame->header->Unum){ + return -2; + } + } + if(frame->header->Vnum>0){ + if(ReadZF(frame->VData, + UV_UNIT_BYTES, + frame->header->Vnum, + ucfp, + ifp)!=UV_UNIT_BYTES*frame->header->Vnum){ + return -2; + } + } + return 0; +} void *LoadCache(ProgData *pdata){ @@ -61,15 +128,19 @@ void *LoadCache(ProgData *pdata){ missing_frames=0,//if this is found >0 current run will not load //a frame but it will proccess the previous thread_exit=0,//0 success, -1 couldn't find files,1 couldn't remove - divisor=16, - blockszy=0,//size of y plane block in bytes - blockszuv=0,//size of u,v plane blocks in bytes + blocknum_x=pdata->enc_data->yuv.y_width/Y_UNIT_WIDTH, + blocknum_y=pdata->enc_data->yuv.y_height/Y_UNIT_WIDTH, + blockszy=Y_UNIT_BYTES,//size of y plane block in bytes + blockszuv=UV_UNIT_BYTES,//size of u,v plane blocks in bytes framesize=((snd_pcm_format_width(SND_PCM_FORMAT_S16_LE))/8)* pdata->args.channels;//audio frame size signed char *sound_data=(signed char *)malloc(pdata->periodsize*framesize); - + u_int32_t YBlocks[(yuv->y_width*yuv->y_height)/Y_UNIT_BYTES], + UBlocks[(yuv->uv_width*yuv->uv_height)/UV_UNIT_BYTES], + VBlocks[(yuv->uv_width*yuv->uv_height)/UV_UNIT_BYTES]; //we allocate the frame that we will use - INIT_FRAME(&frame,&fheader,yuv) + INIT_FRAME(&frame,&fheader,yuv, + YBlocks,UBlocks,VBlocks) //and the we open our files if(!pdata->args.zerocompression){ ifp=gzopen(pdata->cache_data->imgdata,"rb"); @@ -94,10 +165,6 @@ void *LoadCache(ProgData *pdata){ pthread_exit(&thread_exit); } } - //these two are likely to be the same, but not guaranteed, especially on - //low resolutions - blockszy=(yuv->y_width*yuv->y_height )/pow(divisor,2); - blockszuv=(yuv->uv_width*yuv->uv_height)/pow(divisor/2,2); //this will be used now to define if we proccess audio or video //on any given loop. @@ -113,8 +180,8 @@ void *LoadCache(ProgData *pdata){ SyncEncodeImageBuffer(pdata); } else if(((!pdata->args.zerocompression)&& - (gzread(ifp,frame.header,sizeof(FrameHeader))== - sizeof(FrameHeader) ))|| + (gzread(ifp,frame.header,sizeof(FrameHeader))== + sizeof(FrameHeader) ))|| ((pdata->args.zerocompression)&& (fread(frame.header,sizeof(FrameHeader),1,ucfp)==1))){ //sync @@ -122,44 +189,13 @@ void *LoadCache(ProgData *pdata){ (extra_frames+frame.header->frameno); fprintf(stdout,"\r[%d%%] ", ((frame.header->frameno+extra_frames)*100)/frames_total); - fflush(stdout); - if( (frame.header->Ynum<=pow(divisor,2)) && - (frame.header->Unum<=pow(divisor/2,2)) && - (frame.header->Vnum<=pow(divisor/2,2)) && - - ( - ((!pdata->args.zerocompression)&& - ((gzread(ifp,frame.YBlocks,frame.header->Ynum)== - frame.header->Ynum) && - (gzread(ifp,frame.UBlocks,frame.header->Unum)== - frame.header->Unum) && - (gzread(ifp,frame.VBlocks,frame.header->Vnum)== - frame.header->Vnum) && - (gzread(ifp,frame.YData,blockszy*frame.header->Ynum)== - blockszy*frame.header->Ynum) && - (gzread(ifp,frame.UData,(blockszuv*frame.header->Unum))== - (blockszuv*frame.header->Unum)) && - (gzread(ifp,frame.VData,(blockszuv*frame.header->Vnum))== - (blockszuv*frame.header->Vnum)))) || - - ((pdata->args.zerocompression)&& - ((fread(frame.YBlocks,1,frame.header->Ynum,ucfp)== - frame.header->Ynum) && - (fread(frame.UBlocks,1,frame.header->Unum,ucfp)== - frame.header->Unum) && - (fread(frame.VBlocks,1,frame.header->Vnum,ucfp)== - frame.header->Vnum) && - (frame.header->Ynum==0 || - fread(frame.YData,blockszy,frame.header->Ynum,ucfp)== - frame.header->Ynum) && - (frame.header->Unum==0 || - fread(frame.UData,blockszuv,frame.header->Unum,ucfp)== - frame.header->Unum) && - (frame.header->Vnum==0 || - fread(frame.VData,blockszuv,frame.header->Vnum,ucfp)== - frame.header->Vnum))) - ) + if( (frame.header->Ynum<=blocknum_x*blocknum_y) && + (frame.header->Unum<=blocknum_x*blocknum_y) && + (frame.header->Vnum<=blocknum_x*blocknum_y) && + (!ReadFrame(&frame, + ((pdata->args.zerocompression)?ucfp:NULL), + ((pdata->args.zerocompression)?NULL:ifp))) ){ //load the blocks for each buffer if(frame.header->Ynum) @@ -169,7 +205,7 @@ void *LoadCache(ProgData *pdata){ frame.YBlocks[j], yuv->y_width, yuv->y_height, - divisor); + Y_UNIT_WIDTH); if(frame.header->Unum) for(j=0;jUnum;j++) LoadBlock( yuv->u, @@ -177,7 +213,7 @@ void *LoadCache(ProgData *pdata){ frame.UBlocks[j], yuv->uv_width, yuv->uv_height, - divisor/2); + UV_UNIT_WIDTH); if(frame.header->Vnum) for(j=0;jVnum;j++) LoadBlock( yuv->v, @@ -185,7 +221,7 @@ void *LoadCache(ProgData *pdata){ frame.VBlocks[j], yuv->uv_width, yuv->uv_height, - divisor/2); + UV_UNIT_WIDTH); //encode. This is not made in a thread since //now blocking is not a problem //and this way sync problems @@ -222,8 +258,7 @@ void *LoadCache(ProgData *pdata){ } } } -// SyncEncodeImageBuffer(pdata); -// SyncEncodeSoundBuffer(pdata,sound_data); + pdata->v_encoding_clean=pdata->th_encoding_clean=1; pthread_cond_signal(&pdata->theora_lib_clean); pthread_cond_signal(&pdata->vorbis_lib_clean); -- cgit v1.2.3