Fixed <dst_dev> size requirements.
[badblock-guess.git] / badblock-guess.c
1 /* $Id$ */
2
3 /*
4  * badblock-guess: Quickly recover most of the data from a damaged disk
5  * Copyright (C) 2002  Jan Kratochvil <short@ucw.cz>
6  * 
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; exactly version 2 of the License required
10  * 
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  * 
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19  */
20
21
22 #define _LARGEFILE_SOURCE
23 #define _LARGEFILE64_SOURCE
24
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29 #include <time.h>
30 #include <sys/stat.h>
31 #include <string.h>
32 #include <limits.h>
33 #include <ctype.h>
34 #include <glib.h>
35 #include <ext2fs/ext2fs.h>
36
37
38 /* configuation */
39
40 #define BUF_ZERO_BLOCKS 0x100   /* *BLOCK, =128KB, in BSS section */
41 #define xDEBUG 1
42
43
44 /* implementation */
45
46 #ifndef O_BINARY
47 #define O_BINARY 0
48 #endif
49 #ifndef LINE_MAX
50 #define LINE_MAX 0x1000
51 #endif
52
53 #define BLOCK (0x200)   /* 512 */
54
55 static void finish(void) G_GNUC_NORETURN;
56
57 static int src_fd=-1,dst_fd=-1;
58 static const char *src_name,*dst_name;
59 static ext2_loff_t src_len,dst_len;     /* in blocks! */
60 static blk_t src_lenblk,dst_lenblk;     /* ==src_len,==dst_len, just a different type */
61
62 static ext2_loff_t stat_lastread,stat_todo,stat_bads,stat_largest,stat_todo_hunks;
63
64 struct range {
65                 ext2_loff_t start,end;
66                 };
67 static GTree *todo_tree;        /* struct range *key, NULL value */
68 static GTree *bad_tree; /* struct range *key, NULL value */
69 static GMemChunk *node_memchunk;        /* struct range */
70
71 static gint tree_linearize_traverse(struct range *key,gpointer value/*unused*/,struct range **r_fillp/*data*/)
72 {
73         *(*r_fillp)++=*key;
74         g_mem_chunk_free(node_memchunk,key);
75         return FALSE;
76 }
77
78 /* mallocated result, data-terminated, input "tree" destroyed */
79 static struct range *tree_linearize(GTree *tree)
80 {
81 struct range *r=g_new(struct range,g_tree_nnodes(tree)+1);
82 struct range *r_fill=r;
83
84         g_tree_traverse(tree,(GTraverseFunc)tree_linearize_traverse,G_IN_ORDER,&r_fill/*data*/);
85         g_assert(r_fill==r+g_tree_nnodes(tree));
86         r_fill->start=r_fill->end=src_len+1;    /* terminator */
87         g_tree_destroy(tree);
88         return r;
89 }
90
91 /* returns terminator! */
92 static struct range *linear_join(struct range *linear,GCompareFunc comparefunc)
93 {
94 struct range *linear_term,*dst,*src;
95
96         for (linear_term=linear;linear_term->start<=src_len;linear_term++);
97         qsort(linear,linear_term-linear,sizeof(*linear),(int (*)(const void *, const void *))comparefunc);
98         for (dst=src=linear;src->start<=src_len;src++) {
99                 if (dst>linear && dst[-1].end==src->start)
100                         dst[-1].end=src->end;
101                 else
102                         *dst++=*src;
103                 }
104         *dst=*src;      /* terminator */
105         return dst;
106 }
107
108 static gint range_compare(const struct range *range1,const struct range *range2)
109 {
110 ext2_loff_t len1,len2;
111 int r;
112
113         g_return_val_if_fail(range1->end>range1->start,0);
114         g_return_val_if_fail(range2->end>range2->start,0);
115
116         len1=range1->end-range1->start;
117         len2=range2->end-range2->start;
118
119         /* big..small (start with large blocks) */
120         if ((r=(len2 > len1) - (len2 < len1)))
121                 return r;
122         /* big..small (reversed processing order) */
123         if ((r=(range2->start > range1->start) - (range2->start < range1->start)))
124                 return r;
125         return 0;
126 }
127
128 static gint range_compare_diskorder(const struct range *range1,const struct range *range2)
129 {
130 int r;
131
132         g_return_val_if_fail(range1->end>range1->start,0);
133         g_return_val_if_fail(range2->end>range2->start,0);
134
135         /* small..big (linear disk processing order) */
136         if ((r=(range2->start < range1->start) - (range2->start > range1->start)))
137                 return r;
138         /* small..big (shouldn't have effect, just to get valid compare for ==) */
139         if ((r=(range2->end   < range1->end  ) - (range2->end   > range1->end  )))
140                 return r;
141         return 0;
142 }
143
144 static struct range *node_build(ext2_loff_t start,ext2_loff_t end)
145 {
146 struct range *r=g_mem_chunk_alloc(node_memchunk);
147
148         r->start=start;
149         r->end=end;
150         return r;
151 }
152
153 static gint search_end(struct range *key,ext2_loff_t *endp/*data*/)
154 {
155 int r;
156
157         if ((r=(*endp < key->end) - (*endp > key->end)))
158                 return r;
159         return 0;
160 }
161
162 static gint search_start(struct range *key,ext2_loff_t *startp/*data*/)
163 {
164 int r;
165
166         if ((r=(*startp < key->start) - (*startp > key->start)))
167                 return r;
168         return 0;
169 }
170
171 static void node_add_bad(ext2_loff_t start,ext2_loff_t end)
172 {
173 struct range *key,*neigh_left,*neigh_right;
174
175         if (start>=end)
176                 return;
177         key=node_build(start,end);
178         if ((neigh_left =g_tree_search(bad_tree,(GCompareFunc)search_end  ,&start))) {
179                 g_assert(neigh_left->end==key->start);
180                 key->start=neigh_left->start;
181                 g_tree_remove(bad_tree,neigh_left);
182                 g_mem_chunk_free(node_memchunk,neigh_left);
183                 }
184         if ((neigh_right=g_tree_search(bad_tree,(GCompareFunc)search_start,&end  ))) {
185                 g_assert(neigh_right->start==key->end);
186                 key->end=neigh_right->end;
187                 g_tree_remove(bad_tree,neigh_right);
188                 g_mem_chunk_free(node_memchunk,neigh_right);
189                 }
190         g_tree_insert(bad_tree,key/*key*/,NULL/*value*/);
191         stat_bads+=(end-start);
192 }
193
194 static void node_add_todo(ext2_loff_t start,ext2_loff_t end)
195 {
196         if (start>=end)
197                 return;
198         g_tree_insert(todo_tree,node_build(start,end)/*key*/,NULL/*value*/);
199         stat_todo_hunks++;
200 }
201
202 static gint traverse_fetchleftkey(gpointer key,gpointer value/*unused*/,gpointer *keyp/*data*/)
203 {
204         *keyp=key;
205         return TRUE;
206 }
207
208 static time_t stat_timelast;
209
210 static void stat_print(void)
211 {
212 time_t now;
213
214         if (stat_timelast==(now=time(NULL)))
215                 return;
216         stat_timelast=now;
217         fprintf(stderr,"@%llu/%llu,TODO=%llu,bad=%llu,largest=%llu,hunks=%llu %10s\r",
218                         (unsigned long long)stat_lastread,
219                         (unsigned long long)src_len,
220                         (unsigned long long)stat_todo,
221                         (unsigned long long)stat_bads,
222                         (unsigned long long)stat_largest,
223                         (unsigned long long)stat_todo_hunks,
224                         "");
225         fflush(stderr);
226 }
227
228 static ext2_loff_t simulate_bads=0;
229
230 static void user_input(ext2_loff_t pending_start,ext2_loff_t pending_end)
231 {
232 fd_set rfds;
233 struct timeval tv;
234 char buf[LINE_MAX];
235
236         FD_ZERO(&rfds);
237         FD_SET(0,&rfds);
238         tv.tv_sec=0;
239         tv.tv_usec=0;
240         if (select(1,&rfds,NULL,NULL,&tv)<=0)
241                 return;
242         if (buf!=fgets(buf,sizeof(buf),stdin)) {
243                 fprintf(stderr,"Weird stdin in user_input, assumed line \"f\"\n");
244                 strcpy(buf,"f");
245                 }
246         stat_timelast=0;        /* reprint statistics line immeadiately */
247         switch (*buf) {
248                 case 'f':
249                         node_add_todo(pending_start,pending_end);
250                         finish();       /* finish(): G_GNUC_NORETURN */
251                         /* NOTREACHED */
252                 case 'b': {
253 int bads=atoi(buf+1);
254
255                         simulate_bads+=(bads>=1 ? bads : 1);
256                         } break;
257                 default:
258                         fprintf(stderr,"Unrecognized key '%c': 'f'=finish(), 'b[<num>]'=simulate bads\n",
259                                         (isprint(*buf) ? *buf : '?'));
260                 }
261 }
262
263 static void write_dst(ext2_loff_t start,ext2_loff_t end,const void *buf)
264 {
265 size_t len=(end-start)*BLOCK;
266 ssize_t gotssize;
267 ext2_loff_t gotext2_loff;
268
269         if (dst_fd==-1)
270                 return;
271         if (start*BLOCK!=(gotext2_loff=ext2fs_llseek(dst_fd,start*BLOCK,SEEK_SET))) {
272                 fprintf(stderr,"ext2fs_llseek(\"%s\",%llu)=%lld: %m\n",dst_name,(unsigned long long)(start*BLOCK),
273                                 (signed long long)gotext2_loff);
274                 exit(EXIT_FAILURE);
275                 }
276         if (len!=(gotssize=write(dst_fd,buf,len))) {
277                 /* see README/"Linux kernel flaw" */
278                 /* no "start==src_len-1" checking here, we may write larger blocks! */
279                 if (gotssize==((src_len&~1)-start)*BLOCK
280                                 /* -1 instead of 0 is given when no bytes were written */
281                                 || (gotssize==-1 && start==src_len-1 && end==src_len))
282                         return; /* suppress error message */
283
284                 fprintf(stderr,"write(\"%s\",@%llu-@%llu=%llu=%lluB)=%lld: %m\n",dst_name,
285                                 (unsigned long long)start,
286                                 (unsigned long long)end,
287                                 (unsigned long long)(end-start),
288                                 (unsigned long long)((end-start)*BLOCK),
289                                 (  signed long long)gotssize);
290                 exit(EXIT_FAILURE);
291                 }
292 }
293
294 static void range_zero(ext2_loff_t start,ext2_loff_t end)
295 {
296 static const unsigned char buf_zero[BUF_ZERO_BLOCKS*BLOCK];
297
298         g_return_if_fail(start<end);
299
300         while (start<end) {
301                 ext2_loff_t mid=((end-start)>BUF_ZERO_BLOCKS ? start+BUF_ZERO_BLOCKS : end);
302                 g_assert(mid>=start);
303                 g_assert(mid<=end);
304
305                 write_dst(start,mid,buf_zero);
306
307                 start=mid;
308                 }
309 }
310
311 static void process(ext2_loff_t start,ext2_loff_t end)
312 {
313 unsigned char block_buf[BLOCK];
314 ext2_loff_t bads;
315 ext2_loff_t gotext2_loff;
316 ssize_t gotssize;
317
318         g_return_if_fail(start<end);
319         g_return_if_fail(end<=src_len);
320
321 #ifdef DEBUG
322         fprintf(stderr,"process([%llu,%llu])\n",(unsigned long long)start,(unsigned long long)end);
323 #endif
324
325 restart:        /* continues when the block was successfuly read */
326         stat_print();
327         user_input(start,end);
328
329         stat_lastread=start;
330         if (start*BLOCK!=(gotext2_loff=ext2fs_llseek(src_fd,start*BLOCK,SEEK_SET))) {
331                 fprintf(stderr,"ext2fs_llseek(\"%s\",%llu)=%lld: %m\n",src_name,(unsigned long long)(start*BLOCK),
332                                 (signed long long)gotext2_loff);
333                 exit(EXIT_FAILURE);
334                 }
335         stat_todo--;    /* for the forthcoming read() */
336         if (!simulate_bads && (sizeof(block_buf)==(gotssize=read(src_fd,block_buf,sizeof(block_buf)))
337                                         /* see README/"Linux kernel flaw" */
338                                         || (start==src_len-1 && gotssize==((src_len&~1)-start)*BLOCK))
339                         ) {
340                 /* success read */
341                 if (gotssize==sizeof(block_buf))
342                         write_dst(start,start+1,block_buf);
343                 else {
344                         /* handle README/"Linux kernel flaw" */
345                         fprintf(stderr,"WARNING: Ignored disk-last sector read failure; see README/\"Linux kernel flaw\"\n");
346                         range_zero(start,start+1);
347                         }
348                 start++;
349                 if (start>=end) /* finished */
350                         return;
351                 goto restart;
352                 }
353
354         /* failed read */
355         if (!simulate_bads)
356                 bads=1; /* one block was detected */
357         else {
358                 bads=simulate_bads;
359                 simulate_bads=0;
360                 }
361         if (start+bads>src_len)
362                 bads=src_len-start;
363         node_add_bad(start,start+bads);
364         start+=bads;
365         while (start<end) {
366 ext2_loff_t mid=(start+end)/2;
367
368                 g_assert(mid<end);
369                 node_add_todo(mid,end);
370                 end=mid;
371                 }
372 }
373
374 static void finish(void)
375 {
376 struct range *todo_linear=tree_linearize(todo_tree),*todol;
377 struct range *bad_linear =tree_linearize(bad_tree ),*badl ;
378
379         linear_join(todo_linear,(GCompareFunc)range_compare_diskorder);
380
381         fputc('\n',stderr);
382         for (todol=todo_linear,badl=bad_linear;
383                         todol->start<=src_len || badl->start<=src_len;) {
384 ext2_loff_t start,end;
385
386                 g_assert(badl->start!=todol->start);
387                 if (badl->start<todol->start) {
388                         g_assert(todol->start>=badl->end);
389                         if (todol->start==badl->end) {
390                                 printf("%llu-%llu\t;BAD=%llu, TODO(@%llu-@%llu)=%llu",
391                                                 (unsigned long long)(start=badl->start),
392                                                 (unsigned long long)(end=todol->end),
393                                                 (unsigned long long)(badl->end-badl->start),
394                                                 (unsigned long long)todol->start,
395                                                 (unsigned long long)todol->end,
396                                                 (unsigned long long)(todol->end-todol->start));
397                                 todol++;
398                                 }
399                         else
400                                 printf("%llu-%llu\t;BAD=%llu",
401                                                 (unsigned long long)(start=badl->start),
402                                                 (unsigned long long)(end=badl->end),
403                                                 (unsigned long long)(badl->end-badl->start));
404                         badl++;
405                         }
406                 else {
407                         g_assert(badl->start>todol->start);
408                         printf("%llu-%llu\t;TODO=%llu",
409                                         (unsigned long long)(start=todol->start),
410                                         (unsigned long long)(end=todol->end),
411                                         (unsigned long long)(todol->end-todol->start));
412                         todol++;
413                         }
414                 fflush(stdout);
415                 range_zero(start,end);
416                 putchar('\n');
417                 }
418         exit(EXIT_SUCCESS);
419 }
420
421 int main(int argc,char **argv)
422 {
423         /* prevent block-buffering such as: badblock-guess.c|tee x.log */
424         setlinebuf(stdout);
425         setlinebuf(stderr);
426
427         if (argc!=2 && argc!=3) {
428                 fprintf(stderr,"\
429 badblock-guess, Copyright (C) 2002 Jan Kratochvil <short@ucw.cz>\n\
430 $Id$\n\
431 badblock-guess comes with ABSOLUTELY NO WARRANTY.\n\
432 This is free software, and you are welcome to redistribute it\n\
433 under certain conditions.\n\
434 \n\
435 Syntax: badblock-guess <src_dev> [<dst_dev (OVERWRITTEN & DESTROYED!!!)>]\n\
436 \n");
437                 exit(EXIT_FAILURE);
438                 }
439         if (-1==(src_fd=open64((src_name=argv[1]),O_RDONLY|O_BINARY))) {
440                 fprintf(stderr,"open64(\"%s\",O_RDONLY|O_BINARY): %m\n",src_name);
441                 exit(EXIT_FAILURE);
442                 }
443         if (argv[2] && -1==(dst_fd=open64((dst_name=argv[2]),O_WRONLY|O_BINARY))) {
444                 fprintf(stderr,"open64(\"%s\",O_WRONLY|O_BINARY): %m\n",dst_name);
445                 exit(EXIT_FAILURE);
446                 }
447         if (dst_fd!=-1) {
448 struct stat src_stat,dst_stat;
449
450                         if (fstat(src_fd,&src_stat)) {
451                                 fprintf(stderr,"fstat(\"%s\"): %m\n",src_name);
452                                 exit(EXIT_FAILURE);
453                                 }
454                         if (fstat(dst_fd,&dst_stat)) {
455                                 fprintf(stderr,"fstat(\"%s\"): %m\n",dst_name);
456                                 exit(EXIT_FAILURE);
457                                 }
458       if ((S_ISBLK(src_stat.st_mode) && S_ISBLK(dst_stat.st_mode) && src_stat.st_rdev==dst_stat.st_rdev)
459                                         || (src_stat.st_dev==dst_stat.st_dev && src_stat.st_ino==dst_stat.st_ino)) {
460                                 fprintf(stderr,"Refusing to operate on two identical devs (src=\"%s\" and dst=\"%s\")\n",src_name,dst_name);
461                                 exit(EXIT_FAILURE);
462                                 }
463                 }
464         if (ext2fs_get_device_size(src_name,BLOCK,&src_lenblk)) {
465                 fprintf(stderr,"ext2fs_get_device_size(\"%s\",%d,...): %m\n",src_name,BLOCK);
466                 exit(EXIT_FAILURE);
467                 }
468         src_len=src_lenblk;
469         if (src_len<=0) {
470                 fprintf(stderr,"\"%s\" length %llu <=0\n",src_name,(unsigned long long)src_len);
471                 exit(EXIT_FAILURE);
472                 }
473         if (dst_fd!=-1) {
474                 if (ext2fs_get_device_size(dst_name,BLOCK,&dst_lenblk)) {
475                         fprintf(stderr,"ext2fs_get_device_size(\"%s\",%d,...): %m\n",dst_name,BLOCK);
476                         exit(EXIT_FAILURE);
477                         }
478                 dst_len=dst_lenblk;
479                 if (dst_len<=0) {
480                         fprintf(stderr,"\"%s\" length %llu <=0\n",dst_name,(unsigned long long)dst_len);
481                         exit(EXIT_FAILURE);
482                         }
483                 if (dst_len<src_len) {
484                         fprintf(stderr,"WARNING: Ignoring exceeding trailing data of \"%s\" from %llu up to its end %llu!\n",
485                                         src_name,(unsigned long long)dst_len,(unsigned long long)src_len);
486                         src_len=dst_len;
487                         src_lenblk=dst_lenblk;
488                         }
489                 }
490
491         node_memchunk=g_mem_chunk_new("node_memchunk",sizeof(struct range)/*atom_size*/,
492                         0x1000 /*are_size: 64KB*/,G_ALLOC_AND_FREE);
493         todo_tree=g_tree_new((GCompareFunc)range_compare);
494         bad_tree=g_tree_new((GCompareFunc)range_compare_diskorder);
495         node_add_todo(0,src_len);
496         stat_todo=src_len;
497
498         for (;;) {
499 struct range *left=NULL;        /* key */
500
501                 g_tree_traverse(todo_tree,(GTraverseFunc)traverse_fetchleftkey,G_IN_ORDER,&left/*data*/);
502                 if (!left)      /* done */
503                                 break;
504 #ifndef G_DISABLE_ASSERT
505                 {
506                         gint nnodes_preremove=g_tree_nnodes(todo_tree);
507 #endif
508                 g_tree_remove(todo_tree,left/*key*/);
509 #ifndef G_DISABLE_ASSERT
510                         g_assert(g_tree_nnodes(todo_tree)==nnodes_preremove-1);
511                         }
512 #endif
513                 stat_largest=(left->end-left->start);
514                 process(left->start,left->end);
515                 g_mem_chunk_free(node_memchunk,left);
516                 stat_todo_hunks--;
517                 }
518         finish();       /* finish(): G_GNUC_NORETURN */
519         /* NOTREACHED */
520 }