'================================================================= ' LISTDUPE.BAS by Brian McLaughlin, compile with PowerBASIC 3.x ' ' Searches C: drive for duplicate files and creates a file listing ' those duplicates, in the root dir of C: drive, named DUPEFILE.LST. '================================================================= $CPU 80386 'make it faster $LIB ALL OFF 'make it smaller $FLOAT PROCEDURE 'if you have a co-pro, use NPX instead DECLARE SUB GatherNames( DirNames$(), File$(), FileDir%()) DECLARE SUB MakeDupeList( DirNames$(), File$(), FileDir%()) DECLARE SUB PrintTwirlBar( Counter% ) DIM DirName$( 1 TO 1400 ) 'handles up to 1400 directories DIM FileName$( 1 TO 16384 ) 'handles up to 16K file names DIM FileDir%( 1 TO 16384 ) 'indexes file names to dir names SHARED gFileTotal% 'a global variable used in both SUBs Synch! = TIMER: DO: Start! = TIMER: LOOP UNTIL Start! > Synch! GatherNames DirName$(), FileName$(), FileDir%() MakeDupeList DirName$(), FileName$(), FileDir%() Elapsed! = TIMER - Start!: PRINT: PRINT "Elapsed time:"; Elapsed! PRINT: PRINT "To find the file DUPEFILE.LST, look in root directory." END '======================================================= SUB GatherNames( DirName$(), File$(), FileDir%()) STATIC '======================================================= AllSpec$ = "*.*" Slash$ = "\" DirAttrib% = &H10 AllAttrib% = &H10 OR &H2 OR &H4 'attrib for dirs, system and hidden files gFileTotal% = 0 DirTotal% = 1 DirIndex% = 1 DirName$( 1 ) = "C:\" 'to search current drive, just 'use "\" PRINT "Collecting filenames... "; 'give 'em something to look at DO 'directory-switching loop ThisDir$ = DirName$( DirIndex% ) 'search next dir PrintTwirlBar DirIndex% FileSpec$ = ThisDir$ + AllSpec$ 'look for *.* in that dir ThisName$ = DIR$( FileSpec$, AllAttrib% ) 'includes subdirs, hidden, syste IF LEN( ThisName$ ) THEN IF ( ATTRIB( ThisDir$ + ThisName$ ) AND DirAttrib% ) THEN INCR DirTotal% DirName$( DirTotal% ) = ThisDir$ + ThisName$ + Slash$ ELSE INCR gFileTotal% File$( gFileTotal% ) = ThisName$ FileDir%( gFileTotal% ) = DirIndex% END IF DO 'main directory-searching loop ThisName$ = DIR$ IF LEN( ThisName$ ) THEN IF ( ATTRIB( ThisDir$ + ThisName$ ) AND DirAttrib% ) THEN INCR DirTotal% DirName$( DirTotal% ) = ThisDir$ + ThisName$ + Slash$ ELSE INCR gFileTotal% File$( gFileTotal% ) = ThisName$ 'saves name of file FileDir%( gFileTotal% ) = DirIndex% 'index to pathname END IF ELSE EXIT DO 'if null name, then exit loop END IF LOOP END IF INCR DirIndex% LOOP UNTIL DirIndex% > DirTotal% END SUB '======================================================== SUB MakeDupeList( DirName$(), File$(), FileDir%()) STATIC '======================================================== CRLF$ = CHR$( 13, 10 ) Indent$ = " " Found$ = " was found in:" + CRLF$ + Indent$ Header$ = "Total duplicate files found on C: drive:" Spacer$ = " " 'saves room for number of dupes Accum$ = Header$ + Spacer$ + CRLF$ + CRLF$ PRINT: PRINT "Sorting files..." ARRAY SORT File$( 1 ) FOR gFileTotal%, TAGARRAY FileDir%() OutFile$ = "C:\DUPEFILE.LST" BakFile$ = "C:\DUPEFILE.BAK" IF LEN( DIR$( BakFile$ ) ) THEN KILL BakFile$ IF LEN( DIR$( OutFile$ ) ) THEN NAME OutFile$ AS BakFile$ OPEN OutFile$ FOR BINARY AS #1 Dupes% = 0 ThisFile% = 1 FileOne$ = File$( ThisFile% ) PRINT "Collecting duplicates... "; DO LastNonDupe% = ThisFile% 'assume this file isn't a dupe DO INCR ThisFile% 'look at the next adjacent file PrintTwirlBar ThisFile% 'give the user something to stare at FileTwo$ = File$( ThisFile% ) IF LEN( FileOne$ ) = LEN( FileTwo$ ) THEN 'eliminates some files quickly IF FileOne$ = FileTwo$ THEN 'if equal, we found a dupe LastDupe% = ThisFile% ThisDirNum% = FileDir%( ThisFile% ) Dir2$ = DirName$( ThisDirNum% ) + CRLF$ IF LastNonDupe% = ( ThisFile% - 1 ) THEN PrevDirNum% = FileDir%( LastNonDupe% ) Dir1$ = DirName$( PrevDirNum% ) + CRLF$ Accum$ = Accum$ + FileOne$ + Found$ + Dir1$ + Indent$ + Dir2$ INCR Dupes%, 2 'count both this and previous file ELSE Accum$ = Accum$ + Indent$ + Dir2$ INCR Dupes% END IF IF LEN( Accum$ ) > 16384 THEN 'needed to help avoid running PUT$ #1, Accum$ 'out of RAM if many dupes found Accum$ = "" END IF ELSE EXIT DO END IF ELSE EXIT DO END IF LOOP IF ThisFile% = gFileTotal% THEN EXIT DO 'we're done - let's split! IF LastDupe% = ( ThisFile% - 1 ) THEN 'this dupe group complete Accum$ = Accum$ + CRLF$ 'add a blank line after END IF SWAP FileOne$, FileTwo$ 'a wee bit faster than: Two$ = One$ LOOP TotalDupes$ = STR$( Dupes% ) PUT$ #1, Accum$ 'write the dupe list SEEK #1, LEN( Header$ ) 'go back and fill in PUT$ #1, TotalDupes$ 'the number of duplicate files CLOSE #1 END SUB '=============================== SUB PrintTwirlBar( Counter% ) '=============================== LOCATE CSRLIN,( POS ) PRINT MID$( "/-",( Counter% MOD 4 ) + 1, 1 ); END SUB