[comp.sources.misc] dusort: size sort output of "du"

conor@goose.STANFORD.EDU (Conor Rafferty) (09/21/87)

Title:
dusort: size sort output of "du"

Description:
This shell script is useful for evaluating where the majority of
disk in your account is tied up. It takes the output of du
and sorts the directories by size. No big trick, except that
typically you want the subdirectories of a directory to move
with their common parent.
Compare:

$ du emacs		      | $ du emacs |sort -n
105     emacs/src/port 	      |17      emacs/shortnames
2821    emacs/src      	      |28      emacs/lisp/term
28      emacs/lisp/term	      |68      emacs/lisp/.backup
68      emacs/lisp/.backup    |105     emacs/src/port
1606    emacs/lisp	      |229     emacs/man
855     emacs/etc	      |635     emacs/info
635     emacs/info	      |855     emacs/etc
229     emacs/man	      |1606    emacs/lisp
17      emacs/shortnames      |2821    emacs/src
6193    emacs		      |6193    emacs

with:
$ du emacs|dusort             |$ du emacs |dusort -t
6193    emacs   	      |   /emacs(6193)
2821         /src             |         /src(2821)
105              /port        |             /port(105)
1606         /lisp            |         /lisp(1606)
68                /.backup    |              /.backup(68)
28                /term	      |              /term(28)
855          /etc             |         /etc(855)
635          /info            |         /info(635)
229          /man             |         /man(229)
17           /shortnames      |         /shortnames(17)

The -t option is for Gnumacs' selective-display mode.

BUGS:
Filenames are assumed to have only characters greater than space.
"du a/b/c/d |dusort" prints bogus lines for a,b and c.
Uses an awk-sed-awk sandwich followed by an awk formatter.
300 directories takes about 25 seconds on a sun, of which about
3 seconds was setting up the pipeline.

--------------------CUT-HERE--------------------
#!/bin/sh
#
# sort a "du" listing by directory size
# usage:  du | dusort

FILES=
TFORM=0
while test $# -ge 1; do
    case $1 in
    -t)	TFORM=1; ;;
    *)  FILES="$FILES $1"; ;;
    esac
    shift
done


#build complex keys so that subdirectories move with parent
awk '{ size[ $2] = $1 }
END {
	for (i in size) {
		printf "%s ", i;
		oj = 1; l = length(i);
		#build up an aggregate key from all its parents
		for (j = 1; j <= l; ) {
			for (; j <= l; j++) if (substr(i,j,1) == "/") break;
			name = substr(i, oj, j-oj);
			j++;
			printf "%d ", size[name];
		}
		#print itself once more to compare ahead of its children
		printf "%d\n", size[i];
	}
}'  $FILES |

#sort numerically
sort -r -n +1 -2 +2 -3 +3 -4 +4 -5 +5 -6 +6 -7 +7 -8 +8 -9|

#just print the path and its size. In two popular flavors.
awk '{if('$TFORM') printf "%s(%d)\n", $1, $NF; else
		   printf "%d\t%s\n", $NF, $1}' |

#indent directories
# This awk could be combined with the previous one
# but it really performs a separate function.
# Cut it off and put it in a separate file called 'ind' if you like it.
#
# ind: indent output from du or find
#
awk '
BEGIN {blank="                                                          "}
{
    for (s=length; s > 0 && substr($0, s, 1) > " " ; s--)
	;
    for (e=length; substr($0, e, 1) != "/" && e > s+1; e--)
	;
    print  substr($0, 1, s) substr(blank, 1, e-s-1) substr($0, e);
}
'

--------------------CUT-HERE--------------------
            conor rafferty    The command
 conor@sierra.stanford.edu       1,$s/^\([^,]*\), *\(.*\)/\2 \1/
decwrl!shasta!conor@sierra    although hard to read, does the job.
                           --- Brian W. Kernighan "Advanced Editing on Unix"