diskhog-0.8/0040775000317400031740000000000010004237410012734 5ustar kpattonkpattondiskhog-0.8/CHANGELOG0100664000317400031740000000256410002015452014150 0ustar kpattonkpattondiskhog-0.3 Initial release diskhog-0.3.1 Mispelled CATEGORY and the script did not appear in the UNIX/System_administration category as desired diskhog-0.4 (David Kovar) Added "-d 0" option to sort by total disk space used. Added "-t" option to report only the top ten users. Added "-w" to turn on warning messages. Added "-e N" to only report users who's total exceeds N megabytes in the selected period. Added "-s " to start at rather than cwd. Turned off output buffering to ensure that output to a file or a pipe appeared in the correct order. diskhog-0.5 Added "--exclude pattern" to skip unwanted directories TODO I think I may allow the user to change the search time from the hard coded mtime to any of mtime, atime or ctime... DCK - Reporting of largest directory based on current sort does not work. Only works for default sort. KP I believe that the issue with the largest directory is by design. Largest directoriy is printed for specified age catagory or default of oldest files. Wed Jan 14 15:49:02 PST 2004 KP Changed --exclude to --filter Changed the argumnet required for -d option to "s" for string from "i" for interger Added reporting on largest directory for "-d t" option Fri Jan 16 09:10:07 PST 2004 DCK Added e=s for Getopts Added print SUMMARY "===============" so | mail looks correct diskhog-0.8/README0100664000317400031740000000022607776575153013650 0ustar kpattonkpattonA utility to recurse a directory and report on per user space usage. Additionally it reports which directory a user has most bytes in of a given age. diskhog-0.8/diskhog-0.8.10100755000317400031740000004205710002014701014751 0ustar kpattonkpatton#!/usr/bin/perl #This script is intended to get an accurate count of files per user and to #list the directory node with the largest concentration of files #Totals are displayed per user for each directory, directory totals include #subdirectory totals =head1 NAME diskhog =head1 SYNOPSYS Print a nice report of the per user space usage. Additionally, show where the most space is used of a given age catagory. Current directory is recursed. Optional "-h" prints a usage message. Optional "-t" only shows the top 10 users in the selected sort order Optional "-e N" only shows the users who's total for the specified period exceeds N MB. Optional "-w" turns on warning messages. Optional "-s " will start the process at rather than at the current working directory. Optional "-d num" where num is [0 30 60 90] changes the default sort order to one of the ages specified. Ages are in days. diskhog -h Usage help diskhog -d t Sort by total disk space used diskhog -d 30 Sort by 30 day modification time diskhog -d 60 Sort by 60 day modification time diskhog -d 90 Sort by 90 day modification time diskhog --filter pattern Exclude pattern from search diskhog Sort by >90 day modification time =head1 Description This program will recurse a directory and gather the usage statistics on a per user basis. The report generated consists of six columns and a total of bytes in use for all users. Login The users login ID. <=30 Total of bytes owned by the user that have not been modified in the last 30 days <=60 Same as above except 60 days <=90 Same as above except 90 days >90 Byte total of files older than 90 days Total Total of bytes for all ages By default the report is ordered by the user with the most bytes in the >90 column. The ordering can be changed with the -d switch. As a convenience, the largest directory that contains the oldest amount of files is also given. This should save the user or system administrator from looking up what directory is taking up all the space. =head1 PREREQUISITES This program requires Number::Format . =pod SCRIPT CATAGORIES UNIX/System_administration =head1 AUTHOR Kirk Patton 01/05/03 E-Mail kirk_patton@yahoo.com =head1 BUGS The program gathers a lot of statistics and can consume alot of memory on large file systems. Additionally, is was written on a *nix system and is not portable. There may be other problems, but it seems to work. =head1 COPYRIGHT This program is free software. You may copy or redistribute it under the same terms as Perl itself. =cut #Need to make sure hard links are not followed for directories and counted #for files #Set up some usefull date numbers $DAY = (60 * 60) * 24; $d30 = $DAY * 30; $d60 = $DAY * 60; $d90 = $DAY * 90; $current_Time = time; #Modules to use use File::Find; use Cwd; use Number::Format qw(:subs); use Getopt::Long; # Turn off buffering #Process command line arguments &process_Arguments; if( $opt_s ){ chdir($opt_s) || die "Error, cannot chdir to $opt_s.\n"; } $top_Directory = cwd; #Save our starting point find(\&wanted, './'); @user_Summary = user_Tally(); &print_Summary; sub wanted { #Do what you need to do with each file found my($inode,$uid,$size,$mtime); #Save the full path without doing an expensive cwd $File::Find::name =~ /^\.(.*)/; $absolute_Path = $top_Directory . $1; unless ( ($inode,$uid,$size,$mtime) = (stat($absolute_Path))[1,4,7,9] ){ if ($opt_w) { #If we are here, it is because we could not stat the file print STDERR "WARNING Could not stat $absolute_Path \n"; my $file_Info = `ls -l $absolute_Path`; print STDERR "$file_Info \n"; } return; } #Skip excluded pattern foreach my $pattern ( @opt_filter ){ if($File::Find::name =~ m=/$pattern=){ return; } } #Make sure hard links are ignored if(is_Hard_Link($inode) ){ next; }#Check the inode number else{ &update_Visited($inode); #Maintain a uid to user table unless(exists $uid_To_Login{$uid}){ $uid_To_Login{$uid} = (getpwuid($uid))[0]; unless(exists $login_To_Uid{ $uid_To_Login{$uid} }) { $login_To_Uid{ $uid_To_Login{$uid} } = $uid; } } #Do other things like totaling bytes per user my $age = file_Age($mtime); #Load hash with file sizes directory_Statistics($inode,$uid,$size,$mtime,$age); } } sub is_Hard_Link{ #Check file against list of already visited files my $inode = $_[0]; $return_Value = exists $inodes{$inode}; return $return_Value; } sub update_Visited{ #Add inode to visited table to help us skip files already seen my $inode = $_[0]; $inodes{$inode} = 1; return; } sub directory_Statistics{ #We want to build up a hash with the directory inodes, parent inode, totals for each #directory and user for 30, 60, 90, >90days, total and grand totals my($inode,$uid,$size,$mtime,$age) = @_; #Keep track of the actual directory name to inode and inode to directory name tables if( -d _ ){ $dirs{$inode}{name_Tables}{$inode} = $absolute_Path; $dirs{$inode}{name_Tables}{$absolute_Path} = $inode; #Save the directories inode as a global var we need it later $current_Directory_Inode = $inode; #Save the parent inode my $parent_Directory_Inode = (stat("${absolute_Path}/.."))[1]; $dirs{$inode}{parent_Inode} = $parent_Directory_Inode; } #Keep directory totals by age if( $age eq 'd30' ){ $dirs{grand_Total}{$uid}{d30} += $size; $dirs{$current_Directory_Inode}{totals}{d30} += $size; $dirs{$current_Directory_Inode}{totals}{users}{$uid}{d30} += $size; #Keep track of the largest directory my @big_Directory = keys %{ $dirs{users}{$uid}{largest_Directory}{d30} }; if( $dirs{users}{$uid}{largest_Directory}{d30}{$big_Directory[0]} < $dirs{$current_Directory_Inode}{totals}{users}{$uid}{d30} ){ #Our bigest dir is not bigger that current dir remove it delete $dirs{users}{$uid}{largest_Directory}{d30}{$big_Directory[0]}; #Save the inode and total of the larger directory $dirs{users}{$uid}{largest_Directory}{d30}{$current_Directory_Inode} = $dirs{$current_Directory_Inode}{totals}{users}{$uid}{d30} } } if( $age eq 'd60' ){ $dirs{grand_Total}{$uid}{d60} += $size; $dirs{$current_Directory_Inode}{totals}{d60} += $size; $dirs{$current_Directory_Inode}{totals}{users}{$uid}{d60} += $size; #Keep track of the largest directory my @big_Directory = keys %{ $dirs{users}{$uid}{largest_Directory}{d60} }; if( $dirs{users}{$uid}{largest_Directory}{d60}{$big_Directory[0]} < $dirs{$current_Directory_Inode}{totals}{users}{$uid}{d60} ){ #Our bigest dir is not bigger that current dir remove it delete $dirs{users}{$uid}{largest_Directory}{d60}{$big_Directory[0]}; #Save the inode and total of the larger directory $dirs{users}{$uid}{largest_Directory}{d60}{$current_Directory_Inode} = $dirs{$current_Directory_Inode}{totals}{users}{$uid}{d60} } } if( $age eq 'd90' ){ $dirs{grand_Total}{$uid}{d90} += $size; $dirs{$current_Directory_Inode}{totals}{d90} += $size; $dirs{$current_Directory_Inode}{totals}{users}{$uid}{d90} += $size; #Keep track of the largest directory my @big_Directory = keys %{ $dirs{users}{$uid}{largest_Directory}{d90} }; if( $dirs{users}{$uid}{largest_Directory}{d90}{$big_Directory[0]} < $dirs{$current_Directory_Inode}{totals}{users}{$uid}{d90} ){ #Our bigest dir is not bigger that current dir remove it delete $dirs{users}{$uid}{largest_Directory}{d90}{$big_Directory[0]}; #Save the inode and total of the larger directory $dirs{users}{$uid}{largest_Directory}{d90}{$current_Directory_Inode} = $dirs{$current_Directory_Inode}{totals}{users}{$uid}{d90} } } if( $age eq 'ancient' ){ $dirs{grand_Total}{$uid}{ancient} += $size; $dirs{$current_Directory_Inode}{totals}{ancient} += $size; $dirs{$current_Directory_Inode}{totals}{users}{$uid}{ancient} += $size; #Keep track of the largest directory my @big_Directory = keys %{ $dirs{users}{$uid}{largest_Directory}{ancient} }; if( $dirs{users}{$uid}{largest_Directory}{ancient}{$big_Directory[0]} < $dirs{$current_Directory_Inode}{totals}{users}{$uid}{ancient} ){ #Our bigest dir is not bigger that current dir remove it delete $dirs{users}{$uid}{largest_Directory}{ancient}{$big_Directory[0]}; #Save the inode and total of the larger directory $dirs{users}{$uid}{largest_Directory}{ancient}{$current_Directory_Inode} = $dirs{$current_Directory_Inode}{totals}{users}{$uid}{ancient} } } #Keep track of total of all ages for each directory $dirs{$current_Directory_Inode}{totals}{total} += $size; #Keep track of total of all ages per user per directory $dirs{$current_Directory_Inode}{totals}{users}{$uid}{total} += $size; #Keep a grand total per user $dirs{grand_Total}{$uid}{total} += $size; #Keep running total of "all" everything $dirs{grand_Total}{total} += $size; } sub file_Age{ #Catagorize and return the age of the file 30, 60, 90 days old etc my $mtime = $_[0]; #pass in the mtime of the file $mtime = $current_Time - $mtime; if( $mtime <= $d30) {return "d30";} elsif ($mtime <= $d60) {return "d60";} elsif ($mtime <= $d90) {return "d90";} else {return "ancient";} } sub user_Tally{ #Build an array containing user summary #uid 30 60 90 total my @user_Summary; foreach my $uid ( keys %{$dirs{grand_Total}} ){ if( my $login = $uid_To_Login{$uid} ){ #Set default value of '0' if no total exists my($d30,$d60,$d90,$ancient,$total) = (0,0,0,0,0); if( exists $dirs{grand_Total}{$uid}{d30} ) { $d30 = $dirs{grand_Total}{$uid}{d30}; } if( exists $dirs{grand_Total}{$uid}{d60} ) { $d60 = $dirs{grand_Total}{$uid}{d60}; } if( exists $dirs{grand_Total}{$uid}{d90} ) { $d90 = $dirs{grand_Total}{$uid}{d90}; } if( exists $dirs{grand_Total}{$uid}{ancient} ) { $ancient = $dirs{grand_Total}{$uid}{ancient}; } if( exists $dirs{grand_Total}{$uid}{total} ) { $total = $dirs{grand_Total}{$uid}{total}; } push @user_Summary, "$login $d30 $d60 $d90 $ancient $total"; } } return @user_Summary; } sub print_Summary{ print "\nDisk useage report for $top_Directory sorted by "; unless( $opt_d ){ #Sort by files older than 90 days if -d not given @sorted_User_Summary = sort by_Oldest @user_Summary; print "files older than 90 days.\n\n"; } elsif( $opt_d == 30 ){ @sorted_User_Summary = sort by_Under_30 @user_Summary; print "files between 0 and 30 days old.\n\n"; } elsif( $opt_d == 60 ){ @sorted_User_Summary = sort by_Under_60 @user_Summary; print "files between 30 and 60 days old.\n\n"; } elsif( $opt_d == 90 ){ @sorted_User_Summary = sort by_Under_90 @user_Summary; print "files between 60 and 90 days old.\n\n"; } elsif( $opt_d == "t" ){ @sorted_User_Summary = sort by_Total @user_Summary; print "files of any age.\n\n"; } if ($opt_e) { print "Reporting only those users with more than "; print $opt_e / 1024 / 1024; print " MB of space in use.\n\n"; } #Print a sorted summary for each user open(SUMMARY, ">&STDOUT"); #Alias standard out for report open(DIRECTORY_SUMMARY, ">&STDOUT"); open(BIG_DIR, ">&STDOUT"); # Turn off output buffering $old_fh = select(SUMMARY); $| = 1; select($old_fh); $old_fh = select(DIRECTORY_SUMMARY); $| = 1; select($old_fh); $old_fh = select(BIG_DIR); $| = 1; select($old_fh); $t_count = 0; foreach my $line (@sorted_User_Summary){ ($user,$d30,$d60,$d90,$ancient,$total) = split(/\s+/,$line); if ($opt_t) { $t_count++; if ($t_count > 10) { last }; } if ($opt_e) { unless( $opt_d ){ if ($ancient < $opt_e) { next; } } elsif( $opt_d == 30 ){ if ($d30 < $opt_e) { next; } } elsif( $opt_d == 60 ){ if ($d60 < $opt_e) { next; } } elsif( $opt_d == 90 ){ if ($d90 < $opt_e) { next; } } elsif( $opt_d == "t" ){ if ($total < $opt_e) { next; } } } #Format data in engineering notation $d30 = format_bytes($d30); $d60 = format_bytes($d60); $d90 = format_bytes($d90); $ancient = format_bytes($ancient); $total = format_bytes($total); $grand_Total = format_bytes($dirs{grand_Total}{total}); write SUMMARY; #We need to display the biggest directory owned by the user my @dir30 = keys %{ $dirs{users}{ $login_To_Uid{$user} }{largest_Directory}{d30} }; my @dir60 = keys %{ $dirs{users}{ $login_To_Uid{$user} }{largest_Directory}{d60} }; my @dir90 = keys %{ $dirs{users}{ $login_To_Uid{$user} }{largest_Directory}{d90} }; my @dirancient = keys %{ $dirs{users}{ $login_To_Uid{$user} }{largest_Directory}{ancient} }; #my @dirtotal = keys %{ $dirs{users}{ $login_To_Uid{$user} }{largest_Directory}{total} }; #Format data $dir30 = format_bytes( $dirs{users}{$login_To_Uid{$user}}{largest_Directory}{d30}{$dir30[0]} ); $dir60 = format_bytes( $dirs{users}{$login_To_Uid{$user}}{largest_Directory}{d60}{$dir60[0]} ); $dir90 = format_bytes( $dirs{users}{$login_To_Uid{$user}}{largest_Directory}{d90}{$dir90[0]} ); $dirold = format_bytes( $dirs{users}{$login_To_Uid{$user}}{largest_Directory}{ancient}{$dirancient[0]} ); #Total ages $dirtotal = format_bytes( $dirs{users}{$login_To_Uid{$user}}{largest_Directory}{d30}{$dir30[0]} + $dirs{users}{$login_To_Uid{$user}}{largest_Directory}{d60}{$dir60[0]} + $dirs{users}{$login_To_Uid{$user}}{largest_Directory}{d90}{$dir90[0]} +$dirs{users}{$login_To_Uid{$user}}{largest_Directory}{ancient}{$dirancient[0]} ); #Translate the directory inode to the proper name unless( $opt_d ){ #Default sort order is by files older than 90 days $directory = $dirs{$dirancient[0]}{name_Tables}{$dirancient[0]}; } elsif( $opt_d == 30 ){ #Fetch largest directory of files younger than 30 days $directory = $dirs{$dir30[0]}{name_Tables}{$dir30[0]}; } elsif( $opt_d == 60 ){ #Fetch largest directory of files younger than 60 days $directory = $dirs{$dir60[0]}{name_Tables}{$dir60[0]}; } elsif( $opt_d == 90 ){ #Fetch largest directory of files younger than 90 days $directory = $dirs{$dir90[0]}{name_Tables}{$dir90[0]}; } elsif( $opt_d == "t" ){ #Fetch largest directory of files total #There is no tracking of largest directory except by age #So here we need to print each directory for each catagory while( ($keys,$values) = each %{ $dirs{users}{$login_To_Uid{$user}}{largest_Directory} } ){ my ($inode,$size) = each %{$values}; my $size = format_bytes( $size ); my $dir = $dirs{$inode}{name_Tables}{$inode}; push(@Size,"$keys $size $dir\n"); @Size = sort by_Size @Size; } foreach(@Size){ ($label,$size,$dir) = split(/\s+/); unless( $size ){ next;} #Skip if catagory has no data write BIG_DIR; } print SUMMARY "=========================================\n"; undef @Size; next; } write DIRECTORY_SUMMARY; } } sub by_Size{ #Sort by number of bytes @a = split(/\s+/,$a); @b = split(/\s+/,$b); $b[1] <=> $a[1]; } sub by_Oldest{ #Sort by ancient column @a = split(/\s+/,$a); @b = split(/\s+/,$b); $b[4] <=> $a[4]; } sub by_Under_30{ #Sort by 30 column @a = split(/\s+/,$a); @b = split(/\s+/,$b); $b[1] <=> $a[1]; } sub by_Under_60{ #Sort by 60 column @a = split(/\s+/,$a); @b = split(/\s+/,$b); $b[2] <=> $a[2]; } sub by_Under_90{ #Sort by 90 column @a = split(/\s+/,$a); @b = split(/\s+/,$b); $b[3] <=> $a[3]; } sub by_Total{ # Sort by total @a = split(/\s+/,$a); @b = split(/\s+/,$b); $b[5] <=> $a[5]; } sub process_Arguments{ #Process command line arguments #Print a usage summary if they are missused GetOptions("filter=s@", "h+", "d=s", "t+", "w+", "s=s", "e=s" ); if( $opt_h || ( $opt_d && ( $opt_d != 30 && $opt_d !=60 && $opt_d != 90 && $opt_d != "t") ) ){ print <" will start the process at rather than at the current working directory. The default is to sort by file's older than 90 days. For more information "perldoc $0". EOF exit; } # Convert megabytes to bytes to agree with stat if ($opt_e) { $opt_e = $opt_e * 1024 * 1024; }; } format SUMMARY = @<<<<<<<< @<<<<<<<< @<<<<<<<< @<<<<<<<< @<<<<<<<< @<<<<<<<< $user, $d30, $d60, $d90, $ancient, $total . format SUMMARY_TOP = Total bytes in use = @<<<<<<<< $grand_Total Login <=30 <=60 <=90 >90 Total . format DIRECTORY_SUMMARY = @* $directory @<<<<<<<< @<<<<<<<< @<<<<<<<< @<<<<<<<< @<<<<<<<< $dir30, $dir60, $dir90, $dirold, $dirtotal ==================================================================== . format BIG_DIR = @<<<<<< @<<<<<<< @* $label, $size, $dir .