#!/usr/dist/bin/perl # # lookbibtex 1.22 # Look in to a bib file. # Comments to . # # Copyright (C) 1990 by John Heidemann # This is distributed under the GNU Public Licence, Version 1 (Feb 89). # See the Perl documentation for a copy of that license. # # 4-Oct-90 it is hacked together. # 19-Nov-90 Now it remembers "'s and join such lines. # It also removes nasty characters like {} from the search string. # 20-Nov-90 Umlaut accents handled correctly. # 28-Nov-90 A simple heuristic to handle multi-line fields with {}'s is added. # In addition, we compress all whitespace to single spaces in the # searched version. lookbibtex 1.1 # 4-Jan-91 Converted the -k option to -f, since field makes more sense than # keyword. lookbibtex 1.11 # 17-Jan-91 Added -s option to pass through strings, instead of ignoring them. # 31-May-91 ficus directory moved # 26-Aug-91 Documentation fixed. The environment variable LOOKBIBTEXFILE # will set the default bibtex file to use. # lookbibtex 1.12 released, posted to anonymous ftp at cs.ucla.edu. # 28-Aug-91 Usage string fixed at suggestion of # Henk P. Penning . # 5-Sep-91 Argument processing re-done (now matches grep, as it always # should have). Changes from Tim Wilson to # handle multiple bib files and select default bib files from BIBINPUTS. # lookbibtex 1.2 # 29-Jan-92 Bug reported by Dana Jacobsen : # "badkeys" are handled in a case sensitive manner. Fixed. # lookbibtex 1.21 # 25-Feb-92 Sigh. Bug fixes always make more bugs. Bug in last fix, fixed. # lookbibtex 1.22 # # This program relies on the convention that the closing } of a # bib entry is the only } in the first non-whitespace column, # and that the opening @ is also there. # $* = 1; # make searches on vars with imbedded newlines work $prog = substr($0,rindex($0,'/')+1); $badkeys = "string"; # keys to ignore (list in lowercase only) # # do argument processing # @files = (); # files to search $passthroughbad = 0; # -s flag undef ($pattern); # will be set below undef ($keyword); # may be set below sub remember_file { local ($file) = @_; local ($dev, $ino) = stat ($file); local ($key) = "$dev,$ino"; if (!defined($files{$key})) { $files{$key} = $file; push (@files, $file); }; # warn ("file $file ($key) remembered.\n"); }; while ($#ARGV >= 0) { if ($ARGV[0] eq "-s") { $passthroughbad = 1; } elsif ($ARGV[0] eq "-f" && $#ARGV >= 1) { $keyword = $ARGV[1]; shift (@ARGV); } elsif (defined($pattern)) { &remember_file ($ARGV[0]); } else { $pattern = $ARGV[0]; }; shift (@ARGV); }; if (!defined($pattern)) { die ("Usage: $prog [-s] [-f field] regexp [bibfile.bib ...]\n" . " Fields restricts the regexp search to that bibtex " . "field entry (author, etc.)\n" . " Default bibfile is $defaultfile, - indicates stdin.\n" . " Regexp is a Perl regexp.\n"); }; # # handle the keyword by modifying the pattern # if (defined($keyword)) { $pattern = "^\\s*${keyword}\\s*=.*${pattern}"; # print "pattern is $pattern\n"; }; # # Handle choosing default bib files: # Select anything from BIBINPUTS. # if ($#files == -1) { $searchpath = ($ENV{'BIBINPUTS'} || "."); foreach $dir (split(/:/, $searchpath)) { opendir(DIR, $dir) || do { warn "$prog: Can't open directory `$dir', skipping\n"; next; }; foreach $file (grep(/\.bib$/, readdir(DIR))) { &remember_file ($dir . "/" . $file); }; closedir(DIR); }; }; die ("$prog: no files on command line or in BIBINPUTS\n") if ($#files == -1); $manyfiles = ($#files > 0); # remember if to show filenames or not # # Certain keys we really want to ignore because # they're not bib entries. They're listed here. # @badkeys = split(/,/, $badkeys); foreach $i (@badkeys) { $badkeys{$i} = "bad"; # just make them defined }; # # To do searches right, we have to make everything # for a field on one line. # This routine does that, and also gets rid of {}'s # which tend to get in the way for searches. In the # same vein, it collapses all whitespace to single spaces. # # To know when to join lines, we use two simple heuristics: # is there are a odd number of "'s on a line, we must enter or exit # multi-line mode. If there are more {'s than }'s, we must enter, # and if there are more }'s than {'s we must exit (anything on # the first line is ignored). # sub printtosearch { local ($print) = @_; local ($search, $mode) = ("", 1); local ($opencurley, $closecurley) = (0,0); @lines = split(/\n/, $print); @lines[0] =~ s/{/ /; foreach $ln (@lines) { # remove and count curley brackets $opencurley = ($ln =~ s/[{]//g); $closecurley = ($ln =~ s/[}]//g); if ($opencurley-$closecurley < 0) { $mode = 1; } elsif ($opencurley-$closecurley > 0) { $mode = 0; } else { # remove umlauts so quote handling works, # and then change modes if required. $ln =~ s/\\"//g; $mode = !$mode if (($ln =~ tr/"/"/) % 2 == 1); }; $search .= $ln; $search .= "\n" if ($mode); }; $search =~ s/[ \t]+/ /g; return $search; } # # looking for beginning of bib entry is state 1, in bib is state 2 # $LOOKING = 1; $INBIB = 2; foreach $file (@files) { open (INF, "<$file") || warn ("cannot open bibfile $file\n"); $state = $LOOKING; while () { # print "line ", $i++, " state=$state: " . "$_\n"; # beware RCS munging $state:...$ if ($state == $LOOKING) { if (/^[ \t]*@(\w+)/) { # beginning of entry ($key = $1) =~ tr/A-Z/a-z/; # case insensitive keywords if (! defined($badkeys{$key})) { $state = $INBIB; $bibentry = $_; } elsif ($passthroughbad) { print "$_"; # a hack for @string }; }; } elsif ($state == $INBIB) { $bibentry .= $_; if (/^[ \t]*}/) { # ending $searchentry = &printtosearch($bibentry); if ($searchentry =~ /$pattern/i) { print "$file:\n" if ($manyfiles); print "$bibentry\n"; }; $state = $LOOKING; } } else { die ("state problem, $state\n"); }; }; };