diff --git a/MANIFEST b/MANIFEST index e0a610d02d1d..13e0db37de63 100644 --- a/MANIFEST +++ b/MANIFEST @@ -5403,6 +5403,7 @@ Porting/checkcfgvar.pl Check that config scripts define all symbols Porting/checkpodencoding.pl Check POD encoding Porting/checkURL.pl Check whether we have working URLs Porting/checkVERSION.pl Check whether we have $VERSIONs +Porting/clean-commit Cleanup whitespace issues in a commit using git blame Porting/cmpVERSION.pl Compare whether two trees have changed modules Porting/config.sh Sample config.sh Porting/config_H Sample config.h diff --git a/Porting/README.pod b/Porting/README.pod index d98487c1c19c..dde9a18c9489 100644 --- a/Porting/README.pod +++ b/Porting/README.pod @@ -82,6 +82,19 @@ Checks that all the URLs in the Perl source are valid. Used by F to ensure changed modules have had their versions updated. +=head2 F + +This tool can be used to intelligently clean up whitespace issue in a +patch. For files already under git control it will only clean lines that +have been modified as part of the commit. It will not modify generated +files which have the marker "do not edit this file" (in uppercase) in +them. It will convert tabs to spaces in files .pl, .pm, .xs, .c and .h +files, and it will remove trailing whitespace from all file types. It will +also remove blank lines at the end of a file should the commit add any. + +By default it will not modify files not under git control, but if new +files are `git add`ed then it will clean then entire file. + =head2 F Compare the current Perl source tree and a given tag for modules that have @@ -394,4 +407,3 @@ leaks. Guide for Vote Administrators for running Steering Council elections. =cut - diff --git a/Porting/clean-commit b/Porting/clean-commit new file mode 100755 index 000000000000..5215d91b5181 --- /dev/null +++ b/Porting/clean-commit @@ -0,0 +1,342 @@ +#!/usr/bin/perl +use Data::Dumper; +use strict; +use warnings; +use Getopt::Long; +use Pod::Usage; + +my $DEBUG=0; + +use constant { + NULL_SHA1 => ("0" x 40), + TAB => " " x 8, + DO_NOT_EDIT => uc("do not edit this file"), +}; + +my $TAB= TAB; # for regexes +my $DO_NOT_EDIT= DO_NOT_EDIT; # for interpolation + +sub read_blame_file { + my ($args, $file, $callback)= @_; + print "executing: git blame -p $args $file\n" if $DEBUG > 1; + open my $fh,"git blame -p $args $file 2>/dev/null |" + or die "Failed to open pipe: $?"; + my ($line_props, $sha1); + my %commit_props; + my $read= 0; + while (<$fh>) { + $read++; + if (/^([0-9a-fA-F]{40}) (\d+) (\d+)(?: (\d+))/) { + $line_props= { sha1=> ($sha1 = $1), src_line => $2, dst_line => $3, group_size => $4 }; + } elsif (/^(\S+)(?: (.*))?/) { + $commit_props{$sha1}{$1}= $2; + } elsif ( s/^\t// ) { + $line_props->{text}= $_; + $callback->($sha1, $line_props, \%commit_props); + } + } + return $read; +} + +sub clean_file { + my $file= shift + or die "Must have a file name to clean\n"; + if (-B $file) { + warn "skipping $file as it is binary\n"; + return; + } + my $full= shift; + print "clean_file($file)\n" if $DEBUG > 1; + my $clean_tabs= $file=~/\.(?:p[lm]|[ch]|xs)\z/; + my $read; + my @out_lines; + unless ( $full ) { + my $noted; + $read= read_blame_file("--since=HEAD", $file, sub { + my ($sha1, $line_props, $commit_props)= @_; + # the following trickery is so if blame fails for an unchecked in file + # we dont print "cleaning using blame" when we will do the full file. + print "\tcleaning using blame '$file'\n" unless $noted++; + my $line= $line_props->{text}; + if ($full or $sha1 eq NULL_SHA1) { + my $modified= 0; + $modified += $line =~ s/\t/$TAB/g if $clean_tabs and $file=~/\./; + $modified += $line =~ s/\s+\z/\n/; + print "\tcleaned line $line_props->{dst_line}\n" + if $DEBUG > 2 and $modified; + } + push @out_lines, $line; + }); + } + if ($read) { + write_file($file, \@out_lines); + } else { + print "\tcleaning new file '$file'\n"; + clean_new($file); + } +} + +sub write_file { + my ($file, $out_lines)= @_; + pop @$out_lines while @$out_lines and $out_lines->[-1] eq "\n"; + my $mode= (stat $file)[2]; + open my $ofh, ">", "$file.out" + or die "Failed to open '$file.out' for write"; + print $ofh @$out_lines; + close $ofh or die "Failed to close '$file.out':$!"; + rename "$file.out", $file or die "Failed to rename '$file.out' to '$file':$!"; + chmod $mode, $file or die sprintf "Failed to chmod '%s' to %3o: %s", $file, $mode, $!; +} + +sub clean_new { + my ($file)= @_; + print "clean_new($file)\n" if $DEBUG > 1; + my $clean_tabs= $file=~/\.(?:p[lm]|[ch]|xs)\z/; + open my $ifh, "<", $file + or die "Failed to open '$file' for read"; + my @out_lines; + while (<$ifh>) { + my $modified= 0; + $modified += s/\t/$TAB/g if $clean_tabs; + $modified += s/\s+\z/\n/; + print "\tcleaned line $.\n" + if $DEBUG > 2 and $modified; + push @out_lines, $_; + } + close $ifh or die "Failed to close '$file':$1"; + write_file($file, \@out_lines); +} + +sub get_modified_files { + my ($autodetect,$status)= @_; + return [] if !defined $autodetect; + $autodetect ||= "MA"; + if ($autodetect and $autodetect=~/([^ MADRCU])/) { + die "Unknown mode '$1' in '$autodetect', must be one of [ MADRCU]\n", + "See git status --help for more information\n"; + } + print "looking for files with mode [$autodetect]...\n" if $DEBUG; + print Dumper($status) if $DEBUG > 1; + $status||= get_status(); + my @files; + foreach my $file (sort keys %$status) { + push @files, $file + if $status->{$file}=~m/[$autodetect]/; + } + return filter_do_not_edit_files(\@files); +} + +sub filter_do_not_edit_files { + my ($files)= @_; + my @ret_files; + # we hide this next line from the check by storing + # it in lower case and then using uc to fix it for the grep + while (@$files) { + my @these_files= splice @$files,0,50; + open my $cmd, "git grep -L '$DO_NOT_EDIT' @these_files |"; + while (<$cmd>) { + chomp; + push @ret_files, $_; + } + } + return \@ret_files; +} + +sub get_status { + open my $cmd, "git status --porcelain |" + or die "No status?"; + my %files; + while (<$cmd>) { + print if $DEBUG > 2; + my ($mode,$file1,$file2)= /(..) (.*?)(?: -> (.*))?$/ + or die "Can't parse: $_"; + $file2 ||= $file1; + if ($mode =~ /[MARC]/) { + $files{$file2}=$mode; + } + } + #die Dumper(\%files); + close $cmd; + return \%files +} + +sub clean_files { + my ($files, $status, $skip, $full)=@_; + my @todo; + FILE: + for my $file (@$files) { + print "\tchecking $file\n" if $DEBUG > 1; + if (!-f $file) { + print "ignoring '$file': not a regular file\n"; + next FILE; + } + for my $pat (@$skip) { + if ($file =~ m/$pat/) { + print "skipping '$file': it matches 'no' pattern $pat\n" if $DEBUG; + next FILE; + } + } + if ($full or $status->{$file}) { + push @todo, $file; + } else { + print "leaving '$file': it is unchanged\n" if $DEBUG; + } + } + if (@todo) { + # might put stuff here + clean_file($_,$full) for @todo; + } +} + +my $full = 0; # if true clean the full file +my $autodetect= 0; # '0' means DWIM (use @ARGV if it has stuff otherwise autodetect) , + # undef means use @ARGV regardless, + # '' or anything else means autdetect regardless. +Getopt::Long::Configure("bundling"); +GetOptions( + 'n|no=s' => \my @no, + 'v|verbose+' => \($DEBUG), + 'h|help|?' => \my $help, + 'man' => \my $man, + 'f|full' => \$full, + 'F|no-full' => sub { undef $full}, + 'a|auto:s' => \$autodetect, + 'A|no-auto' => sub { undef $autodetect }, +) or pod2usage(2); +pod2usage(1) if $help; +pod2usage(-exitstatus => 0, -verbose => 2) if $man; + +warn Data::Dumper->Dump([\@ARGV,$autodetect,$full,\@no,$DEBUG],[qw(*ARGV *autodetect *full *no *DEBUG)]) + if $DEBUG>2; +exit(0) if $DEBUG > 9; + +chomp(my $path= `git rev-parse --git-dir`); +die "Not a git repo" if !$path; +chdir( $path . "/.." ) + or die "Failed to chdir to '$path/..': $!"; + +my %seen; +@no= map { !$seen{$_}++ ? qr/$_/ : () } @no; + +my $status= get_status(); +exit(0) if !%$status && !@ARGV; + +my $files= (!defined($autodetect) || # if autodetect is undef - do NOT use git status + $autodetect eq '0' && @ARGV) # or autodetect eq '0' and @ARGV has stuff in it + ? \@ARGV # then use ARGV + : get_modified_files($autodetect,$status); # otherwise use git status to find the files + +clean_files($files,$status,\@no,$full); + +__END__ + +=head1 NAME + +clean-commit - whitespace clean modified files in a git repository + +=head1 SYNOPSIS + +clean-commit [options] [file ...] + + Options: + --no=REGEX ignore anything matching this + -a --auto=MODE use git to find modified files - MODE can be [MADRCU] + -A --no-auto do not use git to find files if the arg list is empty + -f --full clean the full file, not just the changed bits + --help brief help message + --man full documentation + --verbose print debugging information + +=head1 OPTIONS + +Either processes the provided list of files or if none are provided then +uses C to find the files. You can use the C<--auto> and +C<--no-auto> to fine-tune this behaviour. + +=over 8 + +=item B<--no=REGEX> + +Any file matching this will be ignored. May be used more than once. +REGEX is a perl syntax regular expression. + +=item B<-a> + +=item B<--auto> + +=item B<--auto=MODES> + +Use git status to find modified files. Defaults to 'M', legal values +are as follows (most can be combined). + + MODE Meaning + '0' use @ARGV if its there, otherwise use mode ('M') + '' use default mode 'M' + ' ' unchanged + 'M' modified + 'A' added + 'R' renamed + 'C' copied + 'U' unmerged + +The default behaviour of the tool is C<--auto=0>, which causes the tool +to process any files passed in on the command line, and to otherwise use +C with the default mode ('M') to find the files. Any other use +of this option causes the tool to ignore any file on the command line. +The use of the C<--no-auto> option overrides this behaviour the other way +and causes the tool to process only the files passed in, even if that +means doing nothing. If used together the last used wins. + +See the documentation for the C command, and the C<--porcelain> option +for more details on the mode values. + +=item B<-A> + +=item B<--no-auto> + +Do not use C, only process files passed in on the command line, even if +that means processing nothing. If combined with B<--auto> which is used last wins. +See also the documentation for C<--auto> + +=item B<-v> + +=item B<--verbose> + +Output debugging information. Right now this is not very pretty. + +=item B<-help> + +Print a brief help message and exits. + +=item B<-man> + +Prints the manual page and exits. + +=back + +=head1 DESCRIPTION + +B will read the given input file(s) and use git to +determine which parts have been modified, and then clean any whitespace +issues in the modified parts. Cleaning that is performed is to eliminate +trailing whitespace and convert tabs to spaces. + +The default behaviour is to DWIM, and either process the specified +files, or use C to find them. The C<--no-auto> option means +the tool will only process the explicitly provided files, even if that +means doing nothing. The C<--auto> option can be used to force C to be used to find the files, even if a list of files have been +provided, and can be used to change which types of file modifications +are chosen to be cleaned, for instance C<--mode=MARC> would clean any +modified, added, renamed, or copied files, instead of the normal default +of just cleaning modified files. + +Files which contain the string 'do not edit this file' in *uppercase* +are excluded from cleanups. + +Trailing whitespace is removed from all files, tabs are converted to +spaces in files ending in C<.pl>, C<.pm>, C<.xs>, C<.c> and C<.h>, any +totally blank lines at the end of a file will be removed (either when +the file is new or, when the lines were added to an existing file). + +=cut diff --git a/Porting/exec-bit.txt b/Porting/exec-bit.txt index ac6fc54ee9f6..5821bcbd5e63 100644 --- a/Porting/exec-bit.txt +++ b/Porting/exec-bit.txt @@ -50,6 +50,7 @@ Porting/checkansi.pl Porting/checkcfguse.pl Porting/checkcfgvar.pl Porting/checkpodencoding.pl +Porting/clean-commit Porting/cmpVERSION.pl Porting/config_h.pl Porting/corecpan.pl