30
30
import difflib
31
31
import collections
32
32
import distutils .sysconfig
33
+ import fnmatch
33
34
import io
34
35
import os
35
36
import re
48
49
ATOMS = frozenset ([tokenize .NAME , tokenize .NUMBER , tokenize .STRING ])
49
50
50
51
EXCEPT_REGEX = re .compile (r'^\s*except [\s,()\w]+ as \w+:$' )
52
+ PYTHON_SHEBANG_REGEX = re .compile (r'^#!.*\bpython[23]?\b\s*$' )
51
53
54
+ MAX_PYTHON_FILE_DETECTION_BYTES = 1024
52
55
53
56
try :
54
57
unicode
@@ -549,21 +552,25 @@ def fix_file(filename, args, standard_out):
549
552
standard_out .write ('' .join (diff ))
550
553
551
554
552
- def open_with_encoding (filename , encoding , mode = 'r' ):
555
+ def open_with_encoding (filename , encoding , mode = 'r' ,
556
+ limit_byte_check = - 1 ):
553
557
"""Return opened file with a specific encoding."""
558
+ if not encoding :
559
+ encoding = detect_encoding (filename , limit_byte_check = limit_byte_check )
560
+
554
561
return io .open (filename , mode = mode , encoding = encoding ,
555
562
newline = '' ) # Preserve line endings
556
563
557
564
558
- def detect_encoding (filename ):
565
+ def detect_encoding (filename , limit_byte_check = - 1 ):
559
566
"""Return file encoding."""
560
567
try :
561
568
with open (filename , 'rb' ) as input_file :
562
569
encoding = _detect_encoding (input_file .readline )
563
570
564
571
# Check for correctness of encoding.
565
572
with open_with_encoding (filename , encoding ) as input_file :
566
- input_file .read ()
573
+ input_file .read (limit_byte_check )
567
574
568
575
return encoding
569
576
except (LookupError , SyntaxError , UnicodeDecodeError ):
@@ -600,6 +607,69 @@ def get_diff_text(old, new, filename):
600
607
return text
601
608
602
609
610
+ def _split_comma_separated (string ):
611
+ """Return a set of strings."""
612
+ return set (text .strip () for text in string .split (',' ) if text .strip ())
613
+
614
+
615
+ def is_python_file (filename ):
616
+ """Return True if filename is Python file."""
617
+ if filename .endswith ('.py' ):
618
+ return True
619
+
620
+ try :
621
+ with open_with_encoding (
622
+ filename ,
623
+ None ,
624
+ limit_byte_check = MAX_PYTHON_FILE_DETECTION_BYTES ) as f :
625
+ text = f .read (MAX_PYTHON_FILE_DETECTION_BYTES )
626
+ if not text :
627
+ return False
628
+ first_line = text .splitlines ()[0 ]
629
+ except (IOError , IndexError ):
630
+ return False
631
+
632
+ if not PYTHON_SHEBANG_REGEX .match (first_line ):
633
+ return False
634
+
635
+ return True
636
+
637
+
638
+ def match_file (filename , exclude ):
639
+ """Return True if file is okay for modifying/recursing."""
640
+ base_name = os .path .basename (filename )
641
+
642
+ if base_name .startswith ('.' ):
643
+ return False
644
+
645
+ for pattern in exclude :
646
+ if fnmatch .fnmatch (base_name , pattern ):
647
+ return False
648
+ if fnmatch .fnmatch (filename , pattern ):
649
+ return False
650
+
651
+ if not os .path .isdir (filename ) and not is_python_file (filename ):
652
+ return False
653
+
654
+ return True
655
+
656
+
657
+ def find_files (filenames , recursive , exclude ):
658
+ """Yield filenames."""
659
+ while filenames :
660
+ name = filenames .pop (0 )
661
+ if recursive and os .path .isdir (name ):
662
+ for root , directories , children in os .walk (name ):
663
+ filenames += [os .path .join (root , f ) for f in children
664
+ if match_file (os .path .join (root , f ),
665
+ exclude )]
666
+ directories [:] = [d for d in directories
667
+ if match_file (os .path .join (root , d ),
668
+ exclude )]
669
+ else :
670
+ yield name
671
+
672
+
603
673
def _main (argv , standard_out , standard_error ):
604
674
"""Return exit status.
605
675
@@ -630,6 +700,9 @@ def _main(argv, standard_out, standard_error):
630
700
parser .add_argument ('--version' , action = 'version' ,
631
701
version = '%(prog)s ' + __version__ )
632
702
parser .add_argument ('files' , nargs = '+' , help = 'files to format' )
703
+ parser .add_argument ('--exclude' , metavar = 'globs' ,
704
+ help = 'exclude file/directory names that match these '
705
+ 'comma-separated globs' )
633
706
634
707
args = parser .parse_args (argv [1 :])
635
708
@@ -638,21 +711,17 @@ def _main(argv, standard_out, standard_error):
638
711
file = standard_error )
639
712
return 1
640
713
714
+ if args .exclude :
715
+ args .exclude = _split_comma_separated (args .exclude )
716
+ else :
717
+ args .exclude = set ([])
718
+
641
719
filenames = list (set (args .files ))
642
- while filenames :
643
- name = filenames .pop (0 )
644
- if args .recursive and os .path .isdir (name ):
645
- for root , directories , children in os .walk (unicode (name )):
646
- filenames += [os .path .join (root , f ) for f in children
647
- if f .endswith ('.py' ) and
648
- not f .startswith ('.' )]
649
- directories [:] = [d for d in directories
650
- if not d .startswith ('.' )]
651
- else :
652
- try :
653
- fix_file (name , args = args , standard_out = standard_out )
654
- except IOError as exception :
655
- print (unicode (exception ), file = standard_error )
720
+ for name in find_files (filenames , args .recursive , args .exclude ):
721
+ try :
722
+ fix_file (name , args = args , standard_out = standard_out )
723
+ except IOError as exception :
724
+ print (unicode (exception ), file = standard_error )
656
725
657
726
658
727
def main ():
0 commit comments