Skip to content

Commit

Permalink
Merge pull request #53 from Kentzo/check-attr
Browse files Browse the repository at this point in the history
Spawn the git check-attr process only once per repo.
  • Loading branch information
Kentzo authored Oct 31, 2018
2 parents 71103d3 + 61e31d8 commit 9f9196b
Showing 1 changed file with 128 additions and 45 deletions.
173 changes: 128 additions & 45 deletions git_archive_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,8 @@ def __init__(self, prefix='', exclude=True, force_sub=False, extra=None, main_re
self.force_sub = force_sub
self.main_repo_abspath = main_repo_abspath

self._check_attr_gens = {}

def create(self, output_path, dry_run=False, output_format=None, compresslevel=None):
"""
Create the archive at output_file_path.
Expand Down Expand Up @@ -208,15 +210,9 @@ def is_file_excluded(self, repo_abspath, repo_file_path):
@return: True if file should be excluded. Otherwise False.
@rtype: bool
"""
out = self.run_git_shell(
'git check-attr -z export-ignore -- %s' % quote(repo_file_path),
cwd=repo_abspath
).split('\0')

try:
return out[2] == 'set'
except IndexError:
return False
next(self._check_attr_gens[repo_abspath])
attrs = self._check_attr_gens[repo_abspath].send(repo_file_path)
return attrs['export-ignore'] == 'set'

def archive_all_files(self, archiver):
"""
Expand Down Expand Up @@ -248,55 +244,73 @@ def walk_git_files(self, repo_path=''):
@rtype: Iterable
"""
repo_abspath = path.join(self.main_repo_abspath, repo_path)
repo_file_paths = self.run_git_shell(
'git ls-files -z --cached --full-name --no-empty-directory',
repo_abspath
).split('\0')[:-1]
assert repo_abspath not in self._check_attr_gens
self._check_attr_gens[repo_abspath] = self.check_attr(repo_abspath, ['export-ignore'])

for repo_file_path in repo_file_paths:
repo_file_abspath = path.join(repo_abspath, repo_file_path) # absolute file path
main_repo_file_path = path.join(repo_path, repo_file_path) # file path relative to the main repo
try:
repo_file_paths = self.run_git_shell(
'git ls-files -z --cached --full-name --no-empty-directory',
repo_abspath
).split('\0')[:-1]

# Only list symlinks and files.
if not path.islink(repo_file_abspath) and path.isdir(repo_file_abspath):
continue
for repo_file_path in repo_file_paths:
repo_file_abspath = path.join(repo_abspath, repo_file_path) # absolute file path
main_repo_file_path = path.join(repo_path, repo_file_path) # relative to main_repo_abspath

if self.is_file_excluded(repo_abspath, repo_file_path):
continue
# Only list symlinks and files.
if not path.islink(repo_file_abspath) and path.isdir(repo_file_abspath):
continue

yield main_repo_file_path
if self.is_file_excluded(repo_abspath, repo_file_path):
continue

if self.force_sub:
self.run_git_shell('git submodule init', repo_abspath)
self.run_git_shell('git submodule update', repo_abspath)
yield main_repo_file_path

try:
repo_gitmodules_abspath = path.join(repo_abspath, ".gitmodules")
if self.force_sub:
self.run_git_shell('git submodule init', repo_abspath)
self.run_git_shell('git submodule update', repo_abspath)

with open(repo_gitmodules_abspath) as f:
lines = f.readlines()
try:
repo_gitmodules_abspath = path.join(repo_abspath, ".gitmodules")

for l in lines:
m = re.match("^\\s*path\\s*=\\s*(.*)\\s*$", l)
with open(repo_gitmodules_abspath) as f:
lines = f.readlines()

if m:
repo_submodule_path = m.group(1) # relative to repo_path
main_repo_submodule_path = path.join(repo_path, repo_submodule_path) # relative to main_repo_abspath
for l in lines:
m = re.match("^\\s*path\\s*=\\s*(.*)\\s*$", l)

if self.is_file_excluded(repo_abspath, repo_submodule_path):
continue
if m:
repo_submodule_path = m.group(1) # relative to repo_path
main_repo_submodule_path = path.join(repo_path, repo_submodule_path) # relative to main_repo_abspath

for main_repo_submodule_file_path in self.walk_git_files(main_repo_submodule_path):
repo_submodule_file_path = main_repo_submodule_file_path.replace(repo_path, "", 1).strip("/")
if self.is_file_excluded(repo_abspath, repo_submodule_file_path):
if self.is_file_excluded(repo_abspath, repo_submodule_path):
continue

yield main_repo_submodule_file_path
except IOError:
pass
for main_repo_submodule_file_path in self.walk_git_files(main_repo_submodule_path):
repo_submodule_file_path = main_repo_submodule_file_path.replace(repo_path, "", 1).strip("/") # relative to repo_path
if self.is_file_excluded(repo_abspath, repo_submodule_file_path):
continue

yield main_repo_submodule_file_path
except IOError:
pass
finally:
self._check_attr_gens[repo_abspath].close()
del self._check_attr_gens[repo_abspath]

@classmethod
def decode_git_output(cls, output):
"""
Decode Git's binary output handeling the way it escapes unicode characters.
@type output: bytes
@rtype: str
"""
return output.decode('unicode_escape').encode('raw_unicode_escape').decode('utf-8')

@staticmethod
def run_git_shell(cmd, cwd=None):
@classmethod
def run_git_shell(cls, cmd, cwd=None):
"""
Runs git shell command, reads output and decodes it into unicode string.
Expand All @@ -313,7 +327,7 @@ def run_git_shell(cmd, cwd=None):
"""
p = Popen(cmd, shell=True, stdout=PIPE, cwd=cwd)
output, _ = p.communicate()
output = output.decode('unicode_escape').encode('raw_unicode_escape').decode('utf-8')
output = cls.decode_git_output(output)

if p.returncode:
if sys.version_info > (2, 6):
Expand All @@ -323,6 +337,75 @@ def run_git_shell(cmd, cwd=None):

return output

@classmethod
def check_attr(cls, repo_abspath, attrs):
"""
Generator that returns attributes for given paths relative to repo_abspath.
>>> g = GitArchiver.check_attr('repo_path', ['export-ignore'])
>>> next(g)
>>> attrs = g.send('relative_path')
>>> print(attrs['export-ignore'])
@param repo_abspath: Absolute path to a git repository.
@type repo_abspath: str
@param attrs: Attributes to check.
@type attrs: [str]
@rtype: generator
"""
def make_process():
cmd = 'GIT_FLUSH=1 git check-attr --stdin -z {0}'.format(' '.join(attrs))
return Popen(cmd, shell=True, stdin=PIPE, stdout=PIPE, cwd=repo_abspath)

def read_attrs(process, repo_file_path):
process.stdin.write(repo_file_path.encode('utf-8') + b'\0')
process.stdin.flush()

# For every attribute check-attr will output: <path> NUL <attribute> NUL <info> NUL
path, attr, info = b'', b'', b''
nuls_count = 0
nuls_expected = 3 * len(attrs)

while nuls_count != nuls_expected:
b = process.stdout.read(1)

if b == b'' and process.poll() is not None:
raise RuntimeError("check-attr exited prematurely")
elif b == b'\0':
nuls_count += 1

if nuls_count % 3 == 0:
yield map(cls.decode_git_output, (path, attr, info))
path, attr, info = b'', b'', b''
elif nuls_count % 3 == 0:
path += b
elif nuls_count % 3 == 1:
attr += b
elif nuls_count % 3 == 2:
info += b

if not attrs:
return

process = make_process()

try:
while True:
repo_file_path = yield
repo_file_attrs = {}

for path, attr, value in read_attrs(process, repo_file_path):
assert path == repo_file_path
assert attr in attrs
repo_file_attrs[attr] = value

yield repo_file_attrs
finally:
process.stdin.close()
process.wait()


def main():
from optparse import OptionParser, SUPPRESS_HELP
Expand Down

0 comments on commit 9f9196b

Please sign in to comment.