3434DOWNLOAD_TIMEOUT = int (os .getenv ('DOWNLOAD_TIMEOUT' , '1800' ))
3535REPO_SIZE_FILE = os .getenv ('REPO_SIZE_FILE' , '' )
3636
37+ package_info = {}
38+
3739pattern_os_template = re .compile (r"@\{(.+)\}" )
3840pattern_package_name = re .compile (r"^Filename: (.+)$" , re .MULTILINE )
3941pattern_package_size = re .compile (r"^Size: (\d+)$" , re .MULTILINE )
@@ -98,6 +100,39 @@ def mkdir_with_dot_tmp(folder: Path)->Tuple[Path, Path]:
98100 shutil .rmtree (str (tmpdir ))
99101 tmpdir .mkdir (parents = True , exist_ok = True )
100102 return (folder , tmpdir )
103+
104+ def flush_package_info (content ):
105+
106+ global package_info
107+
108+ for pkg in content .split ('\n \n ' ):
109+ if len (pkg ) < 10 : # ignore blanks
110+ continue
111+ try :
112+ pkg_filename = pattern_package_name .search (pkg ).group (1 )
113+ pkg_size = int (pattern_package_size .search (pkg ).group (1 ))
114+ pkg_checksum = pattern_package_sha256 .search (pkg ).group (1 )
115+ if pkg_filename not in package_info :
116+ pkg_info = {
117+ 'size' : pkg_size ,
118+ 'sha256' : {
119+ 'new' : pkg_checksum ,
120+ 'old' : None
121+ }
122+ }
123+ else :
124+ pkg_info = package_info [pkg_filename ]
125+ pkg_info ['size' ] = pkg_size
126+ if pkg_info ['sha256' ]['new' ] != None and pkg_info ['sha256' ]['old' ] == None :
127+ pkg_info ['sha256' ]['old' ] = pkg_info ['sha256' ]['new' ]
128+ pkg_info ['sha256' ]['new' ] = pkg_checksum
129+ package_info .update ({
130+ pkg_filename : pkg_info
131+ })
132+ except :
133+ print ("Failed to parse one package description" , flush = True )
134+ traceback .print_exc ()
135+ return 1
101136
102137def move_files_in (src : Path , dst : Path ):
103138 empty = True
@@ -110,6 +145,9 @@ def move_files_in(src: Path, dst: Path):
110145 print (f"{ src } is empty" )
111146
112147def apt_mirror (base_url : str , dist : str , repo : str , arch : str , dest_base_dir : Path , deb_set : Dict [str , int ])-> int :
148+
149+ global package_info
150+
113151 if not dest_base_dir .is_dir ():
114152 print ("Destination directory is empty, cannot continue" )
115153 return 1
@@ -134,6 +172,8 @@ def apt_mirror(base_url: str, dist: str, repo: str, arch: str, dest_base_dir: Pa
134172 pkgidx_dir ,pkgidx_tmp_dir = mkdir_with_dot_tmp (comp_dir / arch_dir )
135173 with open (release_file , "r" ) as fd :
136174 pkgidx_content = None
175+ pkgidx_file_old = None
176+ package_info = {}
137177 cnt_start = False
138178 for line in fd :
139179 if cnt_start :
@@ -164,6 +204,8 @@ def apt_mirror(base_url: str, dist: str, repo: str, arch: str, dest_base_dir: Pa
164204 pkgidx_file .unlink ()
165205 continue
166206 if pkgidx_content is None and pkgidx_file .stem == 'Packages' :
207+ pkgidx_file_old = Path (f'{ dist_dir } /{ filename } .old' )
208+ shutil .copy (pkgidx_file , pkgidx_file_old )
167209 print (f"getting packages index content from { pkgidx_file .name } " , flush = True )
168210 suffix = pkgidx_file .suffix
169211 if suffix == '.xz' :
@@ -176,12 +218,43 @@ def apt_mirror(base_url: str, dist: str, repo: str, arch: str, dest_base_dir: Pa
176218 pkgidx_content = content .decode ('utf-8' )
177219 else :
178220 print ("unsupported format" )
221+ continue
222+
223+ flush_package_info (pkgidx_content )
224+
225+ with pkgidx_file_old .open ('rb' ) as t : content = t .read ()
226+ if len (content ) != int (filesize ):
227+ print (f"Invalid size of { pkgidx_file } , expected { filesize } , skipped" )
228+ pkgidx_file .unlink ()
229+ continue
230+ if hashlib .sha256 (content ).hexdigest () != checksum :
231+ print (f"Invalid checksum of { pkgidx_file } , expected { checksum } , skipped" )
232+ pkgidx_file .unlink ()
233+ continue
234+ if pkgidx_file_old .stem == 'Packages' :
235+ print (f"getting packages index content from { pkgidx_file_old .name } " , flush = True )
236+ suffix = pkgidx_file_old .suffix
237+ if suffix == '.xz.old' :
238+ pkgidx_content_old = lzma .decompress (content ).decode ('utf-8' )
239+ elif suffix == '.bz2.old' :
240+ pkgidx_content_old = bz2 .decompress (content ).decode ('utf-8' )
241+ elif suffix == '.gz.old' :
242+ pkgidx_content_old = gzip .decompress (content ).decode ('utf-8' )
243+ elif suffix == '.old' :
244+ pkgidx_content_old = content .decode ('utf-8' )
245+ else :
246+ print ("unsupported format" )
247+ continue
248+
249+ flush_package_info (pkgidx_content_old )
250+
179251
180252 # Currently only support SHA-256 checksum, because
181253 # "Clients may not use the MD5Sum and SHA1 fields for security purposes, and must require a SHA256 or a SHA512 field."
182254 # from https://wiki.debian.org/DebianRepository/Format#A.22Release.22_files
183255 if line .startswith ('SHA256:' ):
184256 cnt_start = True
257+
185258 if not cnt_start :
186259 print ("Cannot find SHA-256 checksum" )
187260 return 1
@@ -216,18 +289,9 @@ def collect_tmp_dir():
216289 err = 0
217290 deb_count = 0
218291 deb_size = 0
219- for pkg in pkgidx_content .split ('\n \n ' ):
220- if len (pkg ) < 10 : # ignore blanks
221- continue
222- try :
223- pkg_filename = pattern_package_name .search (pkg ).group (1 )
224- pkg_size = int (pattern_package_size .search (pkg ).group (1 ))
225- pkg_checksum = pattern_package_sha256 .search (pkg ).group (1 )
226- except :
227- print ("Failed to parse one package description" , flush = True )
228- traceback .print_exc ()
229- err = 1
230- continue
292+ for pkg_filename , pkg_info in package_info .items ():
293+ pkg_size = pkg_info ['size' ]
294+ pkg_checksum = pkg_info ['sha256' ]
231295 deb_count += 1
232296 deb_size += pkg_size
233297
@@ -237,8 +301,8 @@ def collect_tmp_dir():
237301 dest_dir .mkdir (parents = True , exist_ok = True )
238302 if dest_filename .suffix == '.deb' :
239303 deb_set [str (dest_filename .relative_to (dest_base_dir ))] = pkg_size
240- if dest_filename .is_file () and dest_filename .stat ().st_size == pkg_size :
241- print (f"Skipping { pkg_filename } , size { pkg_size } " )
304+ if dest_filename .is_file () and ( dest_filename .stat ().st_size == pkg_size and pkg_checksum [ 'old' ] == pkg_checksum [ 'new' ]) :
305+ print (f"Skipping { pkg_filename } , size { pkg_size } , old sha256 { pkg_checksum [ 'old' ] } , new sha256 { pkg_checksum [ 'new' ] } " )
242306 continue
243307
244308 pkg_url = f"{ base_url } /{ pkg_filename } "
@@ -253,8 +317,8 @@ def collect_tmp_dir():
253317 with dest_tmp_filename .open ("rb" ) as f :
254318 for block in iter (lambda : f .read (1024 ** 2 ), b"" ):
255319 sha .update (block )
256- if sha .hexdigest () != pkg_checksum :
257- print (f"Invalid checksum of { dest_filename } , expected { pkg_checksum } " )
320+ if sha .hexdigest () != pkg_checksum [ 'new' ] :
321+ print (f"Invalid checksum of { dest_filename } , expected { pkg_checksum [ 'new' ] } " )
258322 dest_tmp_filename .unlink ()
259323 continue
260324 dest_tmp_filename .rename (dest_filename )
0 commit comments