99import traceback
1010from collections import deque
1111from datetime import datetime
12+ from enum import Enum
1213from threading import Condition , Lock
13- from typing import TYPE_CHECKING , Any , Callable , Deque , Dict , Iterable , List , Optional , Tuple
14+ from typing import Any , Callable , Deque , Dict , Iterable , List , Mapping , Optional , TYPE_CHECKING , Tuple
1415from urllib .parse import quote as urlescape
1516
1617import requests
3435)
3536
3637if TYPE_CHECKING :
37- from pyff .parse import PyffParser
38+ from pyff .parse import ParserInfo , PyffParser
3839 from pyff .pipes import PipelineCallback
3940 from pyff .utils import Lambda
4041
@@ -141,14 +142,14 @@ def thing_to_url(self, t: Resource) -> Optional[str]:
141142 def i_handle (self , t : Resource , url = None , response = None , exception = None , last_fetched = None ):
142143 try :
143144 if exception is not None :
144- t .info [ 'Exception' ] = exception
145+ t .info . exception = exception
145146 else :
146147 children = t .parse (lambda u : response )
147148 self .i_schedule (children )
148149 except BaseException as ex :
149150 log .debug (traceback .format_exc ())
150151 log .error (f'Failed handling resource: { ex } ' )
151- t .info [ 'Exception' ] = ex
152+ t .info . exception = ex
152153
153154
154155class ResourceOpts (BaseModel ):
@@ -177,6 +178,50 @@ def to_dict(self) -> Dict[str, Any]:
177178 return res
178179
179180
181+ class ResourceLoadState (str , Enum ):
182+ Fetched = 'Fetched'
183+ Parsing = 'Parsing'
184+ Parsed = 'Parsed'
185+ Ready = 'Ready'
186+
187+
188+ class ResourceInfo (BaseModel ):
189+ resource : str # URL
190+ state : Optional [ResourceLoadState ] = None
191+ http_headers : Dict [str , Any ] = Field ({})
192+ reason : Optional [str ] = None
193+ status_code : Optional [str ] # HTTP status code as string. TODO: change to int
194+ parser_info : Optional [ParserInfo ] = None
195+ expired : Optional [bool ] = None
196+ exception : Optional [BaseException ] = None
197+
198+ class Config :
199+ arbitrary_types_allowed = True
200+
201+ def to_dict (self ):
202+ def _format_key (k : str ) -> str :
203+ special = {'http_headers' : 'HTTP Response Headers' }
204+ if k in special :
205+ return special [k ]
206+ # Turn validation_errors into 'Validation Errors'
207+ return k .replace ('_' , ' ' ).title ()
208+
209+ res = {_format_key (k ): v for k , v in self .dict ().items ()}
210+
211+ if self .parser_info :
212+ # Move contents from sub-dict to top of dict, for backwards compatibility
213+ res .update (self .parser_info .to_dict ())
214+ del res ['Parser Info' ]
215+
216+ # backwards compat
217+ if res ['Description' ] == 'SAML Metadata' :
218+ del res ['Description' ]
219+ if res ['Exception' ] is None :
220+ del res ['Exception' ]
221+
222+ return res
223+
224+
180225class Resource (Watchable ):
181226 def __init__ (self , url : Optional [str ], opts : ResourceOpts ):
182227 super ().__init__ ()
@@ -189,7 +234,7 @@ def __init__(self, url: Optional[str], opts: ResourceOpts):
189234 self .never_expires : bool = False
190235 self .last_seen : Optional [datetime ] = None
191236 self .last_parser : Optional ['PyffParser' ] = None # importing PyffParser in this module causes a loop
192- self ._infos : Deque [Dict ] = deque (maxlen = config .info_buffer_size )
237+ self ._infos : Deque [ResourceInfo ] = deque (maxlen = config .info_buffer_size )
193238 self .children : Deque [Resource ] = deque ()
194239 self ._setup ()
195240
@@ -281,10 +326,8 @@ def is_expired(self) -> bool:
281326 def is_valid (self ) -> bool :
282327 return not self .is_expired () and self .last_seen is not None and self .last_parser is not None
283328
284- def add_info (self ) -> Dict [str , Any ]:
285- info : Dict [str , Any ] = dict ()
286- info ['State' ] = None
287- info ['Resource' ] = self .url
329+ def add_info (self ) -> ResourceInfo :
330+ info = ResourceInfo (resource = self .url )
288331 self ._infos .append (info )
289332 return info
290333
@@ -315,19 +358,12 @@ def name(self) -> Optional[str]:
315358 return self .url
316359
317360 @property
318- def info (self ):
361+ def info (self ) -> ResourceInfo :
319362 if self ._infos is None or not self ._infos :
320- return dict ( )
363+ return ResourceInfo ( resource = self . url )
321364 else :
322365 return self ._infos [- 1 ]
323366
324- @property
325- def errors (self ):
326- if 'Validation Errors' in self .info :
327- return self .info ['Validation Errors' ]
328- else :
329- return []
330-
331367 def load_backup (self ) -> Optional [str ]:
332368 if config .local_copy_dir is None :
333369 return None
@@ -352,7 +388,7 @@ def save_backup(self, data: Optional[str]) -> None:
352388 except IOError as ex :
353389 log .warning ("unable to save backup copy of {}: {}" .format (self .url , ex ))
354390
355- def load_resource (self , getter : Callable [[str ], Response ]) -> Tuple [Optional [str ], int , Dict [ str , Any ] ]:
391+ def load_resource (self , getter : Callable [[str ], Response ]) -> Tuple [Optional [str ], int , ResourceInfo ]:
356392 data : Optional [str ] = None
357393 status : int = 500
358394 info = self .add_info ()
@@ -366,14 +402,14 @@ def load_resource(self, getter: Callable[[str], Response]) -> Tuple[Optional[str
366402 try :
367403 r = getter (self .url )
368404
369- info [ 'HTTP Response Headers' ] = r .headers
405+ info . http_headers = dict ( r .headers )
370406 log .debug (
371407 "got status_code={:d}, encoding={} from_cache={} from {}" .format (
372408 r .status_code , r .encoding , getattr (r , "from_cache" , False ), self .url
373409 )
374410 )
375411 status = r .status_code
376- info [ 'Reason' ] = r .reason
412+ info . reason = r .reason
377413
378414 if r .ok :
379415 data = r .text
@@ -389,25 +425,25 @@ def load_resource(self, getter: Callable[[str], Response]) -> Tuple[Optional[str
389425 )
390426 data = self .load_backup ()
391427 if data is not None and len (data ) > 0 :
392- info [ 'Reason' ] = "Retrieved from local cache because status: {} != 200" .format (status )
428+ info . reason = "Retrieved from local cache because status: {} != 200" .format (status )
393429 status = 218
394430
395- info [ 'Status Code' ] = str (status )
431+ info . status_code = str (status )
396432
397433 except IOError as ex :
398434 if self .local_copy_fn is not None :
399435 log .warning ("caught exception from {} - trying local backup: {}" .format (self .url , ex ))
400436 data = self .load_backup ()
401437 if data is not None and len (data ) > 0 :
402- info [ 'Reason' ] = "Retrieved from local cache because exception: {}" .format (ex )
438+ info . reason = "Retrieved from local cache because exception: {}" .format (ex )
403439 status = 218
404440 if data is None or not len (data ) > 0 :
405441 raise ex # propagate exception if we can't find a backup
406442
407443 if data is None or not len (data ) > 0 :
408444 raise ResourceException ("failed to fetch {} (status: {:d})" .format (self .url , status ))
409445
410- info [ 'State' ] = ' Fetched'
446+ info . state = ResourceLoadState . Fetched
411447
412448 return data , status , info
413449
@@ -417,34 +453,33 @@ def parse(self, getter: Callable[[str], Response]) -> Deque[Resource]:
417453 if not data :
418454 raise ResourceException (f'Nothing to parse when loading resource { self } ' )
419455
420- info [ 'State' ] = ' Parsing'
456+ info . state = ResourceLoadState . Parsing
421457 # local import to avoid circular import
422458 from pyff .parse import parse_resource
423459
424- parse_info = parse_resource (self , data )
425- if parse_info is not None :
426- info .update (parse_info )
460+ info .parser_info = parse_resource (self , data )
427461
428462 if status != 218 : # write backup unless we just loaded from backup
429463 self .last_seen = utc_now ().replace (microsecond = 0 )
430464 self .save_backup (data )
431465
432- info [ 'State' ] = ' Parsed'
466+ info . state = ResourceLoadState . Parsed
433467 if self .t is not None :
434468 if self .post :
435469 for cb in self .post :
436470 if self .t is not None :
437471 self .t = cb (self .t , self .opts .dict ())
438472
439473 if self .is_expired ():
440- info [ 'Expired' ] = True
474+ info . expired = True
441475 raise ResourceException ("Resource at {} expired on {}" .format (self .url , self .expire_time ))
442476 else :
443- info [ 'Expired' ] = False
477+ info . expired = False
444478
445- for (eid , error ) in list (info ['Validation Errors' ].items ()):
446- log .error (error )
479+ if info .parser_info :
480+ for (eid , error ) in list (info .parser_info .validation_errors .items ()):
481+ log .error (error )
447482
448- info [ 'State' ] = ' Ready'
483+ info . state = ResourceLoadState . Ready
449484
450485 return self .children
0 commit comments