1111
1212from .config import CacheConfig
1313from .const import SCHEMA_VERSION
14- from .exceptions import (
15- BiocCacheError ,
16- InvalidRnameError ,
17- NoFpathError ,
18- RnameExistsError ,
19- RpathTimeoutError ,
20- )
21- from .models import Base , Resource
14+ from .models import Base , Metadata , Resource
2215from .utils import (
2316 calculate_file_hash ,
2417 copy_or_move ,
2518 create_tmp_dir ,
19+ download_web_file ,
2620 generate_id ,
27- validate_rname ,
21+ generate_uuid ,
2822)
2923
3024__author__ = "Jayaram Kancherla"
@@ -65,7 +59,6 @@ def __init__(self, cache_dir: Optional[Union[str, Path]] = None, config: Optiona
6559 db_schema_version = self ._setup_database ()
6660
6761 if db_schema_version != SCHEMA_VERSION :
68- print (db_schema_version )
6962 raise RuntimeError (f"Database version is not { SCHEMA_VERSION } ." )
7063
7164 self ._last_cleanup = datetime .now ()
@@ -111,13 +104,15 @@ def _setup_database(self) -> None:
111104
112105 return SCHEMA_VERSION
113106
114- def _get_detached_resource (self , session : Session , resource : Resource ) -> Optional [Resource ]:
107+ def _get_detached_resource (
108+ self , session : Session , obj : Union [Resource , Metadata ]
109+ ) -> Optional [Union [Resource , Metadata ]]:
115110 """Get a detached copy of a resource."""
116- if resource is None :
111+ if obj is None :
117112 return None
118- session .refresh (resource )
119- session .expunge (resource )
120- return resource
113+ session .refresh (obj )
114+ session .expunge (obj )
115+ return obj
121116
122117 def __enter__ (self ) -> "BiocFileCache" :
123118 return self
@@ -142,10 +137,10 @@ def get_session(self) -> Iterator[Session]:
142137 finally :
143138 session .close ()
144139
145- def _validate_rname (self , rname : str ) -> None :
146- """Validate resource name format."""
147- if not validate_rname (rname , self .config .rname_pattern ):
148- raise InvalidRnameError (f"Resource name '{ rname } ' doesn't match pattern " f"'{ self .config .rname_pattern } '" )
140+ # def _validate_rname(self, rname: str) -> None:
141+ # """Validate resource name format."""
142+ # if not validate_rname(rname, self.config.rname_pattern):
143+ # raise Exception (f"Resource name '{rname}' doesn't match pattern " f"'{self.config.rname_pattern}'")
149144
150145 def _should_cleanup (self ) -> bool :
151146 """Check if cache cleanup should be performed.
@@ -196,24 +191,33 @@ def cleanup(self) -> int:
196191 self ._last_cleanup = datetime .now ()
197192 return removed
198193
199- def get (self , rname : str ) -> Optional [Resource ]:
194+ def get (self , rname : str = None , rid : str = None ) -> Optional [Resource ]:
200195 """Get resource by name from cache.
201196
202197 Args:
203198 rname:
204199 Name to identify the resource in cache.
205200
201+ rid:
202+ Resource id to search by.
203+
206204 """
205+ if rname is None and rid is None :
206+ raise ValueError ("either 'rname' or 'rid' must be provided." )
207+
207208 with self .get_session () as session :
208- resource = session .query (Resource ).filter (Resource .rname == rname ).first ()
209+ if rname is not None :
210+ resource = session .query (Resource ).filter (Resource .rname == rname ).first ()
211+ elif rid is not None :
212+ resource = session .query (Resource ).filter (Resource .rid == rid ).first ()
209213
210214 if resource is not None :
211215 # Check if path exists with timeout
212216 start = time ()
213217 timeout = 30
214218 while not Path (str (resource .rpath )).exists ():
215219 if time () - start >= timeout :
216- raise RpathTimeoutError (
220+ raise TimeoutError (
217221 f"For resource: '{ rname } ' the rpath does not exist " f"after { timeout } seconds."
218222 )
219223 sleep (0.1 )
@@ -229,10 +233,11 @@ def add(
229233 self ,
230234 rname : str ,
231235 fpath : Union [str , Path ],
232- rtype : Literal ["local" , "web" , "relative" ] = "local " ,
236+ rtype : Literal ["local" , "web" , "relative" ] = "relative " ,
233237 action : Literal ["copy" , "move" , "asis" ] = "copy" ,
234238 expires : Optional [datetime ] = None ,
235- ext : bool = False ,
239+ download : bool = True ,
240+ ext : bool = True ,
236241 ) -> Resource :
237242 """Add a resource to the cache.
238243
@@ -252,29 +257,41 @@ def add(
252257 How to handle the file ("copy", "move", or "asis").
253258 Defaults to ``copy``.
254259
260+ download:
261+ Whether to download the resource.
262+ Only used if 'rtype' is "web".
263+
255264 expires:
256265 Optional expiration datetime.
257266 If None, resource never expires.
258267
259268 ext:
260269 Whether to use filepath extension when storing in cache.
261- Defaults to `False `.
270+ Defaults to `True `.
262271
263272 Returns:
264273 The `Resource` object added to the cache.
265274 """
266275 # self._validate_rname(rname)
267- fpath = Path (fpath )
268-
269- if not fpath .exists ():
270- raise NoFpathError (f"Resource at '{ fpath } ' does not exist" )
276+ fpath = Path (fpath ) if rtype != "web" else fpath
271277
272278 if self .get (rname ) is not None :
273- raise RnameExistsError (f"Resource '{ rname } ' already exists" )
279+ raise FileExistsError (f"Resource '{ rname } ' already exists" )
280+
281+ if rtype == "web" :
282+ outpath = download_web_file (fpath , Path (fpath ).name , download )
283+ action = "copy"
284+ else :
285+ outpath = Path (fpath )
286+
287+ if action == "asis" :
288+ logger .warning ("If action='asis', rtype must be 'local'." )
289+ rtype = "local"
274290
275291 # Generate paths and check size
276292 rid = generate_id (size = len (self ))
277- rpath = self .config .cache_dir / f"{ rid } { fpath .suffix if ext else '' } " if action != "asis" else fpath
293+ uuid = generate_uuid ()
294+ rpath = self .config .cache_dir / f"{ uuid } _{ outpath .name if ext else outpath .stem } " if action != "asis" else fpath
278295
279296 # Create resource record
280297 resource = Resource (
@@ -292,7 +309,7 @@ def add(
292309 session .commit ()
293310
294311 try :
295- copy_or_move (fpath , rpath , rname , action , False )
312+ copy_or_move (outpath , rpath , rname , action , False )
296313
297314 # Calculate and store checksum
298315 resource .etag = calculate_file_hash (rpath , self .config .hash_algorithm )
@@ -303,7 +320,7 @@ def add(
303320 except Exception as e :
304321 session .delete (resource )
305322 session .commit ()
306- raise BiocCacheError ("Failed to add resource" ) from e
323+ raise Exception ("Failed to add resource" ) from e
307324
308325 def add_batch (self , resources : List [Dict [str , Any ]]) -> List [Resource ]:
309326 """Add multiple resources in a single transaction.
@@ -349,7 +366,7 @@ def update(
349366 """
350367 fpath = Path (fpath )
351368 if not fpath .exists ():
352- raise NoFpathError (f"File '{ fpath } ' does not exist" )
369+ raise FileNotFoundError (f"File '{ fpath } ' does not exist" )
353370
354371 with self .get_session () as session :
355372 resource = session .query (Resource ).filter (Resource .rname == rname ).first ()
@@ -369,7 +386,7 @@ def update(
369386
370387 except Exception as e :
371388 session .rollback ()
372- raise BiocCacheError ("Failed to update resource" ) from e
389+ raise Exception ("Failed to update resource" ) from e
373390
374391 def remove (self , rname : str ) -> None :
375392 """Remove a resource from cache by name.
@@ -381,7 +398,7 @@ def remove(self, rname: str) -> None:
381398 Name to identify the resource in cache.
382399
383400 Raises:
384- BiocCacheError : If resource removal fails
401+ Exception : If resource removal fails
385402 """
386403 with self .get_session () as session :
387404 resource = session .query (Resource ).filter (Resource .rname == rname ).first ()
@@ -399,7 +416,7 @@ def remove(self, rname: str) -> None:
399416
400417 except Exception as e :
401418 session .rollback ()
402- raise BiocCacheError (f"Failed to remove resource '{ rname } '" ) from e
419+ raise Exception (f"Failed to remove resource '{ rname } '" ) from e
403420
404421 def list_resources (self , rtype : Optional [str ] = None , expired : Optional [bool ] = None ) -> List [Resource ]:
405422 """List resources in the cache with optional filtering.
@@ -564,7 +581,7 @@ def purge(self, force: bool = False) -> bool:
564581 True if purge was successful, False otherwise.
565582
566583 Raises:
567- BiocCacheError : If purge fails and force=False.
584+ Exception : If purge fails and force=False.
568585 """
569586 try :
570587 with self .get_session () as session :
@@ -577,7 +594,7 @@ def purge(self, force: bool = False) -> bool:
577594 except Exception as e :
578595 if not force :
579596 session .rollback ()
580- raise BiocCacheError (f"Failed to remove file for resource '{ resource .rname } '" ) from e
597+ raise Exception (f"Failed to remove file for resource '{ resource .rname } '" ) from e
581598 logger .warning (f"Failed to remove file for resource '{ resource .rname } ': { e } " )
582599
583600 session .commit ()
@@ -598,7 +615,7 @@ def purge(self, force: bool = False) -> bool:
598615
599616 except Exception as e :
600617 if not force :
601- raise BiocCacheError ("Failed to purge cache" ) from e
618+ raise Exception ("Failed to purge cache" ) from e
602619
603620 logger .error ("Database cleanup failed, forcing file removal" , exc_info = e )
604621 for file in self .config .cache_dir .iterdir ():
@@ -616,3 +633,58 @@ def purge(self, force: bool = False) -> bool:
616633 def __len__ (self ):
617634 with self .get_session () as session :
618635 return session .query (Resource ).count ()
636+
637+ def check_metadata_key (self , key : str ) -> bool :
638+ """Check if a key exists in the metadata table.
639+
640+ Args:
641+ key:
642+ Key to search.
643+
644+ Returns:
645+ True if the key exists, else False.
646+ """
647+ with self .get_session () as session :
648+ return session .query (Metadata ).filter (Metadata .key == key ).count () != 0
649+
650+ def get_metadata (self , key : str ):
651+ """Add a new metadata key"""
652+ with self .get_session () as session :
653+ meta = session .query (Metadata ).filter (Metadata .key == key ).first ()
654+ if meta is not None :
655+ return self ._get_detached_resource (session , meta )
656+
657+ return None
658+
659+ def add_metadata (self , key : str , value : str ):
660+ """Add a new metadata key"""
661+ exists = self .get_metadata (key = key )
662+
663+ if exists is None :
664+ meta = Metadata (key = key , value = value )
665+
666+ with self .get_session () as session :
667+ try :
668+ session .add (meta )
669+ session .commit ()
670+ return self ._get_detached_resource (session , meta )
671+ except Exception as e :
672+ session .delete (meta )
673+ session .commit ()
674+ raise Exception ("Failed to add metadata" ) from e
675+ else :
676+ raise Exception (f"'key'={ key } already exists in metadata." )
677+
678+ def remove_metadata (self , key : str ) -> None :
679+ """Remove a metadata key."""
680+ with self .get_session () as session :
681+ meta = session .query (Metadata ).filter (Metadata .key == key ).first ()
682+
683+ if meta is not None :
684+ try :
685+ session .delete (meta )
686+ session .commit ()
687+
688+ except Exception as e :
689+ session .rollback ()
690+ raise Exception (f"Failed to remove key '{ key } '" ) from e
0 commit comments