Skip to content

Commit c1fa09d

Browse files
authored
Minor enhancements and support for web links (#29)
* Remove unneeded custom exceptions * get a resource either by rname or rid * Add metadata CRUD methods * Update tests
1 parent 59e19b6 commit c1fa09d

6 files changed

Lines changed: 204 additions & 81 deletions

File tree

CHANGELOG.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
# Changelog
22

3-
## Version 0.6.1
3+
## Version 0.6.1 - 0.6.2
44

55
- Generate rid's that match with R's cache.
6-
- remove rname pattern checks.
6+
- Remove rname pattern checks.
7+
- Add functions to access metadata table.
8+
- Add function to add web urls and download them if needed.
79
- Rename GitHub actions for consistency with the rest of the packages.
810

911
## Version 0.6.0

src/pybiocfilecache/cache.py

Lines changed: 112 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -11,20 +11,14 @@
1111

1212
from .config import CacheConfig
1313
from .const import SCHEMA_VERSION
14-
from .exceptions import (
15-
BiocCacheError,
16-
InvalidRnameError,
17-
NoFpathError,
18-
RnameExistsError,
19-
RpathTimeoutError,
20-
)
21-
from .models import Base, Resource
14+
from .models import Base, Metadata, Resource
2215
from .utils import (
2316
calculate_file_hash,
2417
copy_or_move,
2518
create_tmp_dir,
19+
download_web_file,
2620
generate_id,
27-
validate_rname,
21+
generate_uuid,
2822
)
2923

3024
__author__ = "Jayaram Kancherla"
@@ -65,7 +59,6 @@ def __init__(self, cache_dir: Optional[Union[str, Path]] = None, config: Optiona
6559
db_schema_version = self._setup_database()
6660

6761
if db_schema_version != SCHEMA_VERSION:
68-
print(db_schema_version)
6962
raise RuntimeError(f"Database version is not {SCHEMA_VERSION}.")
7063

7164
self._last_cleanup = datetime.now()
@@ -111,13 +104,15 @@ def _setup_database(self) -> None:
111104

112105
return SCHEMA_VERSION
113106

114-
def _get_detached_resource(self, session: Session, resource: Resource) -> Optional[Resource]:
107+
def _get_detached_resource(
108+
self, session: Session, obj: Union[Resource, Metadata]
109+
) -> Optional[Union[Resource, Metadata]]:
115110
"""Get a detached copy of a resource."""
116-
if resource is None:
111+
if obj is None:
117112
return None
118-
session.refresh(resource)
119-
session.expunge(resource)
120-
return resource
113+
session.refresh(obj)
114+
session.expunge(obj)
115+
return obj
121116

122117
def __enter__(self) -> "BiocFileCache":
123118
return self
@@ -142,10 +137,10 @@ def get_session(self) -> Iterator[Session]:
142137
finally:
143138
session.close()
144139

145-
def _validate_rname(self, rname: str) -> None:
146-
"""Validate resource name format."""
147-
if not validate_rname(rname, self.config.rname_pattern):
148-
raise InvalidRnameError(f"Resource name '{rname}' doesn't match pattern " f"'{self.config.rname_pattern}'")
140+
# def _validate_rname(self, rname: str) -> None:
141+
# """Validate resource name format."""
142+
# if not validate_rname(rname, self.config.rname_pattern):
143+
# raise Exception(f"Resource name '{rname}' doesn't match pattern " f"'{self.config.rname_pattern}'")
149144

150145
def _should_cleanup(self) -> bool:
151146
"""Check if cache cleanup should be performed.
@@ -196,24 +191,33 @@ def cleanup(self) -> int:
196191
self._last_cleanup = datetime.now()
197192
return removed
198193

199-
def get(self, rname: str) -> Optional[Resource]:
194+
def get(self, rname: str = None, rid: str = None) -> Optional[Resource]:
200195
"""Get resource by name from cache.
201196
202197
Args:
203198
rname:
204199
Name to identify the resource in cache.
205200
201+
rid:
202+
Resource id to search by.
203+
206204
"""
205+
if rname is None and rid is None:
206+
raise ValueError("either 'rname' or 'rid' must be provided.")
207+
207208
with self.get_session() as session:
208-
resource = session.query(Resource).filter(Resource.rname == rname).first()
209+
if rname is not None:
210+
resource = session.query(Resource).filter(Resource.rname == rname).first()
211+
elif rid is not None:
212+
resource = session.query(Resource).filter(Resource.rid == rid).first()
209213

210214
if resource is not None:
211215
# Check if path exists with timeout
212216
start = time()
213217
timeout = 30
214218
while not Path(str(resource.rpath)).exists():
215219
if time() - start >= timeout:
216-
raise RpathTimeoutError(
220+
raise TimeoutError(
217221
f"For resource: '{rname}' the rpath does not exist " f"after {timeout} seconds."
218222
)
219223
sleep(0.1)
@@ -229,10 +233,11 @@ def add(
229233
self,
230234
rname: str,
231235
fpath: Union[str, Path],
232-
rtype: Literal["local", "web", "relative"] = "local",
236+
rtype: Literal["local", "web", "relative"] = "relative",
233237
action: Literal["copy", "move", "asis"] = "copy",
234238
expires: Optional[datetime] = None,
235-
ext: bool = False,
239+
download: bool = True,
240+
ext: bool = True,
236241
) -> Resource:
237242
"""Add a resource to the cache.
238243
@@ -252,29 +257,41 @@ def add(
252257
How to handle the file ("copy", "move", or "asis").
253258
Defaults to ``copy``.
254259
260+
download:
261+
Whether to download the resource.
262+
Only used if 'rtype' is "web".
263+
255264
expires:
256265
Optional expiration datetime.
257266
If None, resource never expires.
258267
259268
ext:
260269
Whether to use filepath extension when storing in cache.
261-
Defaults to `False`.
270+
Defaults to `True`.
262271
263272
Returns:
264273
The `Resource` object added to the cache.
265274
"""
266275
# self._validate_rname(rname)
267-
fpath = Path(fpath)
268-
269-
if not fpath.exists():
270-
raise NoFpathError(f"Resource at '{fpath}' does not exist")
276+
fpath = Path(fpath) if rtype != "web" else fpath
271277

272278
if self.get(rname) is not None:
273-
raise RnameExistsError(f"Resource '{rname}' already exists")
279+
raise FileExistsError(f"Resource '{rname}' already exists")
280+
281+
if rtype == "web":
282+
outpath = download_web_file(fpath, Path(fpath).name, download)
283+
action = "copy"
284+
else:
285+
outpath = Path(fpath)
286+
287+
if action == "asis":
288+
logger.warning("If action='asis', rtype must be 'local'.")
289+
rtype = "local"
274290

275291
# Generate paths and check size
276292
rid = generate_id(size=len(self))
277-
rpath = self.config.cache_dir / f"{rid}{fpath.suffix if ext else ''}" if action != "asis" else fpath
293+
uuid = generate_uuid()
294+
rpath = self.config.cache_dir / f"{uuid}_{outpath.name if ext else outpath.stem}" if action != "asis" else fpath
278295

279296
# Create resource record
280297
resource = Resource(
@@ -292,7 +309,7 @@ def add(
292309
session.commit()
293310

294311
try:
295-
copy_or_move(fpath, rpath, rname, action, False)
312+
copy_or_move(outpath, rpath, rname, action, False)
296313

297314
# Calculate and store checksum
298315
resource.etag = calculate_file_hash(rpath, self.config.hash_algorithm)
@@ -303,7 +320,7 @@ def add(
303320
except Exception as e:
304321
session.delete(resource)
305322
session.commit()
306-
raise BiocCacheError("Failed to add resource") from e
323+
raise Exception("Failed to add resource") from e
307324

308325
def add_batch(self, resources: List[Dict[str, Any]]) -> List[Resource]:
309326
"""Add multiple resources in a single transaction.
@@ -349,7 +366,7 @@ def update(
349366
"""
350367
fpath = Path(fpath)
351368
if not fpath.exists():
352-
raise NoFpathError(f"File '{fpath}' does not exist")
369+
raise FileNotFoundError(f"File '{fpath}' does not exist")
353370

354371
with self.get_session() as session:
355372
resource = session.query(Resource).filter(Resource.rname == rname).first()
@@ -369,7 +386,7 @@ def update(
369386

370387
except Exception as e:
371388
session.rollback()
372-
raise BiocCacheError("Failed to update resource") from e
389+
raise Exception("Failed to update resource") from e
373390

374391
def remove(self, rname: str) -> None:
375392
"""Remove a resource from cache by name.
@@ -381,7 +398,7 @@ def remove(self, rname: str) -> None:
381398
Name to identify the resource in cache.
382399
383400
Raises:
384-
BiocCacheError: If resource removal fails
401+
Exception: If resource removal fails
385402
"""
386403
with self.get_session() as session:
387404
resource = session.query(Resource).filter(Resource.rname == rname).first()
@@ -399,7 +416,7 @@ def remove(self, rname: str) -> None:
399416

400417
except Exception as e:
401418
session.rollback()
402-
raise BiocCacheError(f"Failed to remove resource '{rname}'") from e
419+
raise Exception(f"Failed to remove resource '{rname}'") from e
403420

404421
def list_resources(self, rtype: Optional[str] = None, expired: Optional[bool] = None) -> List[Resource]:
405422
"""List resources in the cache with optional filtering.
@@ -564,7 +581,7 @@ def purge(self, force: bool = False) -> bool:
564581
True if purge was successful, False otherwise.
565582
566583
Raises:
567-
BiocCacheError: If purge fails and force=False.
584+
Exception: If purge fails and force=False.
568585
"""
569586
try:
570587
with self.get_session() as session:
@@ -577,7 +594,7 @@ def purge(self, force: bool = False) -> bool:
577594
except Exception as e:
578595
if not force:
579596
session.rollback()
580-
raise BiocCacheError(f"Failed to remove file for resource '{resource.rname}'") from e
597+
raise Exception(f"Failed to remove file for resource '{resource.rname}'") from e
581598
logger.warning(f"Failed to remove file for resource '{resource.rname}': {e}")
582599

583600
session.commit()
@@ -598,7 +615,7 @@ def purge(self, force: bool = False) -> bool:
598615

599616
except Exception as e:
600617
if not force:
601-
raise BiocCacheError("Failed to purge cache") from e
618+
raise Exception("Failed to purge cache") from e
602619

603620
logger.error("Database cleanup failed, forcing file removal", exc_info=e)
604621
for file in self.config.cache_dir.iterdir():
@@ -616,3 +633,58 @@ def purge(self, force: bool = False) -> bool:
616633
def __len__(self):
617634
with self.get_session() as session:
618635
return session.query(Resource).count()
636+
637+
def check_metadata_key(self, key: str) -> bool:
638+
"""Check if a key exists in the metadata table.
639+
640+
Args:
641+
key:
642+
Key to search.
643+
644+
Returns:
645+
True if the key exists, else False.
646+
"""
647+
with self.get_session() as session:
648+
return session.query(Metadata).filter(Metadata.key == key).count() != 0
649+
650+
def get_metadata(self, key: str):
651+
"""Add a new metadata key"""
652+
with self.get_session() as session:
653+
meta = session.query(Metadata).filter(Metadata.key == key).first()
654+
if meta is not None:
655+
return self._get_detached_resource(session, meta)
656+
657+
return None
658+
659+
def add_metadata(self, key: str, value: str):
660+
"""Add a new metadata key"""
661+
exists = self.get_metadata(key=key)
662+
663+
if exists is None:
664+
meta = Metadata(key=key, value=value)
665+
666+
with self.get_session() as session:
667+
try:
668+
session.add(meta)
669+
session.commit()
670+
return self._get_detached_resource(session, meta)
671+
except Exception as e:
672+
session.delete(meta)
673+
session.commit()
674+
raise Exception("Failed to add metadata") from e
675+
else:
676+
raise Exception(f"'key'={key} already exists in metadata.")
677+
678+
def remove_metadata(self, key: str) -> None:
679+
"""Remove a metadata key."""
680+
with self.get_session() as session:
681+
meta = session.query(Metadata).filter(Metadata.key == key).first()
682+
683+
if meta is not None:
684+
try:
685+
session.delete(meta)
686+
session.commit()
687+
688+
except Exception as e:
689+
session.rollback()
690+
raise Exception(f"Failed to remove key '{key}'") from e

src/pybiocfilecache/exceptions.py

Lines changed: 0 additions & 31 deletions
This file was deleted.

src/pybiocfilecache/models.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,4 +73,4 @@ class Resource(Base):
7373
expires = Column(DateTime, default=None)
7474

7575
def __repr__(self) -> str:
76-
return f"<Resource(rid='{self.rid}', rname='{self.rname}')>"
76+
return f"<Resource(rid='{self.rid}', rname='{self.rname}', rpath='{self.rpath}')>"

src/pybiocfilecache/utils.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,13 @@
22
import logging
33
import re
44
import tempfile
5+
import urllib.request
56
import uuid
67
import zlib
78
from pathlib import Path
89
from shutil import copy2, move
910
from typing import Literal
1011

11-
from .exceptions import BiocCacheError
12-
1312
__author__ = "Jayaram Kancherla"
1413
__copyright__ = "Jayaram Kancherla"
1514
__license__ = "MIT"
@@ -86,4 +85,15 @@ def copy_or_move(
8685
elif action == "asis":
8786
pass
8887
except Exception as e:
89-
raise BiocCacheError(f"Failed to store resource '{rname}' from '{source}' to '{target}'") from e
88+
raise Exception(f"Failed to store resource '{rname}' from '{source}' to '{target}'") from e
89+
90+
91+
def download_web_file(url: str, filename: str, download: bool):
92+
tmp_dir = create_tmp_dir()
93+
outpath = tmp_dir / filename
94+
if download:
95+
urllib.request.urlretrieve(str(url), str(outpath))
96+
else:
97+
open(str(outpath), "a").close()
98+
99+
return outpath

0 commit comments

Comments
 (0)