From e958f48da4e9e4ac5aafa59afabfa85f2e34c358 Mon Sep 17 00:00:00 2001 From: Robert Tonsing Date: Tue, 9 Jun 2026 10:45:46 -0500 Subject: [PATCH 1/5] also process .json files --- puller.py | 42 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 34 insertions(+), 8 deletions(-) diff --git a/puller.py b/puller.py index 1c05cf7..a873f89 100644 --- a/puller.py +++ b/puller.py @@ -42,11 +42,19 @@ # These are where .zip.trig files go on ibiblio : DOPULL_LOG_DIR = os.path.join(PRIVATE, 'logs', 'dopull') DOPUSH_LOG_DIR = os.path.join(PRIVATE, 'logs', 'dopush') +JSON_LOG_DIR = os.path.join(PRIVATE, 'logs', 'json') def scan_dopull_log(): - """ - Scan the dopull log directory for new files. + """ + Scan DOPULL_LOG_DIR for new files. + Note: this does 3 things: + 1. For all trigger files, it pulls the latest files from the upstream repo into the FILES directory. + 2. Copies .json files to JSON_LOG_DIR for database processing, and renames them as .info.txt trigger files. + 3. Copies .zip.trig (all) files to DOPUSH_LOG_DIR for database updates. + Both directories are processed by FileInfo.py. In the future, it should be updated to do the appropriate + processing for each file type, but for now this is a simple way to get the files where they need to go + without needing to change FileInfo.py. """ for filename in sorted(os.listdir(DOPULL_LOG_DIR)): mode = os.stat(os.path.join(DOPULL_LOG_DIR, filename))[stat.ST_MODE] @@ -55,19 +63,37 @@ def scan_dopull_log(): continue ebook_num = 0 - m = re.match(r'^(\d+)\.zip\.trig$', filename) + m = re.match(r'^(\d+)\.(zip\.trig|json)$', filename) if m: ebook_num = int(m.group(1)) logging.info(ebook_num) origin = f'{UPSTREAM_REPO_DIR}{ebook_num}.git/' target_path = os.path.join(FILES, str(ebook_num)) logging.info(f'origin: {origin}, target_path: {target_path}') - - if update_folder(origin, target_path): + + # Get the latest files from the upstream repo + if not update_folder(origin, target_path): + logging.error(f'failed to get files for {ebook_num}') + continue + + try: + if filename.endswith('.json'): + # For .json files, copy them to the JSON_LOG_DIR to add to the database + shutil.copy(os.path.join(DOPULL_LOG_DIR, filename), + os.path.join(JSON_LOG_DIR, filename)) + logging.info(f'copied {filename} to JSON log directory for processing.') + # Rename it as a trigger file + newfilename = os.path.splitext(filename)[0] + '.info.txt' + os.rename(os.path.join(DOPULL_LOG_DIR, filename), + os.path.join(DOPULL_LOG_DIR, newfilename)) + filename = newfilename + + # Move all files to the DOPUSH_LOG_DIR to trigger updating shutil.move(os.path.join(DOPULL_LOG_DIR, filename), - os.path.join(DOPUSH_LOG_DIR, filename)) - else: - logging.error(f'failed to update {ebook_num}') + os.path.join(DOPUSH_LOG_DIR, filename)) + except Exception as e: + logging.error(f'failed to trigger update for {ebook_num}: {e}') + return def main(): From 2ded76c6adaf882166a0aac34de8df59b8639fe0 Mon Sep 17 00:00:00 2001 From: Robert Tonsing Date: Tue, 9 Jun 2026 10:51:52 -0500 Subject: [PATCH 2/5] remove .json handling from dopull --- dopull/dopull.py | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/dopull/dopull.py b/dopull/dopull.py index 50d13e6..66293d1 100644 --- a/dopull/dopull.py +++ b/dopull/dopull.py @@ -19,7 +19,7 @@ except ImportError: pwd = None -VERSION = "2026.05.29" +VERSION = "2026.06.09" SCRIPT_DIR = Path(__file__).resolve().parent # Parent directory of where to look for files to push out. @@ -33,11 +33,9 @@ LOGFILE = Path(os.getenv("LOGFILE", str(SCRIPT_DIR / "logs/dopull.log"))) # Lock file to prevent multiple dopulls running at the same time. PULLRUNNING = Path(os.getenv("PULLRUNNING", str(SCRIPT_DIR / ".dopull-running"))) -# Trigger directory for JSON processing on ibiblio (kept for compatibility with shell config). IBIBLIO = "gutenberg.login.ibiblio.org" PRIVATE = os.getenv('PRIVATE') or '' IBIBLIO_DOPULL_DIR = os.path.join(PRIVATE, 'logs', 'dopull') -IBIBLIO_JSON_DIR = os.path.join(PRIVATE, 'logs', 'json') # Email address to send trouble reports to. BOSS = os.getenv("BOSS", "pterodactyl@fastmail.com") LOGGER = logging.getLogger("dopull") @@ -125,7 +123,6 @@ def main() -> int: • For each trigger file found in "push" directory, ◦ Get owner of file (user) ◦ Trigger ebook update by copying it to the ibiblio dopull dir. - ◦ If file is .json, trigger ebook indexing by copying it to the ibiblio JSON dir. ◦ Move file to DONE archive ◦ Send success/fail email to user """ @@ -179,16 +176,6 @@ def process_trigger_file(trigger_file: Path) -> str: append_out(f"Failed to trigger ibiblio update for {filename}: {e}") return "failure" - # Handle .json files for ebook indexing. - if trigger_file.suffix.lower() == ".json": - try: - dest = f"{IBIBLIO}:{IBIBLIO_JSON_DIR}/{filename}" - subprocess.run(["scp", str(trigger_file), dest], check=True) - append_out(f"Copied {filename} to ibiblio to trigger ebook indexing.") - except Exception as e: - append_out(f"Failed to trigger ebook indexing for {filename}: {e}") - return "failure" - # If we got to here, all is OK, move trigger file to the DONE directory, # otherwise, it will be retried on the next run. try: From 3df314fa6e01be3e1ad0d90a85a44b9d5e5db71b Mon Sep 17 00:00:00 2001 From: Robert Tonsing Date: Wed, 10 Jun 2026 12:38:53 -0500 Subject: [PATCH 3/5] handle multiple trigger files, & file name correction --- puller.py | 39 +++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/puller.py b/puller.py index a873f89..87201ab 100644 --- a/puller.py +++ b/puller.py @@ -50,11 +50,16 @@ def scan_dopull_log(): Scan DOPULL_LOG_DIR for new files. Note: this does 3 things: 1. For all trigger files, it pulls the latest files from the upstream repo into the FILES directory. - 2. Copies .json files to JSON_LOG_DIR for database processing, and renames them as .info.txt trigger files. - 3. Copies .zip.trig (all) files to DOPUSH_LOG_DIR for database updates. + 2. Moves .json files to JSON_LOG_DIR for database processing, and creates an .info.txt trigger file. + 3. Moves .zip.trig files to DOPUSH_LOG_DIR for database updates. Both directories are processed by FileInfo.py. In the future, it should be updated to do the appropriate processing for each file type, but for now this is a simple way to get the files where they need to go without needing to change FileInfo.py. + + If both .zip.trig and .json files are present for the same ebook number: + (Workflow creates a .json, file, then Errata Workbench creates a .zip.trig file) + should be OK, the repo has all the changes, and we need the trigger file in any case. + Repo pull will occur twice, but the second will have no changes, snd this should be too rare to worry about. """ for filename in sorted(os.listdir(DOPULL_LOG_DIR)): mode = os.stat(os.path.join(DOPULL_LOG_DIR, filename))[stat.ST_MODE] @@ -65,10 +70,13 @@ def scan_dopull_log(): ebook_num = 0 m = re.match(r'^(\d+)\.(zip\.trig|json)$', filename) if m: - ebook_num = int(m.group(1)) + ebook_num = m.group(1) + if not ebook_num.isdigit(): + logging.error(f'Skipping invalid filename (non-numeric book number): {filename}') + continue logging.info(ebook_num) origin = f'{UPSTREAM_REPO_DIR}{ebook_num}.git/' - target_path = os.path.join(FILES, str(ebook_num)) + target_path = os.path.join(FILES, ebook_num) logging.info(f'origin: {origin}, target_path: {target_path}') # Get the latest files from the upstream repo @@ -76,19 +84,21 @@ def scan_dopull_log(): logging.error(f'failed to get files for {ebook_num}') continue + # Now trigger database/catalog update try: if filename.endswith('.json'): - # For .json files, copy them to the JSON_LOG_DIR to add to the database - shutil.copy(os.path.join(DOPULL_LOG_DIR, filename), + # For .json files, move them to the JSON_LOG_DIR to add to the database + shutil.move(os.path.join(DOPULL_LOG_DIR, filename), os.path.join(JSON_LOG_DIR, filename)) - logging.info(f'copied {filename} to JSON log directory for processing.') - # Rename it as a trigger file - newfilename = os.path.splitext(filename)[0] + '.info.txt' - os.rename(os.path.join(DOPULL_LOG_DIR, filename), - os.path.join(DOPULL_LOG_DIR, newfilename)) - filename = newfilename - - # Move all files to the DOPUSH_LOG_DIR to trigger updating + logging.info(f'moved {filename} to JSON log directory for processing.') + + # Create a corresponding .zip.trig trigger file + trigger_file = os.path.join(DOPULL_LOG_DIR, ebook_num + '.zip.trig') + if not os.path.exists(trigger_file): + with open(trigger_file, 'w') as file: + pass + + # Move file to the DOPUSH_LOG_DIR to trigger updating shutil.move(os.path.join(DOPULL_LOG_DIR, filename), os.path.join(DOPUSH_LOG_DIR, filename)) except Exception as e: @@ -96,6 +106,7 @@ def scan_dopull_log(): return + def main(): sys.exit(scan_dopull_log()) From c8cfeb84c1e5ac28b63a5b922cf8819e5bb15af9 Mon Sep 17 00:00:00 2001 From: Robert Tonsing Date: Thu, 11 Jun 2026 11:38:34 -0500 Subject: [PATCH 4/5] typo --- puller.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/puller.py b/puller.py index 87201ab..ef52665 100644 --- a/puller.py +++ b/puller.py @@ -59,7 +59,7 @@ def scan_dopull_log(): If both .zip.trig and .json files are present for the same ebook number: (Workflow creates a .json, file, then Errata Workbench creates a .zip.trig file) should be OK, the repo has all the changes, and we need the trigger file in any case. - Repo pull will occur twice, but the second will have no changes, snd this should be too rare to worry about. + Repo pull will occur twice, but the second will have no changes, and this should be too rare to worry about. """ for filename in sorted(os.listdir(DOPULL_LOG_DIR)): mode = os.stat(os.path.join(DOPULL_LOG_DIR, filename))[stat.ST_MODE] From 1ea6035a78619918c30351bb90f58b88daf4256c Mon Sep 17 00:00:00 2001 From: Robert Tonsing Date: Thu, 11 Jun 2026 20:38:07 -0500 Subject: [PATCH 5/5] add check for existing trigger file --- puller.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/puller.py b/puller.py index ef52665..91e4cee 100644 --- a/puller.py +++ b/puller.py @@ -93,14 +93,15 @@ def scan_dopull_log(): logging.info(f'moved {filename} to JSON log directory for processing.') # Create a corresponding .zip.trig trigger file - trigger_file = os.path.join(DOPULL_LOG_DIR, ebook_num + '.zip.trig') + trigger_file = os.path.join(DOPUSH_LOG_DIR, ebook_num + '.zip.trig') if not os.path.exists(trigger_file): with open(trigger_file, 'w') as file: pass - - # Move file to the DOPUSH_LOG_DIR to trigger updating - shutil.move(os.path.join(DOPULL_LOG_DIR, filename), - os.path.join(DOPUSH_LOG_DIR, filename)) + else: + # Move file to the DOPUSH_LOG_DIR to trigger updating + trigger_push = os.path.join(DOPUSH_LOG_DIR, filename) + if not os.path.exists(trigger_push): + shutil.move(os.path.join(DOPULL_LOG_DIR, filename), trigger_push) except Exception as e: logging.error(f'failed to trigger update for {ebook_num}: {e}')