diff --git a/xExtension-LlmClassification/README.md b/xExtension-LlmClassification/README.md
index 0b7fd21b..4e774798 100644
--- a/xExtension-LlmClassification/README.md
+++ b/xExtension-LlmClassification/README.md
@@ -11,6 +11,7 @@ It classifies articles on insertion using a customizable prompt, and applies tag
- Entry filtering via FreshRSS Boolean search syntax
- Response caching to avoid redundant API calls
- Content truncation to control token usage
+- Skips re-classifying articles that the feed republishes without any prompt-relevant change, avoiding duplicate LLM API calls
## Requirements
@@ -63,13 +64,21 @@ The **user prompt** is an editable template. The following placeholders are repl
**Search filters** (one per line): Only entries matching at least one filter are classified. Uses [FreshRSS Boolean search syntax](https://freshrss.github.io/FreshRSS/en/users/10_filter.html). Leave empty to classify all entries.
+**Re-classify when content changes** (default: on): When a feed republishes an existing article, FreshRSS treats it as updated even if nothing semantically changed (e.g. a tweaked date, a reformatted author, a new enclosure attribute). To avoid wasted API calls, the extension stores a hash of the prompt it sent for each classified entry. On a feed update:
+
+- If the prompt would be identical, the previous tags are restored and the LLM is **not** called.
+- If the prompt has actually changed and this option is **on**, the LLM is called again to refresh the tags.
+- If the prompt has changed and this option is **off**, the previous tags are kept and the LLM is **not** called.
+
## How it works
1. A new article arrives in FreshRSS
2. The extension checks if tag classification is enabled and the article matches the configured search filters
3. The user prompt is built by replacing placeholders with article data
-4. The LLM API is called with the system prompt and the user prompt
-5. The returned tags are validated (prefix prepended, whitelist enforced) and applied to the article
+4. For updates of previously-classified articles, the new prompt is compared against the stored hash; if unchanged, the previous tags are restored and no API call is made
+5. Otherwise, the LLM API is called with the system prompt and the user prompt
+6. The returned tags are validated (prefix prepended, whitelist enforced) and applied to the article
+7. The prompt hash and the list of LLM-assigned tags are stored on the entry so future updates can be deduplicated
## Changelog
diff --git a/xExtension-LlmClassification/configure.phtml b/xExtension-LlmClassification/configure.phtml
index 17f26194..34c56834 100644
--- a/xExtension-LlmClassification/configure.phtml
+++ b/xExtension-LlmClassification/configure.phtml
@@ -132,6 +132,15 @@
+
= _t('gen.action.submit') ?>
diff --git a/xExtension-LlmClassification/extension.php b/xExtension-LlmClassification/extension.php
index 66662af2..566190e8 100644
--- a/xExtension-LlmClassification/extension.php
+++ b/xExtension-LlmClassification/extension.php
@@ -10,6 +10,9 @@ final class LlmClassificationExtension extends Minz_Extension {
private const DEFAULT_MAX_RETRIES = 2;
private const RETRYABLE_HTTP_STATUSES = [429, 500, 502, 503, 504];
private const PROMPT_FILENAME = 'prompt.md';
+ private const ATTRIBUTE_NAMESPACE = 'llm_classification';
+ private const ATTRIBUTE_KEY_PROMPT_HASH = 'prompt_hash';
+ private const ATTRIBUTE_KEY_TAGS = 'tags';
public string $user_prompt = '';
@@ -57,6 +60,9 @@ public function init(): void {
if ($this->getUserConfigurationString('search_filter') === null) {
$this->setUserConfigurationValue('search_filter', '');
}
+ if ($this->getUserConfigurationBool('reclassify_on_change') === null) {
+ $this->setUserConfigurationValue('reclassify_on_change', true);
+ }
}
#[\Override]
@@ -89,6 +95,9 @@ public function handleConfigureAction(): void {
$this->setUserConfigurationValue('search_filter',
trim(Minz_Request::paramString('search_filter', plaintext: true)));
+
+ $this->setUserConfigurationValue('reclassify_on_change',
+ Minz_Request::paramBoolean('reclassify_on_change'));
}
$this->user_prompt = '';
@@ -337,48 +346,134 @@ private function callLlm(string $systemPrompt, string $userPrompt): ?array {
/**
* Apply classification results to an entry.
+ *
* @param array $classification
+ * @param list|null $previousLlmTags Exact list of tags previously assigned by this extension.
+ * When provided, these tags are removed from the entry
+ * before re-adding new ones (used on the update path).
+ * When null, falls back to prefix-based removal for
+ * backwards compatibility on the insert path.
+ * @return array{entry: FreshRSS_Entry, llm_tags: list} The entry with merged tags and the
+ * new list of LLM-assigned tags.
*/
- private function applyClassification(FreshRSS_Entry $entry, array $classification, bool $removeOldTags): FreshRSS_Entry {
- if (is_array($classification['tags'] ?? null)) {
- $prefix = $this->getUserConfigurationString('tag_prefix') ?? '';
- $allowedTagsStr = $this->getUserConfigurationString('allowed_tags') ?? '';
- $allowedTags = $allowedTagsStr !== ''
- ? array_filter(array_map('trim', explode("\n", $allowedTagsStr)), static fn(string $tag) => $tag !== '')
- : [];
+ private function applyClassification(FreshRSS_Entry $entry, array $classification, ?array $previousLlmTags = null): array {
+ $llmTags = [];
+ if (!is_array($classification['tags'] ?? null)) {
+ return ['entry' => $entry, 'llm_tags' => $llmTags];
+ }
- $existingTags = $entry->tags();
+ $prefix = $this->getUserConfigurationString('tag_prefix') ?? '';
+ $allowedTagsStr = $this->getUserConfigurationString('allowed_tags') ?? '';
+ $allowedTags = $allowedTagsStr !== ''
+ ? array_filter(array_map('trim', explode("\n", $allowedTagsStr)), static fn(string $tag) => $tag !== '')
+ : [];
- if ($removeOldTags && $prefix !== '') {
- $existingTags = array_values(array_filter(
- $existingTags,
- static fn(string $tag) => !str_starts_with($tag, $prefix)
- ));
- }
+ $existingTags = $entry->tags();
- $newTags = [];
- foreach ($classification['tags'] as $tag) {
- if (!is_string($tag)) {
- continue;
- }
- $tag = trim($tag);
- if ($tag === '') {
- continue;
- }
- if (!empty($allowedTags) && !in_array($tag, $allowedTags, true)) {
- continue;
- }
- $newTags[] = htmlspecialchars($prefix . $tag, ENT_COMPAT, 'UTF-8');
+ if ($previousLlmTags !== null && $previousLlmTags !== []) {
+ $existingTags = array_values(array_filter(
+ $existingTags,
+ static fn(string $tag) => !in_array($tag, $previousLlmTags, true)
+ ));
+ } elseif ($previousLlmTags === null && $prefix !== '') {
+ $existingTags = array_values(array_filter(
+ $existingTags,
+ static fn(string $tag) => !str_starts_with($tag, $prefix)
+ ));
+ }
+
+ foreach ($classification['tags'] as $tag) {
+ if (!is_string($tag)) {
+ continue;
+ }
+ $tag = trim($tag);
+ if ($tag === '') {
+ continue;
+ }
+ if (!empty($allowedTags) && !in_array($tag, $allowedTags, true)) {
+ continue;
}
+ $llmTags[] = htmlspecialchars($prefix . $tag, ENT_COMPAT, 'UTF-8');
+ }
+ $llmTags = array_values(array_unique($llmTags));
+
+ $entry->_tags(array_values(array_unique(array_merge($existingTags, $llmTags))));
+
+ return ['entry' => $entry, 'llm_tags' => $llmTags];
+ }
+
+ /**
+ * Look up the previously stored classification for an entry being updated.
+ *
+ * Returns the prompt hash and the exact list of tags this extension assigned the last time
+ * the entry was classified. Returns null if the entry was never classified by this extension
+ * (e.g., it predates the extension being enabled, or the namespaced attribute is missing).
+ *
+ * @return array{hash: string, llm_tags: list}|null
+ * @throws Minz_ConfigurationNamespaceException
+ * @throws Minz_PDOConnectionException
+ */
+ private function loadPreviousClassification(FreshRSS_Entry $entry): ?array {
+ $feedId = $entry->feedId();
+ $guid = $entry->guid();
+ if ($feedId <= 0 || $guid === '') {
+ return null;
+ }
+
+ $entryDAO = FreshRSS_Factory::createEntryDao();
+ $previous = $entryDAO->searchByGuid($feedId, $guid);
+ if ($previous === null) {
+ return null;
+ }
- $entry->_tags(array_values(array_unique(array_merge($existingTags, $newTags))));
+ $namespaced = $previous->attributeArray(self::ATTRIBUTE_NAMESPACE);
+ if ($namespaced === null) {
+ return null;
+ }
+
+ $previousHash = $namespaced[self::ATTRIBUTE_KEY_PROMPT_HASH] ?? null;
+ if (!is_string($previousHash) || $previousHash === '') {
+ return null;
}
+ $storedTags = $namespaced[self::ATTRIBUTE_KEY_TAGS] ?? null;
+ $llmTags = is_array($storedTags)
+ ? array_values(array_filter(
+ array_map(static fn($t) => is_string($t) ? $t : '', $storedTags),
+ static fn(string $t) => $t !== ''
+ ))
+ : [];
+
+ return ['hash' => $previousHash, 'llm_tags' => $llmTags];
+ }
+
+ /**
+ * Reuse a previous classification on an updated entry: merge the prior LLM tags back into the
+ * (otherwise fresh) entry and re-attach the namespaced classification attribute so it survives
+ * the upcoming `updateEntry` write.
+ *
+ * @param list $previousLlmTags
+ */
+ private function reusePreviousClassification(FreshRSS_Entry $entry, array $previousLlmTags, string $previousHash): FreshRSS_Entry {
+ if ($previousLlmTags !== []) {
+ $existingTags = $entry->tags();
+ $entry->_tags(array_values(array_unique(array_merge($existingTags, $previousLlmTags))));
+ }
+ $entry->_attribute(self::ATTRIBUTE_NAMESPACE, [
+ self::ATTRIBUTE_KEY_PROMPT_HASH => $previousHash,
+ self::ATTRIBUTE_KEY_TAGS => $previousLlmTags,
+ ]);
return $entry;
}
/**
- * Hook for EntryBeforeInsert: classify a new entry.
+ * Hook for EntryBeforeInsert: classify a new entry, or reuse the prior classification when an
+ * existing entry is detected as updated by FreshRSS but its classification-relevant inputs have
+ * not changed (or the user opted out of re-classifying updates). This avoids hitting the LLM
+ * every time a feed publishes a no-op refresh of an article.
+ *
+ * @throws Minz_ConfigurationNamespaceException
+ * @throws Minz_PDOConnectionException
* @throws Minz_PermissionDeniedException
*/
public function classifyEntry(FreshRSS_Entry $entry): FreshRSS_Entry {
@@ -398,11 +493,32 @@ public function classifyEntry(FreshRSS_Entry $entry): FreshRSS_Entry {
return $entry;
}
+ $promptHash = sha1($systemPrompt . "\n" . $userPrompt);
+
+ $previous = null;
+ if ($entry->isUpdated() === true) {
+ $previous = $this->loadPreviousClassification($entry);
+ if ($previous !== null) {
+ $reclassifyOnChange = $this->getUserConfigurationBool('reclassify_on_change') ?? true;
+ $promptUnchanged = hash_equals($previous['hash'], $promptHash);
+ if ($promptUnchanged || !$reclassifyOnChange) {
+ return $this->reusePreviousClassification($entry, $previous['llm_tags'], $previous['hash']);
+ }
+ }
+ }
+
$classification = $this->callLlm($systemPrompt, $userPrompt);
if ($classification === null) {
return $entry;
}
- return $this->applyClassification($entry, $classification, removeOldTags: true);
+ $previousLlmTags = $previous !== null ? $previous['llm_tags'] : null;
+ $result = $this->applyClassification($entry, $classification, $previousLlmTags);
+ $entry = $result['entry'];
+ $entry->_attribute(self::ATTRIBUTE_NAMESPACE, [
+ self::ATTRIBUTE_KEY_PROMPT_HASH => $promptHash,
+ self::ATTRIBUTE_KEY_TAGS => $result['llm_tags'],
+ ]);
+ return $entry;
}
}
diff --git a/xExtension-LlmClassification/i18n/en/ext.php b/xExtension-LlmClassification/i18n/en/ext.php
index c78a467a..4c690362 100644
--- a/xExtension-LlmClassification/i18n/en/ext.php
+++ b/xExtension-LlmClassification/i18n/en/ext.php
@@ -36,6 +36,8 @@
'title' => 'Conditions for tagging',
'search' => 'Search filters',
'search_help' => 'Only classify entries matching at least one of these filters. Leave empty to classify all entries.',
+ 'reclassify_on_change' => 'Re-classify when content changes',
+ 'reclassify_on_change_help' => 'When a feed re-publishes an existing article, call the LLM again only if the prompt (title, content, etc.) has actually changed. When disabled, previously-classified articles are never re-classified on update. Articles updated without prompt changes always reuse their previous tags without calling the LLM, regardless of this setting.',
),
'default_prompt' => 'Classify the following article.
diff --git a/xExtension-LlmClassification/i18n/fr/ext.php b/xExtension-LlmClassification/i18n/fr/ext.php
index 4c13bc2e..87bfe1ca 100644
--- a/xExtension-LlmClassification/i18n/fr/ext.php
+++ b/xExtension-LlmClassification/i18n/fr/ext.php
@@ -36,6 +36,8 @@
'title' => 'Conditions pour l’étiquetage',
'search' => 'Filtres de recherche',
'search_help' => 'Classifier uniquement les articles correspondant à au moins un de ces filtres. Laisser vide pour classifier tous les articles.',
+ 'reclassify_on_change' => 'Re-classifier lorsque le contenu change',
+ 'reclassify_on_change_help' => 'Lorsqu’un flux republie un article existant, ne rappeler le LLM que si l’invite (titre, contenu, etc.) a réellement changé. Si désactivé, les articles déjà classifiés ne sont jamais re-classifiés lors d’une mise à jour. Les articles mis à jour sans changement d’invite réutilisent toujours leurs tags précédents sans appeler le LLM, indépendamment de ce réglage.',
),
'default_prompt' => 'Classifie l’article suivant.