From cc4f37cb7a618dc44c4a63086a6936a55e8d0ed5 Mon Sep 17 00:00:00 2001 From: Luis Pabon Date: Mon, 25 May 2026 19:44:16 -0500 Subject: [PATCH 1/3] Update tests to use the ImportedUrl model --- .../api/v2/api_works_search_spec.rb | 5 ++++- spec/controllers/api/v2/api_works_spec.rb | 3 ++- spec/lib/tasks/opendoors.rake_spec.rb | 11 +++++++---- spec/models/story_parser_spec.rb | 9 ++++++--- spec/models/work_spec.rb | 18 ++++++++++++------ test/fixtures/imported_urls.yml | 15 +++++++++++++++ 6 files changed, 46 insertions(+), 15 deletions(-) create mode 100644 test/fixtures/imported_urls.yml diff --git a/spec/controllers/api/v2/api_works_search_spec.rb b/spec/controllers/api/v2/api_works_search_spec.rb index d2365172510..b8349881fb2 100644 --- a/spec/controllers/api/v2/api_works_search_spec.rb +++ b/spec/controllers/api/v2/api_works_search_spec.rb @@ -11,7 +11,10 @@ def post_search_result(valid_params) describe "API v2 WorksController - Search", type: :request, work_search: true do describe "valid work URL request" do - let!(:work) { create(:work, imported_from_url: "foo") } + let!(:work) { + create(:work) + :work.imported_url = ImportedUrl.new(original: "foo") + } it "returns 200 OK" do valid_params = { works: [{ original_urls: %w(bar foo) }] } diff --git a/spec/controllers/api/v2/api_works_spec.rb b/spec/controllers/api/v2/api_works_spec.rb index a515cc20cba..93a99715769 100644 --- a/spec/controllers/api/v2/api_works_spec.rb +++ b/spec/controllers/api/v2/api_works_spec.rb @@ -514,7 +514,8 @@ end it "work_url_from_external returns the work url when a work is found" do - work1 = create(:work, imported_from_url: "http://foo") + work1 = create(:work) + work1.imported_url = ImportedUrl.new(original: "http://foo") work_url_response = @under_test.instance_eval { find_work_by_import_url("http://foo") } expect(work_url_response[:works].first).to eq work1 end diff --git a/spec/lib/tasks/opendoors.rake_spec.rb b/spec/lib/tasks/opendoors.rake_spec.rb index 380d71bc08c..744d0764a42 100644 --- a/spec/lib/tasks/opendoors.rake_spec.rb +++ b/spec/lib/tasks/opendoors.rake_spec.rb @@ -8,8 +8,11 @@ original_url = "http://another/1" original_url2 = "http://another/2" - let!(:work_with_temp_url) { create(:work, id: id1, imported_from_url: temp_url) } - let!(:work_with_no_url) { create(:work, id: id2, imported_from_url: nil) } + let!(:work_with_temp_url) { + create(:work, id: id1) + :work.imported_url = ImportedUrl.new(original: temp_url) + } + let!(:work_with_no_url) { create(:work, id: id2) } after do File.delete("opendoors_result.txt") if File.exist?("opendoors_result.txt") @@ -19,8 +22,8 @@ it "takes a path to a CSV and updates works" do path = file_fixture("opendoors_import_url_mapping.csv") subject.invoke(path) - expect(Work.find(work_with_temp_url.id).imported_from_url).to eq(original_url) - expect(Work.find(work_with_no_url.id).imported_from_url).to eq(original_url2) + expect(Work.find(work_with_temp_url.id).imported_url.original).to eq(original_url) + expect(Work.find(work_with_no_url.id).imported_url.original).to eq(original_url2) end it "fails if the CSV path is invalid" do expect { subject.invoke("not a path") }.to raise_error Errno::ENOENT diff --git a/spec/models/story_parser_spec.rb b/spec/models/story_parser_spec.rb index a0269a3b2dc..4100cfd2e9c 100644 --- a/spec/models/story_parser_spec.rb +++ b/spec/models/story_parser_spec.rb @@ -117,19 +117,22 @@ class StoryParser let(:location_partial_match) { "http://testme.org/welcome_to_test_vale/12345" } it "should recognise previously imported www. works" do - @work = FactoryBot.create(:work, imported_from_url: location_with_www) + @work = FactoryBot.create(:work) + @work.imported_url = ImportedUrl.new(original: location_with_www) expect { @sp.check_for_previous_import(location_no_www) }.to raise_exception(StoryParser::Error) end it "should recognise previously imported non-www. works" do - @work = FactoryBot.create(:work, imported_from_url: location_no_www) + @work = FactoryBot.create(:work) + @work.imported_url = ImportedUrl.new(original: location_no_www) expect { @sp.check_for_previous_import(location_with_www) }.to raise_exception(StoryParser::Error) end it "should not perform a partial match on work import locations" do - @work = create(:work, imported_from_url: location_partial_match) + @work = create(:work) + @work.imported_url = ImportedUrl.new(original: location_partial_match) expect { @sp.check_for_previous_import("http://testme.org/welcome_to_test_vale/123") }.to_not raise_exception end diff --git a/spec/models/work_spec.rb b/spec/models/work_spec.rb index ad4a102125f..63cdf502aee 100644 --- a/spec/models/work_spec.rb +++ b/spec/models/work_spec.rb @@ -360,7 +360,8 @@ http://lj-site.com/bar/foo?color=blue https://www.lj-site.com/bar/foo?color=blue http://www.foo.com/bar https://www.foo.com/bar].each do |url| - work = create(:work, imported_from_url: url) + work = create(:work) + work.imported_url = ImportedUrl.create(original: url) expect(Work.find_by_url(url)).to eq(work) work.destroy end @@ -372,33 +373,38 @@ "http://efiction-site.com/viewstory.php?sid=123" => "http://efiction-site.com/viewstory.php?sid=456", "http://www.foo.com/i-am-something" => "http://foo.com/i-am-something/else" }.each do |import_url, find_url| - work = create(:work, imported_from_url: import_url) + work = create(:work) + work.imported_url = ImportedUrl.create(original: import_url) expect(Work.find_by_url(find_url)).to_not eq(work) work.destroy end end it "should find works imported with irrelevant query parameters" do - work = create(:work, imported_from_url: "http://lj-site.com/thing1?style=mine") + work = create(:work) + work.imported_url = ImportedUrl.create(original: "http://lj-site.com/thing1?style=mine") expect(Work.find_by_url("http://lj-site.com/thing1?style=other")).to eq(work) work.destroy end it "finds works imported with HTTP protocol and irrelevant query parameters" do - work = create(:work, imported_from_url: "http://lj-site.com/thing1?style=mine") + work = create(:work) + work.imported_url = ImportedUrl.create(original: "http://lj-site.com/thing1?style=mine") expect(Work.find_by_url("https://lj-site.com/thing1?style=other")).to eq(work) work.destroy end it "finds works imported with HTTPS protocol and irrelevant query parameters" do - work = create(:work, imported_from_url: "https://lj-site.com/thing1?style=mine") + work = create(:work) + work.imported_url = ImportedUrl.create(original: "https://lj-site.com/thing1?style=mine") expect(Work.find_by_url("http://lj-site.com/thing1?style=other")).to eq(work) work.destroy end it "gets the work from cache when searching for an imported work by URL" do url = "http://lj-site.com/thing2" - work = create(:work, imported_from_url: url) + work = create(:work) + work.imported_url = ImportedUrl.new(original: url) expect(Rails.cache.read(Work.find_by_url_cache_key(url))).to be_nil expect(Work.find_by_url(url)).to eq(work) expect(Rails.cache.read(Work.find_by_url_cache_key(url))).to eq(work) diff --git a/test/fixtures/imported_urls.yml b/test/fixtures/imported_urls.yml new file mode 100644 index 00000000000..5e08485005f --- /dev/null +++ b/test/fixtures/imported_urls.yml @@ -0,0 +1,15 @@ +--- +imported_url_00001: + created_at: 2008-12-06 22:56:52 Z + updated_at: 2008-12-06 22:56:52 Z + id: 1 + work_id: 82 + original: http://www.trickster.org/llwyden/misc/cracked.html + minimal: + minimal_no_protocol_no_www: + no_www: + with_www: + with_http: + with_https: + encoded: + decoded: From 6e21246f603801c130b35c22c0212aae275c0b17 Mon Sep 17 00:00:00 2001 From: Luis Pabon Date: Mon, 25 May 2026 19:45:17 -0500 Subject: [PATCH 2/3] Update references to imported_from_url where they use the work column --- app/controllers/api/v2/works_controller.rb | 2 +- app/controllers/opendoors/tools_controller.rb | 4 +-- app/models/search/work_indexer.rb | 3 --- app/models/story_parser.rb | 3 +-- app/models/work.rb | 27 +++++++++++-------- lib/tasks/opendoors.rake | 11 +++++--- 6 files changed, 27 insertions(+), 23 deletions(-) diff --git a/app/controllers/api/v2/works_controller.rb b/app/controllers/api/v2/works_controller.rb index 3dee15833ef..724a2715cc6 100644 --- a/app/controllers/api/v2/works_controller.rb +++ b/app/controllers/api/v2/works_controller.rb @@ -127,7 +127,7 @@ def find_work_by_import_url(original_url) message = "Please provide the original URL for the work." else # We know the url will be identical no need for a call to find_by_url - works = Work.where(imported_from_url: original_url) + works = Work.joins(:imported_url).where(imported_url: { original: @imported_from_url }) message = if works.empty? "No work has been imported from \"" + original_url + "\"." else diff --git a/app/controllers/opendoors/tools_controller.rb b/app/controllers/opendoors/tools_controller.rb index 1f3cb805a64..60d526474d6 100644 --- a/app/controllers/opendoors/tools_controller.rb +++ b/app/controllers/opendoors/tools_controller.rb @@ -46,12 +46,12 @@ def url_update flash[:error] = ts("The imported-from url you are trying to set doesn't seem valid.") else # check for any other works - works = Work.where(imported_from_url: @imported_from_url) + works = Work.joins(:imported_url).where(imported_url: { original: @imported_from_url }) if works.count > 0 flash[:error] = ts("There is already a work imported from the url %{url}.", url: @imported_from_url) else # ok let's try to update - @work.update_attribute(:imported_from_url, @imported_from_url) + @work.imported_url&.update(original: @imported_from_url) flash[:notice] = "Updated imported-from url for #{@work.title} to #{@imported_from_url}" end end diff --git a/app/models/search/work_indexer.rb b/app/models/search/work_indexer.rb index f7c4c260143..c3a0dbcb6d6 100644 --- a/app/models/search/work_indexer.rb +++ b/app/models/search/work_indexer.rb @@ -55,9 +55,6 @@ def self.mapping title_to_sort_on: { type: "keyword" }, - imported_from_url: { - type: "keyword" - }, work_types: { type: "keyword" }, diff --git a/app/models/story_parser.rb b/app/models/story_parser.rb index c9e869a21f5..74fea824db4 100644 --- a/app/models/story_parser.rb +++ b/app/models/story_parser.rb @@ -300,8 +300,7 @@ def set_work_attributes(work, location = "", options = {}) raise Error, "Work could not be downloaded" if work.nil? @options = options - work.imported_from_url = location # @todo remove this as part of AO3-6979 - work.imported_url = ImportedUrl.new(original: work.imported_from_url) + work.imported_url = ImportedUrl.new(original: location) work.ip_address = options[:ip_address] work.expected_number_of_chapters = work.chapters.length diff --git a/app/models/work.rb b/app/models/work.rb index 9a10e1dd0b6..6084b121f73 100755 --- a/app/models/work.rb +++ b/app/models/work.rb @@ -298,7 +298,7 @@ def expire_caches series.each(&:expire_caches) Work.expire_work_blurb_version(id) - Work.flush_find_by_url_cache unless imported_from_url.blank? + Work.flush_find_by_url_cache unless imported_url == nil end def update_pseud_and_collection_indexes @@ -419,16 +419,21 @@ def self.find_by_url_cache_key(url) def self.find_by_url_uncached(url) url = UrlFormatter.new(url) - Work.where(imported_from_url: url.original).first || - Work.where(imported_from_url: [url.minimal, - url.with_http, url.with_https, - url.no_www, url.with_www, - url.encoded, url.decoded, - url.minimal_no_protocol_no_www]).first || - Work.where("imported_from_url LIKE ? or imported_from_url LIKE ?", - "http://#{url.minimal_no_protocol_no_www}%", - "https://#{url.minimal_no_protocol_no_www}%").select do |w| - work_url = UrlFormatter.new(w.imported_from_url) + + Work.joins(:imported_url).where(imported_url: { original: @imported_from_url }).first || + Work.joins(:imported_url).where(imported_url: { minimal: url.minimal }).first || + Work.joins(:imported_url).where(imported_url: { with_http: url.with_http }).first || + Work.joins(:imported_url).where(imported_url: { with_https: url.with_https }).first || + Work.joins(:imported_url).where(imported_url: { no_www: url.no_www }).first || + Work.joins(:imported_url).where(imported_url: { with_www: url.with_www }).first || + Work.joins(:imported_url).where(imported_url: { encoded: url.encoded }).first || + Work.joins(:imported_url).where(imported_url: { decoded: url.decoded }).first || + Work.joins(:imported_url).where(imported_url: { minimal_no_protocol_no_www: url.minimal_no_protocol_no_www }).first || + Work.joins(:imported_url).where(imported_url: { minimal: url.minimal }).first || + Work.joins(:imported_url).where("imported_urls.original LIKE ? or imported_urls.original LIKE ?", + "http://#{url.minimal_no_protocol_no_www}%", + "https://#{url.minimal_no_protocol_no_www}%").select do |w| + work_url = UrlFormatter.new(w.imported_url&.original) %w[original minimal no_www with_www with_http with_https encoded decoded].any? do |method| work_url.send(method) == url.send(method) end diff --git a/lib/tasks/opendoors.rake b/lib/tasks/opendoors.rake index da057d2303f..3101c8d872c 100644 --- a/lib/tasks/opendoors.rake +++ b/lib/tasks/opendoors.rake @@ -4,12 +4,15 @@ namespace :opendoors do def update_work(row) begin work = Work.find(row["AO3 id"]) - if work&.imported_from_url.blank? || work&.imported_from_url == row["URL Imported From"] - work.imported_from_url = row["Original URL"] + if work&.imported_url.nil? + work.imported_url = ImportedUrl.new() + end + if work&.imported_url.original.blank? || work&.imported_url.original == row["URL Imported From"] + work.imported_url.original = row["Original URL"] work.save! - "#{work.id}\twas updated: its import url is now #{work.imported_from_url}" + "#{work.id}\twas updated: its import url is now #{work.imported_url.original}" else - "#{work.id}\twas not changed: its import url is #{work.imported_from_url}" + "#{work.id}\twas not changed: its import url is #{work.imported_url.original}" end rescue StandardError => e "#{row["AO3 id"]}\twas not changed: #{e}" From 9e3a6785603e77c32772477679ece8d18f2e35b2 Mon Sep 17 00:00:00 2001 From: Luis Pabon Date: Mon, 25 May 2026 19:45:41 -0500 Subject: [PATCH 3/3] Add migration for getting rid of the imported_from_url index and column --- ...60519171051_remove_imported_from_url_field_from_works.rb | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 db/migrate/20260519171051_remove_imported_from_url_field_from_works.rb diff --git a/db/migrate/20260519171051_remove_imported_from_url_field_from_works.rb b/db/migrate/20260519171051_remove_imported_from_url_field_from_works.rb new file mode 100644 index 00000000000..89526889411 --- /dev/null +++ b/db/migrate/20260519171051_remove_imported_from_url_field_from_works.rb @@ -0,0 +1,6 @@ +class RemoveImportedFromUrlFieldFromWorks < ActiveRecord::Migration[8.1] + def change + remove_index :works, :imported_from_url + remove_column :works, :imported_from_url + end +end