From d239d0bf27090db7456767897fc94a7de0834b69 Mon Sep 17 00:00:00 2001 From: cerredz <422michaelcerreto@gmail.com> Date: Fri, 15 May 2026 21:06:39 -0400 Subject: [PATCH] Add Cycle.js documentation scraper --- lib/docs/filters/cyclejs/clean_html.rb | 24 ++++++++ lib/docs/filters/cyclejs/entries.rb | 26 ++++++++ lib/docs/scrapers/cyclejs.rb | 84 ++++++++++++++++++++++++++ 3 files changed, 134 insertions(+) create mode 100644 lib/docs/filters/cyclejs/clean_html.rb create mode 100644 lib/docs/filters/cyclejs/entries.rb create mode 100644 lib/docs/scrapers/cyclejs.rb diff --git a/lib/docs/filters/cyclejs/clean_html.rb b/lib/docs/filters/cyclejs/clean_html.rb new file mode 100644 index 0000000000..b09f079a4f --- /dev/null +++ b/lib/docs/filters/cyclejs/clean_html.rb @@ -0,0 +1,24 @@ +module Docs + class Cyclejs + class CleanHtmlFilter < Filter + def call + css('br').remove + + css('pre > code').each do |node| + parent = node.parent + if node['class'] && node['class'] =~ /language-(\w+)/ + parent['data-language'] = Regexp.last_match(1) + end + parent.content = node.content.strip + end + + css('table[style]', 'tr[style]', 'td[style]', 'th[style]').remove_attr('style') + css('img').each do |node| + node['alt'] = node['alt'].presence || '' + end + + doc + end + end + end +end diff --git a/lib/docs/filters/cyclejs/entries.rb b/lib/docs/filters/cyclejs/entries.rb new file mode 100644 index 0000000000..5317a9c616 --- /dev/null +++ b/lib/docs/filters/cyclejs/entries.rb @@ -0,0 +1,26 @@ +module Docs + class Cyclejs + class EntriesFilter < Docs::EntriesFilter + def get_name + title = at_css('h1') + name = title ? title.content.strip : subpath.sub(/\.html\z/, '').titleize + name = 'Cycle.js' if root_page? + name = 'API Reference' if slug == 'api/index' + name + end + + def get_type + slug.start_with?('api/') ? 'API' : 'Guide' + end + + def additional_entries + css('h2[id], h3[id]').map do |node| + name = node.content.strip + name.sub!(/\A#\s*/, '') + name.sub!(/\s+#\z/, '') + [name, node['id']] + end + end + end + end +end diff --git a/lib/docs/scrapers/cyclejs.rb b/lib/docs/scrapers/cyclejs.rb new file mode 100644 index 0000000000..df27d7c147 --- /dev/null +++ b/lib/docs/scrapers/cyclejs.rb @@ -0,0 +1,84 @@ +require 'redcarpet' + +module Docs + class Cyclejs < UrlScraper + self.name = 'Cycle.js' + self.slug = 'cyclejs' + self.type = 'cyclejs' + self.release = '23.1.0' + self.base_url = 'https://cycle.js.org/' + self.root_path = 'index.html' + self.initial_paths = %w( + getting-started.html + model-view-intent.html + streams.html + drivers.html + components.html + basic-examples.html + dialogue.html + releases.html + api/index.html + api/run.html + api/rxjs-run.html + api/most-run.html + api/dom.html + api/html.html + api/http.html + api/history.html + api/isolate.html + api/state.html + ) + + self.links = { + home: 'https://cycle.js.org/', + code: 'https://github.com/cyclejs/cyclejs' + } + + html_filters.push 'cyclejs/clean_html', 'cyclejs/entries' + + options[:only_patterns] = [ + /\Aindex\.html\z/, + /\Agetting-started\.html\z/, + /\Amodel-view-intent\.html\z/, + /\Astreams\.html\z/, + /\Adrivers\.html\z/, + /\Acomponents\.html\z/, + /\Abasic-examples\.html\z/, + /\Adialogue\.html\z/, + /\Areleases\.html\z/, + /\Aapi\// + ] + + options[:attribution] = <<-HTML + © 2014–present Cycle.js contributors.
+ Licensed under the MIT License. + HTML + + def get_latest_version(opts) + get_npm_version('@cycle/dom', opts) + end + + private + + def parse(response) + document = Parser.new(response.body).html + markdown = document.at_css('script#markdown') + + return super unless markdown + + html = markdown_renderer.render(markdown.content.strip) + title = document.at_css('title').try(:content).try(:strip) + [Parser.new("#{title}#{html}").html, title] + end + + def markdown_renderer + @markdown_renderer ||= Redcarpet::Markdown.new( + Redcarpet::Render::HTML.new(with_toc_data: true), + autolink: true, + fenced_code_blocks: true, + no_intra_emphasis: true, + tables: true + ) + end + end +end