From 144dd8b9e8ec4d99674bd7dc7fc008f4457bbced Mon Sep 17 00:00:00 2001 From: Kirk Wang Date: Mon, 2 Mar 2026 15:30:07 -0800 Subject: [PATCH] WIP: Implement transcription support in Hyrax --- .../hyrax/works_controller_behavior.rb | 11 +- .../hyrax/transcriptions_controller.rb | 14 +++ app/presenters/hyrax/annotates_content.rb | 52 +++++++++ .../hyrax/iiif_manifest_presenter.rb | 12 ++ config/metadata/file_set_metadata.yaml | 2 + config/routes.rb | 2 + .../hyrax/transcriptions_controller_spec.rb | 28 +++++ spec/fixtures/sample.vtt | 9 ++ .../hyrax/iiif_manifest_presenter_spec.rb | 110 ++++++++++++++++++ 9 files changed, 236 insertions(+), 4 deletions(-) create mode 100644 app/controllers/hyrax/transcriptions_controller.rb create mode 100644 app/presenters/hyrax/annotates_content.rb create mode 100644 spec/controllers/hyrax/transcriptions_controller_spec.rb create mode 100644 spec/fixtures/sample.vtt diff --git a/app/controllers/concerns/hyrax/works_controller_behavior.rb b/app/controllers/concerns/hyrax/works_controller_behavior.rb index 8f69c7829b..e0a7d68c14 100644 --- a/app/controllers/concerns/hyrax/works_controller_behavior.rb +++ b/app/controllers/concerns/hyrax/works_controller_behavior.rb @@ -139,12 +139,15 @@ def inspect_work end def manifest - headers['Access-Control-Allow-Origin'] = '*' + locale = params[:locale] || current_user&.preferred_locale || I18n.default_locale + I18n.with_locale(locale) do + headers['Access-Control-Allow-Origin'] = '*' - json = iiif_manifest_builder.manifest_for(presenter: iiif_manifest_presenter) + json = iiif_manifest_builder.manifest_for(presenter: iiif_manifest_presenter) - respond_to do |wants| - wants.any { render json: json } + respond_to do |wants| + wants.any { render json: json } + end end end diff --git a/app/controllers/hyrax/transcriptions_controller.rb b/app/controllers/hyrax/transcriptions_controller.rb new file mode 100644 index 0000000000..0f24467426 --- /dev/null +++ b/app/controllers/hyrax/transcriptions_controller.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true + +module Hyrax + class TranscriptionsController < ApplicationController + def show + file_metadata = Hyrax.query_service.find_by(id: params[:id]) + file_object = Hyrax.storage_adapter.find_by(id: file_metadata.file_identifier) + transcription = file_object.read + + response.headers['Access-Control-Allow-Origin'] = '*' + send_data transcription, type: 'text/vtt; charset=utf-8', disposition: 'inline' + end + end +end diff --git a/app/presenters/hyrax/annotates_content.rb b/app/presenters/hyrax/annotates_content.rb new file mode 100644 index 0000000000..e42e79f39f --- /dev/null +++ b/app/presenters/hyrax/annotates_content.rb @@ -0,0 +1,52 @@ +# frozen_string_literal: true + +module Hyrax + module AnnotatesContent + extend ActiveSupport::Concern + + def annotation_content + transcription_content if video? || audio? + end + + private + + def transcription_content + transcriptions.map do |hash| + IIIFManifest::V3::AnnotationContent.new( + type: 'Annotation', + motivation: 'supplementing', + body_id: captions_url(hash['file_ids_ssim'].first), + format: 'text/vtt', + label: hash['title_tesim']&.first || 'Captions', + language: hash['language_tesim']&.first || 'en' + ) + end + end + + def transcriptions + @transcriptions ||= begin + parent = Hyrax::SolrService.query("member_ids_ssim:#{id}", rows: 1, fl: "member_ids_ssim").first + member_ids = parent['member_ids_ssim'] + mime_type = 'text/vtt' + fl = 'title_tesim,language_tesim,file_ids_ssim' + results = Hyrax::SolrService.query("id:(#{member_ids.join(' OR ')}) AND mime_type_ssi:#{mime_type}", rows: 100, fl: fl) + + sort_transcriptions_by_language(results) + end + end + + def captions_url(file_id) + Hyrax::Engine.routes.url_helpers.transcription_url(file_id, host: hostname) + end + + def sort_transcriptions_by_language(results) + current_locale = I18n.locale.to_s + + # Sort alphabetically by language code + sorted = results.sort_by { |hash| hash['language_tesim']&.first || '' } + + # Move current locale to front + sorted.partition { |hash| hash['language_tesim']&.first == current_locale }.flatten + end + end +end diff --git a/app/presenters/hyrax/iiif_manifest_presenter.rb b/app/presenters/hyrax/iiif_manifest_presenter.rb index 68dbe5b8fd..6b10d4a4c2 100644 --- a/app/presenters/hyrax/iiif_manifest_presenter.rb +++ b/app/presenters/hyrax/iiif_manifest_presenter.rb @@ -129,6 +129,17 @@ def sequence_rendering end end + ## + # @return [Array String}>] + def homepage + [{ + 'id' => Rails.application.routes.url_helpers.polymorphic_url(model, host: hostname), + 'type' => 'Text', + 'format' => 'text/html', + 'label' => { 'none' => [Array(title).first || ''] } + }] + end + ## # @return [Boolean] def work? @@ -189,6 +200,7 @@ class DisplayImagePresenter < Draper::Decorator include Hyrax::DisplaysImage include Hyrax::DisplaysContent + include Hyrax::AnnotatesContent ## # @!attribute [w] ability diff --git a/config/metadata/file_set_metadata.yaml b/config/metadata/file_set_metadata.yaml index 932d44520e..875fc81618 100644 --- a/config/metadata/file_set_metadata.yaml +++ b/config/metadata/file_set_metadata.yaml @@ -93,6 +93,8 @@ attributes: form: primary: false predicate: http://purl.org/dc/elements/1.1/language + index_keys: + - "language_tesim" # required: license publisher: type: string diff --git a/config/routes.rb b/config/routes.rb index b8280798dd..0dcdbd17f3 100644 --- a/config/routes.rb +++ b/config/routes.rb @@ -14,6 +14,8 @@ resources :downloads, only: :show + get 'transcriptions/:id.vtt', to: 'transcriptions#show', as: :transcription + # ResourceSync routes get '/.well-known/resourcesync' => 'resource_sync#source_description', as: :source_description get '/capabilitylist' => 'resource_sync#capability_list', as: :capability_list diff --git a/spec/controllers/hyrax/transcriptions_controller_spec.rb b/spec/controllers/hyrax/transcriptions_controller_spec.rb new file mode 100644 index 0000000000..a50bd9c943 --- /dev/null +++ b/spec/controllers/hyrax/transcriptions_controller_spec.rb @@ -0,0 +1,28 @@ +# frozen_string_literal: true + +RSpec.describe Hyrax::TranscriptionsController do + routes { Hyrax::Engine.routes } + + describe '#show' do + let(:vtt_path) { fixture_path + '/sample.vtt' } + let(:vtt_file) { File.open(vtt_path) } + let(:vtt_content) { IO.binread(vtt_path) } + let(:uploaded_file) { FactoryBot.create(:uploaded_file, file: vtt_file) } + + let(:vtt_file_metadata) do + FactoryBot.valkyrie_create(:hyrax_file_metadata, :with_file, :original_file, + original_filename: 'sample.vtt', + mime_type: 'text/vtt', + file: uploaded_file) + end + + it 'sends the vtt file with CORS headers' do + get :show, params: { id: vtt_file_metadata.id } + expect(response).to be_successful + expect(response.body).to eq vtt_content + expect(response.headers['Content-Type']).to eq 'text/vtt' + expect(response.headers['Access-Control-Allow-Origin']).to eq '*' + expect(response.headers['Content-Disposition']).to include 'inline' + end + end +end diff --git a/spec/fixtures/sample.vtt b/spec/fixtures/sample.vtt new file mode 100644 index 0000000000..4cd5afbc21 --- /dev/null +++ b/spec/fixtures/sample.vtt @@ -0,0 +1,9 @@ +WEBVTT + +1 +00:00:00.500 --> 00:00:02.000 +Hello + +2 +00:00:02.500 --> 00:00:04.000 +World! diff --git a/spec/presenters/hyrax/iiif_manifest_presenter_spec.rb b/spec/presenters/hyrax/iiif_manifest_presenter_spec.rb index 3397828e23..2ee408d51e 100644 --- a/spec/presenters/hyrax/iiif_manifest_presenter_spec.rb +++ b/spec/presenters/hyrax/iiif_manifest_presenter_spec.rb @@ -380,6 +380,99 @@ end end end + + describe '#annotation_content' do + let(:ability) { Ability.new(user) } + let(:user) { create(:user) } + let(:file_set) { FactoryBot.valkyrie_create(:hyrax_file_set, :public) } + let(:work) { valkyrie_create(:monograph, members: [file_set, vtt_file_set]) } + + let(:video_file_metadata) do + valkyrie_create(:hyrax_file_metadata, :original_file, :with_file, + file_set: file_set, + mime_type: 'video/mp4', + duration: ['120']) + end + + let(:vtt_file_path) { fixture_path + '/sample.vtt' } + let(:vtt_file) { File.open(vtt_file_path) } + let(:vtt_uploaded_file) { FactoryBot.create(:uploaded_file, file: vtt_file) } + + let(:vtt_file_metadata) do + valkyrie_create(:hyrax_file_metadata, :original_file, :with_file, + file_set: vtt_file_set, + original_filename: 'sample.vtt', + mime_type: 'text/vtt', + file: vtt_uploaded_file) + end + + let(:vtt_file_set) do + FactoryBot.valkyrie_create(:hyrax_file_set, + :public, + title: ['English Captions'], + language: ['en']) + end + + let(:solr_doc) do + video_file_metadata + vtt_file_metadata + work # ensure work is created with members + + solr_hash = Hyrax::Indexers::ResourceIndexer.for(resource: file_set).to_solr + solr_hash['mime_type_ssi'] = 'video/mp4' + solr_hash['duration_tesim'] = ['120'] + SolrDocument.new(solr_hash) + end + + subject(:presenter) { described_class.new(solr_doc) } + + before do + presenter.hostname = 'samvera.org' + presenter.ability = ability + end + + context 'with video file and vtt file set' do + it 'returns transcription annotations' do + annotations = presenter.annotation_content + + expect(annotations).to be_an(Array) + expect(annotations.first).to be_a(IIIFManifest::V3::AnnotationContent) + expect(annotations.first.type).to eq('Annotation') + expect(annotations.first.motivation).to eq('supplementing') + expect(annotations.first.format).to eq('text/vtt') + expect(annotations.first.language).to eq('en') + expect(annotations.first.label).to eq('English Captions') + expect(annotations.first.body_id).to include('transcriptions') + expect(annotations.first.body_id).to end_with('.vtt') + end + end + + context 'with video file but no vtt file sets' do + let(:work) { valkyrie_create(:monograph, members: [file_set]) } + + it 'returns empty array' do + annotations = presenter.annotation_content + + expect(annotations).to eq([]) + end + end + + context 'with non-video/audio file' do + let(:image_file_metadata) do + valkyrie_create(:hyrax_file_metadata, :original_file, :image, :with_file, + file_set: file_set) + end + + let(:solr_doc) do + image_file_metadata + SolrDocument.new(Hyrax::Indexers::ResourceIndexer.for(resource: file_set).to_solr) + end + + it 'returns nil' do + expect(presenter.annotation_content).to be_nil + end + end + end end describe '#description' do @@ -512,6 +605,23 @@ end end + describe '#homepage' do + let(:work) { valkyrie_create(:monograph) } + + before { presenter.hostname = 'example.com' } + + it 'returns a homepage hash with the work show url' do + expect(presenter.homepage).to contain_exactly( + hash_including( + 'id' => include("concern/monographs/#{work.id}"), + 'type' => 'Text', + 'format' => 'text/html', + 'label' => be_a(Hash) + ) + ) + end + end + describe '#work_presenters' do it('is empty') { expect(presenter.work_presenters).to be_empty }