@@ -48,11 +48,83 @@ class Format
4848 ##
4949 # Enumerates known RDF serialization format classes.
5050 #
51+ # Given options from {Format.for}, it returns just those formats that match the specified criteria.
52+ #
53+ # @example finding all formats that have a writer supporting text/html
54+ # RDF::Format.each(content_type: 'text/html', has_writer: true).to_a
55+ # #=> RDF::RDFa::Format
56+ #
57+ # @param [String, #to_s] file_name (nil)
58+ # @param [Symbol, #to_sym] file_extension (nil)
59+ # @param [String, #to_s] content_type (nil)
60+ # Content type may include wildcard characters, which will select among matching formats.
61+ # Note that content_type will be taken from a URL opened using {RDF::Util::File.open_file}.
62+ # @param [Boolean] has_reader (false)
63+ # Only return a format having a reader.
64+ # @param [Boolean] has_writer (false)
65+ # Only return a format having a writer.
66+ # @param [String, Proc] sample (nil)
67+ # A sample of input used for performing format detection. If we find no formats, or we find more than one, and we have a sample, we can perform format detection to find a specific format to use, in which case we pick the last one we find
5168 # @yield [klass]
5269 # @yieldparam [Class]
5370 # @return [Enumerator]
54- def self . each ( &block )
55- @@subclasses . each ( &block )
71+ def self . each ( file_name : nil ,
72+ file_extension : nil ,
73+ content_type : nil ,
74+ has_reader : false ,
75+ has_writer : false ,
76+ sample : nil ,
77+ **options ,
78+ &block )
79+ formats = case
80+ # Find a format based on the MIME content type:
81+ when content_type
82+ # @see http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.17
83+ # @see http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.7
84+ mime_type = content_type . to_s . split ( ';' ) . first # remove any media type parameters
85+
86+ # Ignore text/plain, a historical encoding for N-Triples, which is
87+ # problematic in format detection, as many web servers will serve
88+ # content by default text/plain.
89+ if ( mime_type == 'text/plain' && sample ) || mime_type == '*/*'
90+ # All content types
91+ @@subclasses
92+ elsif mime_type . end_with? ( '/*' )
93+ # All content types that have the first part of the mime-type as a prefix
94+ prefix = mime_type [ 0 ..-3 ]
95+ content_types . map do |ct , fmts |
96+ ct . start_with? ( prefix ) ? fmts : [ ]
97+ end . flatten . uniq
98+ else
99+ content_types [ mime_type ]
100+ end
101+ # Find a format based on the file name:
102+ when file_name
103+ ext = File . extname ( RDF ::URI ( file_name ) . path . to_s ) [ 1 ..-1 ] . to_s
104+ file_extensions [ ext . to_sym ]
105+ # Find a format based on the file extension:
106+ when file_extension
107+ file_extensions [ file_extension . to_sym ]
108+ else
109+ @@subclasses
110+ end || ( sample ? @@subclasses : [ ] ) # If we can sample, check all classes
111+
112+ # Subset by available reader or writer
113+ formats = formats . select do |f |
114+ has_reader ? f . reader : ( has_writer ? f . writer : true )
115+ end
116+
117+ # If we have multiple formats and a sample, use that for format detection
118+ if formats . length != 1 && sample
119+ sample = case sample
120+ when Proc then sample . call . to_s
121+ else sample . dup . to_s
122+ end . force_encoding ( Encoding ::ASCII_8BIT )
123+ # Given a sample, perform format detection across the appropriate formats, choosing the last that matches
124+ # Return last format that has a positive detection
125+ formats = formats . select { |f | f . detect ( sample ) }
126+ end
127+ formats . each ( &block )
56128 end
57129
58130 ##
@@ -77,6 +149,7 @@ def self.each(&block)
77149 # @option options [String, #to_s] :file_name (nil)
78150 # @option options [Symbol, #to_sym] :file_extension (nil)
79151 # @option options [String, #to_s] :content_type (nil)
152+ # Content type may include wildcard characters, which will select among matching formats.
80153 # Note that content_type will be taken from a URL opened using {RDF::Util::File.open_file}.
81154 # @option options [Boolean] :has_reader (false)
82155 # Only return a format having a reader.
@@ -88,73 +161,31 @@ def self.each(&block)
88161 # @yieldreturn [String] another way to provide a sample, allows lazy for retrieving the sample.
89162 #
90163 # @return [Class]
91- def self . for ( options = { } )
92- format = case options
93- when String , RDF ::URI
94- # Find a format based on the file name
95- fn , options = options , { }
96- self . for ( file_name : fn ) { yield if block_given? }
97-
98- when Hash
99- case
100- # Find a format based on the MIME content type:
101- when mime_type = options [ :content_type ]
102- # @see http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.17
103- # @see http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.7
104- mime_type = mime_type . to_s
105- mime_type = mime_type . split ( ';' ) . first # remove any media type parameters
106-
107- # Ignore text/plain, a historical encoding for N-Triples, which is
108- # problematic in format detection, as many web servers will serve
109- # content by default text/plain.
110- content_types [ mime_type ] unless mime_type == 'text/plain' && ( options [ :sample ] || block_given? )
111- # Find a format based on the file name:
112- when file_name = options [ :file_name ]
113- self . for ( file_extension : File . extname ( RDF ::URI ( file_name ) . path . to_s ) [ 1 ..-1 ] ) { yield if block_given? }
114- # Find a format based on the file extension:
115- when file_ext = options [ :file_extension ]
116- file_extensions [ file_ext . to_sym ]
117- end
118-
119- when Symbol
120- # Try to find a match based on the full class name
121- # We want this to work even if autoloading fails
122- fmt , options = options , { }
123- classes = @@subclasses . select { |klass | klass . symbols . include? ( fmt ) }
124- if classes . empty?
125- classes = case fmt
126- when :ntriples then [ RDF ::NTriples ::Format ]
127- when :nquads then [ RDF ::NQuads ::Format ]
128- else [ ]
129- end
164+ def self . for ( *args , **options , &block )
165+ options = { sample : block } . merge ( options ) if block_given?
166+ formats = case args . first
167+ when String , RDF ::URI
168+ # Find a format based on the file name
169+ self . each ( file_name : args . first , **options ) . to_a
170+ when Symbol
171+ # Try to find a match based on the full class name
172+ # We want this to work even if autoloading fails
173+ fmt = args . first
174+ classes = self . each ( options ) . select { |f | f . symbols . include? ( fmt ) }
175+ if classes . empty?
176+ classes = case fmt
177+ when :ntriples then [ RDF ::NTriples ::Format ]
178+ when :nquads then [ RDF ::NQuads ::Format ]
179+ else [ ]
130180 end
131- classes
132- end
133-
134- if format . is_a? ( Array )
135- format = format . select { |f | f . reader } if options [ :has_reader ]
136- format = format . select { |f | f . writer } if options [ :has_writer ]
137-
138- return format . last if format . uniq . length == 1
139- elsif !format . nil?
140- return format
141- end
142-
143- # If we have a sample, use that for format detection
144- if sample = ( options [ :sample ] if options . is_a? ( Hash ) ) || ( yield if block_given? )
145- sample = sample . dup . to_s
146- sample . force_encoding ( Encoding ::ASCII_8BIT ) if sample . respond_to? ( :force_encoding )
147- # Given a sample, perform format detection across the appropriate formats, choosing the last that matches
148- format ||= @@subclasses
149-
150- # Return last format that has a positive detection
151- format . reverse . detect { |f | f . detect ( sample ) } || format . last
152- elsif format . is_a? ( Array )
153- # Otherwise, just return the last matching format
154- format . last
181+ end
182+ classes
155183 else
156- nil
184+ self . each ( options ) . to_a
157185 end
186+
187+ # Return the last detected format
188+ formats . last
158189 end
159190
160191 ##
0 commit comments