3030class Tiny_Picture extends Tiny_WP_Base {
3131
3232
33+
3334 /** @var string */
3435 private $ base_dir ;
3536
@@ -100,18 +101,8 @@ private function replace_img_sources( $content ) {
100101 */
101102 private function filter_pictures ( $ content ) {
102103 $ matches = array ();
103- /*
104- * Match <picture> blocks that contain one or more <source> tags.
105- *
106- * Pattern parts:
107- * - (?:<picture[^>]*?>\s*): opening <picture> with optional attributes and
108- * trailing whitespace.
109- * - (?:<source[^>]*?>)+: one or more <source> tags inside the picture.
110- * - (?:.*?</picture>)?: optionally include everything up to the closing
111- * </picture>.
112- */
113104 if ( ! preg_match_all (
114- '#(?: <picture[^>]*?>\s*)(?:<source [^>]*?>)+(?: .*?</picture>)? #is ' ,
105+ '#<picture\b [^>]*> .*?<\ /picture>#is ' ,
115106 $ content ,
116107 $ matches
117108 ) ) {
@@ -197,6 +188,7 @@ private function filter_images( $content ) {
197188
198189abstract class Tiny_Source_Base {
199190
191+
200192 public $ raw_html ;
201193 protected $ base_dir ;
202194 protected $ allowed_domains ;
@@ -210,14 +202,53 @@ public function __construct( $html, $base_dir, $domains ) {
210202 }
211203
212204 protected static function get_attribute_value ( $ element , $ name ) {
213- // Find {name} enclosed in single or double quotes after '='
214- $ regex = '#\b ' . preg_quote ( $ name , '# ' ) . '\s*=\s*([" \'])(.*?)\1#is ' ;
215- if ( preg_match ( $ regex , $ element , $ attr_matches ) ) {
216- return $ attr_matches [2 ];
205+ // Match the exact attribute name (not part of data-media, mediaType, etc.)
206+ // and capture a single- or double-quoted value.
207+ $ delim = '~ ' ;
208+ $ attr = preg_quote ( $ name , $ delim );
209+ $ regex = $ delim . '(?<![\w:-]) ' . $ attr . '\s*=\s*([" \'])(.*?)\1 ' . $ delim . 'is ' ;
210+
211+ if ( preg_match ( $ regex , $ element , $ m ) ) {
212+ return $ m [2 ];
217213 }
218214 return null ;
219215 }
220216
217+ /**
218+ * Extract elements by tag name from an HTML string (regex-based).
219+ *
220+ * @param string $html The HTML string to search in.
221+ * @param string $tagname The tag name (e.g., 'div', 'source', 'img').
222+ * @return array Array of matched elements as strings.
223+ */
224+ protected function get_element_by_tag ( $ html , $ tagname ) {
225+ $ results = [];
226+
227+ // Self-closing / void tag (e.g. <source />, <img />, <br />)
228+ if ( preg_match_all (
229+ '~< ' . preg_quote ( $ tagname , '~ ' ) . '\b(?:[^>" \']+|"[^"]*"| \'[^ \']* \')*/?>~i ' ,
230+ $ html ,
231+ $ matches
232+ ) ) {
233+ $ results = array_merge ( $ results , $ matches [0 ] );
234+ }
235+
236+ // Normal paired tags (e.g. <div>…</div>)
237+ $ regex_tag = preg_quote ( $ tagname , '~ ' );
238+ if ( preg_match_all (
239+ '~< ' . $ regex_tag .
240+ '\b(?:[^>" \']+|"[^"]*"| \'[^ \']* \')*>.*?</ ' .
241+ $ regex_tag .
242+ '>~is ' ,
243+ $ html ,
244+ $ matches
245+ ) ) {
246+ $ results = array_merge ( $ results , $ matches [0 ] );
247+ }
248+
249+ return $ results ;
250+ }
251+
221252 protected function get_local_path ( $ url ) {
222253 if ( strpos ( $ url , 'http ' ) === 0 ) {
223254 $ matched_domain = null ;
@@ -258,134 +289,14 @@ protected function get_formatted_source( $image_source_data, $mimetype ) {
258289 return null ;
259290 }
260291
261- protected function build_alternative_sources_for_url ( $ url , $ size = '' ) {
262- $ sources = array ();
263- foreach ( $ this ->valid_mimetypes as $ mimetype ) {
264- $ formatted = $ this ->get_formatted_source ( array (
265- 'path ' => $ url ,
266- 'size ' => $ size ,
267- ), $ mimetype );
268- if ( $ formatted ) {
269- $ srcset = trim ( $ formatted ['src ' ] . ' ' . $ formatted ['size ' ] );
270- $ sources [] = '<source srcset=" ' .
271- $ srcset . '" type=" ' .
272- $ formatted ['type ' ] . '" /> ' ;
273- break ;
274- }
275- }
276- return $ sources ;
277- }
278-
279- /**
280- * Will parse the srcset attribute
281- *
282- * @param string $srcset
283- * @return array{ path: string, size: string } srcset parts
284- */
285- protected static function parse_srcset_list ( $ srcset ) {
286- $ out = [];
287- foreach ( explode ( ', ' , $ srcset ) as $ entry ) {
288- $ entry = trim ( $ entry );
289- if ( '' === $ entry ) {
290- continue ;
291- }
292- $ parts = preg_split ( '/\s+/ ' , $ entry , 2 );
293- $ out [] = [
294- 'path ' => $ parts [0 ],
295- 'size ' => $ parts [1 ] ?? '' ,
296- ];
297- }
298- return $ out ;
299- }
300- }
301-
302- class Tiny_Picture_Source extends Tiny_Source_Base {
303-
304-
305- /**
306- * Adds alternative format sources (e.g., image/webp, image/avif) to an existing
307- * <picture> element based on locally available converted files.
308- *
309- *
310- * @return string The augmented <picture> HTML or the original if no additions.
311- */
312- public function augment_picture_element () {
313- $ new_sources = array ();
314-
315- // Find existing <source> tags inside the <picture>.
316- if ( preg_match_all ( '#<source\b[^>]*>#i ' , $ this ->raw_html , $ source_tag_matches ) ) {
317- foreach ( $ source_tag_matches [0 ] as $ source_tag_html ) {
318- // Extract srcset="..."
319- if ( ! preg_match ( '#\bsrcset\s*=\s*([\" \'])(.*?)\1#i ' , $ source_tag_html , $ m ) ) { continue ;
320- }
321- $ media = '' ;
322- // Extract optional media="..." to preserve any media query
323- if ( preg_match ( '#\bmedia\s*=\s*([\" \'])(.*?)\1#i ' , $ source_tag_html , $ mm ) ) {
324- $ media = $ mm [2 ];
325- }
326- foreach ( self ::parse_srcset_list ( $ m [2 ] ) as $ entry ) {
327- foreach ( $ this ->valid_mimetypes as $ mimetype ) {
328- $ formatted = $ this ->get_formatted_source ( $ entry , $ mimetype );
329- if ( $ formatted ) {
330- $ srcset = trim ( $ formatted ['src ' ] . ' ' . $ formatted ['size ' ] );
331- $ tag = '<source srcset=" ' . $ srcset . '" type=" ' . $ formatted ['type ' ] . '" ' ;
332- if ( $ media ) { $ tag .= ' media=" ' . $ media . '" ' ;
333- }
334- $ new_sources [] = $ tag . ' /> ' ;
335- break ;
336- }
337- }
338- }
339- }
340- }
341-
342- // inner <img>
343- if ( preg_match ( '#<img\b[^>]*>#i ' , $ this ->raw_html , $ img_tag_match ) ) {
344- $ img_tag = $ img_tag_match [0 ];
345- $ candidates = [];
346- // Extract srcset="..."
347- if ( preg_match ( '#\bsrcset\s*=\s*([\" \'])(.*?)\1#i ' , $ img_tag , $ m ) ) {
348- $ candidates = array_merge ( $ candidates , self ::parse_srcset_list ( $ m [2 ] ) );
349- }
350- // Extract fallback src="..."
351- if ( preg_match ( '#\bsrc\s*=\s*([\" \'])(.*?)\1#i ' , $ img_tag , $ m ) ) {
352- $ candidates [] = [
353- 'path ' => $ m [2 ],
354- 'size ' => '' ,
355- ];
356- }
357- foreach ( $ candidates as $ entry ) {
358- foreach ( $ this ->valid_mimetypes as $ mimetype ) {
359- $ formatted = $ this ->get_formatted_source ( $ entry , $ mimetype );
360- if ( $ formatted ) {
361- $ srcset = trim ( $ formatted ['src ' ] . ' ' . $ formatted ['size ' ] );
362- $ new_sources [] = '<source srcset=" ' . $ srcset . '" type=" ' . $ formatted ['type ' ] . '" /> ' ;
363- break ;
364- }
365- }
366- }
367- }
368- if ( empty ( $ new_sources ) ) {
369- return $ this ->raw_html ;
370- }
371-
372- $ insertion = implode ( '' , $ new_sources );
373-
374- // Insert newly built <source> elements immediately before the first <img>
375- return preg_replace ( '#(<img\b)#i ' , $ insertion . '$1 ' , $ this ->raw_html , 1 );
376- }
377- }
378-
379- class Tiny_Image_Source extends Tiny_Source_Base {
380-
381292 /**
382- * Retrieves the image sources from the img element
293+ * Retrieves the sources from the < img> or <source> element
383294 *
384295 * @return array{path: string, size: string}[] The image sources
385296 */
386- private function get_image_srcsets () {
297+ protected function get_image_srcsets ( $ html ) {
387298 $ result = array ();
388- $ srcset = $ this ::get_attribute_value ( $ this -> raw_html , 'srcset ' );
299+ $ srcset = $ this ::get_attribute_value ( $ html , 'srcset ' );
389300
390301 if ( $ srcset ) {
391302 // Split the srcset to get individual entries
@@ -414,7 +325,7 @@ private function get_image_srcsets() {
414325 }
415326 }
416327
417- $ source = $ this ::get_attribute_value ( $ this -> raw_html , 'src ' );
328+ $ source = $ this ::get_attribute_value ( $ html , 'src ' );
418329 if ( ! empty ( $ source ) ) {
419330 // No srcset, but we have a src attribute
420331 $ result [] = array (
@@ -427,20 +338,20 @@ private function get_image_srcsets() {
427338
428339
429340 /**
430- * Generates a formatted image source array if the corresponding local file exists.
341+ * Creates one or more <source> elements if alternative formats
342+ * are available.
431343 *
432- * Attempts to replace the file extension of the provided image path with the
433- * specified MIME type, resolves the local path of the resulting file, and returns
434- * the `srcset` and `type` if the file exists.
435- *
436- * @return string a <picture> element contain additional sources
344+ * @param string $original_source_html, either <source> or <img>
345+ * @return array{string} array of <source> html
437346 */
438- public function create_picture_elements ( ) {
439- $ srcsets = $ this ->get_image_srcsets ();
347+ protected function create_alternative_sources ( $ original_source_html ) {
348+ $ srcsets = $ this ->get_image_srcsets ( $ original_source_html );
440349 if ( empty ( $ srcsets ) ) {
441- return $ this -> raw_html ;
350+ return array () ;
442351 }
443352
353+ $ is_source_tag = (bool ) preg_match ( '#<source\b#i ' , $ original_source_html );
354+
444355 $ sources = array ();
445356 foreach ( $ this ->valid_mimetypes as $ mimetype ) {
446357 $ srcset_parts = [];
@@ -453,16 +364,96 @@ public function create_picture_elements() {
453364 }
454365
455366 if ( ! empty ( $ srcset_parts ) ) {
367+ $ source_attr_parts = array ();
368+
456369 $ srcset_attr = implode ( ', ' , $ srcset_parts );
457- $ mimetype_source = '<source srcset=" ' . $ srcset_attr . '" type=" ' . $ mimetype . '" /> ' ;
458- $ sources [] = $ mimetype_source ;
370+ $ source_attr_parts ['srcset ' ] = $ srcset_attr ;
371+
372+ if ( $ is_source_tag ) {
373+ foreach ( array ( 'sizes ' , 'media ' , 'width ' , 'height ' ) as $ attr ) {
374+ $ attr_value = $ this ->get_attribute_value ( $ original_source_html , $ attr );
375+ if ( $ attr_value ) {
376+ $ source_attr_parts [ $ attr ] = $ attr_value ;
377+ }
378+ }
379+ }
380+
381+ $ source_attr_parts ['type ' ] = $ mimetype ;
382+ $ source_parts [] = '<source ' ;
383+ foreach ( $ source_attr_parts as $ source_attr_name => $ source_attr_val ) {
384+ $ source_parts [] = $ source_attr_name . '=" ' . $ source_attr_val . '" ' ;
385+ }
386+ $ source_parts [] = '/> ' ;
387+ $ sources [] = implode ( ' ' , $ source_parts );
459388 }
460389 }
461390
391+ return $ sources ;
392+ }
393+ }
394+
395+ class Tiny_Picture_Source extends Tiny_Source_Base {
396+
397+
398+
399+ /**
400+ * Adds alternative format sources (e.g., image/webp, image/avif) to an existing
401+ * <picture> element based on locally available converted files.
402+ *
403+ * @return string The augmented <picture> HTML or the original if no additions.
404+ */
405+ public function augment_picture_element () {
406+ $ modified_sources = array ();
407+
408+ // handle existing sources
409+ $ optimized_types = [ 'image/webp ' , 'image/avif ' ];
410+
411+ foreach ( $ this ->get_element_by_tag ( $ this ->raw_html , 'source ' ) as $ source_tag_html ) {
412+ $ type_attr = self ::get_attribute_value ( $ source_tag_html , 'type ' );
413+ $ type_attr = null !== $ type_attr ? strtolower ( trim ( $ type_attr ) ) : '' ;
414+
415+ // Skip if already optimized.
416+ if ( '' !== $ type_attr && in_array ( $ type_attr , $ optimized_types , true ) ) {
417+ continue ;
418+ }
419+
420+ $ alternative_sources = $ this ->create_alternative_sources ( $ source_tag_html );
421+ if ( is_array ( $ alternative_sources ) && $ alternative_sources ) {
422+ foreach ( $ alternative_sources as $ alt ) {
423+ $ modified_sources [] = $ alt ; // no array_merge in the loop
424+ }
425+ }
426+ }
427+
428+ // handle inner image
429+ foreach ( $ this ->get_element_by_tag ( $ this ->raw_html , 'img ' ) as $ img_tag_html ) {
430+ $ alt_image_source = $ this ->create_alternative_sources ( $ img_tag_html );
431+ $ modified_sources = array_merge ( $ modified_sources , $ alt_image_source );
432+ }
433+
434+ $ modified_source = implode ( '' , $ modified_sources );
435+
436+ // Insert newly built <source> elements immediately before the first <img>
437+ return preg_replace ( '#(<img\b)#i ' , $ modified_source . '$1 ' , $ this ->raw_html , 1 );
438+ }
439+ }
440+
441+ class Tiny_Image_Source extends Tiny_Source_Base {
442+
443+ /**
444+ * Generates a formatted image source array if the corresponding local file exists.
445+ *
446+ * Attempts to replace the file extension of the provided image path with the
447+ * specified MIME type, resolves the local path of the resulting file, and returns
448+ * the `srcset` and `type` if the file exists.
449+ *
450+ * @return string a <picture> element contain additional sources
451+ */
452+ public function create_picture_elements () {
453+ $ sources = $ this ->create_alternative_sources ( $ this ->raw_html );
462454 if ( empty ( $ sources ) ) {
463455 return $ this ->raw_html ;
464456 }
465-
466457 $ picture_element = array ( '<picture> ' );
467458 $ picture_element [] = implode ( '' , $ sources );
468459 $ picture_element [] = $ this ->raw_html ;
0 commit comments