Skip to content

Commit 39ab835

Browse files
feat: handle srcsets
1 parent 8d63ca0 commit 39ab835

2 files changed

Lines changed: 157 additions & 155 deletions

File tree

src/class-tiny-picture.php

Lines changed: 142 additions & 151 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
class Tiny_Picture extends Tiny_WP_Base {
3131

3232

33+
3334
/** @var string */
3435
private $base_dir;
3536

@@ -100,18 +101,8 @@ private function replace_img_sources( $content ) {
100101
*/
101102
private function filter_pictures( $content ) {
102103
$matches = array();
103-
/*
104-
* Match <picture> blocks that contain one or more <source> tags.
105-
*
106-
* Pattern parts:
107-
* - (?:<picture[^>]*?>\s*): opening <picture> with optional attributes and
108-
* trailing whitespace.
109-
* - (?:<source[^>]*?>)+: one or more <source> tags inside the picture.
110-
* - (?:.*?</picture>)?: optionally include everything up to the closing
111-
* </picture>.
112-
*/
113104
if ( ! preg_match_all(
114-
'#(?:<picture[^>]*?>\s*)(?:<source[^>]*?>)+(?:.*?</picture>)?#is',
105+
'#<picture\b[^>]*>.*?<\/picture>#is',
115106
$content,
116107
$matches
117108
) ) {
@@ -197,6 +188,7 @@ private function filter_images( $content ) {
197188

198189
abstract class Tiny_Source_Base {
199190

191+
200192
public $raw_html;
201193
protected $base_dir;
202194
protected $allowed_domains;
@@ -210,14 +202,53 @@ public function __construct( $html, $base_dir, $domains ) {
210202
}
211203

212204
protected static function get_attribute_value( $element, $name ) {
213-
// Find {name} enclosed in single or double quotes after '='
214-
$regex = '#\b' . preg_quote( $name, '#' ) . '\s*=\s*(["\'])(.*?)\1#is';
215-
if ( preg_match( $regex, $element, $attr_matches ) ) {
216-
return $attr_matches[2];
205+
// Match the exact attribute name (not part of data-media, mediaType, etc.)
206+
// and capture a single- or double-quoted value.
207+
$delim = '~';
208+
$attr = preg_quote( $name, $delim );
209+
$regex = $delim . '(?<![\w:-])' . $attr . '\s*=\s*(["\'])(.*?)\1' . $delim . 'is';
210+
211+
if ( preg_match( $regex, $element, $m ) ) {
212+
return $m[2];
217213
}
218214
return null;
219215
}
220216

217+
/**
218+
* Extract elements by tag name from an HTML string (regex-based).
219+
*
220+
* @param string $html The HTML string to search in.
221+
* @param string $tagname The tag name (e.g., 'div', 'source', 'img').
222+
* @return array Array of matched elements as strings.
223+
*/
224+
protected function get_element_by_tag( $html, $tagname ) {
225+
$results = [];
226+
227+
// Self-closing / void tag (e.g. <source />, <img />, <br />)
228+
if ( preg_match_all(
229+
'~<' . preg_quote( $tagname, '~' ) . '\b(?:[^>"\']+|"[^"]*"|\'[^\']*\')*/?>~i',
230+
$html,
231+
$matches
232+
) ) {
233+
$results = array_merge( $results, $matches[0] );
234+
}
235+
236+
// Normal paired tags (e.g. <div>…</div>)
237+
$regex_tag = preg_quote( $tagname, '~' );
238+
if ( preg_match_all(
239+
'~<' . $regex_tag .
240+
'\b(?:[^>"\']+|"[^"]*"|\'[^\']*\')*>.*?</' .
241+
$regex_tag .
242+
'>~is',
243+
$html,
244+
$matches
245+
) ) {
246+
$results = array_merge( $results, $matches[0] );
247+
}
248+
249+
return $results;
250+
}
251+
221252
protected function get_local_path( $url ) {
222253
if ( strpos( $url, 'http' ) === 0 ) {
223254
$matched_domain = null;
@@ -258,134 +289,14 @@ protected function get_formatted_source( $image_source_data, $mimetype ) {
258289
return null;
259290
}
260291

261-
protected function build_alternative_sources_for_url( $url, $size = '' ) {
262-
$sources = array();
263-
foreach ( $this->valid_mimetypes as $mimetype ) {
264-
$formatted = $this->get_formatted_source( array(
265-
'path' => $url,
266-
'size' => $size,
267-
), $mimetype );
268-
if ( $formatted ) {
269-
$srcset = trim( $formatted['src'] . ' ' . $formatted['size'] );
270-
$sources[] = '<source srcset="' .
271-
$srcset . '" type="' .
272-
$formatted['type'] . '" />';
273-
break;
274-
}
275-
}
276-
return $sources;
277-
}
278-
279-
/**
280-
* Will parse the srcset attribute
281-
*
282-
* @param string $srcset
283-
* @return array{ path: string, size: string } srcset parts
284-
*/
285-
protected static function parse_srcset_list( $srcset ) {
286-
$out = [];
287-
foreach ( explode( ',', $srcset ) as $entry ) {
288-
$entry = trim( $entry );
289-
if ( '' === $entry ) {
290-
continue;
291-
}
292-
$parts = preg_split( '/\s+/', $entry, 2 );
293-
$out[] = [
294-
'path' => $parts[0],
295-
'size' => $parts[1] ?? '',
296-
];
297-
}
298-
return $out;
299-
}
300-
}
301-
302-
class Tiny_Picture_Source extends Tiny_Source_Base {
303-
304-
305-
/**
306-
* Adds alternative format sources (e.g., image/webp, image/avif) to an existing
307-
* <picture> element based on locally available converted files.
308-
*
309-
*
310-
* @return string The augmented <picture> HTML or the original if no additions.
311-
*/
312-
public function augment_picture_element() {
313-
$new_sources = array();
314-
315-
// Find existing <source> tags inside the <picture>.
316-
if ( preg_match_all( '#<source\b[^>]*>#i', $this->raw_html, $source_tag_matches ) ) {
317-
foreach ( $source_tag_matches[0] as $source_tag_html ) {
318-
// Extract srcset="..."
319-
if ( ! preg_match( '#\bsrcset\s*=\s*([\"\'])(.*?)\1#i', $source_tag_html, $m ) ) { continue;
320-
}
321-
$media = '';
322-
// Extract optional media="..." to preserve any media query
323-
if ( preg_match( '#\bmedia\s*=\s*([\"\'])(.*?)\1#i', $source_tag_html, $mm ) ) {
324-
$media = $mm[2];
325-
}
326-
foreach ( self::parse_srcset_list( $m[2] ) as $entry ) {
327-
foreach ( $this->valid_mimetypes as $mimetype ) {
328-
$formatted = $this->get_formatted_source( $entry, $mimetype );
329-
if ( $formatted ) {
330-
$srcset = trim( $formatted['src'] . ' ' . $formatted['size'] );
331-
$tag = '<source srcset="' . $srcset . '" type="' . $formatted['type'] . '"';
332-
if ( $media ) { $tag .= ' media="' . $media . '"';
333-
}
334-
$new_sources[] = $tag . ' />';
335-
break;
336-
}
337-
}
338-
}
339-
}
340-
}
341-
342-
// inner <img>
343-
if ( preg_match( '#<img\b[^>]*>#i', $this->raw_html, $img_tag_match ) ) {
344-
$img_tag = $img_tag_match[0];
345-
$candidates = [];
346-
// Extract srcset="..."
347-
if ( preg_match( '#\bsrcset\s*=\s*([\"\'])(.*?)\1#i', $img_tag, $m ) ) {
348-
$candidates = array_merge( $candidates, self::parse_srcset_list( $m[2] ) );
349-
}
350-
// Extract fallback src="..."
351-
if ( preg_match( '#\bsrc\s*=\s*([\"\'])(.*?)\1#i', $img_tag, $m ) ) {
352-
$candidates[] = [
353-
'path' => $m[2],
354-
'size' => '',
355-
];
356-
}
357-
foreach ( $candidates as $entry ) {
358-
foreach ( $this->valid_mimetypes as $mimetype ) {
359-
$formatted = $this->get_formatted_source( $entry, $mimetype );
360-
if ( $formatted ) {
361-
$srcset = trim( $formatted['src'] . ' ' . $formatted['size'] );
362-
$new_sources[] = '<source srcset="' . $srcset . '" type="' . $formatted['type'] . '" />';
363-
break;
364-
}
365-
}
366-
}
367-
}
368-
if ( empty( $new_sources ) ) {
369-
return $this->raw_html;
370-
}
371-
372-
$insertion = implode( '', $new_sources );
373-
374-
// Insert newly built <source> elements immediately before the first <img>
375-
return preg_replace( '#(<img\b)#i', $insertion . '$1', $this->raw_html, 1 );
376-
}
377-
}
378-
379-
class Tiny_Image_Source extends Tiny_Source_Base {
380-
381292
/**
382-
* Retrieves the image sources from the img element
293+
* Retrieves the sources from the <img> or <source> element
383294
*
384295
* @return array{path: string, size: string}[] The image sources
385296
*/
386-
private function get_image_srcsets() {
297+
protected function get_image_srcsets( $html ) {
387298
$result = array();
388-
$srcset = $this::get_attribute_value( $this->raw_html, 'srcset' );
299+
$srcset = $this::get_attribute_value( $html, 'srcset' );
389300

390301
if ( $srcset ) {
391302
// Split the srcset to get individual entries
@@ -414,7 +325,7 @@ private function get_image_srcsets() {
414325
}
415326
}
416327

417-
$source = $this::get_attribute_value( $this->raw_html, 'src' );
328+
$source = $this::get_attribute_value( $html, 'src' );
418329
if ( ! empty( $source ) ) {
419330
// No srcset, but we have a src attribute
420331
$result[] = array(
@@ -427,20 +338,20 @@ private function get_image_srcsets() {
427338

428339

429340
/**
430-
* Generates a formatted image source array if the corresponding local file exists.
341+
* Creates one or more <source> elements if alternative formats
342+
* are available.
431343
*
432-
* Attempts to replace the file extension of the provided image path with the
433-
* specified MIME type, resolves the local path of the resulting file, and returns
434-
* the `srcset` and `type` if the file exists.
435-
*
436-
* @return string a <picture> element contain additional sources
344+
* @param string $original_source_html, either <source> or <img>
345+
* @return array{string} array of <source> html
437346
*/
438-
public function create_picture_elements() {
439-
$srcsets = $this->get_image_srcsets();
347+
protected function create_alternative_sources( $original_source_html ) {
348+
$srcsets = $this->get_image_srcsets( $original_source_html );
440349
if ( empty( $srcsets ) ) {
441-
return $this->raw_html;
350+
return array();
442351
}
443352

353+
$is_source_tag = (bool) preg_match( '#<source\b#i', $original_source_html );
354+
444355
$sources = array();
445356
foreach ( $this->valid_mimetypes as $mimetype ) {
446357
$srcset_parts = [];
@@ -453,16 +364,96 @@ public function create_picture_elements() {
453364
}
454365

455366
if ( ! empty( $srcset_parts ) ) {
367+
$source_attr_parts = array();
368+
456369
$srcset_attr = implode( ', ', $srcset_parts );
457-
$mimetype_source = '<source srcset="' . $srcset_attr . '" type="' . $mimetype . '" />';
458-
$sources[] = $mimetype_source;
370+
$source_attr_parts['srcset'] = $srcset_attr;
371+
372+
if ( $is_source_tag ) {
373+
foreach ( array( 'sizes', 'media', 'width', 'height' ) as $attr ) {
374+
$attr_value = $this->get_attribute_value( $original_source_html, $attr );
375+
if ( $attr_value ) {
376+
$source_attr_parts[ $attr ] = $attr_value;
377+
}
378+
}
379+
}
380+
381+
$source_attr_parts['type'] = $mimetype;
382+
$source_parts[] = '<source';
383+
foreach ( $source_attr_parts as $source_attr_name => $source_attr_val ) {
384+
$source_parts[] = $source_attr_name . '="' . $source_attr_val . '"';
385+
}
386+
$source_parts[] = '/>';
387+
$sources[] = implode( ' ', $source_parts );
459388
}
460389
}
461390

391+
return $sources;
392+
}
393+
}
394+
395+
class Tiny_Picture_Source extends Tiny_Source_Base {
396+
397+
398+
399+
/**
400+
* Adds alternative format sources (e.g., image/webp, image/avif) to an existing
401+
* <picture> element based on locally available converted files.
402+
*
403+
* @return string The augmented <picture> HTML or the original if no additions.
404+
*/
405+
public function augment_picture_element() {
406+
$modified_sources = array();
407+
408+
// handle existing sources
409+
$optimized_types = [ 'image/webp', 'image/avif' ];
410+
411+
foreach ( $this->get_element_by_tag( $this->raw_html, 'source' ) as $source_tag_html ) {
412+
$type_attr = self::get_attribute_value( $source_tag_html, 'type' );
413+
$type_attr = null !== $type_attr ? strtolower( trim( $type_attr ) ) : '';
414+
415+
// Skip if already optimized.
416+
if ( '' !== $type_attr && in_array( $type_attr, $optimized_types, true ) ) {
417+
continue;
418+
}
419+
420+
$alternative_sources = $this->create_alternative_sources( $source_tag_html );
421+
if ( is_array( $alternative_sources ) && $alternative_sources ) {
422+
foreach ( $alternative_sources as $alt ) {
423+
$modified_sources[] = $alt; // no array_merge in the loop
424+
}
425+
}
426+
}
427+
428+
// handle inner image
429+
foreach ( $this->get_element_by_tag( $this->raw_html, 'img' ) as $img_tag_html ) {
430+
$alt_image_source = $this->create_alternative_sources( $img_tag_html );
431+
$modified_sources = array_merge( $modified_sources, $alt_image_source );
432+
}
433+
434+
$modified_source = implode( '', $modified_sources );
435+
436+
// Insert newly built <source> elements immediately before the first <img>
437+
return preg_replace( '#(<img\b)#i', $modified_source . '$1', $this->raw_html, 1 );
438+
}
439+
}
440+
441+
class Tiny_Image_Source extends Tiny_Source_Base {
442+
443+
/**
444+
* Generates a formatted image source array if the corresponding local file exists.
445+
*
446+
* Attempts to replace the file extension of the provided image path with the
447+
* specified MIME type, resolves the local path of the resulting file, and returns
448+
* the `srcset` and `type` if the file exists.
449+
*
450+
* @return string a <picture> element contain additional sources
451+
*/
452+
public function create_picture_elements() {
453+
$sources = $this->create_alternative_sources( $this->raw_html );
462454
if ( empty( $sources ) ) {
463455
return $this->raw_html;
464456
}
465-
466457
$picture_element = array( '<picture>' );
467458
$picture_element[] = implode( '', $sources );
468459
$picture_element[] = $this->raw_html;

0 commit comments

Comments
 (0)