import { notEmpty } from '../../typeValidators';

// Data URLs are already inlined, so we don't need to replace them.
const filterDataUrl = (url: string | undefined) => {
  if (url && !url.startsWith('data:')) {
    return url;
  }
  return undefined;
};

/**
 * Patterns to extract image URLs from an HTML string.
 *
 * Each pattern matches the image URL with either single or double quotes.
 */
const patterns: [RegExp, (match: RegExpMatchArray) => string | string[] | undefined][] = [
  // Image src attributes
  [
    /<img\s+[^>]*src=["']([^"'>]+)["']/gi,
    (match) => filterDataUrl(match[1]),
  ],
  // CSS url() patterns
  [
    /url\(\s*["']?([^"')]+)["']?\s*\)/gi,
    (match) => filterDataUrl(match[1]),
  ],
  // Inline style background images
  [
    /style=["'][^"']*background-image:\s*url\(\s*["']?([^"')]+)["']?\s*\)[^"']*/gi,
    (match) => filterDataUrl(match[1]),
  ],
  // Favicon and other icon links
  [
    /<link\s+[^>]*(?:(?:href=["']?([^"'\s>]+).*?rel=["'](?:icon|shortcut icon|apple-touch-icon|mask-icon)["'])|(?:rel=["'](?:icon|shortcut icon|apple-touch-icon|mask-icon)["'].*?href=["']?([^"'\s>]+)))[^>]*>/gi,
    (match) => filterDataUrl(match[1] || match[2]),
  ],
  // srcset attributes
  [
    /<img\s+[^>]*srcset=["']([^"']+)["']/gi,
    (match) => match[1]
      .split(',')
      .map((src) => src.trim().split(/\s+/)[0])
      .map(filterDataUrl)
      .filter((url): url is string => url !== undefined),
  ],
];

/**
 * Extracts image URLs from an HTML string.
 * @param html - The HTML string to extract image URLs from.
 * @returns An array of unique image URLs.
 */
export function extractImageUrls(html: string): string[] {
  const urls = patterns.flatMap(([regex, selector]) =>
    Array.from(html.matchAll(regex))
      .map(selector)
      .flat()
      .filter(notEmpty));

  return Array.from(new Set(urls));
}
