import { type Element as HastElement, type Root as HastRoot } from 'hast';
import fromParse5 from 'hast-util-from-parse5';
import hastSanitize from 'hast-util-sanitize';
import hastUtilToHtml from 'hast-util-to-html';
import hastIsWhitespace from 'hast-util-whitespace';
import htmlVoidElements from 'html-void-elements';
import { isEmpty, last, omit } from 'lodash';
import parse5 from 'parse5';

import { CUSTOM_HTML } from '@eversity/domain/constants';
import { ensureArray, filterTree, mapTree } from '@eversity/utils/misc';

import { type SanitizeSchema } from './schemas';

export function transformHastToHtmlString(hast: HastRoot | HastElement) {
  return hastUtilToHtml(hast);
}
// Add our custom void elements to the list of standard void elements.
const voidElements = [
  ...htmlVoidElements,
  CUSTOM_HTML.VIDEO,
  CUSTOM_HTML.LINKEDIN_EMBEDDED,
  CUSTOM_HTML.CAROUSEL_SLIDE,
  CUSTOM_HTML.ATTACHMENT,
  CUSTOM_HTML.AUDIO,
];

/**
 * Transform html content to a hast tree.
 * If the argument is already a hast tree, it is not parsed again.
 *
 * @param htmlOrHast - Html string or hast tree.
 * @param [options] - Options.
 * @param [options.forceParsing=false] - Force to reparse content if not html string.
 * @returns Hast tree.
 */
export function parseHtml(
  htmlOrHast: string | HastElement | HastRoot,
  {
    forceParsing,
  }: {
    forceParsing?: boolean;
  } = {},
): HastElement | HastRoot {
  // If forceParsing is true, convert input to html to pass it through parse5 again.
  const input =
    forceParsing && htmlOrHast && typeof htmlOrHast === 'object'
      ? transformHastToHtmlString(htmlOrHast)
      : htmlOrHast;

  // Parse if argument is not already an HAST.
  const hast =
    input && typeof input === 'object'
      ? input
      : (fromParse5(parse5.parse((input as string) || '')) as HastRoot);

  return hast;
}

/**
 * For some reason, if no protocol is passed to the href/src attributes, any url is validated
 * by the sanitizer.
 * This method makes sure that the attributes in the protocols property of the schema all
 * start with at least one valid protocol.
 *
 * Remove invalid protocols defined in the schema.
 * If no schema or protocol attributes, return the input as HAST.
 *
 * @param htmlOrHast - HTML string or HAST.
 * @param options - Options.
 * @param options.schema - Schema to use to sanitize the HAST. See ./schemas.
 * @returns Filtered HAST.
 */
export function filterInvalidProtocols(
  htmlOrHast: string | HastElement | HastRoot,
  {
    schema,
  }: {
    schema?: SanitizeSchema;
  } = {},
): HastElement | HastRoot {
  // Parse if argument is not already an HAST.
  const hast = parseHtml(htmlOrHast);

  const protocolAttributes = Object.keys(schema?.protocols || {});

  // Walk the tree and remove invalid protocols.
  return protocolAttributes.length
    ? mapTree(hast, (node) => {
        if (node.type === 'element') {
          const invalidAttributes = protocolAttributes.filter(
            (attribute) =>
              // Node has attribute
              !!node.properties?.[attribute] &&
              // Value of attribute does not match any valid protocol.
              !schema.protocols[attribute].some((protocol) =>
                []
                  .concat(node.properties[attribute])
                  .some(
                    (propertyValue) =>
                      typeof propertyValue === 'string' &&
                      propertyValue
                        .toLowerCase()
                        .startsWith(`${protocol.toLowerCase()}://`),
                  ),
              ),
          );

          return invalidAttributes.length
            ? {
                ...node,
                properties: omit(node.properties, invalidAttributes),
              }
            : node;
        }

        return node;
      })
    : hast;
}

/**
 * Given a HAST tree, remove all nodes whose attributes' url do not match the allowedDomains of the
 * schema.
 *
 * @example
 * const schema = {
 *   allowedDomains: {
 *     'some-element': {
 *       'some-attribute': 'www.valid-url.com',
 *     },
 *   },
 * };
 *
 * const html = '<some-element some-attribute="www.invalid-url.com></some-element>';
 *
 * sanitizeInvalidDomains(html, { schema }) // -> { type: 'root', children: [] };
 *
 * @param htmlOrHast - HTML string or HAST.
 * @param options - Options.
 * @param options.schema - Schema to use to sanitize the HAST. See ./schemas.
 * @returns Filtered HAST.
 */
export const sanitizeInvalidDomains = (
  htmlOrHast: string | HastElement | HastRoot,
  {
    schema: { allowedDomains = {} } = {},
  }: {
    schema?: SanitizeSchema;
  } = {},
): HastElement | HastRoot => {
  // Parse if argument is not already an HAST.
  const hast = parseHtml(htmlOrHast);

  return !isEmpty(allowedDomains)
    ? filterTree(hast, (node) => {
        if (node.type === 'element') {
          const nodeAllowedDomains = allowedDomains[node.tagName];
          const attributesToValidate = Object.keys(nodeAllowedDomains || {});

          // Keep node if there are no attributes to validate,
          // or if the attributes of the node all match the specified allowed domains.
          // For example, eversity-linkedin-embedded only allows www.linkedin.com urls as src.
          return (
            !attributesToValidate.length ||
            attributesToValidate.every((attribute) => {
              try {
                // If the node does not have the attribute, do not filter.
                const nodeAttributeValue = node.properties[attribute] as string;
                if (!nodeAttributeValue) {
                  return true;
                }

                // Parse URL of attribute.
                const { hostname } = new URL(nodeAttributeValue);

                // Filter if URL hostname is not explicitly allowed.
                const attributeAllowedDomains = ensureArray(
                  nodeAllowedDomains[attribute],
                );

                return attributeAllowedDomains.includes(hostname);
              } catch {
                return false;
              }
            })
          );
        }

        return true;
      })
    : hast;
};

/**
 * Sanitize content.
 *
 * @param htmlOrHast - HTML or HAST content.
 * @param options - Options.
 * @param options.schema - Schema to use to sanitize the HAST. See ./schemas.
 * @param options.forceParsing - Force to reparse content if not html string.
 * @param options.strictProtocol - Remove hrefs not starting with a valid protocol.
 * @returns HAST.
 */
export const sanitizeHast = (
  htmlOrHast: string | HastElement | HastRoot,
  {
    schema,
    forceParsing = false,
    strictProtocol = true,
  }: {
    schema: SanitizeSchema;
    forceParsing?: boolean;
    strictProtocol?: boolean;
  },
): HastElement | HastRoot => {
  // Parse if argument is not already an HAST.
  let hast = parseHtml(htmlOrHast, { forceParsing });

  // Remove attributes with invalid protocols.
  if (strictProtocol) {
    hast = filterInvalidProtocols(hast, { schema });
  }

  // Remove nodes with invalid domains.
  if (schema.allowedDomains) {
    hast = sanitizeInvalidDomains(hast, { schema });
  }

  // Sanitize hast.
  return hastSanitize(hast, schema) as HastElement;
};

/**
 * Remove whitespaces and empty elements in html content.
 * If the argument is already a hast tree, it is not parsed again.
 *
 * @param htmlOrHast - Html string or hast tree.
 * @returns Hast tree.
 */
export const minifyHast = (
  htmlOrHast: string | HastElement | HastRoot,
): HastElement | HastRoot => {
  // Parse if argument is not already an HAST.
  const hast = parseHtml(htmlOrHast);

  // Remove nodes that are empty / full of whitespace / considered whitespace.
  // For tables, we must not remove empty cells (or the table layout would be broken).
  return filterTree(hast, (node, nodePath, parent) => {
    // If the item is a child of a td, only filter if it is not the only empty element.
    // AKA always keep an empty <p> inside a <td> if the cell is empty.
    if (
      node.type === 'element' &&
      parent?.type === 'element' &&
      parent?.tagName === 'td' &&
      last(nodePath) === 0 &&
      !parent.children.some((sibling) =>
        sibling.type === 'element'
          ? voidElements.includes(sibling.tagName) || !!sibling.children.length
          : !hastIsWhitespace(sibling),
      )
    ) {
      return true;
    }

    // Otherwise, remove all empty non-void elements except td elements.
    return node.type === 'element'
      ? node.tagName === 'td' ||
          voidElements.includes(node.tagName) ||
          !!node.children.length
      : !hastIsWhitespace(node);
  });
};

/**
 * Parse, minify if necessary, sanitize and return HTML content as a HAST.
 * If the input is already an HAST, the parsing stage is ignored.
 *
 * @param htmlOrHast - HTML content.
 * @param options - Options.
 * @param options.sanitizeSchema - Schema to use to sanitize the HAST. See ./schemas.
 * @param options.minify - Should minify the HAST.
 * @returns HAST.
 */
export const transformHtmlToHast = (
  html: string,
  {
    sanitizeSchema,
    minify = true,
  }: {
    sanitizeSchema: SanitizeSchema;
    minify?: boolean;
  },
): HastRoot => {
  // Parse the HTML.
  let hast = parseHtml(html);

  // Remove empty nodes if necessary.
  hast = minify ? minifyHast(hast) : hast;

  // Sanitize the content based on the given schema.
  return sanitizeHast(hast, { schema: sanitizeSchema }) as HastRoot;
};
