
/**
 * Utilities for extracting content from nested HTML structures
 */
import { extractTextFromHtml } from "@/utils/blog/excerptUtils";

/**
 * Extract content from nested HTML documents
 * @param html - The HTML content that might contain nested HTML documents
 * @returns Cleaned HTML content
 */
export const extractFromNestedHtml = (html: string): string => {
  // Skip processing if the content doesn't appear to have a nested HTML document
  if (!html.includes('<html') || !html.includes('</html>')) {
    return html;
  }

  try {
    // Create a DOM parser to handle the HTML safely
    const parser = new DOMParser();
    const doc = parser.parseFromString(html, 'text/html');
    
    // First check for deep nesting (html inside content div inside html)
    const contentDiv = doc.querySelector('.content');
    if (contentDiv) {
      console.log('Found content div in nested HTML, extracting content');
      // Return just the content div's HTML, which should contain all the blog content
      return contentDiv.innerHTML;
    }
    
    // Check for article tag which often contains the main content
    const articleElement = doc.querySelector('article');
    if (articleElement) {
      console.log('Found article element in HTML, extracting content');
      return articleElement.innerHTML;
    }
    
    // Look for main tag
    const mainElement = doc.querySelector('main');
    if (mainElement) {
      console.log('Found main element in HTML, extracting content');
      return mainElement.innerHTML;
    }
    
    // Check for any div with "content" in its class or id
    const contentDivs = doc.querySelectorAll('div[class*="content"], div[id*="content"]');
    if (contentDivs.length > 0) {
      console.log('Found div with content in class/id, extracting content');
      return contentDivs[0].innerHTML;
    }
    
    // If no content div is found, look for the body content
    const bodyContent = doc.body;
    if (bodyContent) {
      console.log('Extracting body content from HTML document');
      return bodyContent.innerHTML;
    }
    
    return html;
  } catch (error) {
    console.error('Error processing nested HTML:', error);
    return html; // Return original content if processing fails
  }
};

/**
 * For backward compatibility, re-export the excerpt extractor
 */
export const extractExcerptFromHtml = extractTextFromHtml;
