import { useState } from 'react';
import { useDispatch } from 'react-redux';
import Tesseract from 'tesseract.js';
import { setSnackbar } from 'store/slice/page';

export const useTextExtraction = () => {
  const dispatch = useDispatch();

  const [extractedWords, setExtractedWords] = useState<string[]>([]);
  const [isProcessing, setIsProcessing] = useState(false);
  const extractTextFromImage = async (src: string) => {
    if (!src) return;
    setIsProcessing(true);

    try {
      const worker = await Tesseract.createWorker('eng');
      await worker.reinitialize('eng');

      await worker.setParameters({
        tessedit_char_whitelist:
          'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789 ',
        tessedit_pageseg_mode: '6',
        tessedit_ocr_engine_mode: '1',
        classify_bln_numeric_mode: '1',
        textord_heavy_nr: '1',
        preserve_interword_spaces: '1',
      } as any);

      const { data } = await worker.recognize(src);

      await worker.terminate();

      let words = data.text
        .split(/\s+/)
        .map(word =>
          word
            .trim()
            .replace(/[^a-zA-Z0-9]/g, '')
            .toLowerCase(),
        )
        .filter(word => word.length > 2); // remove words with less than 3 characters

      const uniqueWords = words.filter(
        (word, index) => words.indexOf(word) === index,
      );

      setExtractedWords(uniqueWords);
    } catch (error) {
      dispatch(
        setSnackbar({
          severity: 'error',
          message: 'Failed to extract text from image',
        }),
      );
      setExtractedWords([]);
    }

    setIsProcessing(false);
  };

  return {
    extractedWords,
    isProcessing,
    extractTextFromImage,
  };
};
