// SPDX-FileCopyrightText: 2025 Open Mobile Platform LLC <community@omp.ru>
// SPDX-License-Identifier: BSD-3-Clause

#include "pdfdocumentphrasesearchtask.h"
#include <QRegularExpression>

/*!
 * \class PdfDocumentPhraseSearchTask
 * \brief The PdfDocumentPhraseSearchTask class is a task for search phrase in PDF document.
 * \inmodule AmberPDF
 * \ingroup tasks
 */

/*!
 * Parametrized constructor.
 * \a phrase is phrase for searching in document.
 * \a interface is QFutureInterface for future tasks.
 * \a documentHolder is QSharedPointer to PdfDocumentHolder object with document.
 */
PdfDocumentPhraseSearchTask::PdfDocumentPhraseSearchTask(const QString phrase, PdfPageTexts *pageTextsModel,
                                                         const QFutureInterface<PdfPageTexts*> &interface,
                                                         const QSharedPointer<PdfDocumentHolder> &documentHolder)
    : m_interface(interface),
      m_documentHolder(documentHolder),
      m_phrase(phrase),
      m_pageTextsModel(pageTextsModel)
{

}

PdfDocumentPhraseSearchTask::PdfDocumentPhraseSearchTask(const QString phrase,
                                                         const QFutureInterface<PdfPageTexts*> &interface,
                                                         const QSharedPointer<PdfDocumentHolder> &documentHolder)
    : m_interface(interface),
      m_documentHolder(documentHolder),
      m_phrase(phrase)
{

}

/*!
 * Destructor.
 */
PdfDocumentPhraseSearchTask::~PdfDocumentPhraseSearchTask() = default;

/*!
 * Checks if the given character matches escape sequences.
 * Returns \c true if matches, \c false otherwise.
 */
static bool checkEscapeSequence(QChar charToCheck) {
    return charToCheck == '\n' || charToCheck == '\r';
}

/*!
 * Starts the process of searching phrase in document.
 */
void PdfDocumentPhraseSearchTask::run()
{
    if (m_interface.isCanceled() || PdfTaskQueue::instance().blockedId().contains(m_documentHolder->id())) {
        m_interface.cancel();
        return;
    }
    m_phrasesForModel.clear();
    FPDF_WIDESTRING phraseWideString = m_phrase.utf16();
    const int pageCount = FPDF_GetPageCount(m_documentHolder->document().data());
    m_interface.setProgressRange(0, pageCount - 1);

    for (int i = 0; i < m_pageTextsModel->phrasesForModel().size(); i++) {
        m_oldResultByPage.insert(m_pageTextsModel->phrasesForModel().at(i)->index());
    }

    for (int i = 0; i < pageCount; ++i)
    {
        if (m_interface.isCanceled() || PdfTaskQueue::instance().blockedId().contains(m_documentHolder->id())) {
            m_interface.reportResult(m_pageTextsModel);
            return;
        }
        if (m_pageTextsModel->hasPage(i)) {
            findOnCachedPage(i, phraseWideString);
        } else {
            cacheAndFind(i, phraseWideString);
        }
        m_interface.setProgressValue(i);
    }
    m_pageTextsModel->addLastSearchResult(m_phrasesForModel);
    m_interface.reportFinished(&m_pageTextsModel);
}

/*!
 * Cancels task.
 */
void PdfDocumentPhraseSearchTask::cancel()
{

}

/*!
 * Returns document id.
 */
int PdfDocumentPhraseSearchTask::id() const
{
    return m_documentHolder->id();
}

void PdfDocumentPhraseSearchTask::findOnPage(const int& pageIndex, const FPDF_WIDESTRING& phrase) {
    auto loadedPage = QSharedPointer<fpdf_page_t__>(FPDF_LoadPage(m_documentHolder->document().data(), pageIndex), [](fpdf_page_t__ *){  });
    auto pageCharactersInfo = QSharedPointer<fpdf_textpage_t__>(FPDFText_LoadPage(loadedPage.data()), [](fpdf_textpage_t__ *){  });
    auto searchContext = QSharedPointer<fpdf_schhandle_t__>(FPDFText_FindStart(pageCharactersInfo.data(), phrase, 0x00000000, 0), [](fpdf_schhandle_t__*){  });
    int amountOfCharacters = FPDFText_CountChars(pageCharactersInfo.data());
    QVector<QSharedPointer<PdfWord>> pageMatchingPhrases;
    QVector<ushort> phraseBuffer(static_cast<qint32>(amountOfCharacters + 1));

    FPDFText_GetText(pageCharactersInfo.data(), 0, amountOfCharacters, phraseBuffer.data());

    if (!m_pageTextsModel->hasPage(pageIndex)) {
        QString pageText = "";
        for (int i = 0; i < amountOfCharacters; i++)
            pageText.append(phraseBuffer.at(i));
        m_pageTextsModel->addPage(pageIndex, pageText);
    }

    while (FPDFText_FindNext(searchContext.data())) {
        int numberOfMatchedCharacters = FPDFText_GetSchCount(searchContext.data());
        int startingCharacterIndex = FPDFText_GetSchResultIndex(searchContext.data());
        int beforeCharacterIndex = -1;
        int afterCharacterIndex = 0;

        QVector<ushort> phraseBuffer(static_cast<qint32>(numberOfMatchedCharacters + 1));

        // Needed to expand phrase (type in "know" -> get "knowledge" or "well-known" as result).
        QVector<ushort> phraseRightSide(static_cast<qint32>(2));
        QVector<ushort> phraseLeftSide(static_cast<qint32>(2));

        // Character box borders.
        double globalLeft, globalTop, globalRight, globalBottom;

        QList<QRectF> attachedRects;

        QString text;

        // Unicode text.
        FPDFText_GetText(pageCharactersInfo.data(), startingCharacterIndex, numberOfMatchedCharacters, phraseBuffer.data());
        FPDFText_GetCharBox(pageCharactersInfo.data(), startingCharacterIndex, &globalLeft, &globalRight, &globalBottom, &globalTop);
        text.append(QString::fromUtf16(phraseBuffer.data()));

        // Expanding phrase to the left (if there were symbols before the found word).
        while (startingCharacterIndex + beforeCharacterIndex >= 0) {
            FPDFText_GetText(pageCharactersInfo.data(), startingCharacterIndex + beforeCharacterIndex, 1, phraseLeftSide.data());
            QChar beforeChar = QChar(phraseLeftSide.first());
            if (!beforeChar.isLetter() && !FPDFText_IsHyphen(pageCharactersInfo.data(), startingCharacterIndex + beforeCharacterIndex) && beforeChar.category() != QChar::Punctuation_Dash) {
                beforeCharacterIndex++;
                break;
            }
            if (!beforeChar.isNonCharacter())
                text.prepend(beforeChar);
            beforeCharacterIndex--;
        }
        if (startingCharacterIndex + beforeCharacterIndex < 0)
            beforeCharacterIndex++;

        // Expanding phrase to the right (if there were symbols after the found word).
        while (startingCharacterIndex + numberOfMatchedCharacters + afterCharacterIndex < amountOfCharacters) {
            FPDFText_GetText(pageCharactersInfo.data(), startingCharacterIndex + numberOfMatchedCharacters + afterCharacterIndex, 1, phraseRightSide.data());
            QChar afterChar = QChar(phraseRightSide.first());
            if (!afterChar.isLetter() && !FPDFText_IsHyphen(pageCharactersInfo.data(), startingCharacterIndex + numberOfMatchedCharacters + afterCharacterIndex) && afterChar.category() != QChar::Punctuation_Dash)
                break;
            if (!afterChar.isNonCharacter())
                text.append(afterChar);
            afterCharacterIndex++;
        }
        double attachedRectLeft = FPDF_GetPageWidth(loadedPage.data());
        double attachedRectTop = 0.0;
        double attachedRectRight = 0.0;
        double attachedRectBottom = FPDF_GetPageHeightF(loadedPage.data());

        // Refreshing the whole phrase coordinates.
        for (int i = beforeCharacterIndex; i < numberOfMatchedCharacters + afterCharacterIndex; ++i) {
            double charLeft, charTop, charRight, charBottom;
            // Refreshing the character box borders for each character of the word or phrase.
            FPDFText_GetCharBox(pageCharactersInfo.data(), startingCharacterIndex + i, &charLeft, &charRight, &charBottom, &charTop);

            if (charTop <= attachedRectBottom && i != beforeCharacterIndex) {
                attachedRects.append(QRectF(attachedRectLeft, attachedRectBottom, attachedRectRight - attachedRectLeft, attachedRectTop - attachedRectBottom));
                attachedRectLeft = charLeft;
                attachedRectTop = charTop;
                attachedRectRight = charRight;
                attachedRectBottom = charBottom;
            }
            globalLeft = std::min(globalLeft, charLeft);
            globalBottom = std::min(globalBottom, charBottom);
            globalRight = std::max(globalRight, charRight);
            globalTop = std::max(globalTop, charTop);
            attachedRectLeft = std::min(attachedRectLeft, charLeft);
            attachedRectBottom = std::min(attachedRectBottom, charBottom);
            attachedRectRight = std::max(attachedRectRight, charRight);
            attachedRectTop = std::max(attachedRectTop, charTop);
        }
        attachedRects.append(QRectF(attachedRectLeft, attachedRectBottom, attachedRectRight - attachedRectLeft, attachedRectTop - attachedRectBottom));

        QString textWithColloredPhrase = text;
        QString colloredPhrase = QString("<span style='background-color: yellow; color: black;'>%1</span>").arg(QString::fromUtf16(phraseBuffer.data()));
        textWithColloredPhrase = textWithColloredPhrase.replace(m_phrase, colloredPhrase, Qt::CaseInsensitive);
        QString context = getPhraseContext(amountOfCharacters, pageCharactersInfo.data(), startingCharacterIndex + beforeCharacterIndex, textWithColloredPhrase, text.length());

        // Creating the box, bordering the phrase.
        QRectF rect(globalLeft, globalBottom, globalRight - globalLeft, globalTop - globalBottom);
        // If this is not the first phrase on the page - check for duplicate (value, page index, coordinates).
        if (pageMatchingPhrases.isEmpty() ||
                !(pageMatchingPhrases.last()->value() == text &&
                   pageMatchingPhrases.last()->index() == pageIndex &&
                   pageMatchingPhrases.last()->rect() == rect)) {
            PdfWord* word = new PdfWord(text, pageIndex, rect, nullptr, context);
            word->setAttachedRects(attachedRects);
            pageMatchingPhrases.append(QSharedPointer<PdfWord>(word));
        }
    }

    // Clearing data.
    m_oldResultByPage.remove(pageIndex);
    FPDFText_FindClose(searchContext.data());
    FPDFText_ClosePage(pageCharactersInfo.data());
    FPDF_ClosePage(loadedPage.data());

    m_phrasesForModel.append(pageMatchingPhrases);
}

/*!
 * Searches matching \a phrase for the given page by \a pageIndex.
 */
void PdfDocumentPhraseSearchTask::cacheAndFind(const int& pageIndex, const FPDF_WIDESTRING& phrase)
{
    findOnPage(pageIndex, phrase);
}

void PdfDocumentPhraseSearchTask::findOnCachedPage(const int& pageIndex, const FPDF_WIDESTRING& phrase) {

    QRegularExpressionMatchIterator i = m_pageTextsModel->findTextOnPage(pageIndex, m_phrase);
    if (i.hasNext())
        findOnPage(pageIndex, phrase);
}

/*!
 * Gets the context (current line, previous line, next line) for the given phrase.
 * \a amountOfCharacters is amount of characters on the page.
 * \a page is current text page.
 * \a startIndex is first character index of the phrase.
 */
QString PdfDocumentPhraseSearchTask::getPhraseContext(const int& amountOfCharacters, const FPDF_TEXTPAGE page, const int& startIndex,
                                                      const QString textWithColloredPhrase, const int wordLength)
{
    // Search till beginning of the previous line.
    int linesBefore = 0;
    int contextBeginningIndex = startIndex;
    while (contextBeginningIndex > 0 && linesBefore < 2) {
        QChar currentChar = FPDFText_GetUnicode(page, contextBeginningIndex - 1);
        QChar previousChar = (contextBeginningIndex > 1) ? FPDFText_GetUnicode(page, contextBeginningIndex - 2) : QChar(0);

        if (checkEscapeSequence(currentChar)) {
            if (checkEscapeSequence(previousChar) && currentChar != previousChar) {
                --contextBeginningIndex;
            }
            // Move to the previous line.
            ++linesBefore;
        }
        --contextBeginningIndex;
    }

    // Search till the end of the next line.
    int linesAfter = 0;
    int contextEndIndex = startIndex;
    while (contextEndIndex < amountOfCharacters && linesAfter < 2) {
        QChar currentChar = FPDFText_GetUnicode(page, contextEndIndex);
        QChar nextChar = (contextEndIndex + 1 < amountOfCharacters) ? FPDFText_GetUnicode(page, contextEndIndex + 1) : QChar(0);

        if (checkEscapeSequence(currentChar)) {
            ++contextEndIndex;
            if (checkEscapeSequence(nextChar) && currentChar != nextChar) {
                ++contextEndIndex;
            }
            // Move to the next line.
            ++linesAfter;
            continue;
        }
        ++contextEndIndex;
    }

    int contextLength = contextEndIndex - contextBeginningIndex;
    QVector<ushort> contextBuffer(contextLength + 1);
    FPDFText_GetText(page, contextBeginningIndex, contextLength, contextBuffer.data());

    QString contextText = QString::fromUtf16(contextBuffer.data());
    contextText = contextText.replace(startIndex - contextBeginningIndex, wordLength, textWithColloredPhrase);
    contextText = contextText.replace("\n", " ").replace("\r", " ").trimmed();
    contextText = "..." + contextText + "...";

    return contextText;
}
