/*
 * Decompiled with CFR 0.152.
 */
package com.nexvor.rag;

import java.util.ArrayList;
import java.util.List;

public class DocumentChunker {
    private final int chunkSize;
    private final int overlapSize;

    public DocumentChunker(int chunkSize, int overlapSize) {
        this.chunkSize = chunkSize;
        this.overlapSize = overlapSize;
    }

    public List<String> chunk(String content) {
        ArrayList<String> chunks = new ArrayList<String>();
        if (content == null || content.isEmpty()) {
            return chunks;
        }
        if ((content = content.replaceAll("\\s+", " ").trim()).length() <= this.chunkSize) {
            chunks.add(content);
            return chunks;
        }
        int start2 = 0;
        while (start2 < content.length()) {
            String chunk;
            int sentenceBreak;
            int end = Math.min(start2 + this.chunkSize, content.length());
            if (end < content.length() && (sentenceBreak = this.findSentenceBreak(content, start2, end)) > start2) {
                end = sentenceBreak;
            }
            if (!(chunk = content.substring(start2, end).trim()).isEmpty()) {
                chunks.add(chunk);
            }
            if ((start2 = end - this.overlapSize) >= 0 && start2 < content.length()) continue;
            break;
        }
        return chunks;
    }

    private int findSentenceBreak(String content, int start2, int end) {
        int i;
        for (i = end - 1; i > start2 + this.chunkSize / 2; --i) {
            char c = content.charAt(i);
            if (c != '.' && c != '!' && c != '?' || i + 1 < content.length() && !Character.isWhitespace(content.charAt(i + 1))) continue;
            return i + 1;
        }
        for (i = end - 1; i > start2 + this.chunkSize / 2; --i) {
            if (!Character.isWhitespace(content.charAt(i))) continue;
            return i + 1;
        }
        return -1;
    }

    public List<String> chunkSemantic(String content) {
        ArrayList<String> chunks = new ArrayList<String>();
        if (content == null || content.isEmpty()) {
            return chunks;
        }
        String[] paragraphs = content.split("\n\n+");
        StringBuilder currentChunk = new StringBuilder();
        for (String paragraph : paragraphs) {
            if ((paragraph = paragraph.trim()).isEmpty()) continue;
            if (currentChunk.length() > 0 && currentChunk.length() + paragraph.length() > this.chunkSize) {
                chunks.add(currentChunk.toString().trim());
                currentChunk = new StringBuilder();
            }
            if (currentChunk.length() > 0) {
                currentChunk.append("\n\n");
            }
            currentChunk.append(paragraph);
            if (currentChunk.length() <= this.chunkSize) continue;
            List<String> subChunks = this.chunk(currentChunk.toString());
            chunks.addAll(subChunks);
            currentChunk = new StringBuilder();
        }
        if (currentChunk.length() > 0) {
            chunks.add(currentChunk.toString().trim());
        }
        return chunks;
    }

    public int getChunkSize() {
        return this.chunkSize;
    }

    public int getOverlapSize() {
        return this.overlapSize;
    }
}

