/*
 * Decompiled with CFR 0.152.
 */
package com.nexvor.rag;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;

public class DocumentParser {
    public String parse(Path filePath) throws IOException {
        String fileName = filePath.getFileName().toString().toLowerCase();
        if (fileName.endsWith(".txt")) {
            return this.parseTxt(filePath);
        }
        if (fileName.endsWith(".md") || fileName.endsWith(".markdown")) {
            return this.parseMarkdown(filePath);
        }
        throw new UnsupportedOperationException("Unsupported file type: " + fileName);
    }

    private String parseTxt(Path filePath) throws IOException {
        return Files.readString(filePath);
    }

    private String parseMarkdown(Path filePath) throws IOException {
        String content = Files.readString(filePath);
        return this.stripMarkdown(content);
    }

    private String stripMarkdown(String markdown) {
        String text = markdown;
        text = text.replaceAll("```[\\s\\S]*?```", "");
        text = text.replaceAll("`([^`]+)`", "$1");
        text = text.replaceAll("^#+\\s+", "");
        text = text.replaceAll("\\*\\*([^*]+)\\*\\*", "$1");
        text = text.replaceAll("__([^_]+)__", "$1");
        text = text.replaceAll("\\*([^*]+)\\*", "$1");
        text = text.replaceAll("_([^_]+)_", "$1");
        text = text.replaceAll("\\[([^]]+)]\\([^)]+\\)", "$1");
        text = text.replaceAll("!\\[([^]]*)]\\([^)]+\\)", "");
        text = text.replaceAll("^[\\-*_]{3,}$", "");
        text = text.replaceAll("^>\\s+", "");
        text = text.replaceAll("^[\\-*+]\\s+", "");
        text = text.replaceAll("^\\d+\\.\\s+", "");
        text = text.replaceAll("\\n{3,}", "\n\n");
        text = text.trim();
        return text;
    }

    public boolean isSupported(String fileName) {
        String lowerName = fileName.toLowerCase();
        return lowerName.endsWith(".txt") || lowerName.endsWith(".md") || lowerName.endsWith(".markdown");
    }

    public List<String> getSupportedExtensions() {
        ArrayList<String> extensions = new ArrayList<String>();
        extensions.add("txt");
        extensions.add("md");
        extensions.add("markdown");
        return extensions;
    }

    public boolean validateFileSize(Path filePath, long maxSizeBytes) throws IOException {
        long fileSize = Files.size(filePath);
        return fileSize <= maxSizeBytes;
    }
}

