package ingestion import ( "fmt" "os" "path/filepath" "strings" ) type Result struct { Title string Body string Format string } var parsers = map[string]func(string) (string, error){ ".md": ParseMarkdown, ".markdown": ParseMarkdown, ".pdf": ParsePDF, ".docx": ParseDOCX, } func ParseFile(path string) (Result, error) { ext := strings.ToLower(filepath.Ext(path)) parse, ok := parsers[ext] if !ok { return Result{}, fmt.Errorf("unsupported file format: %s", ext) } body, err := parse(path) if err != nil { return Result{}, fmt.Errorf("parse %s: %w", ext, err) } title := strings.TrimSuffix(filepath.Base(path), ext) return Result{ Title: title, Body: strings.TrimSpace(body), Format: ext[1:], }, nil } func IsSupported(path string) bool { ext := strings.ToLower(filepath.Ext(path)) _, ok := parsers[ext] return ok } func SupportedExtensions() []string { exts := make([]string, 0, len(parsers)) for ext := range parsers { exts = append(exts, ext) } return exts } func ParseFromBytes(filename string, data []byte) (Result, error) { safe := filepath.Base(filename) if safe == "." || safe == string(filepath.Separator) { return Result{}, fmt.Errorf("invalid filename: %q", filename) } tmpDir, err := os.MkdirTemp("", "ingestion-*") if err != nil { return Result{}, fmt.Errorf("create temp dir: %w", err) } defer os.RemoveAll(tmpDir) tmpPath := filepath.Join(tmpDir, safe) if err := os.WriteFile(tmpPath, data, 0644); err != nil { return Result{}, fmt.Errorf("write temp file: %w", err) } return ParseFile(tmpPath) }