76 lines
1.6 KiB
Go
76 lines
1.6 KiB
Go
|
|
package ingestion
|
||
|
|
|
||
|
|
import (
|
||
|
|
"fmt"
|
||
|
|
"os"
|
||
|
|
"path/filepath"
|
||
|
|
"strings"
|
||
|
|
)
|
||
|
|
|
||
|
|
type Result struct {
|
||
|
|
Title string
|
||
|
|
Body string
|
||
|
|
Format string
|
||
|
|
}
|
||
|
|
|
||
|
|
var parsers = map[string]func(string) (string, error){
|
||
|
|
".md": ParseMarkdown,
|
||
|
|
".markdown": ParseMarkdown,
|
||
|
|
".pdf": ParsePDF,
|
||
|
|
".docx": ParseDOCX,
|
||
|
|
}
|
||
|
|
|
||
|
|
func ParseFile(path string) (Result, error) {
|
||
|
|
ext := strings.ToLower(filepath.Ext(path))
|
||
|
|
parse, ok := parsers[ext]
|
||
|
|
if !ok {
|
||
|
|
return Result{}, fmt.Errorf("unsupported file format: %s", ext)
|
||
|
|
}
|
||
|
|
|
||
|
|
body, err := parse(path)
|
||
|
|
if err != nil {
|
||
|
|
return Result{}, fmt.Errorf("parse %s: %w", ext, err)
|
||
|
|
}
|
||
|
|
|
||
|
|
title := strings.TrimSuffix(filepath.Base(path), ext)
|
||
|
|
return Result{
|
||
|
|
Title: title,
|
||
|
|
Body: strings.TrimSpace(body),
|
||
|
|
Format: ext[1:],
|
||
|
|
}, nil
|
||
|
|
}
|
||
|
|
|
||
|
|
func IsSupported(path string) bool {
|
||
|
|
ext := strings.ToLower(filepath.Ext(path))
|
||
|
|
_, ok := parsers[ext]
|
||
|
|
return ok
|
||
|
|
}
|
||
|
|
|
||
|
|
func SupportedExtensions() []string {
|
||
|
|
exts := make([]string, 0, len(parsers))
|
||
|
|
for ext := range parsers {
|
||
|
|
exts = append(exts, ext)
|
||
|
|
}
|
||
|
|
return exts
|
||
|
|
}
|
||
|
|
|
||
|
|
func ParseFromBytes(filename string, data []byte) (Result, error) {
|
||
|
|
safe := filepath.Base(filename)
|
||
|
|
if safe == "." || safe == string(filepath.Separator) {
|
||
|
|
return Result{}, fmt.Errorf("invalid filename: %q", filename)
|
||
|
|
}
|
||
|
|
|
||
|
|
tmpDir, err := os.MkdirTemp("", "ingestion-*")
|
||
|
|
if err != nil {
|
||
|
|
return Result{}, fmt.Errorf("create temp dir: %w", err)
|
||
|
|
}
|
||
|
|
defer os.RemoveAll(tmpDir)
|
||
|
|
|
||
|
|
tmpPath := filepath.Join(tmpDir, safe)
|
||
|
|
if err := os.WriteFile(tmpPath, data, 0644); err != nil {
|
||
|
|
return Result{}, fmt.Errorf("write temp file: %w", err)
|
||
|
|
}
|
||
|
|
|
||
|
|
return ParseFile(tmpPath)
|
||
|
|
}
|