feat: add file upload for materials (PDF/DOCX) with ingestion pipeline
This commit is contained in:
75
internal/ingestion/ingestion.go
Normal file
75
internal/ingestion/ingestion.go
Normal file
@@ -0,0 +1,75 @@
|
||||
package ingestion
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type Result struct {
|
||||
Title string
|
||||
Body string
|
||||
Format string
|
||||
}
|
||||
|
||||
var parsers = map[string]func(string) (string, error){
|
||||
".md": ParseMarkdown,
|
||||
".markdown": ParseMarkdown,
|
||||
".pdf": ParsePDF,
|
||||
".docx": ParseDOCX,
|
||||
}
|
||||
|
||||
func ParseFile(path string) (Result, error) {
|
||||
ext := strings.ToLower(filepath.Ext(path))
|
||||
parse, ok := parsers[ext]
|
||||
if !ok {
|
||||
return Result{}, fmt.Errorf("unsupported file format: %s", ext)
|
||||
}
|
||||
|
||||
body, err := parse(path)
|
||||
if err != nil {
|
||||
return Result{}, fmt.Errorf("parse %s: %w", ext, err)
|
||||
}
|
||||
|
||||
title := strings.TrimSuffix(filepath.Base(path), ext)
|
||||
return Result{
|
||||
Title: title,
|
||||
Body: strings.TrimSpace(body),
|
||||
Format: ext[1:],
|
||||
}, nil
|
||||
}
|
||||
|
||||
func IsSupported(path string) bool {
|
||||
ext := strings.ToLower(filepath.Ext(path))
|
||||
_, ok := parsers[ext]
|
||||
return ok
|
||||
}
|
||||
|
||||
func SupportedExtensions() []string {
|
||||
exts := make([]string, 0, len(parsers))
|
||||
for ext := range parsers {
|
||||
exts = append(exts, ext)
|
||||
}
|
||||
return exts
|
||||
}
|
||||
|
||||
func ParseFromBytes(filename string, data []byte) (Result, error) {
|
||||
safe := filepath.Base(filename)
|
||||
if safe == "." || safe == string(filepath.Separator) {
|
||||
return Result{}, fmt.Errorf("invalid filename: %q", filename)
|
||||
}
|
||||
|
||||
tmpDir, err := os.MkdirTemp("", "ingestion-*")
|
||||
if err != nil {
|
||||
return Result{}, fmt.Errorf("create temp dir: %w", err)
|
||||
}
|
||||
defer os.RemoveAll(tmpDir)
|
||||
|
||||
tmpPath := filepath.Join(tmpDir, safe)
|
||||
if err := os.WriteFile(tmpPath, data, 0644); err != nil {
|
||||
return Result{}, fmt.Errorf("write temp file: %w", err)
|
||||
}
|
||||
|
||||
return ParseFile(tmpPath)
|
||||
}
|
||||
Reference in New Issue
Block a user