feat: add file upload for materials (PDF/DOCX) with ingestion pipeline

This commit is contained in:
root
2026-04-29 15:52:35 +09:00
parent 518370b93e
commit 7f503326f9
51 changed files with 4712 additions and 27 deletions

View File

@@ -0,0 +1,75 @@
package ingestion
import (
"fmt"
"os"
"path/filepath"
"strings"
)
type Result struct {
Title string
Body string
Format string
}
var parsers = map[string]func(string) (string, error){
".md": ParseMarkdown,
".markdown": ParseMarkdown,
".pdf": ParsePDF,
".docx": ParseDOCX,
}
func ParseFile(path string) (Result, error) {
ext := strings.ToLower(filepath.Ext(path))
parse, ok := parsers[ext]
if !ok {
return Result{}, fmt.Errorf("unsupported file format: %s", ext)
}
body, err := parse(path)
if err != nil {
return Result{}, fmt.Errorf("parse %s: %w", ext, err)
}
title := strings.TrimSuffix(filepath.Base(path), ext)
return Result{
Title: title,
Body: strings.TrimSpace(body),
Format: ext[1:],
}, nil
}
func IsSupported(path string) bool {
ext := strings.ToLower(filepath.Ext(path))
_, ok := parsers[ext]
return ok
}
func SupportedExtensions() []string {
exts := make([]string, 0, len(parsers))
for ext := range parsers {
exts = append(exts, ext)
}
return exts
}
func ParseFromBytes(filename string, data []byte) (Result, error) {
safe := filepath.Base(filename)
if safe == "." || safe == string(filepath.Separator) {
return Result{}, fmt.Errorf("invalid filename: %q", filename)
}
tmpDir, err := os.MkdirTemp("", "ingestion-*")
if err != nil {
return Result{}, fmt.Errorf("create temp dir: %w", err)
}
defer os.RemoveAll(tmpDir)
tmpPath := filepath.Join(tmpDir, safe)
if err := os.WriteFile(tmpPath, data, 0644); err != nil {
return Result{}, fmt.Errorf("write temp file: %w", err)
}
return ParseFile(tmpPath)
}