feat: add file upload for materials (PDF/DOCX) with ingestion pipeline

This commit is contained in:
root
2026-04-29 15:52:35 +09:00
parent 518370b93e
commit 7f503326f9
51 changed files with 4712 additions and 27 deletions

View File

@@ -0,0 +1,57 @@
package ingestion
import (
"os"
"strings"
)
func ParseMarkdown(path string) (string, error) {
data, err := os.ReadFile(path)
if err != nil {
return "", err
}
content := string(data)
content = stripFrontmatter(content)
content = strings.TrimSpace(content)
return content, nil
}
func stripFrontmatter(content string) string {
content = strings.TrimLeft(content, "\n\r\t ")
if !strings.HasPrefix(content, "---") {
return content
}
rest := content[3:]
closing := findFMClosing(rest)
if closing < 0 {
return content
}
return strings.TrimLeft(rest[closing:], "\n\r")
}
func findFMClosing(s string) int {
i := 0
for i < len(s) {
nl := strings.IndexByte(s[i:], '\n')
if nl < 0 {
break
}
lineStart := i + nl + 1
if lineStart >= len(s) {
break
}
end := strings.IndexByte(s[lineStart:], '\n')
line := s[lineStart:]
if end >= 0 {
line = s[lineStart : lineStart+end]
}
if strings.TrimRight(line, "\r") == "---" {
return lineStart + len(line)
}
i = lineStart + len(line)
}
return -1
}