feat: add file upload for materials (PDF/DOCX) with ingestion pipeline
This commit is contained in:
@@ -50,6 +50,7 @@ func (h Handler) Routes() http.Handler {
|
||||
mux.HandleFunc("GET /api/v1/learners/{userID}/readiness-map", h.getReadinessMap)
|
||||
mux.HandleFunc("GET /api/v1/learners/{userID}/next-challenge", h.getNextChallenge)
|
||||
mux.HandleFunc("POST /api/v1/materials", h.ingestMaterial)
|
||||
mux.HandleFunc("POST /api/v1/materials/upload", h.uploadMaterial)
|
||||
mux.HandleFunc("GET /api/v1/ontology", h.getOntology)
|
||||
mux.HandleFunc("POST /api/v1/teaching-assets/prompts", h.generateTeachingAssetPrompt)
|
||||
mux.HandleFunc("GET /api/v1/teaching-assets", h.getTeachingAssets)
|
||||
|
||||
67
internal/httpapi/material_upload.go
Normal file
67
internal/httpapi/material_upload.go
Normal file
@@ -0,0 +1,67 @@
|
||||
package httpapi
|
||||
|
||||
import (
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
"tutor/internal/ingestion"
|
||||
"tutor/internal/ontology"
|
||||
)
|
||||
|
||||
func (h Handler) uploadMaterial(w http.ResponseWriter, r *http.Request) {
|
||||
if h.ontology == nil {
|
||||
writeError(w, http.StatusNotFound, "ontology not configured")
|
||||
return
|
||||
}
|
||||
|
||||
if err := r.ParseMultipartForm(32 << 20); err != nil {
|
||||
writeError(w, http.StatusBadRequest, "invalid multipart form")
|
||||
return
|
||||
}
|
||||
|
||||
file, header, err := r.FormFile("file")
|
||||
if err != nil {
|
||||
writeError(w, http.StatusBadRequest, "file field required")
|
||||
return
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
if !ingestion.IsSupported(header.Filename) {
|
||||
writeError(w, http.StatusBadRequest, "unsupported file format; supported: .md, .markdown, .pdf, .docx")
|
||||
return
|
||||
}
|
||||
|
||||
data, err := io.ReadAll(file)
|
||||
if err != nil {
|
||||
writeError(w, http.StatusInternalServerError, "failed to read file")
|
||||
return
|
||||
}
|
||||
|
||||
result, err := ingestion.ParseFromBytes(header.Filename, data)
|
||||
if err != nil {
|
||||
if strings.Contains(err.Error(), "unsupported") {
|
||||
writeError(w, http.StatusBadRequest, "parse error: "+err.Error())
|
||||
return
|
||||
}
|
||||
writeError(w, http.StatusInternalServerError, "parse error: "+err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
title := r.FormValue("title")
|
||||
if title == "" {
|
||||
title = result.Title
|
||||
}
|
||||
|
||||
ingestResult, err := h.ontology.Ingest(ontology.IngestInput{
|
||||
Title: title,
|
||||
SourceType: result.Format,
|
||||
Body: result.Body,
|
||||
})
|
||||
if err != nil {
|
||||
writeError(w, http.StatusBadRequest, err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
writeJSON(w, http.StatusCreated, ingestResult)
|
||||
}
|
||||
221
internal/httpapi/material_upload_test.go
Normal file
221
internal/httpapi/material_upload_test.go
Normal file
@@ -0,0 +1,221 @@
|
||||
package httpapi
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"mime/multipart"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"tutor/internal/config"
|
||||
"tutor/internal/interview"
|
||||
"tutor/internal/learnermemory"
|
||||
"tutor/internal/ontology"
|
||||
"tutor/internal/progression"
|
||||
"tutor/internal/teachingassets"
|
||||
"tutor/internal/workflows"
|
||||
)
|
||||
|
||||
func TestUploadMaterialMarkdown(t *testing.T) {
|
||||
memory := learnermemory.NewService(learnermemory.NewMemoryStore())
|
||||
service := interview.NewService(interview.NewMemoryStore(), workflows.NewStubRunner(), memory)
|
||||
progress := progression.NewService(memory)
|
||||
onto := ontology.NewService(ontology.NewMemoryStore())
|
||||
assets := teachingassets.NewService(teachingassets.NewMemoryStore(), onto, "gpt-image-v2")
|
||||
handler := NewHandler(config.Config{Environment: "test"}, service, memory, progress, onto, assets)
|
||||
routes := handler.Routes()
|
||||
|
||||
var buf bytes.Buffer
|
||||
w := multipart.NewWriter(&buf)
|
||||
part, _ := w.CreateFormFile("file", "notes.md")
|
||||
io.Copy(part, strings.NewReader("# Backend notes\nIdempotent API retries need transactions."))
|
||||
w.Close()
|
||||
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/v1/materials/upload", &buf)
|
||||
req.Header.Set("Content-Type", w.FormDataContentType())
|
||||
rec := httptest.NewRecorder()
|
||||
routes.ServeHTTP(rec, req)
|
||||
|
||||
if rec.Code != http.StatusCreated {
|
||||
t.Fatalf("status = %d, body = %s", rec.Code, rec.Body.String())
|
||||
}
|
||||
|
||||
var result ontology.IngestResult
|
||||
decodeJSON(t, rec.Body, &result)
|
||||
if len(result.Snapshot.Concepts) == 0 {
|
||||
t.Fatal("expected concept candidates after md upload")
|
||||
}
|
||||
}
|
||||
|
||||
func TestUploadMaterialPDF(t *testing.T) {
|
||||
memory := learnermemory.NewService(learnermemory.NewMemoryStore())
|
||||
service := interview.NewService(interview.NewMemoryStore(), workflows.NewStubRunner(), memory)
|
||||
progress := progression.NewService(memory)
|
||||
onto := ontology.NewService(ontology.NewMemoryStore())
|
||||
assets := teachingassets.NewService(teachingassets.NewMemoryStore(), onto, "gpt-image-v2")
|
||||
handler := NewHandler(config.Config{Environment: "test"}, service, memory, progress, onto, assets)
|
||||
routes := handler.Routes()
|
||||
|
||||
var buf bytes.Buffer
|
||||
w := multipart.NewWriter(&buf)
|
||||
part, _ := w.CreateFormFile("file", "notes.pdf")
|
||||
io.Copy(part, strings.NewReader("not a real pdf"))
|
||||
w.Close()
|
||||
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/v1/materials/upload", &buf)
|
||||
req.Header.Set("Content-Type", w.FormDataContentType())
|
||||
rec := httptest.NewRecorder()
|
||||
routes.ServeHTTP(rec, req)
|
||||
|
||||
if rec.Code != http.StatusInternalServerError {
|
||||
t.Fatalf("expected 500 for invalid PDF, got %d: %s", rec.Code, rec.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
func TestUploadMaterialUnsupportedFormat(t *testing.T) {
|
||||
memory := learnermemory.NewService(learnermemory.NewMemoryStore())
|
||||
service := interview.NewService(interview.NewMemoryStore(), workflows.NewStubRunner(), memory)
|
||||
progress := progression.NewService(memory)
|
||||
onto := ontology.NewService(ontology.NewMemoryStore())
|
||||
assets := teachingassets.NewService(teachingassets.NewMemoryStore(), onto, "gpt-image-v2")
|
||||
handler := NewHandler(config.Config{Environment: "test"}, service, memory, progress, onto, assets)
|
||||
routes := handler.Routes()
|
||||
|
||||
var buf bytes.Buffer
|
||||
w := multipart.NewWriter(&buf)
|
||||
part, _ := w.CreateFormFile("file", "notes.txt")
|
||||
io.Copy(part, strings.NewReader("plain text"))
|
||||
w.Close()
|
||||
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/v1/materials/upload", &buf)
|
||||
req.Header.Set("Content-Type", w.FormDataContentType())
|
||||
rec := httptest.NewRecorder()
|
||||
routes.ServeHTTP(rec, req)
|
||||
|
||||
if rec.Code != http.StatusBadRequest {
|
||||
t.Fatalf("expected 400 for unsupported format, got %d: %s", rec.Code, rec.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
func TestUploadMaterialMissingFile(t *testing.T) {
|
||||
memory := learnermemory.NewService(learnermemory.NewMemoryStore())
|
||||
service := interview.NewService(interview.NewMemoryStore(), workflows.NewStubRunner(), memory)
|
||||
progress := progression.NewService(memory)
|
||||
onto := ontology.NewService(ontology.NewMemoryStore())
|
||||
assets := teachingassets.NewService(teachingassets.NewMemoryStore(), onto, "gpt-image-v2")
|
||||
handler := NewHandler(config.Config{Environment: "test"}, service, memory, progress, onto, assets)
|
||||
routes := handler.Routes()
|
||||
|
||||
var buf bytes.Buffer
|
||||
w := multipart.NewWriter(&buf)
|
||||
w.Close()
|
||||
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/v1/materials/upload", &buf)
|
||||
req.Header.Set("Content-Type", w.FormDataContentType())
|
||||
rec := httptest.NewRecorder()
|
||||
routes.ServeHTTP(rec, req)
|
||||
|
||||
if rec.Code != http.StatusBadRequest {
|
||||
t.Fatalf("expected 400 for missing file, got %d: %s", rec.Code, rec.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
func TestUploadMaterialWithCustomTitle(t *testing.T) {
|
||||
memory := learnermemory.NewService(learnermemory.NewMemoryStore())
|
||||
service := interview.NewService(interview.NewMemoryStore(), workflows.NewStubRunner(), memory)
|
||||
progress := progression.NewService(memory)
|
||||
onto := ontology.NewService(ontology.NewMemoryStore())
|
||||
assets := teachingassets.NewService(teachingassets.NewMemoryStore(), onto, "gpt-image-v2")
|
||||
handler := NewHandler(config.Config{Environment: "test"}, service, memory, progress, onto, assets)
|
||||
routes := handler.Routes()
|
||||
|
||||
var buf bytes.Buffer
|
||||
w := multipart.NewWriter(&buf)
|
||||
w.WriteField("title", "Custom Title")
|
||||
part, _ := w.CreateFormFile("file", "notes.md")
|
||||
io.Copy(part, strings.NewReader("Cache invalidation with TTL."))
|
||||
w.Close()
|
||||
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/v1/materials/upload", &buf)
|
||||
req.Header.Set("Content-Type", w.FormDataContentType())
|
||||
rec := httptest.NewRecorder()
|
||||
routes.ServeHTTP(rec, req)
|
||||
|
||||
if rec.Code != http.StatusCreated {
|
||||
t.Fatalf("status = %d, body = %s", rec.Code, rec.Body.String())
|
||||
}
|
||||
|
||||
var result ontology.IngestResult
|
||||
decodeJSON(t, rec.Body, &result)
|
||||
if result.Material.Title != "Custom Title" {
|
||||
t.Fatalf("title = %q, want %q", result.Material.Title, "Custom Title")
|
||||
}
|
||||
}
|
||||
|
||||
func TestUploadMaterialOntologyNotConfigured(t *testing.T) {
|
||||
handler := NewHandler(config.Config{Environment: "test"}, nil, nil, nil, nil, nil)
|
||||
routes := handler.Routes()
|
||||
|
||||
var buf bytes.Buffer
|
||||
w := multipart.NewWriter(&buf)
|
||||
part, _ := w.CreateFormFile("file", "notes.md")
|
||||
io.Copy(part, strings.NewReader("# test"))
|
||||
w.Close()
|
||||
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/v1/materials/upload", &buf)
|
||||
req.Header.Set("Content-Type", w.FormDataContentType())
|
||||
rec := httptest.NewRecorder()
|
||||
routes.ServeHTTP(rec, req)
|
||||
|
||||
if rec.Code != http.StatusNotFound {
|
||||
t.Fatalf("expected 404, got %d: %s", rec.Code, rec.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
func decodeJSON(t *testing.T, r io.Reader, v interface{}) {
|
||||
t.Helper()
|
||||
if err := json.NewDecoder(r).Decode(v); err != nil {
|
||||
t.Fatalf("decode error: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestUploadMaterialMarkdownFrontmatter(t *testing.T) {
|
||||
memory := learnermemory.NewService(learnermemory.NewMemoryStore())
|
||||
service := interview.NewService(interview.NewMemoryStore(), workflows.NewStubRunner(), memory)
|
||||
progress := progression.NewService(memory)
|
||||
onto := ontology.NewService(ontology.NewMemoryStore())
|
||||
assets := teachingassets.NewService(teachingassets.NewMemoryStore(), onto, "gpt-image-v2")
|
||||
handler := NewHandler(config.Config{Environment: "test"}, service, memory, progress, onto, assets)
|
||||
routes := handler.Routes()
|
||||
|
||||
var buf bytes.Buffer
|
||||
w := multipart.NewWriter(&buf)
|
||||
part, _ := w.CreateFormFile("file", "study-notes.md")
|
||||
io.Copy(part, strings.NewReader(fmt.Sprintf("---\ntitle: Study Notes\ntags:\n - backend\n - go\n---\n\n# HTTP Idempotency\n\nIdempotent API retries need transactions for correctness.")))
|
||||
w.Close()
|
||||
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/v1/materials/upload", &buf)
|
||||
req.Header.Set("Content-Type", w.FormDataContentType())
|
||||
rec := httptest.NewRecorder()
|
||||
routes.ServeHTTP(rec, req)
|
||||
|
||||
if rec.Code != http.StatusCreated {
|
||||
t.Fatalf("status = %d, body = %s", rec.Code, rec.Body.String())
|
||||
}
|
||||
|
||||
var result ontology.IngestResult
|
||||
decodeJSON(t, rec.Body, &result)
|
||||
if len(result.Snapshot.Concepts) == 0 {
|
||||
t.Fatal("expected concepts from markdown with frontmatter")
|
||||
}
|
||||
for _, c := range result.Snapshot.Concepts {
|
||||
if c.Concept.ID == "http-idempotency" {
|
||||
return
|
||||
}
|
||||
}
|
||||
t.Fatalf("expected http-idempotency concept, got concepts: %v", result.Snapshot.Concepts)
|
||||
}
|
||||
Reference in New Issue
Block a user