feat: add ontology material ingestion

This commit is contained in:
user
2026-04-26 17:49:35 +09:00
parent a413f1ef15
commit 4936cdf4c9
19 changed files with 766 additions and 13 deletions

View File

@@ -7,6 +7,7 @@ import (
"tutor/internal/httpapi"
"tutor/internal/interview"
"tutor/internal/learnermemory"
"tutor/internal/ontology"
"tutor/internal/progression"
"tutor/internal/workflows"
)
@@ -16,8 +17,9 @@ func NewServer(cfg config.Config) *http.Server {
store := interview.NewMemoryStore()
memory := learnermemory.NewService(learnermemory.NewMemoryStore())
progress := progression.NewService(memory)
onto := ontology.NewService(ontology.NewMemoryStore())
service := interview.NewService(store, runner, memory)
handler := httpapi.NewHandler(cfg, service, memory, progress)
handler := httpapi.NewHandler(cfg, service, memory, progress, onto)
return &http.Server{
Addr: cfg.HTTPAddr,

View File

@@ -10,6 +10,7 @@ import (
"tutor/internal/config"
"tutor/internal/interview"
"tutor/internal/learnermemory"
"tutor/internal/ontology"
"tutor/internal/progression"
"tutor/internal/workflows"
)
@@ -18,7 +19,8 @@ func TestDiagnosticHTTPFlow(t *testing.T) {
memory := learnermemory.NewService(learnermemory.NewMemoryStore())
service := interview.NewService(interview.NewMemoryStore(), workflows.NewStubRunner(), memory)
progress := progression.NewService(memory)
handler := NewHandler(config.Config{Environment: "test", ModelKey: "deepseek-v4-flash"}, service, memory, progress)
onto := ontology.NewService(ontology.NewMemoryStore())
handler := NewHandler(config.Config{Environment: "test", ModelKey: "deepseek-v4-flash"}, service, memory, progress, onto)
routes := handler.Routes()
createBody := bytes.NewBufferString(`{

View File

@@ -7,6 +7,7 @@ import (
"tutor/internal/config"
"tutor/internal/interview"
"tutor/internal/learnermemory"
"tutor/internal/ontology"
"tutor/internal/progression"
)
@@ -15,6 +16,7 @@ type Handler struct {
diagnostic *interview.Service
memory *learnermemory.Service
progress *progression.Service
ontology *ontology.Service
}
func NewHandler(
@@ -22,12 +24,14 @@ func NewHandler(
diagnostic *interview.Service,
memory *learnermemory.Service,
progress *progression.Service,
ontology *ontology.Service,
) Handler {
return Handler{
cfg: cfg,
diagnostic: diagnostic,
memory: memory,
progress: progress,
ontology: ontology,
}
}
@@ -40,6 +44,8 @@ func (h Handler) Routes() http.Handler {
mux.HandleFunc("GET /api/v1/learners/{userID}/memory", h.getLearnerMemory)
mux.HandleFunc("GET /api/v1/learners/{userID}/readiness-map", h.getReadinessMap)
mux.HandleFunc("GET /api/v1/learners/{userID}/next-challenge", h.getNextChallenge)
mux.HandleFunc("POST /api/v1/materials", h.ingestMaterial)
mux.HandleFunc("GET /api/v1/ontology", h.getOntology)
return mux
}

View File

@@ -9,6 +9,7 @@ import (
"tutor/internal/config"
"tutor/internal/interview"
"tutor/internal/learnermemory"
"tutor/internal/ontology"
"tutor/internal/progression"
"tutor/internal/workflows"
)
@@ -21,7 +22,8 @@ func TestHealth(t *testing.T) {
memory := learnermemory.NewService(learnermemory.NewMemoryStore())
service := interview.NewService(interview.NewMemoryStore(), workflows.NewStubRunner(), memory)
progress := progression.NewService(memory)
handler := NewHandler(cfg, service, memory, progress)
onto := ontology.NewService(ontology.NewMemoryStore())
handler := NewHandler(cfg, service, memory, progress, onto)
req := httptest.NewRequest(http.MethodGet, "/healthz", nil)
rec := httptest.NewRecorder()

View File

@@ -0,0 +1,48 @@
package httpapi
import (
"encoding/json"
"net/http"
"tutor/internal/ontology"
)
type ingestMaterialRequest struct {
Title string `json:"title"`
SourceType string `json:"source_type"`
Body string `json:"body"`
}
func (h Handler) ingestMaterial(w http.ResponseWriter, r *http.Request) {
if h.ontology == nil {
writeError(w, http.StatusNotFound, "ontology not configured")
return
}
var req ingestMaterialRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeError(w, http.StatusBadRequest, "invalid JSON body")
return
}
result, err := h.ontology.Ingest(ontology.IngestInput{
Title: req.Title,
SourceType: req.SourceType,
Body: req.Body,
})
if err != nil {
writeError(w, http.StatusBadRequest, err.Error())
return
}
writeJSON(w, http.StatusCreated, result)
}
func (h Handler) getOntology(w http.ResponseWriter, _ *http.Request) {
if h.ontology == nil {
writeError(w, http.StatusNotFound, "ontology not configured")
return
}
writeJSON(w, http.StatusOK, h.ontology.Snapshot())
}

View File

@@ -0,0 +1,54 @@
package httpapi
import (
"bytes"
"encoding/json"
"net/http"
"net/http/httptest"
"testing"
"tutor/internal/config"
"tutor/internal/interview"
"tutor/internal/learnermemory"
"tutor/internal/ontology"
"tutor/internal/progression"
"tutor/internal/workflows"
)
func TestOntologyHTTPFlow(t *testing.T) {
memory := learnermemory.NewService(learnermemory.NewMemoryStore())
service := interview.NewService(interview.NewMemoryStore(), workflows.NewStubRunner(), memory)
progress := progression.NewService(memory)
onto := ontology.NewService(ontology.NewMemoryStore())
handler := NewHandler(config.Config{Environment: "test"}, service, memory, progress, onto)
routes := handler.Routes()
body := bytes.NewBufferString(`{
"title":"Backend interview notes",
"source_type":"markdown",
"body":"Idempotent API retries need transactions. Cache invalidation uses TTL tradeoffs."
}`)
req := httptest.NewRequest(http.MethodPost, "/api/v1/materials", body)
rec := httptest.NewRecorder()
routes.ServeHTTP(rec, req)
if rec.Code != http.StatusCreated {
t.Fatalf("ingest status = %d, body = %s", rec.Code, rec.Body.String())
}
var result ontology.IngestResult
if err := json.NewDecoder(rec.Body).Decode(&result); err != nil {
t.Fatalf("decode ingest response: %v", err)
}
if len(result.Snapshot.Concepts) == 0 {
t.Fatal("expected ontology concepts")
}
getReq := httptest.NewRequest(http.MethodGet, "/api/v1/ontology", nil)
getRec := httptest.NewRecorder()
routes.ServeHTTP(getRec, getReq)
if getRec.Code != http.StatusOK {
t.Fatalf("ontology status = %d, body = %s", getRec.Code, getRec.Body.String())
}
}

View File

@@ -0,0 +1,38 @@
package ontology
import "tutor/internal/workflows"
var knownConcepts = []knownConcept{
{
Ref: workflows.ConceptRef{ID: "http-idempotency", Label: "HTTP idempotency", Track: "backend-developer"},
Keywords: []string{"idempotent", "idempotency", "retry", "retries"},
},
{
Ref: workflows.ConceptRef{ID: "database-indexes", Label: "Database indexes", Track: "backend-developer"},
Keywords: []string{"index", "indexes", "database index", "query plan"},
},
{
Ref: workflows.ConceptRef{ID: "cache-invalidation", Label: "Cache invalidation", Track: "backend-developer"},
Keywords: []string{"cache", "invalidation", "ttl"},
},
{
Ref: workflows.ConceptRef{ID: "transactions", Label: "Transactions", Track: "backend-developer"},
Keywords: []string{"transaction", "transactions", "atomic", "rollback"},
},
}
var prerequisiteRules = []prerequisiteRule{
{FromID: "http-idempotency", ToID: "transactions"},
{FromID: "transactions", ToID: "cache-invalidation"},
{FromID: "database-indexes", ToID: "cache-invalidation"},
}
type knownConcept struct {
Ref workflows.ConceptRef
Keywords []string
}
type prerequisiteRule struct {
FromID string
ToID string
}

View File

@@ -0,0 +1,192 @@
package ontology
import (
"errors"
"fmt"
"sort"
"strings"
"sync/atomic"
"time"
"tutor/internal/workflows"
)
type Service struct {
store Store
ids atomic.Uint64
}
func NewService(store Store) *Service {
return &Service{store: store}
}
func (s *Service) Ingest(input IngestInput) (IngestResult, error) {
if strings.TrimSpace(input.Title) == "" {
return IngestResult{}, errors.New("title is required")
}
if strings.TrimSpace(input.Body) == "" {
return IngestResult{}, errors.New("body is required")
}
now := time.Now().UTC()
material := Material{
ID: s.nextID("material"),
Title: input.Title,
SourceType: sourceTypeOrDefault(input.SourceType),
Body: input.Body,
CreatedAt: now,
}
concepts := s.extractConcepts(material, now)
edges := s.extractEdges(concepts, now)
gaps := s.detectGaps(concepts, edges, now)
if err := s.store.Save(material, concepts, edges, gaps); err != nil {
return IngestResult{}, err
}
return IngestResult{
Material: material,
Snapshot: s.store.Snapshot(),
}, nil
}
func (s *Service) Snapshot() Snapshot {
return s.store.Snapshot()
}
func (s *Service) extractConcepts(material Material, now time.Time) []ConceptCandidate {
body := strings.ToLower(material.Body)
concepts := []ConceptCandidate{}
for _, known := range knownConcepts {
quote, ok := firstKeywordQuote(body, material.Body, known.Keywords)
if !ok {
continue
}
concepts = append(concepts, ConceptCandidate{
ID: s.nextID("concept"),
Concept: known.Ref,
Summary: "Source material mentions " + known.Ref.Label + ".",
Evidence: []workflows.EvidenceRef{{
Kind: workflows.EvidenceSource,
ID: material.ID,
Quote: quote,
Confidence: 0.72,
}},
ReviewState: ReviewCandidate,
CreatedAt: now,
})
}
sort.Slice(concepts, func(i, j int) bool {
return concepts[i].Concept.ID < concepts[j].Concept.ID
})
return concepts
}
func (s *Service) extractEdges(concepts []ConceptCandidate, now time.Time) []EdgeCandidate {
byID := make(map[string]ConceptCandidate, len(concepts))
for _, concept := range concepts {
byID[concept.Concept.ID] = concept
}
edges := []EdgeCandidate{}
for _, rule := range prerequisiteRules {
from, fromOK := byID[rule.FromID]
to, toOK := byID[rule.ToID]
if !fromOK || !toOK {
continue
}
edges = append(edges, EdgeCandidate{
ID: s.nextID("edge"),
From: from.Concept,
To: to.Concept,
Kind: EdgePrerequisite,
Evidence: append([]workflows.EvidenceRef(nil), from.Evidence...),
ReviewState: ReviewCandidate,
CreatedAt: now,
})
}
return edges
}
func (s *Service) detectGaps(
concepts []ConceptCandidate,
edges []EdgeCandidate,
now time.Time,
) []Gap {
gaps := []Gap{}
byID := make(map[string]ConceptCandidate, len(concepts))
for _, concept := range concepts {
byID[concept.Concept.ID] = concept
if len(concept.Evidence) == 1 && len(strings.Fields(concept.Evidence[0].Quote)) < 6 {
gaps = append(gaps, Gap{
ID: s.nextID("gap"),
Concept: concept.Concept,
GapType: GapWeakEvidence,
Reason: "Concept is mentioned, but source support is thin.",
SupportingEvidence: append([]workflows.EvidenceRef(nil), concept.Evidence...),
ProposedAction: ActionRequestSource,
ReviewState: ReviewCandidate,
CreatedAt: now,
})
}
}
for _, rule := range prerequisiteRules {
to, toOK := byID[rule.ToID]
if !toOK {
continue
}
if _, fromOK := byID[rule.FromID]; fromOK {
continue
}
gaps = append(gaps, Gap{
ID: s.nextID("gap"),
Concept: to.Concept,
GapType: GapMissingPrerequisite,
Reason: "Prerequisite concept " + rule.FromID + " is missing from the material.",
SupportingEvidence: append([]workflows.EvidenceRef(nil), to.Evidence...),
ProposedAction: ActionGenerateCandidate,
ReviewState: ReviewCandidate,
CreatedAt: now,
})
}
if len(edges) == 0 && len(concepts) > 1 {
first := concepts[0]
gaps = append(gaps, Gap{
ID: s.nextID("gap"),
Concept: first.Concept,
GapType: GapMissingPrerequisite,
Reason: "Concept relationship is inferred as incomplete and needs review.",
SupportingEvidence: append([]workflows.EvidenceRef(nil), first.Evidence...),
ProposedAction: ActionHumanReview,
ReviewState: ReviewCandidate,
CreatedAt: now,
})
}
return gaps
}
func firstKeywordQuote(lowerBody string, originalBody string, keywords []string) (string, bool) {
for _, keyword := range keywords {
index := strings.Index(lowerBody, strings.ToLower(keyword))
if index < 0 {
continue
}
start := max(0, index-40)
end := min(len(originalBody), index+len(keyword)+80)
return strings.TrimSpace(originalBody[start:end]), true
}
return "", false
}
func sourceTypeOrDefault(sourceType string) string {
if strings.TrimSpace(sourceType) == "" {
return "text"
}
return sourceType
}
func (s *Service) nextID(prefix string) string {
return fmt.Sprintf("%s-%d", prefix, s.ids.Add(1))
}

View File

@@ -0,0 +1,53 @@
package ontology
import "testing"
func TestIngestCreatesSourceBackedCandidates(t *testing.T) {
service := NewService(NewMemoryStore())
result, err := service.Ingest(IngestInput{
Title: "Backend interview notes",
SourceType: "markdown",
Body: "Idempotent API retries need transactions. Cache invalidation uses TTL tradeoffs.",
})
if err != nil {
t.Fatalf("Ingest error: %v", err)
}
if result.Material.ID == "" {
t.Fatal("expected material id")
}
if len(result.Snapshot.Concepts) == 0 {
t.Fatal("expected concept candidates")
}
for _, concept := range result.Snapshot.Concepts {
if concept.ReviewState != ReviewCandidate {
t.Fatalf("review state = %q", concept.ReviewState)
}
if len(concept.Evidence) == 0 {
t.Fatal("expected concept evidence")
}
}
if len(result.Snapshot.Edges) == 0 {
t.Fatal("expected prerequisite edge candidates")
}
}
func TestIngestMarksGapsAsCandidates(t *testing.T) {
service := NewService(NewMemoryStore())
result, err := service.Ingest(IngestInput{
Title: "Cache note",
Body: "Cache invalidation is hard.",
})
if err != nil {
t.Fatalf("Ingest error: %v", err)
}
if len(result.Snapshot.Gaps) == 0 {
t.Fatal("expected gaps")
}
for _, gap := range result.Snapshot.Gaps {
if gap.ReviewState != ReviewCandidate {
t.Fatalf("gap review state = %q", gap.ReviewState)
}
}
}

View File

@@ -0,0 +1,87 @@
package ontology
import "sync"
import "tutor/internal/workflows"
type Store interface {
Save(Material, []ConceptCandidate, []EdgeCandidate, []Gap) error
Snapshot() Snapshot
}
type MemoryStore struct {
mu sync.RWMutex
materials []Material
concepts []ConceptCandidate
edges []EdgeCandidate
gaps []Gap
}
func NewMemoryStore() *MemoryStore {
return &MemoryStore{}
}
func (s *MemoryStore) Save(
material Material,
concepts []ConceptCandidate,
edges []EdgeCandidate,
gaps []Gap,
) error {
s.mu.Lock()
defer s.mu.Unlock()
s.materials = append(s.materials, cloneMaterial(material))
s.concepts = append(s.concepts, cloneConcepts(concepts)...)
s.edges = append(s.edges, cloneEdges(edges)...)
s.gaps = append(s.gaps, cloneGaps(gaps)...)
return nil
}
func (s *MemoryStore) Snapshot() Snapshot {
s.mu.RLock()
defer s.mu.RUnlock()
return Snapshot{
Materials: cloneMaterials(s.materials),
Concepts: cloneConcepts(s.concepts),
Edges: cloneEdges(s.edges),
Gaps: cloneGaps(s.gaps),
}
}
func cloneMaterial(material Material) Material {
return material
}
func cloneMaterials(items []Material) []Material {
cloned := make([]Material, len(items))
copy(cloned, items)
return cloned
}
func cloneConcepts(items []ConceptCandidate) []ConceptCandidate {
cloned := make([]ConceptCandidate, len(items))
for i, item := range items {
cloned[i] = item
cloned[i].Evidence = append([]workflows.EvidenceRef(nil), item.Evidence...)
}
return cloned
}
func cloneEdges(items []EdgeCandidate) []EdgeCandidate {
cloned := make([]EdgeCandidate, len(items))
for i, item := range items {
cloned[i] = item
cloned[i].Evidence = append([]workflows.EvidenceRef(nil), item.Evidence...)
}
return cloned
}
func cloneGaps(items []Gap) []Gap {
cloned := make([]Gap, len(items))
for i, item := range items {
cloned[i] = item
cloned[i].SupportingEvidence = append([]workflows.EvidenceRef(nil), item.SupportingEvidence...)
}
return cloned
}

View File

@@ -0,0 +1,91 @@
package ontology
import (
"time"
"tutor/internal/workflows"
)
type ReviewState string
const (
ReviewCandidate ReviewState = "candidate"
ReviewReviewed ReviewState = "reviewed"
)
type Material struct {
ID string `json:"id"`
Title string `json:"title"`
SourceType string `json:"source_type"`
Body string `json:"body,omitempty"`
CreatedAt time.Time `json:"created_at"`
}
type ConceptCandidate struct {
ID string `json:"id"`
Concept workflows.ConceptRef `json:"concept"`
Summary string `json:"summary"`
Evidence []workflows.EvidenceRef `json:"evidence"`
ReviewState ReviewState `json:"review_state"`
CreatedAt time.Time `json:"created_at"`
}
type EdgeCandidate struct {
ID string `json:"id"`
From workflows.ConceptRef `json:"from"`
To workflows.ConceptRef `json:"to"`
Kind EdgeKind `json:"kind"`
Evidence []workflows.EvidenceRef `json:"evidence"`
ReviewState ReviewState `json:"review_state"`
CreatedAt time.Time `json:"created_at"`
}
type EdgeKind string
const (
EdgePrerequisite EdgeKind = "prerequisite"
)
type Gap struct {
ID string `json:"id"`
Concept workflows.ConceptRef `json:"concept"`
GapType GapType `json:"gap_type"`
Reason string `json:"reason"`
SupportingEvidence []workflows.EvidenceRef `json:"supporting_evidence"`
ProposedAction ProposedAction `json:"proposed_action"`
ReviewState ReviewState `json:"review_state"`
CreatedAt time.Time `json:"created_at"`
}
type GapType string
const (
GapMissingPrerequisite GapType = "missing_prerequisite"
GapWeakEvidence GapType = "weak_evidence"
)
type ProposedAction string
const (
ActionGenerateCandidate ProposedAction = "generate_candidate"
ActionRequestSource ProposedAction = "request_source"
ActionHumanReview ProposedAction = "human_review"
)
type IngestInput struct {
Title string
SourceType string
Body string
}
type IngestResult struct {
Material Material `json:"material"`
Snapshot Snapshot `json:"snapshot"`
}
type Snapshot struct {
Materials []Material `json:"materials"`
Concepts []ConceptCandidate `json:"concepts"`
Edges []EdgeCandidate `json:"edges"`
Gaps []Gap `json:"gaps"`
}