feat: add ontology material ingestion

This commit is contained in:
user
2026-04-26 17:49:35 +09:00
parent a413f1ef15
commit 4936cdf4c9
19 changed files with 766 additions and 13 deletions

View File

@@ -0,0 +1,38 @@
package ontology
import "tutor/internal/workflows"
var knownConcepts = []knownConcept{
{
Ref: workflows.ConceptRef{ID: "http-idempotency", Label: "HTTP idempotency", Track: "backend-developer"},
Keywords: []string{"idempotent", "idempotency", "retry", "retries"},
},
{
Ref: workflows.ConceptRef{ID: "database-indexes", Label: "Database indexes", Track: "backend-developer"},
Keywords: []string{"index", "indexes", "database index", "query plan"},
},
{
Ref: workflows.ConceptRef{ID: "cache-invalidation", Label: "Cache invalidation", Track: "backend-developer"},
Keywords: []string{"cache", "invalidation", "ttl"},
},
{
Ref: workflows.ConceptRef{ID: "transactions", Label: "Transactions", Track: "backend-developer"},
Keywords: []string{"transaction", "transactions", "atomic", "rollback"},
},
}
var prerequisiteRules = []prerequisiteRule{
{FromID: "http-idempotency", ToID: "transactions"},
{FromID: "transactions", ToID: "cache-invalidation"},
{FromID: "database-indexes", ToID: "cache-invalidation"},
}
type knownConcept struct {
Ref workflows.ConceptRef
Keywords []string
}
type prerequisiteRule struct {
FromID string
ToID string
}

View File

@@ -0,0 +1,192 @@
package ontology
import (
"errors"
"fmt"
"sort"
"strings"
"sync/atomic"
"time"
"tutor/internal/workflows"
)
type Service struct {
store Store
ids atomic.Uint64
}
func NewService(store Store) *Service {
return &Service{store: store}
}
func (s *Service) Ingest(input IngestInput) (IngestResult, error) {
if strings.TrimSpace(input.Title) == "" {
return IngestResult{}, errors.New("title is required")
}
if strings.TrimSpace(input.Body) == "" {
return IngestResult{}, errors.New("body is required")
}
now := time.Now().UTC()
material := Material{
ID: s.nextID("material"),
Title: input.Title,
SourceType: sourceTypeOrDefault(input.SourceType),
Body: input.Body,
CreatedAt: now,
}
concepts := s.extractConcepts(material, now)
edges := s.extractEdges(concepts, now)
gaps := s.detectGaps(concepts, edges, now)
if err := s.store.Save(material, concepts, edges, gaps); err != nil {
return IngestResult{}, err
}
return IngestResult{
Material: material,
Snapshot: s.store.Snapshot(),
}, nil
}
func (s *Service) Snapshot() Snapshot {
return s.store.Snapshot()
}
func (s *Service) extractConcepts(material Material, now time.Time) []ConceptCandidate {
body := strings.ToLower(material.Body)
concepts := []ConceptCandidate{}
for _, known := range knownConcepts {
quote, ok := firstKeywordQuote(body, material.Body, known.Keywords)
if !ok {
continue
}
concepts = append(concepts, ConceptCandidate{
ID: s.nextID("concept"),
Concept: known.Ref,
Summary: "Source material mentions " + known.Ref.Label + ".",
Evidence: []workflows.EvidenceRef{{
Kind: workflows.EvidenceSource,
ID: material.ID,
Quote: quote,
Confidence: 0.72,
}},
ReviewState: ReviewCandidate,
CreatedAt: now,
})
}
sort.Slice(concepts, func(i, j int) bool {
return concepts[i].Concept.ID < concepts[j].Concept.ID
})
return concepts
}
func (s *Service) extractEdges(concepts []ConceptCandidate, now time.Time) []EdgeCandidate {
byID := make(map[string]ConceptCandidate, len(concepts))
for _, concept := range concepts {
byID[concept.Concept.ID] = concept
}
edges := []EdgeCandidate{}
for _, rule := range prerequisiteRules {
from, fromOK := byID[rule.FromID]
to, toOK := byID[rule.ToID]
if !fromOK || !toOK {
continue
}
edges = append(edges, EdgeCandidate{
ID: s.nextID("edge"),
From: from.Concept,
To: to.Concept,
Kind: EdgePrerequisite,
Evidence: append([]workflows.EvidenceRef(nil), from.Evidence...),
ReviewState: ReviewCandidate,
CreatedAt: now,
})
}
return edges
}
func (s *Service) detectGaps(
concepts []ConceptCandidate,
edges []EdgeCandidate,
now time.Time,
) []Gap {
gaps := []Gap{}
byID := make(map[string]ConceptCandidate, len(concepts))
for _, concept := range concepts {
byID[concept.Concept.ID] = concept
if len(concept.Evidence) == 1 && len(strings.Fields(concept.Evidence[0].Quote)) < 6 {
gaps = append(gaps, Gap{
ID: s.nextID("gap"),
Concept: concept.Concept,
GapType: GapWeakEvidence,
Reason: "Concept is mentioned, but source support is thin.",
SupportingEvidence: append([]workflows.EvidenceRef(nil), concept.Evidence...),
ProposedAction: ActionRequestSource,
ReviewState: ReviewCandidate,
CreatedAt: now,
})
}
}
for _, rule := range prerequisiteRules {
to, toOK := byID[rule.ToID]
if !toOK {
continue
}
if _, fromOK := byID[rule.FromID]; fromOK {
continue
}
gaps = append(gaps, Gap{
ID: s.nextID("gap"),
Concept: to.Concept,
GapType: GapMissingPrerequisite,
Reason: "Prerequisite concept " + rule.FromID + " is missing from the material.",
SupportingEvidence: append([]workflows.EvidenceRef(nil), to.Evidence...),
ProposedAction: ActionGenerateCandidate,
ReviewState: ReviewCandidate,
CreatedAt: now,
})
}
if len(edges) == 0 && len(concepts) > 1 {
first := concepts[0]
gaps = append(gaps, Gap{
ID: s.nextID("gap"),
Concept: first.Concept,
GapType: GapMissingPrerequisite,
Reason: "Concept relationship is inferred as incomplete and needs review.",
SupportingEvidence: append([]workflows.EvidenceRef(nil), first.Evidence...),
ProposedAction: ActionHumanReview,
ReviewState: ReviewCandidate,
CreatedAt: now,
})
}
return gaps
}
func firstKeywordQuote(lowerBody string, originalBody string, keywords []string) (string, bool) {
for _, keyword := range keywords {
index := strings.Index(lowerBody, strings.ToLower(keyword))
if index < 0 {
continue
}
start := max(0, index-40)
end := min(len(originalBody), index+len(keyword)+80)
return strings.TrimSpace(originalBody[start:end]), true
}
return "", false
}
func sourceTypeOrDefault(sourceType string) string {
if strings.TrimSpace(sourceType) == "" {
return "text"
}
return sourceType
}
func (s *Service) nextID(prefix string) string {
return fmt.Sprintf("%s-%d", prefix, s.ids.Add(1))
}

View File

@@ -0,0 +1,53 @@
package ontology
import "testing"
func TestIngestCreatesSourceBackedCandidates(t *testing.T) {
service := NewService(NewMemoryStore())
result, err := service.Ingest(IngestInput{
Title: "Backend interview notes",
SourceType: "markdown",
Body: "Idempotent API retries need transactions. Cache invalidation uses TTL tradeoffs.",
})
if err != nil {
t.Fatalf("Ingest error: %v", err)
}
if result.Material.ID == "" {
t.Fatal("expected material id")
}
if len(result.Snapshot.Concepts) == 0 {
t.Fatal("expected concept candidates")
}
for _, concept := range result.Snapshot.Concepts {
if concept.ReviewState != ReviewCandidate {
t.Fatalf("review state = %q", concept.ReviewState)
}
if len(concept.Evidence) == 0 {
t.Fatal("expected concept evidence")
}
}
if len(result.Snapshot.Edges) == 0 {
t.Fatal("expected prerequisite edge candidates")
}
}
func TestIngestMarksGapsAsCandidates(t *testing.T) {
service := NewService(NewMemoryStore())
result, err := service.Ingest(IngestInput{
Title: "Cache note",
Body: "Cache invalidation is hard.",
})
if err != nil {
t.Fatalf("Ingest error: %v", err)
}
if len(result.Snapshot.Gaps) == 0 {
t.Fatal("expected gaps")
}
for _, gap := range result.Snapshot.Gaps {
if gap.ReviewState != ReviewCandidate {
t.Fatalf("gap review state = %q", gap.ReviewState)
}
}
}

View File

@@ -0,0 +1,87 @@
package ontology
import "sync"
import "tutor/internal/workflows"
type Store interface {
Save(Material, []ConceptCandidate, []EdgeCandidate, []Gap) error
Snapshot() Snapshot
}
type MemoryStore struct {
mu sync.RWMutex
materials []Material
concepts []ConceptCandidate
edges []EdgeCandidate
gaps []Gap
}
func NewMemoryStore() *MemoryStore {
return &MemoryStore{}
}
func (s *MemoryStore) Save(
material Material,
concepts []ConceptCandidate,
edges []EdgeCandidate,
gaps []Gap,
) error {
s.mu.Lock()
defer s.mu.Unlock()
s.materials = append(s.materials, cloneMaterial(material))
s.concepts = append(s.concepts, cloneConcepts(concepts)...)
s.edges = append(s.edges, cloneEdges(edges)...)
s.gaps = append(s.gaps, cloneGaps(gaps)...)
return nil
}
func (s *MemoryStore) Snapshot() Snapshot {
s.mu.RLock()
defer s.mu.RUnlock()
return Snapshot{
Materials: cloneMaterials(s.materials),
Concepts: cloneConcepts(s.concepts),
Edges: cloneEdges(s.edges),
Gaps: cloneGaps(s.gaps),
}
}
func cloneMaterial(material Material) Material {
return material
}
func cloneMaterials(items []Material) []Material {
cloned := make([]Material, len(items))
copy(cloned, items)
return cloned
}
func cloneConcepts(items []ConceptCandidate) []ConceptCandidate {
cloned := make([]ConceptCandidate, len(items))
for i, item := range items {
cloned[i] = item
cloned[i].Evidence = append([]workflows.EvidenceRef(nil), item.Evidence...)
}
return cloned
}
func cloneEdges(items []EdgeCandidate) []EdgeCandidate {
cloned := make([]EdgeCandidate, len(items))
for i, item := range items {
cloned[i] = item
cloned[i].Evidence = append([]workflows.EvidenceRef(nil), item.Evidence...)
}
return cloned
}
func cloneGaps(items []Gap) []Gap {
cloned := make([]Gap, len(items))
for i, item := range items {
cloned[i] = item
cloned[i].SupportingEvidence = append([]workflows.EvidenceRef(nil), item.SupportingEvidence...)
}
return cloned
}

View File

@@ -0,0 +1,91 @@
package ontology
import (
"time"
"tutor/internal/workflows"
)
type ReviewState string
const (
ReviewCandidate ReviewState = "candidate"
ReviewReviewed ReviewState = "reviewed"
)
type Material struct {
ID string `json:"id"`
Title string `json:"title"`
SourceType string `json:"source_type"`
Body string `json:"body,omitempty"`
CreatedAt time.Time `json:"created_at"`
}
type ConceptCandidate struct {
ID string `json:"id"`
Concept workflows.ConceptRef `json:"concept"`
Summary string `json:"summary"`
Evidence []workflows.EvidenceRef `json:"evidence"`
ReviewState ReviewState `json:"review_state"`
CreatedAt time.Time `json:"created_at"`
}
type EdgeCandidate struct {
ID string `json:"id"`
From workflows.ConceptRef `json:"from"`
To workflows.ConceptRef `json:"to"`
Kind EdgeKind `json:"kind"`
Evidence []workflows.EvidenceRef `json:"evidence"`
ReviewState ReviewState `json:"review_state"`
CreatedAt time.Time `json:"created_at"`
}
type EdgeKind string
const (
EdgePrerequisite EdgeKind = "prerequisite"
)
type Gap struct {
ID string `json:"id"`
Concept workflows.ConceptRef `json:"concept"`
GapType GapType `json:"gap_type"`
Reason string `json:"reason"`
SupportingEvidence []workflows.EvidenceRef `json:"supporting_evidence"`
ProposedAction ProposedAction `json:"proposed_action"`
ReviewState ReviewState `json:"review_state"`
CreatedAt time.Time `json:"created_at"`
}
type GapType string
const (
GapMissingPrerequisite GapType = "missing_prerequisite"
GapWeakEvidence GapType = "weak_evidence"
)
type ProposedAction string
const (
ActionGenerateCandidate ProposedAction = "generate_candidate"
ActionRequestSource ProposedAction = "request_source"
ActionHumanReview ProposedAction = "human_review"
)
type IngestInput struct {
Title string
SourceType string
Body string
}
type IngestResult struct {
Material Material `json:"material"`
Snapshot Snapshot `json:"snapshot"`
}
type Snapshot struct {
Materials []Material `json:"materials"`
Concepts []ConceptCandidate `json:"concepts"`
Edges []EdgeCandidate `json:"edges"`
Gaps []Gap `json:"gaps"`
}