feat: add ontology material ingestion
This commit is contained in:
192
internal/ontology/service.go
Normal file
192
internal/ontology/service.go
Normal file
@@ -0,0 +1,192 @@
|
||||
package ontology
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"tutor/internal/workflows"
|
||||
)
|
||||
|
||||
type Service struct {
|
||||
store Store
|
||||
ids atomic.Uint64
|
||||
}
|
||||
|
||||
func NewService(store Store) *Service {
|
||||
return &Service{store: store}
|
||||
}
|
||||
|
||||
func (s *Service) Ingest(input IngestInput) (IngestResult, error) {
|
||||
if strings.TrimSpace(input.Title) == "" {
|
||||
return IngestResult{}, errors.New("title is required")
|
||||
}
|
||||
if strings.TrimSpace(input.Body) == "" {
|
||||
return IngestResult{}, errors.New("body is required")
|
||||
}
|
||||
|
||||
now := time.Now().UTC()
|
||||
material := Material{
|
||||
ID: s.nextID("material"),
|
||||
Title: input.Title,
|
||||
SourceType: sourceTypeOrDefault(input.SourceType),
|
||||
Body: input.Body,
|
||||
CreatedAt: now,
|
||||
}
|
||||
|
||||
concepts := s.extractConcepts(material, now)
|
||||
edges := s.extractEdges(concepts, now)
|
||||
gaps := s.detectGaps(concepts, edges, now)
|
||||
if err := s.store.Save(material, concepts, edges, gaps); err != nil {
|
||||
return IngestResult{}, err
|
||||
}
|
||||
|
||||
return IngestResult{
|
||||
Material: material,
|
||||
Snapshot: s.store.Snapshot(),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (s *Service) Snapshot() Snapshot {
|
||||
return s.store.Snapshot()
|
||||
}
|
||||
|
||||
func (s *Service) extractConcepts(material Material, now time.Time) []ConceptCandidate {
|
||||
body := strings.ToLower(material.Body)
|
||||
concepts := []ConceptCandidate{}
|
||||
for _, known := range knownConcepts {
|
||||
quote, ok := firstKeywordQuote(body, material.Body, known.Keywords)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
concepts = append(concepts, ConceptCandidate{
|
||||
ID: s.nextID("concept"),
|
||||
Concept: known.Ref,
|
||||
Summary: "Source material mentions " + known.Ref.Label + ".",
|
||||
Evidence: []workflows.EvidenceRef{{
|
||||
Kind: workflows.EvidenceSource,
|
||||
ID: material.ID,
|
||||
Quote: quote,
|
||||
Confidence: 0.72,
|
||||
}},
|
||||
ReviewState: ReviewCandidate,
|
||||
CreatedAt: now,
|
||||
})
|
||||
}
|
||||
sort.Slice(concepts, func(i, j int) bool {
|
||||
return concepts[i].Concept.ID < concepts[j].Concept.ID
|
||||
})
|
||||
return concepts
|
||||
}
|
||||
|
||||
func (s *Service) extractEdges(concepts []ConceptCandidate, now time.Time) []EdgeCandidate {
|
||||
byID := make(map[string]ConceptCandidate, len(concepts))
|
||||
for _, concept := range concepts {
|
||||
byID[concept.Concept.ID] = concept
|
||||
}
|
||||
|
||||
edges := []EdgeCandidate{}
|
||||
for _, rule := range prerequisiteRules {
|
||||
from, fromOK := byID[rule.FromID]
|
||||
to, toOK := byID[rule.ToID]
|
||||
if !fromOK || !toOK {
|
||||
continue
|
||||
}
|
||||
edges = append(edges, EdgeCandidate{
|
||||
ID: s.nextID("edge"),
|
||||
From: from.Concept,
|
||||
To: to.Concept,
|
||||
Kind: EdgePrerequisite,
|
||||
Evidence: append([]workflows.EvidenceRef(nil), from.Evidence...),
|
||||
ReviewState: ReviewCandidate,
|
||||
CreatedAt: now,
|
||||
})
|
||||
}
|
||||
return edges
|
||||
}
|
||||
|
||||
func (s *Service) detectGaps(
|
||||
concepts []ConceptCandidate,
|
||||
edges []EdgeCandidate,
|
||||
now time.Time,
|
||||
) []Gap {
|
||||
gaps := []Gap{}
|
||||
byID := make(map[string]ConceptCandidate, len(concepts))
|
||||
for _, concept := range concepts {
|
||||
byID[concept.Concept.ID] = concept
|
||||
if len(concept.Evidence) == 1 && len(strings.Fields(concept.Evidence[0].Quote)) < 6 {
|
||||
gaps = append(gaps, Gap{
|
||||
ID: s.nextID("gap"),
|
||||
Concept: concept.Concept,
|
||||
GapType: GapWeakEvidence,
|
||||
Reason: "Concept is mentioned, but source support is thin.",
|
||||
SupportingEvidence: append([]workflows.EvidenceRef(nil), concept.Evidence...),
|
||||
ProposedAction: ActionRequestSource,
|
||||
ReviewState: ReviewCandidate,
|
||||
CreatedAt: now,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
for _, rule := range prerequisiteRules {
|
||||
to, toOK := byID[rule.ToID]
|
||||
if !toOK {
|
||||
continue
|
||||
}
|
||||
if _, fromOK := byID[rule.FromID]; fromOK {
|
||||
continue
|
||||
}
|
||||
gaps = append(gaps, Gap{
|
||||
ID: s.nextID("gap"),
|
||||
Concept: to.Concept,
|
||||
GapType: GapMissingPrerequisite,
|
||||
Reason: "Prerequisite concept " + rule.FromID + " is missing from the material.",
|
||||
SupportingEvidence: append([]workflows.EvidenceRef(nil), to.Evidence...),
|
||||
ProposedAction: ActionGenerateCandidate,
|
||||
ReviewState: ReviewCandidate,
|
||||
CreatedAt: now,
|
||||
})
|
||||
}
|
||||
|
||||
if len(edges) == 0 && len(concepts) > 1 {
|
||||
first := concepts[0]
|
||||
gaps = append(gaps, Gap{
|
||||
ID: s.nextID("gap"),
|
||||
Concept: first.Concept,
|
||||
GapType: GapMissingPrerequisite,
|
||||
Reason: "Concept relationship is inferred as incomplete and needs review.",
|
||||
SupportingEvidence: append([]workflows.EvidenceRef(nil), first.Evidence...),
|
||||
ProposedAction: ActionHumanReview,
|
||||
ReviewState: ReviewCandidate,
|
||||
CreatedAt: now,
|
||||
})
|
||||
}
|
||||
return gaps
|
||||
}
|
||||
|
||||
func firstKeywordQuote(lowerBody string, originalBody string, keywords []string) (string, bool) {
|
||||
for _, keyword := range keywords {
|
||||
index := strings.Index(lowerBody, strings.ToLower(keyword))
|
||||
if index < 0 {
|
||||
continue
|
||||
}
|
||||
start := max(0, index-40)
|
||||
end := min(len(originalBody), index+len(keyword)+80)
|
||||
return strings.TrimSpace(originalBody[start:end]), true
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
|
||||
func sourceTypeOrDefault(sourceType string) string {
|
||||
if strings.TrimSpace(sourceType) == "" {
|
||||
return "text"
|
||||
}
|
||||
return sourceType
|
||||
}
|
||||
|
||||
func (s *Service) nextID(prefix string) string {
|
||||
return fmt.Sprintf("%s-%d", prefix, s.ids.Add(1))
|
||||
}
|
||||
Reference in New Issue
Block a user