193 lines
5.1 KiB
Go
193 lines
5.1 KiB
Go
package ontology
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"sort"
|
|
"strings"
|
|
"sync/atomic"
|
|
"time"
|
|
|
|
"tutor/internal/workflows"
|
|
)
|
|
|
|
type Service struct {
|
|
store Store
|
|
ids atomic.Uint64
|
|
}
|
|
|
|
func NewService(store Store) *Service {
|
|
return &Service{store: store}
|
|
}
|
|
|
|
func (s *Service) Ingest(input IngestInput) (IngestResult, error) {
|
|
if strings.TrimSpace(input.Title) == "" {
|
|
return IngestResult{}, errors.New("title is required")
|
|
}
|
|
if strings.TrimSpace(input.Body) == "" {
|
|
return IngestResult{}, errors.New("body is required")
|
|
}
|
|
|
|
now := time.Now().UTC()
|
|
material := Material{
|
|
ID: s.nextID("material"),
|
|
Title: input.Title,
|
|
SourceType: sourceTypeOrDefault(input.SourceType),
|
|
Body: input.Body,
|
|
CreatedAt: now,
|
|
}
|
|
|
|
concepts := s.extractConcepts(material, now)
|
|
edges := s.extractEdges(concepts, now)
|
|
gaps := s.detectGaps(concepts, edges, now)
|
|
if err := s.store.Save(material, concepts, edges, gaps); err != nil {
|
|
return IngestResult{}, err
|
|
}
|
|
|
|
return IngestResult{
|
|
Material: material,
|
|
Snapshot: s.store.Snapshot(),
|
|
}, nil
|
|
}
|
|
|
|
func (s *Service) Snapshot() Snapshot {
|
|
return s.store.Snapshot()
|
|
}
|
|
|
|
func (s *Service) extractConcepts(material Material, now time.Time) []ConceptCandidate {
|
|
body := strings.ToLower(material.Body)
|
|
concepts := []ConceptCandidate{}
|
|
for _, known := range knownConcepts {
|
|
quote, ok := firstKeywordQuote(body, material.Body, known.Keywords)
|
|
if !ok {
|
|
continue
|
|
}
|
|
concepts = append(concepts, ConceptCandidate{
|
|
ID: s.nextID("concept"),
|
|
Concept: known.Ref,
|
|
Summary: "Source material mentions " + known.Ref.Label + ".",
|
|
Evidence: []workflows.EvidenceRef{{
|
|
Kind: workflows.EvidenceSource,
|
|
ID: material.ID,
|
|
Quote: quote,
|
|
Confidence: 0.72,
|
|
}},
|
|
ReviewState: ReviewCandidate,
|
|
CreatedAt: now,
|
|
})
|
|
}
|
|
sort.Slice(concepts, func(i, j int) bool {
|
|
return concepts[i].Concept.ID < concepts[j].Concept.ID
|
|
})
|
|
return concepts
|
|
}
|
|
|
|
func (s *Service) extractEdges(concepts []ConceptCandidate, now time.Time) []EdgeCandidate {
|
|
byID := make(map[string]ConceptCandidate, len(concepts))
|
|
for _, concept := range concepts {
|
|
byID[concept.Concept.ID] = concept
|
|
}
|
|
|
|
edges := []EdgeCandidate{}
|
|
for _, rule := range prerequisiteRules {
|
|
from, fromOK := byID[rule.FromID]
|
|
to, toOK := byID[rule.ToID]
|
|
if !fromOK || !toOK {
|
|
continue
|
|
}
|
|
edges = append(edges, EdgeCandidate{
|
|
ID: s.nextID("edge"),
|
|
From: from.Concept,
|
|
To: to.Concept,
|
|
Kind: EdgePrerequisite,
|
|
Evidence: append([]workflows.EvidenceRef(nil), from.Evidence...),
|
|
ReviewState: ReviewCandidate,
|
|
CreatedAt: now,
|
|
})
|
|
}
|
|
return edges
|
|
}
|
|
|
|
func (s *Service) detectGaps(
|
|
concepts []ConceptCandidate,
|
|
edges []EdgeCandidate,
|
|
now time.Time,
|
|
) []Gap {
|
|
gaps := []Gap{}
|
|
byID := make(map[string]ConceptCandidate, len(concepts))
|
|
for _, concept := range concepts {
|
|
byID[concept.Concept.ID] = concept
|
|
if len(concept.Evidence) == 1 && len(strings.Fields(concept.Evidence[0].Quote)) < 6 {
|
|
gaps = append(gaps, Gap{
|
|
ID: s.nextID("gap"),
|
|
Concept: concept.Concept,
|
|
GapType: GapWeakEvidence,
|
|
Reason: "Concept is mentioned, but source support is thin.",
|
|
SupportingEvidence: append([]workflows.EvidenceRef(nil), concept.Evidence...),
|
|
ProposedAction: ActionRequestSource,
|
|
ReviewState: ReviewCandidate,
|
|
CreatedAt: now,
|
|
})
|
|
}
|
|
}
|
|
|
|
for _, rule := range prerequisiteRules {
|
|
to, toOK := byID[rule.ToID]
|
|
if !toOK {
|
|
continue
|
|
}
|
|
if _, fromOK := byID[rule.FromID]; fromOK {
|
|
continue
|
|
}
|
|
gaps = append(gaps, Gap{
|
|
ID: s.nextID("gap"),
|
|
Concept: to.Concept,
|
|
GapType: GapMissingPrerequisite,
|
|
Reason: "Prerequisite concept " + rule.FromID + " is missing from the material.",
|
|
SupportingEvidence: append([]workflows.EvidenceRef(nil), to.Evidence...),
|
|
ProposedAction: ActionGenerateCandidate,
|
|
ReviewState: ReviewCandidate,
|
|
CreatedAt: now,
|
|
})
|
|
}
|
|
|
|
if len(edges) == 0 && len(concepts) > 1 {
|
|
first := concepts[0]
|
|
gaps = append(gaps, Gap{
|
|
ID: s.nextID("gap"),
|
|
Concept: first.Concept,
|
|
GapType: GapMissingPrerequisite,
|
|
Reason: "Concept relationship is inferred as incomplete and needs review.",
|
|
SupportingEvidence: append([]workflows.EvidenceRef(nil), first.Evidence...),
|
|
ProposedAction: ActionHumanReview,
|
|
ReviewState: ReviewCandidate,
|
|
CreatedAt: now,
|
|
})
|
|
}
|
|
return gaps
|
|
}
|
|
|
|
func firstKeywordQuote(lowerBody string, originalBody string, keywords []string) (string, bool) {
|
|
for _, keyword := range keywords {
|
|
index := strings.Index(lowerBody, strings.ToLower(keyword))
|
|
if index < 0 {
|
|
continue
|
|
}
|
|
start := max(0, index-40)
|
|
end := min(len(originalBody), index+len(keyword)+80)
|
|
return strings.TrimSpace(originalBody[start:end]), true
|
|
}
|
|
return "", false
|
|
}
|
|
|
|
func sourceTypeOrDefault(sourceType string) string {
|
|
if strings.TrimSpace(sourceType) == "" {
|
|
return "text"
|
|
}
|
|
return sourceType
|
|
}
|
|
|
|
func (s *Service) nextID(prefix string) string {
|
|
return fmt.Sprintf("%s-%d", prefix, s.ids.Add(1))
|
|
}
|