From 4936cdf4c9ebea70e03da1e4ab0af44682cb7e12 Mon Sep 17 00:00:00 2001 From: user Date: Sun, 26 Apr 2026 17:49:35 +0900 Subject: [PATCH] feat: add ontology material ingestion --- .planning/REQUIREMENTS.md | 12 +- .planning/STATE.md | 13 +- .../005-ontology-materials/005-CONTEXT.md | 37 ++++ .../phases/005-ontology-materials/005-PLAN.md | 42 ++++ .../005-ontology-materials/005-RESEARCH.md | 28 +++ .../005-ontology-materials/005-SUMMARY.md | 36 ++++ .../005-VERIFICATION.md | 29 +++ internal/app/server.go | 4 +- internal/httpapi/diagnostic_test.go | 4 +- internal/httpapi/handler.go | 6 + internal/httpapi/handler_test.go | 4 +- internal/httpapi/ontology.go | 48 +++++ internal/httpapi/ontology_test.go | 54 +++++ internal/ontology/catalog.go | 38 ++++ internal/ontology/service.go | 192 ++++++++++++++++++ internal/ontology/service_test.go | 53 +++++ internal/ontology/store.go | 87 ++++++++ internal/ontology/types.go | 91 +++++++++ .../bootstrap-job-tutor-platform/tasks.md | 1 + 19 files changed, 766 insertions(+), 13 deletions(-) create mode 100644 .planning/phases/005-ontology-materials/005-CONTEXT.md create mode 100644 .planning/phases/005-ontology-materials/005-PLAN.md create mode 100644 .planning/phases/005-ontology-materials/005-RESEARCH.md create mode 100644 .planning/phases/005-ontology-materials/005-SUMMARY.md create mode 100644 .planning/phases/005-ontology-materials/005-VERIFICATION.md create mode 100644 internal/httpapi/ontology.go create mode 100644 internal/httpapi/ontology_test.go create mode 100644 internal/ontology/catalog.go create mode 100644 internal/ontology/service.go create mode 100644 internal/ontology/service_test.go create mode 100644 internal/ontology/store.go create mode 100644 internal/ontology/types.go diff --git a/.planning/REQUIREMENTS.md b/.planning/REQUIREMENTS.md index 85fbed8..d5e0a04 100644 --- a/.planning/REQUIREMENTS.md +++ b/.planning/REQUIREMENTS.md @@ -49,12 +49,12 @@ interview-ready after each short practice loop. ### Ontology and Learning Materials -- [ ] **ONTO-01**: User or operator can upload learning materials. -- [ ] **ONTO-02**: System creates source-backed ontology candidate nodes and +- [x] **ONTO-01**: User or operator can upload learning materials. +- [x] **ONTO-02**: System creates source-backed ontology candidate nodes and edges. -- [ ] **ONTO-03**: System detects missing prerequisites and weakly supported +- [x] **ONTO-03**: System detects missing prerequisites and weakly supported concepts. -- [ ] **ONTO-04**: Generated or inferred content is marked as candidate until +- [x] **ONTO-04**: Generated or inferred content is marked as candidate until reviewed. ### Teaching Assets @@ -98,7 +98,7 @@ interview-ready after each short practice loop. | INT-01..INT-06 | Phase 2 | Complete | | MEM-01..MEM-05 | Phase 3 | Complete | | PROG-01..PROG-05 | Phase 4 | Complete | -| ONTO-01..ONTO-04 | Phase 5 | Pending | +| ONTO-01..ONTO-04 | Phase 5 | Complete | | ASSET-01..ASSET-03 | Phase 6 | Pending | **Coverage:** @@ -108,4 +108,4 @@ interview-ready after each short practice loop. --- *Requirements defined: 2026-04-26* -*Last updated: 2026-04-26 after Phase 4 execution.* +*Last updated: 2026-04-26 after Phase 5 execution.* diff --git a/.planning/STATE.md b/.planning/STATE.md index 23cffc8..7046abe 100644 --- a/.planning/STATE.md +++ b/.planning/STATE.md @@ -7,7 +7,7 @@ See: `.planning/PROJECT.md` (updated 2026-04-26) **Core value:** The user should feel and prove that they are becoming more interview-ready after each short practice loop. -**Current focus:** Phase 5 planning: Ontology and Learning Materials. +**Current focus:** Phase 6 planning: Teaching Assets. ## Current Decisions @@ -29,14 +29,16 @@ interview-ready after each short practice loop. schedules. - Phase 4 progression is implemented and verified with readiness map and next challenge APIs derived from learner memory evidence. +- Phase 5 ontology material ingestion is implemented and verified with + source-backed candidate concepts, prerequisite edges, and candidate gaps. ## Next Actions -1. Plan Phase 5 ontology and learning material ingestion with GSD. +1. Plan Phase 6 teaching asset prompt generation with GSD. 2. Keep `docs/planning/WORKFLOW_CONTRACTS.md` aligned with Go structs during future workflow implementation. -3. Decide the MVP ontology storage boundary before accepting uploaded source - materials. +3. Verify the production OpenAI image model identifier before real asset + generation calls. ## Validation Log @@ -56,6 +58,9 @@ interview-ready after each short practice loop. - 2026-04-26: Phase 4 implementation verified with `go test ./...`, `openspec validate bootstrap-job-tutor-platform --strict`, live readiness and next-challenge smoke, and Go source line-count check. +- 2026-04-26: Phase 5 implementation verified with `go test ./...`, + `openspec validate bootstrap-job-tutor-platform --strict`, live material + ingestion and ontology snapshot smoke, and Go source line-count check. --- *State initialized: 2026-04-26.* diff --git a/.planning/phases/005-ontology-materials/005-CONTEXT.md b/.planning/phases/005-ontology-materials/005-CONTEXT.md new file mode 100644 index 0000000..531806e --- /dev/null +++ b/.planning/phases/005-ontology-materials/005-CONTEXT.md @@ -0,0 +1,37 @@ +# Phase 5 Context: Ontology and Learning Materials + +**Status:** Ready for execution +**Started:** 2026-04-26 + +## Goal + +Accept learning material input and produce source-backed ontology candidates. + +## Inputs + +- OpenSpec `learning-ontology` requirements. +- Existing workflow contracts for `OntologyGap`. +- Backend Developer Interview seed concepts. + +## Decisions + +- Use an in-memory ontology store for MVP proof. +- Accept JSON material ingestion before multipart file upload. +- Mark all generated nodes, edges, and gaps as `candidate`. +- Preserve source evidence for every supported ontology candidate. + +## Boundaries + +In scope: + +- Material ingestion API. +- Source-backed ontology candidate nodes and edges. +- Gap detection for missing prerequisites and weak evidence. +- Ontology snapshot API. + +Out of scope: + +- File storage. +- PDF/PPT parsing. +- Human review UI. +- Canonical promotion workflow. diff --git a/.planning/phases/005-ontology-materials/005-PLAN.md b/.planning/phases/005-ontology-materials/005-PLAN.md new file mode 100644 index 0000000..bf2e58a --- /dev/null +++ b/.planning/phases/005-ontology-materials/005-PLAN.md @@ -0,0 +1,42 @@ +# Phase 5 Plan: Ontology and Learning Materials + +**Status:** Ready for execution +**Phase Goal:** Ingest learning materials into source-backed ontology candidates. + +## Requirements Covered + +- ONTO-01: User or operator can upload learning materials. +- ONTO-02: System creates source-backed ontology candidate nodes and edges. +- ONTO-03: System detects missing prerequisites and weakly supported concepts. +- ONTO-04: Generated or inferred content is marked as candidate until reviewed. + +## Tasks + +### 1. Add ontology package + +- Define material, concept candidate, edge candidate, gap, and snapshot types. +- Add in-memory store and service. + +### 2. Implement deterministic MVP analyzer + +- Extract known backend interview concept candidates from material text. +- Create prerequisite edges for supported concept pairs. +- Create gap candidates for missing prerequisites and weak evidence. + +### 3. Add HTTP endpoints + +- `POST /api/v1/materials` +- `GET /api/v1/ontology` + +### 4. Add tests and verification + +- Test material ingestion creates source-backed candidates. +- Test gaps are candidate-only. +- Test HTTP ingestion and ontology snapshot flow. +- Run Go tests, OpenSpec validation, line-count check, and smoke. + +## Out of Scope + +- Multipart upload. +- Real document parsers. +- Human review promotion. diff --git a/.planning/phases/005-ontology-materials/005-RESEARCH.md b/.planning/phases/005-ontology-materials/005-RESEARCH.md new file mode 100644 index 0000000..c766ced --- /dev/null +++ b/.planning/phases/005-ontology-materials/005-RESEARCH.md @@ -0,0 +1,28 @@ +# Phase 5 Research: Ontology and Learning Materials + +## Findings + +The first useful ontology proof does not need heavy parsing. It needs a clean +boundary that proves uploaded material can become inspectable candidate +knowledge with provenance. + +The MVP should: + +- store material metadata and source text +- extract concept candidates from known backend interview concepts +- create prerequisite edges from a small deterministic rule set +- identify weak concepts when source support is thin +- never mark generated or inferred content as canonical + +## Recommended Shape + +- `internal/ontology` owns material ingestion, candidate storage, and snapshot. +- HTTP exposes JSON ingestion first. +- Evidence references use the existing workflow shared type. +- Gap records distinguish source-backed weakness from generated inference. + +## Risks + +- Overbuilding parsers too early would violate YAGNI. +- Treating keyword extraction as canonical knowledge would violate OpenSpec. +- A future parser can replace the analyzer behind the same service boundary. diff --git a/.planning/phases/005-ontology-materials/005-SUMMARY.md b/.planning/phases/005-ontology-materials/005-SUMMARY.md new file mode 100644 index 0000000..c88dd7b --- /dev/null +++ b/.planning/phases/005-ontology-materials/005-SUMMARY.md @@ -0,0 +1,36 @@ +# Phase 5 Summary + +**Status:** Complete +**Completed:** 2026-04-26 + +## Delivered + +- Added `internal/ontology` for materials, concept candidates, edge candidates, + gaps, and snapshots. +- Added deterministic MVP analyzer for known backend interview concepts. +- Added source evidence to every supported concept and edge candidate. +- Added candidate-only gap records for missing prerequisites and weak evidence. +- Added HTTP endpoints: + - `POST /api/v1/materials` + - `GET /api/v1/ontology` +- Added ontology unit tests and HTTP flow tests. + +## Verification + +```powershell +gofmt -w cmd internal +go test ./... +openspec validate bootstrap-job-tutor-platform --strict +``` + +Additional smoke check: + +- Material ingestion followed by ontology snapshot returned candidate concepts, + edges, and gaps. + +## Deferred + +- Multipart uploads. +- PPT/PDF/document parsing. +- Human review and canonical promotion. +- Graph database persistence. diff --git a/.planning/phases/005-ontology-materials/005-VERIFICATION.md b/.planning/phases/005-ontology-materials/005-VERIFICATION.md new file mode 100644 index 0000000..615007c --- /dev/null +++ b/.planning/phases/005-ontology-materials/005-VERIFICATION.md @@ -0,0 +1,29 @@ +# Phase 5 Verification + +## Verdict + +PASS + +## Requirement Coverage + +- ONTO-01: PASS. JSON material ingestion API accepts operator-provided learning + material. +- ONTO-02: PASS. Ingestion creates source-backed candidate concepts and + prerequisite edges. +- ONTO-03: PASS. The analyzer creates candidate gaps for missing prerequisites + and weak source evidence. +- ONTO-04: PASS. All generated ontology candidates and gaps use `candidate` + review state. + +## Evidence + +- `go test ./...` passed. +- `openspec validate bootstrap-job-tutor-platform --strict` passed. +- Live material ingestion and ontology snapshot smoke passed. +- Go source line-count check passed. + +## Residual Risk + +The analyzer is deterministic and intentionally shallow. It proves the product +boundary but should later be replaced or supplemented with parser-backed and +LLM-assisted extraction. diff --git a/internal/app/server.go b/internal/app/server.go index 12823ee..9831187 100644 --- a/internal/app/server.go +++ b/internal/app/server.go @@ -7,6 +7,7 @@ import ( "tutor/internal/httpapi" "tutor/internal/interview" "tutor/internal/learnermemory" + "tutor/internal/ontology" "tutor/internal/progression" "tutor/internal/workflows" ) @@ -16,8 +17,9 @@ func NewServer(cfg config.Config) *http.Server { store := interview.NewMemoryStore() memory := learnermemory.NewService(learnermemory.NewMemoryStore()) progress := progression.NewService(memory) + onto := ontology.NewService(ontology.NewMemoryStore()) service := interview.NewService(store, runner, memory) - handler := httpapi.NewHandler(cfg, service, memory, progress) + handler := httpapi.NewHandler(cfg, service, memory, progress, onto) return &http.Server{ Addr: cfg.HTTPAddr, diff --git a/internal/httpapi/diagnostic_test.go b/internal/httpapi/diagnostic_test.go index 3ef1fb5..adc71f7 100644 --- a/internal/httpapi/diagnostic_test.go +++ b/internal/httpapi/diagnostic_test.go @@ -10,6 +10,7 @@ import ( "tutor/internal/config" "tutor/internal/interview" "tutor/internal/learnermemory" + "tutor/internal/ontology" "tutor/internal/progression" "tutor/internal/workflows" ) @@ -18,7 +19,8 @@ func TestDiagnosticHTTPFlow(t *testing.T) { memory := learnermemory.NewService(learnermemory.NewMemoryStore()) service := interview.NewService(interview.NewMemoryStore(), workflows.NewStubRunner(), memory) progress := progression.NewService(memory) - handler := NewHandler(config.Config{Environment: "test", ModelKey: "deepseek-v4-flash"}, service, memory, progress) + onto := ontology.NewService(ontology.NewMemoryStore()) + handler := NewHandler(config.Config{Environment: "test", ModelKey: "deepseek-v4-flash"}, service, memory, progress, onto) routes := handler.Routes() createBody := bytes.NewBufferString(`{ diff --git a/internal/httpapi/handler.go b/internal/httpapi/handler.go index 6a8aaae..30f1600 100644 --- a/internal/httpapi/handler.go +++ b/internal/httpapi/handler.go @@ -7,6 +7,7 @@ import ( "tutor/internal/config" "tutor/internal/interview" "tutor/internal/learnermemory" + "tutor/internal/ontology" "tutor/internal/progression" ) @@ -15,6 +16,7 @@ type Handler struct { diagnostic *interview.Service memory *learnermemory.Service progress *progression.Service + ontology *ontology.Service } func NewHandler( @@ -22,12 +24,14 @@ func NewHandler( diagnostic *interview.Service, memory *learnermemory.Service, progress *progression.Service, + ontology *ontology.Service, ) Handler { return Handler{ cfg: cfg, diagnostic: diagnostic, memory: memory, progress: progress, + ontology: ontology, } } @@ -40,6 +44,8 @@ func (h Handler) Routes() http.Handler { mux.HandleFunc("GET /api/v1/learners/{userID}/memory", h.getLearnerMemory) mux.HandleFunc("GET /api/v1/learners/{userID}/readiness-map", h.getReadinessMap) mux.HandleFunc("GET /api/v1/learners/{userID}/next-challenge", h.getNextChallenge) + mux.HandleFunc("POST /api/v1/materials", h.ingestMaterial) + mux.HandleFunc("GET /api/v1/ontology", h.getOntology) return mux } diff --git a/internal/httpapi/handler_test.go b/internal/httpapi/handler_test.go index 48f143e..6043442 100644 --- a/internal/httpapi/handler_test.go +++ b/internal/httpapi/handler_test.go @@ -9,6 +9,7 @@ import ( "tutor/internal/config" "tutor/internal/interview" "tutor/internal/learnermemory" + "tutor/internal/ontology" "tutor/internal/progression" "tutor/internal/workflows" ) @@ -21,7 +22,8 @@ func TestHealth(t *testing.T) { memory := learnermemory.NewService(learnermemory.NewMemoryStore()) service := interview.NewService(interview.NewMemoryStore(), workflows.NewStubRunner(), memory) progress := progression.NewService(memory) - handler := NewHandler(cfg, service, memory, progress) + onto := ontology.NewService(ontology.NewMemoryStore()) + handler := NewHandler(cfg, service, memory, progress, onto) req := httptest.NewRequest(http.MethodGet, "/healthz", nil) rec := httptest.NewRecorder() diff --git a/internal/httpapi/ontology.go b/internal/httpapi/ontology.go new file mode 100644 index 0000000..809d526 --- /dev/null +++ b/internal/httpapi/ontology.go @@ -0,0 +1,48 @@ +package httpapi + +import ( + "encoding/json" + "net/http" + + "tutor/internal/ontology" +) + +type ingestMaterialRequest struct { + Title string `json:"title"` + SourceType string `json:"source_type"` + Body string `json:"body"` +} + +func (h Handler) ingestMaterial(w http.ResponseWriter, r *http.Request) { + if h.ontology == nil { + writeError(w, http.StatusNotFound, "ontology not configured") + return + } + + var req ingestMaterialRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + writeError(w, http.StatusBadRequest, "invalid JSON body") + return + } + + result, err := h.ontology.Ingest(ontology.IngestInput{ + Title: req.Title, + SourceType: req.SourceType, + Body: req.Body, + }) + if err != nil { + writeError(w, http.StatusBadRequest, err.Error()) + return + } + + writeJSON(w, http.StatusCreated, result) +} + +func (h Handler) getOntology(w http.ResponseWriter, _ *http.Request) { + if h.ontology == nil { + writeError(w, http.StatusNotFound, "ontology not configured") + return + } + + writeJSON(w, http.StatusOK, h.ontology.Snapshot()) +} diff --git a/internal/httpapi/ontology_test.go b/internal/httpapi/ontology_test.go new file mode 100644 index 0000000..193354e --- /dev/null +++ b/internal/httpapi/ontology_test.go @@ -0,0 +1,54 @@ +package httpapi + +import ( + "bytes" + "encoding/json" + "net/http" + "net/http/httptest" + "testing" + + "tutor/internal/config" + "tutor/internal/interview" + "tutor/internal/learnermemory" + "tutor/internal/ontology" + "tutor/internal/progression" + "tutor/internal/workflows" +) + +func TestOntologyHTTPFlow(t *testing.T) { + memory := learnermemory.NewService(learnermemory.NewMemoryStore()) + service := interview.NewService(interview.NewMemoryStore(), workflows.NewStubRunner(), memory) + progress := progression.NewService(memory) + onto := ontology.NewService(ontology.NewMemoryStore()) + handler := NewHandler(config.Config{Environment: "test"}, service, memory, progress, onto) + routes := handler.Routes() + + body := bytes.NewBufferString(`{ + "title":"Backend interview notes", + "source_type":"markdown", + "body":"Idempotent API retries need transactions. Cache invalidation uses TTL tradeoffs." + }`) + req := httptest.NewRequest(http.MethodPost, "/api/v1/materials", body) + rec := httptest.NewRecorder() + routes.ServeHTTP(rec, req) + + if rec.Code != http.StatusCreated { + t.Fatalf("ingest status = %d, body = %s", rec.Code, rec.Body.String()) + } + + var result ontology.IngestResult + if err := json.NewDecoder(rec.Body).Decode(&result); err != nil { + t.Fatalf("decode ingest response: %v", err) + } + if len(result.Snapshot.Concepts) == 0 { + t.Fatal("expected ontology concepts") + } + + getReq := httptest.NewRequest(http.MethodGet, "/api/v1/ontology", nil) + getRec := httptest.NewRecorder() + routes.ServeHTTP(getRec, getReq) + + if getRec.Code != http.StatusOK { + t.Fatalf("ontology status = %d, body = %s", getRec.Code, getRec.Body.String()) + } +} diff --git a/internal/ontology/catalog.go b/internal/ontology/catalog.go new file mode 100644 index 0000000..4001b47 --- /dev/null +++ b/internal/ontology/catalog.go @@ -0,0 +1,38 @@ +package ontology + +import "tutor/internal/workflows" + +var knownConcepts = []knownConcept{ + { + Ref: workflows.ConceptRef{ID: "http-idempotency", Label: "HTTP idempotency", Track: "backend-developer"}, + Keywords: []string{"idempotent", "idempotency", "retry", "retries"}, + }, + { + Ref: workflows.ConceptRef{ID: "database-indexes", Label: "Database indexes", Track: "backend-developer"}, + Keywords: []string{"index", "indexes", "database index", "query plan"}, + }, + { + Ref: workflows.ConceptRef{ID: "cache-invalidation", Label: "Cache invalidation", Track: "backend-developer"}, + Keywords: []string{"cache", "invalidation", "ttl"}, + }, + { + Ref: workflows.ConceptRef{ID: "transactions", Label: "Transactions", Track: "backend-developer"}, + Keywords: []string{"transaction", "transactions", "atomic", "rollback"}, + }, +} + +var prerequisiteRules = []prerequisiteRule{ + {FromID: "http-idempotency", ToID: "transactions"}, + {FromID: "transactions", ToID: "cache-invalidation"}, + {FromID: "database-indexes", ToID: "cache-invalidation"}, +} + +type knownConcept struct { + Ref workflows.ConceptRef + Keywords []string +} + +type prerequisiteRule struct { + FromID string + ToID string +} diff --git a/internal/ontology/service.go b/internal/ontology/service.go new file mode 100644 index 0000000..67c6557 --- /dev/null +++ b/internal/ontology/service.go @@ -0,0 +1,192 @@ +package ontology + +import ( + "errors" + "fmt" + "sort" + "strings" + "sync/atomic" + "time" + + "tutor/internal/workflows" +) + +type Service struct { + store Store + ids atomic.Uint64 +} + +func NewService(store Store) *Service { + return &Service{store: store} +} + +func (s *Service) Ingest(input IngestInput) (IngestResult, error) { + if strings.TrimSpace(input.Title) == "" { + return IngestResult{}, errors.New("title is required") + } + if strings.TrimSpace(input.Body) == "" { + return IngestResult{}, errors.New("body is required") + } + + now := time.Now().UTC() + material := Material{ + ID: s.nextID("material"), + Title: input.Title, + SourceType: sourceTypeOrDefault(input.SourceType), + Body: input.Body, + CreatedAt: now, + } + + concepts := s.extractConcepts(material, now) + edges := s.extractEdges(concepts, now) + gaps := s.detectGaps(concepts, edges, now) + if err := s.store.Save(material, concepts, edges, gaps); err != nil { + return IngestResult{}, err + } + + return IngestResult{ + Material: material, + Snapshot: s.store.Snapshot(), + }, nil +} + +func (s *Service) Snapshot() Snapshot { + return s.store.Snapshot() +} + +func (s *Service) extractConcepts(material Material, now time.Time) []ConceptCandidate { + body := strings.ToLower(material.Body) + concepts := []ConceptCandidate{} + for _, known := range knownConcepts { + quote, ok := firstKeywordQuote(body, material.Body, known.Keywords) + if !ok { + continue + } + concepts = append(concepts, ConceptCandidate{ + ID: s.nextID("concept"), + Concept: known.Ref, + Summary: "Source material mentions " + known.Ref.Label + ".", + Evidence: []workflows.EvidenceRef{{ + Kind: workflows.EvidenceSource, + ID: material.ID, + Quote: quote, + Confidence: 0.72, + }}, + ReviewState: ReviewCandidate, + CreatedAt: now, + }) + } + sort.Slice(concepts, func(i, j int) bool { + return concepts[i].Concept.ID < concepts[j].Concept.ID + }) + return concepts +} + +func (s *Service) extractEdges(concepts []ConceptCandidate, now time.Time) []EdgeCandidate { + byID := make(map[string]ConceptCandidate, len(concepts)) + for _, concept := range concepts { + byID[concept.Concept.ID] = concept + } + + edges := []EdgeCandidate{} + for _, rule := range prerequisiteRules { + from, fromOK := byID[rule.FromID] + to, toOK := byID[rule.ToID] + if !fromOK || !toOK { + continue + } + edges = append(edges, EdgeCandidate{ + ID: s.nextID("edge"), + From: from.Concept, + To: to.Concept, + Kind: EdgePrerequisite, + Evidence: append([]workflows.EvidenceRef(nil), from.Evidence...), + ReviewState: ReviewCandidate, + CreatedAt: now, + }) + } + return edges +} + +func (s *Service) detectGaps( + concepts []ConceptCandidate, + edges []EdgeCandidate, + now time.Time, +) []Gap { + gaps := []Gap{} + byID := make(map[string]ConceptCandidate, len(concepts)) + for _, concept := range concepts { + byID[concept.Concept.ID] = concept + if len(concept.Evidence) == 1 && len(strings.Fields(concept.Evidence[0].Quote)) < 6 { + gaps = append(gaps, Gap{ + ID: s.nextID("gap"), + Concept: concept.Concept, + GapType: GapWeakEvidence, + Reason: "Concept is mentioned, but source support is thin.", + SupportingEvidence: append([]workflows.EvidenceRef(nil), concept.Evidence...), + ProposedAction: ActionRequestSource, + ReviewState: ReviewCandidate, + CreatedAt: now, + }) + } + } + + for _, rule := range prerequisiteRules { + to, toOK := byID[rule.ToID] + if !toOK { + continue + } + if _, fromOK := byID[rule.FromID]; fromOK { + continue + } + gaps = append(gaps, Gap{ + ID: s.nextID("gap"), + Concept: to.Concept, + GapType: GapMissingPrerequisite, + Reason: "Prerequisite concept " + rule.FromID + " is missing from the material.", + SupportingEvidence: append([]workflows.EvidenceRef(nil), to.Evidence...), + ProposedAction: ActionGenerateCandidate, + ReviewState: ReviewCandidate, + CreatedAt: now, + }) + } + + if len(edges) == 0 && len(concepts) > 1 { + first := concepts[0] + gaps = append(gaps, Gap{ + ID: s.nextID("gap"), + Concept: first.Concept, + GapType: GapMissingPrerequisite, + Reason: "Concept relationship is inferred as incomplete and needs review.", + SupportingEvidence: append([]workflows.EvidenceRef(nil), first.Evidence...), + ProposedAction: ActionHumanReview, + ReviewState: ReviewCandidate, + CreatedAt: now, + }) + } + return gaps +} + +func firstKeywordQuote(lowerBody string, originalBody string, keywords []string) (string, bool) { + for _, keyword := range keywords { + index := strings.Index(lowerBody, strings.ToLower(keyword)) + if index < 0 { + continue + } + start := max(0, index-40) + end := min(len(originalBody), index+len(keyword)+80) + return strings.TrimSpace(originalBody[start:end]), true + } + return "", false +} + +func sourceTypeOrDefault(sourceType string) string { + if strings.TrimSpace(sourceType) == "" { + return "text" + } + return sourceType +} + +func (s *Service) nextID(prefix string) string { + return fmt.Sprintf("%s-%d", prefix, s.ids.Add(1)) +} diff --git a/internal/ontology/service_test.go b/internal/ontology/service_test.go new file mode 100644 index 0000000..037e5d5 --- /dev/null +++ b/internal/ontology/service_test.go @@ -0,0 +1,53 @@ +package ontology + +import "testing" + +func TestIngestCreatesSourceBackedCandidates(t *testing.T) { + service := NewService(NewMemoryStore()) + + result, err := service.Ingest(IngestInput{ + Title: "Backend interview notes", + SourceType: "markdown", + Body: "Idempotent API retries need transactions. Cache invalidation uses TTL tradeoffs.", + }) + if err != nil { + t.Fatalf("Ingest error: %v", err) + } + if result.Material.ID == "" { + t.Fatal("expected material id") + } + if len(result.Snapshot.Concepts) == 0 { + t.Fatal("expected concept candidates") + } + for _, concept := range result.Snapshot.Concepts { + if concept.ReviewState != ReviewCandidate { + t.Fatalf("review state = %q", concept.ReviewState) + } + if len(concept.Evidence) == 0 { + t.Fatal("expected concept evidence") + } + } + if len(result.Snapshot.Edges) == 0 { + t.Fatal("expected prerequisite edge candidates") + } +} + +func TestIngestMarksGapsAsCandidates(t *testing.T) { + service := NewService(NewMemoryStore()) + + result, err := service.Ingest(IngestInput{ + Title: "Cache note", + Body: "Cache invalidation is hard.", + }) + if err != nil { + t.Fatalf("Ingest error: %v", err) + } + if len(result.Snapshot.Gaps) == 0 { + t.Fatal("expected gaps") + } + for _, gap := range result.Snapshot.Gaps { + if gap.ReviewState != ReviewCandidate { + t.Fatalf("gap review state = %q", gap.ReviewState) + } + } +} diff --git a/internal/ontology/store.go b/internal/ontology/store.go new file mode 100644 index 0000000..a65ee28 --- /dev/null +++ b/internal/ontology/store.go @@ -0,0 +1,87 @@ +package ontology + +import "sync" + +import "tutor/internal/workflows" + +type Store interface { + Save(Material, []ConceptCandidate, []EdgeCandidate, []Gap) error + Snapshot() Snapshot +} + +type MemoryStore struct { + mu sync.RWMutex + materials []Material + concepts []ConceptCandidate + edges []EdgeCandidate + gaps []Gap +} + +func NewMemoryStore() *MemoryStore { + return &MemoryStore{} +} + +func (s *MemoryStore) Save( + material Material, + concepts []ConceptCandidate, + edges []EdgeCandidate, + gaps []Gap, +) error { + s.mu.Lock() + defer s.mu.Unlock() + + s.materials = append(s.materials, cloneMaterial(material)) + s.concepts = append(s.concepts, cloneConcepts(concepts)...) + s.edges = append(s.edges, cloneEdges(edges)...) + s.gaps = append(s.gaps, cloneGaps(gaps)...) + return nil +} + +func (s *MemoryStore) Snapshot() Snapshot { + s.mu.RLock() + defer s.mu.RUnlock() + + return Snapshot{ + Materials: cloneMaterials(s.materials), + Concepts: cloneConcepts(s.concepts), + Edges: cloneEdges(s.edges), + Gaps: cloneGaps(s.gaps), + } +} + +func cloneMaterial(material Material) Material { + return material +} + +func cloneMaterials(items []Material) []Material { + cloned := make([]Material, len(items)) + copy(cloned, items) + return cloned +} + +func cloneConcepts(items []ConceptCandidate) []ConceptCandidate { + cloned := make([]ConceptCandidate, len(items)) + for i, item := range items { + cloned[i] = item + cloned[i].Evidence = append([]workflows.EvidenceRef(nil), item.Evidence...) + } + return cloned +} + +func cloneEdges(items []EdgeCandidate) []EdgeCandidate { + cloned := make([]EdgeCandidate, len(items)) + for i, item := range items { + cloned[i] = item + cloned[i].Evidence = append([]workflows.EvidenceRef(nil), item.Evidence...) + } + return cloned +} + +func cloneGaps(items []Gap) []Gap { + cloned := make([]Gap, len(items)) + for i, item := range items { + cloned[i] = item + cloned[i].SupportingEvidence = append([]workflows.EvidenceRef(nil), item.SupportingEvidence...) + } + return cloned +} diff --git a/internal/ontology/types.go b/internal/ontology/types.go new file mode 100644 index 0000000..89dabce --- /dev/null +++ b/internal/ontology/types.go @@ -0,0 +1,91 @@ +package ontology + +import ( + "time" + + "tutor/internal/workflows" +) + +type ReviewState string + +const ( + ReviewCandidate ReviewState = "candidate" + ReviewReviewed ReviewState = "reviewed" +) + +type Material struct { + ID string `json:"id"` + Title string `json:"title"` + SourceType string `json:"source_type"` + Body string `json:"body,omitempty"` + CreatedAt time.Time `json:"created_at"` +} + +type ConceptCandidate struct { + ID string `json:"id"` + Concept workflows.ConceptRef `json:"concept"` + Summary string `json:"summary"` + Evidence []workflows.EvidenceRef `json:"evidence"` + ReviewState ReviewState `json:"review_state"` + CreatedAt time.Time `json:"created_at"` +} + +type EdgeCandidate struct { + ID string `json:"id"` + From workflows.ConceptRef `json:"from"` + To workflows.ConceptRef `json:"to"` + Kind EdgeKind `json:"kind"` + Evidence []workflows.EvidenceRef `json:"evidence"` + ReviewState ReviewState `json:"review_state"` + CreatedAt time.Time `json:"created_at"` +} + +type EdgeKind string + +const ( + EdgePrerequisite EdgeKind = "prerequisite" +) + +type Gap struct { + ID string `json:"id"` + Concept workflows.ConceptRef `json:"concept"` + GapType GapType `json:"gap_type"` + Reason string `json:"reason"` + SupportingEvidence []workflows.EvidenceRef `json:"supporting_evidence"` + ProposedAction ProposedAction `json:"proposed_action"` + ReviewState ReviewState `json:"review_state"` + CreatedAt time.Time `json:"created_at"` +} + +type GapType string + +const ( + GapMissingPrerequisite GapType = "missing_prerequisite" + GapWeakEvidence GapType = "weak_evidence" +) + +type ProposedAction string + +const ( + ActionGenerateCandidate ProposedAction = "generate_candidate" + ActionRequestSource ProposedAction = "request_source" + ActionHumanReview ProposedAction = "human_review" +) + +type IngestInput struct { + Title string + SourceType string + Body string +} + +type IngestResult struct { + Material Material `json:"material"` + Snapshot Snapshot `json:"snapshot"` +} + +type Snapshot struct { + Materials []Material `json:"materials"` + Concepts []ConceptCandidate `json:"concepts"` + Edges []EdgeCandidate `json:"edges"` + Gaps []Gap `json:"gaps"` +} diff --git a/openspec/changes/bootstrap-job-tutor-platform/tasks.md b/openspec/changes/bootstrap-job-tutor-platform/tasks.md index a5d7ee6..0ea6edc 100644 --- a/openspec/changes/bootstrap-job-tutor-platform/tasks.md +++ b/openspec/changes/bootstrap-job-tutor-platform/tasks.md @@ -15,3 +15,4 @@ - [x] 11. Validate the OpenSpec change. - [x] 12. Implement evidence-backed learner memory ingestion and readback. - [x] 13. Implement evidence-backed readiness map and next challenge APIs. +- [x] 14. Implement source-backed ontology material ingestion.