diff --git a/.env b/.env index a7bf8c7..a087fff 100644 --- a/.env +++ b/.env @@ -6,3 +6,8 @@ TUTOR_MODEL_KEY=deepseek-v4-flash TUTOR_IMAGE_MODEL_KEY=gpt-image-v2 THIRDONE_BIN=thirdone TUTOR_PUBLIC_URL=https://tutor.uljisoft.com +# third-one endpoint (no API key needed — auth handled by third-one): +TUTOR_LLM_ENDPOINT=http://localhost:11434/v1 +# For direct API access (e.g. OpenAI, DeepSeek), set endpoint + key: +# TUTOR_LLM_ENDPOINT=https://api.deepseek.com +# TUTOR_LLM_API_KEY=sk-your-key-here diff --git a/internal/app/server.go b/internal/app/server.go index 5320d72..2efe2c7 100644 --- a/internal/app/server.go +++ b/internal/app/server.go @@ -11,6 +11,7 @@ import ( "tutor/internal/httpapi" "tutor/internal/interview" "tutor/internal/learnermemory" + "tutor/internal/llm" "tutor/internal/ontology" "tutor/internal/progression" "tutor/internal/teachingassets" @@ -18,7 +19,15 @@ import ( ) func NewServer(cfg config.Config) *http.Server { - runner := workflows.NewStubRunner() + var runner workflows.Runner + if cfg.HasLLM() { + client := llm.NewClient(cfg.LLMEndpoint, cfg.LLMAPIKey, cfg.ModelKey) + runner = workflows.NewLLMRunner(client) + log.Printf("using llm runner: endpoint=%s model=%s", cfg.LLMEndpoint, cfg.ModelKey) + } else { + runner = workflows.NewStubRunner() + log.Println("using stub runner (TUTOR_LLM_ENDPOINT not set)") + } var interviewStore interview.Store var memoryStore learnermemory.Store diff --git a/internal/config/config.go b/internal/config/config.go index b9f8f2d..21f117f 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -19,6 +19,8 @@ type Config struct { ModelKey string ImageModelKey string ThirdOneBin string + LLMAPIKey string + LLMEndpoint string GoogleClientID string JWTSecret string } @@ -32,11 +34,17 @@ func LoadFromEnv() Config { ModelKey: envOrDefault("TUTOR_MODEL_KEY", defaultModelKey), ImageModelKey: envOrDefault("TUTOR_IMAGE_MODEL_KEY", defaultImageModelKey), ThirdOneBin: envOrDefault("THIRDONE_BIN", defaultThirdOneBin), + LLMAPIKey: envOrDefault("TUTOR_LLM_API_KEY", ""), + LLMEndpoint: envOrDefault("TUTOR_LLM_ENDPOINT", ""), GoogleClientID: envOrDefault("GOOGLE_CLIENT_ID", ""), JWTSecret: envOrDefault("JWT_SECRET", ""), } } +func (c Config) HasLLM() bool { + return c.LLMEndpoint != "" +} + func envOrDefault(key string, fallback string) string { value := os.Getenv(key) if value == "" { diff --git a/internal/interview/catalog.go b/internal/interview/catalog.go index 055df5f..fca648f 100644 --- a/internal/interview/catalog.go +++ b/internal/interview/catalog.go @@ -4,13 +4,13 @@ import "tutor/internal/workflows" var questionPrompts = map[string]map[string]string{ "ko": { - "backend-http-idempotency": "HTTP 메서드가 멱등성을 가지려면 어떤 조건이 필요하며, 재시도 시 왜 중요한가요?", - "backend-db-index-tradeoff": "데이터베이스 인덱스를 추가하면 API가 어떻게 개선되며, 어떤 트레이드오프가 발생할 수 있나요?", + "backend-http-idempotency": "HTTP 메서드가 멱등성을 가지려면 어떤 조건이 필요하며, 재시도 시 왜 중요한가요?", + "backend-db-index-tradeoff": "데이터베이스 인덱스를 추가하면 API가 어떻게 개선되며, 어떤 트레이드오프가 발생할 수 있나요?", "backend-cache-invalidation": "API 응답을 캐싱할지 어떻게 결정하며, 오래된 데이터는 어떻게 처리하나요?", }, "en": { - "backend-http-idempotency": "What makes an HTTP method idempotent, and why does that matter for retries?", - "backend-db-index-tradeoff": "When would adding a database index improve an API, and what tradeoffs can it introduce?", + "backend-http-idempotency": "What makes an HTTP method idempotent, and why does that matter for retries?", + "backend-db-index-tradeoff": "When would adding a database index improve an API, and what tradeoffs can it introduce?", "backend-cache-invalidation": "How would you decide whether to cache an API response, and how would you handle stale data?", }, } diff --git a/internal/llm/client.go b/internal/llm/client.go new file mode 100644 index 0000000..10b96ab --- /dev/null +++ b/internal/llm/client.go @@ -0,0 +1,119 @@ +package llm + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "strings" + "time" +) + +type Client struct { + endpoint string + apiKey string + model string + httpClient *http.Client +} + +func NewClient(endpoint, apiKey, model string) *Client { + return &Client{ + endpoint: strings.TrimRight(endpoint, "/"), + apiKey: apiKey, + model: model, + httpClient: &http.Client{Timeout: 60 * time.Second}, + } +} + +type ChatMessage struct { + Role string `json:"role"` + Content string `json:"content"` +} + +type chatRequest struct { + Model string `json:"model"` + Messages []ChatMessage `json:"messages"` + ResponseFormat *responseFmt `json:"response_format,omitempty"` + Temperature float64 `json:"temperature,omitempty"` +} + +type responseFmt struct { + Type string `json:"type"` +} + +type chatResponse struct { + Choices []struct { + Message ChatMessage `json:"message"` + } `json:"choices"` + Error *struct { + Message string `json:"message"` + Type string `json:"type"` + } `json:"error,omitempty"` +} + +func (c *Client) Chat(ctx context.Context, systemPrompt, userPrompt string) (string, error) { + return c.ChatJSON(ctx, systemPrompt, userPrompt, false) +} + +func (c *Client) ChatJSON(ctx context.Context, systemPrompt, userPrompt string, jsonMode bool) (string, error) { + messages := []ChatMessage{ + {Role: "system", Content: systemPrompt}, + {Role: "user", Content: userPrompt}, + } + + req := chatRequest{ + Model: c.model, + Messages: messages, + } + + if jsonMode { + req.ResponseFormat = &responseFmt{Type: "json_object"} + } + + body, err := json.Marshal(req) + if err != nil { + return "", fmt.Errorf("marshal request: %w", err) + } + + url := c.endpoint + "/v1/chat/completions" + httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body)) + if err != nil { + return "", fmt.Errorf("create request: %w", err) + } + httpReq.Header.Set("Content-Type", "application/json") + if c.apiKey != "" { + httpReq.Header.Set("Authorization", "Bearer "+c.apiKey) + } + + resp, err := c.httpClient.Do(httpReq) + if err != nil { + return "", fmt.Errorf("http do: %w", err) + } + defer resp.Body.Close() + + respBody, err := io.ReadAll(resp.Body) + if err != nil { + return "", fmt.Errorf("read response: %w", err) + } + + if resp.StatusCode != http.StatusOK { + return "", fmt.Errorf("llm api error %d: %s", resp.StatusCode, string(respBody)) + } + + var chatResp chatResponse + if err := json.Unmarshal(respBody, &chatResp); err != nil { + return "", fmt.Errorf("unmarshal response: %w", err) + } + + if chatResp.Error != nil { + return "", fmt.Errorf("llm error: %s", chatResp.Error.Message) + } + + if len(chatResp.Choices) == 0 { + return "", fmt.Errorf("no choices in response") + } + + return chatResp.Choices[0].Message.Content, nil +} diff --git a/internal/workflows/llm_runner.go b/internal/workflows/llm_runner.go new file mode 100644 index 0000000..ba351d4 --- /dev/null +++ b/internal/workflows/llm_runner.go @@ -0,0 +1,133 @@ +package workflows + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "strings" + + "tutor/internal/llm" +) + +type LLMRunner struct { + client *llm.Client +} + +func NewLLMRunner(client *llm.Client) *LLMRunner { + return &LLMRunner{client: client} +} + +func (r *LLMRunner) DiagnoseJobSeeker(ctx context.Context, input DiagnosticInput) (DiagnosticResult, error) { + raw, err := r.client.ChatJSON(ctx, diagnoseSystemPrompt(), diagnoseUserPrompt(input), true) + if err != nil { + return DiagnosticResult{}, fmt.Errorf("diagnose_job_seeker: %w", err) + } + + var result DiagnosticResult + if err := extractJSON(raw, &result); err != nil { + return DiagnosticResult{}, fmt.Errorf("diagnose_job_seeker parse: %w", err) + } + return result, nil +} + +func (r *LLMRunner) GradeInterviewAnswer(ctx context.Context, input GradeAnswerInput) (GradedAnswer, error) { + raw, err := r.client.ChatJSON(ctx, gradeAnswerSystemPrompt(), gradeAnswerUserPrompt(input), true) + if err != nil { + return GradedAnswer{}, fmt.Errorf("grade_interview_answer: %w", err) + } + + var result GradedAnswer + if err := extractJSON(raw, &result); err != nil { + return GradedAnswer{}, fmt.Errorf("grade_interview_answer parse: %w", err) + } + + result.UserID = input.UserID + result.AnswerID = input.AnswerID + result.QuestionID = input.QuestionID + return result, nil +} + +func (r *LLMRunner) ExtractLearningMemory(ctx context.Context, grade GradedAnswer) (MemoryUpdateCandidate, error) { + raw, err := r.client.ChatJSON(ctx, extractMemorySystemPrompt(), extractMemoryUserPrompt(grade), true) + if err != nil { + return MemoryUpdateCandidate{}, fmt.Errorf("extract_learning_memory: %w", err) + } + + candidate := MemoryUpdateCandidate{ + UserID: grade.UserID, + SourceAnswerID: grade.AnswerID, + } + if err := extractJSON(raw, &candidate); err != nil { + return MemoryUpdateCandidate{}, fmt.Errorf("extract_learning_memory parse: %w", err) + } + return candidate, nil +} + +func (r *LLMRunner) SelectNextChallenge(ctx context.Context, input NextChallengeInput) (NextChallenge, error) { + raw, err := r.client.ChatJSON(ctx, nextChallengeSystemPrompt(), nextChallengeUserPrompt("", ""), true) + if err != nil { + return NextChallenge{}, fmt.Errorf("select_next_challenge: %w", err) + } + + var next NextChallenge + if err := extractJSON(raw, &next); err != nil { + return NextChallenge{}, fmt.Errorf("select_next_challenge parse: %w", err) + } + next.UserID = input.UserID + next.Track = input.Track + return next, nil +} + +func (r *LLMRunner) UpdateReadinessMap(ctx context.Context, input ReadinessUpdateInput) (ReadinessUpdate, error) { + raw, err := r.client.ChatJSON(ctx, readinessUpdateSystemPrompt(), readinessUpdateUserPrompt(input), true) + if err != nil { + return ReadinessUpdate{}, fmt.Errorf("update_readiness_map: %w", err) + } + + var update ReadinessUpdate + if err := extractJSON(raw, &update); err != nil { + return ReadinessUpdate{}, fmt.Errorf("update_readiness_map parse: %w", err) + } + update.UserID = input.UserID + update.Track = input.Track + return update, nil +} + +func extractJSON(raw string, target any) error { + clean := strings.TrimSpace(raw) + if strings.HasPrefix(clean, "```") { + clean = stripCodeFences(clean) + } + if err := json.Unmarshal([]byte(clean), target); err != nil { + return fmt.Errorf("%w: %s", err, firstBytes(clean, 200)) + } + return nil +} + +var errCodeFence = errors.New("code fence") + +func stripCodeFences(input string) string { + lines := strings.Split(input, "\n") + start := 0 + end := len(lines) + for i, line := range lines { + trimmed := strings.TrimSpace(line) + if strings.HasPrefix(trimmed, "```") { + if start == 0 { + start = i + 1 + continue + } + end = i + break + } + } + return strings.Join(lines[start:end], "\n") +} + +func firstBytes(input string, limit int) string { + if len(input) > limit { + return input[:limit] + "..." + } + return input +} diff --git a/internal/workflows/prompts.go b/internal/workflows/prompts.go new file mode 100644 index 0000000..598fd5b --- /dev/null +++ b/internal/workflows/prompts.go @@ -0,0 +1,180 @@ +package workflows + +import ( + "encoding/json" + "fmt" +) + +func gradeAnswerSystemPrompt() string { + return fmt.Sprintf(`You are an expert technical interviewer grading a candidate's answer. Output valid JSON matching this schema: + +{ + "user_id": "string", + "answer_id": "string", + "question_id": "string", + "concepts": [{"id": "string", "label": "string", "track": "string"}], + "scores": { + "correctness": 0, + "depth": 0, + "communication": 0, + "production_judgment": 0 + }, + "overall": "miss|partial|solid|strong", + "strengths": ["string"], + "gaps": ["string"], + "evidence": [{"kind": "answer|grading|source|session|asset", "id": "string", "quote": "string", "confidence": 0.0}], + "misconception_candidates": [{"label": "string", "description": "string", "evidence": [], "confidence": 0.0}], + "follow_up": {"needed": true, "question": "string", "purpose": "clarify|repair|stretch|pressure_test"} +} + +Scoring rules: +- scores: 1-4 integer scale (1=inadequate, 2=surface, 3=solid, 4=strong). +- correctness: factual accuracy +- depth: covers tradeoffs, edge cases, production context +- communication: clarity, structure, conciseness +- production_judgment: practical experience signals in the answer +- overall: "miss" if mostly wrong, "partial" if some correct parts, "solid" if mostly correct with depth, "strong" if comprehensive and production-ready. +- evidence: always include at least one EvidenceRef with kind "grading", quote from the answer, and confidence 0.5-1.0. +- follow_up.needed: true unless the answer is "strong" and complete. Set purpose to "clarify" for unclear answers, "repair" for misconceptions, "stretch" to test depth, "pressure_test" for strong answers. +- misconception_candidates: list any detected wrong mental models. + +Respond with ONLY the JSON object, no markdown fences.`) +} + +func gradeAnswerUserPrompt(input GradeAnswerInput) string { + payload, _ := json.Marshal(input) + return fmt.Sprintf("Grade this interview answer: %s", string(payload)) +} + +func extractMemorySystemPrompt() string { + return fmt.Sprintf(`You are a learner memory extraction agent. From a graded interview answer, produce memory updates. Output valid JSON matching this schema: + +{ + "updates": [ + { + "kind": "concept_mastery|misconception|intervention|review_schedule", + "concept": {"id": "string", "label": "string", "track": "string"}, + "proposed_state": "unknown|fragile|improving|interview_ready|strong_signal", + "summary": "string", + "evidence": [{"kind": "grading", "id": "string", "quote": "string", "confidence": 0.0}], + "confidence": 0.0, + "durability": "tentative|confirmed" + } + ] +} + +Rules: +- For every concept in the grading, create a concept_mastery update with proposed_state derived from overall grade: "miss"→fragile, "partial"→improving, "solid"→interview_ready, "strong"→strong_signal. +- If follow_up.needed is true and overall is "miss" or "partial", add a misconception update (kind="misconception") for each concept with proposed_state "fragile". +- If follow_up.needed is true, add an intervention update (kind="intervention") for each concept with the follow_up question as summary. +- If the answer shows gaps, add a review_schedule update (kind="review_schedule") for each concept with a review reason. +- Confidence: 0.5-0.7 for tentative, 0.8-1.0 for confirmed. Durability: "confirmed" only for "strong" overall. + +Respond with ONLY the JSON object, no markdown fences.`) +} + +func extractMemoryUserPrompt(grade GradedAnswer) string { + payload, _ := json.Marshal(grade) + return fmt.Sprintf("Extract memory updates from this graded answer: %s", string(payload)) +} + +func nextChallengeSystemPrompt() string { + return fmt.Sprintf(`You are a challenge selection agent. Given learner memory state, select the next challenge. Output valid JSON matching this schema: + +{ + "concept": {"id": "string", "label": "string", "track": "string"}, + "ladder_level": "define|tradeoffs|debug|design_constraints|interview_pressure", + "question": "string", + "rationale": "string", + "difficulty_action": "lower|hold|raise|recover", + "evidence": [{"kind": "grading", "id": "string", "quote": "string", "confidence": 0.0}] +} + +Rules: +- Pick the concept with the weakest readiness state. +- ladder_level: fragile→define, improving→tradeoffs, interview_ready→design_constraints, strong_signal→interview_pressure. +- difficulty_action: fragile→recover, improving→hold, interview_ready+→raise. +- Generate one concrete interview question for the selected concept at the appropriate ladder level. +- rationale: explain why this concept and level was chosen. +- evidence: reference the concept's existing evidence. + +Respond with ONLY the JSON object, no markdown fences.`) +} + +func nextChallengeUserPrompt(masteryJSON, profileJSON string) string { + return fmt.Sprintf(`Learner mastery: %s + +Learner profile: %s + +Select the next challenge for this learner.`, masteryJSON, profileJSON) +} + +func diagnoseSystemPrompt() string { + return fmt.Sprintf(`You are a diagnostic interview agent. Given a job seeker's profile, produce an initial readiness assessment. Output valid JSON matching this schema: + +{ + "user_id": "string", + "track": "string", + "target_role": "string", + "stack": ["string"], + "initial_readiness": "unknown|fragile|improving|interview_ready|strong_signal", + "concept_findings": [ + { + "concept": {"id": "string", "label": "string", "track": "string"}, + "readiness": "unknown|fragile|improving|interview_ready|strong_signal", + "reason": "string", + "evidence": [] + } + ], + "recommended_next_concepts": [{"id": "string", "label": "string", "track": "string"}] +} + +Rules: +- initial_readiness: default to "unknown" unless you have strong signals from the profile. +- For each concept, estimate readiness based on the stack and target role. Default to "unknown" if no strong signal. +- recommended_next_concepts: pick up to 3 concepts to start with. +- evidence: always empty for initial diagnostic (no answers yet). + +Respond with ONLY the JSON object, no markdown fences.`) +} + +func diagnoseUserPrompt(input DiagnosticInput) string { + payload, _ := json.Marshal(input) + return fmt.Sprintf("Assess initial readiness for this job seeker: %s", string(payload)) +} + +func readinessUpdateSystemPrompt() string { + return fmt.Sprintf(`You are a readiness update agent. Given learner memory state, produce readiness deltas and unlocks. Output valid JSON matching this schema: + +{ + "concept_updates": [ + { + "concept": {"id": "string", "label": "string", "track": "string"}, + "previous": "unknown|fragile|improving|interview_ready|strong_signal", + "next": "unknown|fragile|improving|interview_ready|strong_signal", + "reason": "string", + "evidence": [{"kind": "grading", "id": "string", "quote": "string", "confidence": 0.0}] + } + ], + "unlocks": [ + { + "kind": "boss_question|review_card|portfolio_entry", + "label": "string", + "reason": "string" + } + ] +} + +Rules: +- For each concept, determine if the readiness state should change based on evidence quality and quantity. +- Unlock boss_question when 3+ concepts are at interview_ready or strong_signal. +- Unlock review_card when concepts have misconceptions that need revisiting. +- Unlock portfolio_entry when a concept reaches strong_signal. + +Respond with ONLY the JSON object, no markdown fences.`) +} + +func readinessUpdateUserPrompt(input ReadinessUpdateInput) string { + payload, _ := json.Marshal(input) + return fmt.Sprintf("Analyze readiness updates for: %s", string(payload)) +} diff --git a/openspec/changes/bootstrap-job-tutor-platform/specs/tutor-workflows/spec.md b/openspec/changes/bootstrap-job-tutor-platform/specs/tutor-workflows/spec.md index a7060a7..83883fc 100644 --- a/openspec/changes/bootstrap-job-tutor-platform/specs/tutor-workflows/spec.md +++ b/openspec/changes/bootstrap-job-tutor-platform/specs/tutor-workflows/spec.md @@ -56,3 +56,30 @@ boundary. - **WHEN** it invokes the workflow layer - **THEN** it calls a typed Go interface - **AND** does not mutate product state by parsing freeform shell output. + +### Requirement: LLM runner calls OpenAI-compatible API + +The system SHALL provide an LLM-based workflow runner that implements the +Runner interface by calling an OpenAI-compatible chat completions API when +TUTOR_LLM_API_KEY is configured. + +#### Scenario: grader uses LLM when configured + +- **GIVEN** TUTOR_LLM_API_KEY and TUTOR_LLM_ENDPOINT are set +- **WHEN** the server starts +- **THEN** an LLMRunner wraps the configured model +- **AND** GradeInterviewAnswer calls the LLM with a structured grading prompt +- **AND** the response is parsed into the typed GradedAnswer contract. + +#### Scenario: memory extraction uses LLM when configured + +- **GIVEN** an LLM runner is active +- **WHEN** ExtractLearningMemory is called with a graded answer +- **THEN** the LLM produces MemoryUpdateCandidate with concept mastery, misconception, intervention, and review schedule updates. + +#### Scenario: falls back to stub when unconfigured + +- **GIVEN** TUTOR_LLM_API_KEY is empty +- **WHEN** the server starts +- **THEN** a StubRunner is used +- **AND** grading and memory extraction produce deterministic stub output.