"""Unit tests for the @human_feedback decorator.

This module tests the @human_feedback decorator's validation logic,
async support, and attribute preservation functionality.
"""

from __future__ import annotations

import asyncio
from datetime import datetime
from typing import Any
from unittest.mock import MagicMock, patch

import pytest

from crewai.flow import Flow, human_feedback, listen, start
from crewai.flow.human_feedback import (
    HumanFeedbackConfig,
    HumanFeedbackResult,
)


class TestHumanFeedbackValidation:
    """Tests for decorator parameter validation."""

    def test_emit_requires_llm(self):
        """Test that specifying emit with llm=None raises ValueError."""
        with pytest.raises(ValueError) as exc_info:

            @human_feedback(
                message="Review this:",
                emit=["approve", "reject"],
                llm=None,  # explicitly None
            )
            def test_method(self):
                return "output"

        assert "llm is required" in str(exc_info.value)

    def test_default_outcome_requires_emit(self):
        """Test that specifying default_outcome without emit raises ValueError."""
        with pytest.raises(ValueError) as exc_info:

            @human_feedback(
                message="Review this:",
                default_outcome="approve",
                # emit not provided
            )
            def test_method(self):
                return "output"

        assert "requires emit" in str(exc_info.value)

    def test_default_outcome_must_be_in_emit(self):
        """Test that default_outcome must be one of the emit values."""
        with pytest.raises(ValueError) as exc_info:

            @human_feedback(
                message="Review this:",
                emit=["approve", "reject"],
                llm="gpt-4o-mini",
                default_outcome="invalid_outcome",
            )
            def test_method(self):
                return "output"

        assert "must be one of" in str(exc_info.value)

    def test_valid_configuration_with_routing(self):
        """Test that valid configuration with routing doesn't raise."""

        @human_feedback(
            message="Review this:",
            emit=["approve", "reject"],
            llm="gpt-4o-mini",
            default_outcome="reject",
        )
        def test_method(self):
            return "output"

        # Should not raise
        assert hasattr(test_method, "__human_feedback_config__")
        assert test_method.__is_router__ is True
        assert test_method.__router_paths__ == ["approve", "reject"]

    def test_valid_configuration_without_routing(self):
        """Test that valid configuration without routing doesn't raise."""

        @human_feedback(message="Review this:")
        def test_method(self):
            return "output"

        # Should not raise
        assert hasattr(test_method, "__human_feedback_config__")
        assert not hasattr(test_method, "__is_router__") or not test_method.__is_router__


class TestHumanFeedbackConfig:
    """Tests for HumanFeedbackConfig dataclass."""

    def test_config_creation(self):
        """Test HumanFeedbackConfig can be created with all parameters."""
        config = HumanFeedbackConfig(
            message="Test message",
            emit=["a", "b"],
            llm="gpt-4",
            default_outcome="a",
            metadata={"key": "value"},
        )

        assert config.message == "Test message"
        assert config.emit == ["a", "b"]
        assert config.llm == "gpt-4"
        assert config.default_outcome == "a"
        assert config.metadata == {"key": "value"}


class TestHumanFeedbackResult:
    """Tests for HumanFeedbackResult dataclass."""

    def test_result_creation(self):
        """Test HumanFeedbackResult can be created with all fields."""
        result = HumanFeedbackResult(
            output={"title": "Test"},
            feedback="Looks good",
            outcome="approved",
            method_name="test_method",
        )

        assert result.output == {"title": "Test"}
        assert result.feedback == "Looks good"
        assert result.outcome == "approved"
        assert result.method_name == "test_method"
        assert isinstance(result.timestamp, datetime)
        assert result.metadata == {}

    def test_result_with_metadata(self):
        """Test HumanFeedbackResult with custom metadata."""
        result = HumanFeedbackResult(
            output="test",
            feedback="feedback",
            metadata={"channel": "slack", "user": "test_user"},
        )

        assert result.metadata == {"channel": "slack", "user": "test_user"}


class TestDecoratorAttributePreservation:
    """Tests for preserving Flow decorator attributes."""

    def test_preserves_start_method_attributes(self):
        """Test that @human_feedback preserves @start decorator attributes."""

        class TestFlow(Flow):
            @start()
            @human_feedback(message="Review:")
            def my_start_method(self):
                return "output"

        # Check that start method attributes are preserved
        flow = TestFlow()
        method = flow._methods.get("my_start_method")
        assert method is not None
        assert hasattr(method, "__is_start_method__") or "my_start_method" in flow._start_methods

    def test_preserves_listen_method_attributes(self):
        """Test that @human_feedback preserves @listen decorator attributes."""

        class TestFlow(Flow):
            @start()
            def begin(self):
                return "start"

            @listen("begin")
            @human_feedback(message="Review:")
            def review(self):
                return "review output"

        flow = TestFlow()
        # The method should be registered as a listener
        assert "review" in flow._listeners or any(
            "review" in str(v) for v in flow._listeners.values()
        )

    def test_sets_router_attributes_when_emit_specified(self):
        """Test that router attributes are set when emit is specified."""

        # Test the decorator directly without @start wrapping
        @human_feedback(
            message="Review:",
            emit=["approved", "rejected"],
            llm="gpt-4o-mini",
        )
        def review_method(self):
            return "output"

        assert review_method.__is_router__ is True
        assert review_method.__router_paths__ == ["approved", "rejected"]


class TestAsyncSupport:
    """Tests for async method support."""

    def test_async_method_detection(self):
        """Test that async methods are properly detected and wrapped."""

        @human_feedback(message="Review:")
        async def async_method(self):
            return "async output"

        assert asyncio.iscoroutinefunction(async_method)

    def test_sync_method_remains_sync(self):
        """Test that sync methods remain synchronous."""

        @human_feedback(message="Review:")
        def sync_method(self):
            return "sync output"

        assert not asyncio.iscoroutinefunction(sync_method)


class TestHumanFeedbackExecution:
    """Tests for actual human feedback execution."""

    @patch("builtins.input", return_value="This looks great!")
    @patch("builtins.print")
    def test_basic_feedback_collection(self, mock_print, mock_input):
        """Test basic feedback collection without routing."""

        class TestFlow(Flow):
            @start()
            @human_feedback(message="Please review:")
            def generate(self):
                return "Generated content"

        flow = TestFlow()

        with patch.object(flow, "_request_human_feedback", return_value="Great job!"):
            result = flow.kickoff()

        assert flow.last_human_feedback is not None
        assert flow.last_human_feedback.output == "Generated content"
        assert flow.last_human_feedback.feedback == "Great job!"

    @patch("builtins.input", return_value="")
    @patch("builtins.print")
    def test_empty_feedback_with_default_outcome(self, mock_print, mock_input):
        """Test empty feedback uses default_outcome for routing, but flow returns method output."""

        class TestFlow(Flow):
            @start()
            @human_feedback(
                message="Review:",
                emit=["approved", "needs_work"],
                llm="gpt-4o-mini",
                default_outcome="needs_work",
            )
            def review(self):
                return "Content"

        flow = TestFlow()

        with patch.object(flow, "_request_human_feedback", return_value=""):
            result = flow.kickoff()

        # Flow result is the method's return value, NOT the collapsed outcome
        assert result == "Content"
        assert flow.last_human_feedback is not None
        # But the outcome is still correctly set for routing purposes
        assert flow.last_human_feedback.outcome == "needs_work"

    @patch("builtins.input", return_value="Approved!")
    @patch("builtins.print")
    def test_feedback_collapsing(self, mock_print, mock_input):
        """Test that feedback is collapsed to an outcome for routing, but flow returns method output."""

        class TestFlow(Flow):
            @start()
            @human_feedback(
                message="Review:",
                emit=["approved", "rejected"],
                llm="gpt-4o-mini",
            )
            def review(self):
                return "Content"

        flow = TestFlow()

        with (
            patch.object(flow, "_request_human_feedback", return_value="Looks great, approved!"),
            patch.object(flow, "_collapse_to_outcome", return_value="approved"),
        ):
            result = flow.kickoff()

        # Flow result is the method's return value, NOT the collapsed outcome
        assert result == "Content"
        assert flow.last_human_feedback is not None
        # But the outcome is still correctly set for routing purposes
        assert flow.last_human_feedback.outcome == "approved"


class TestHumanFeedbackHistory:
    """Tests for human feedback history tracking."""

    @patch("builtins.input", return_value="feedback")
    @patch("builtins.print")
    def test_history_accumulates(self, mock_print, mock_input):
        """Test that multiple feedbacks are stored in history."""

        class TestFlow(Flow):
            @start()
            @human_feedback(message="Review step 1:")
            def step1(self):
                return "Step 1 output"

            @listen(step1)
            @human_feedback(message="Review step 2:")
            def step2(self, prev):
                return "Step 2 output"

        flow = TestFlow()

        with patch.object(flow, "_request_human_feedback", return_value="feedback"):
            flow.kickoff()

        # Both feedbacks should be in history
        assert len(flow.human_feedback_history) == 2
        assert flow.human_feedback_history[0].method_name == "step1"
        assert flow.human_feedback_history[1].method_name == "step2"

    @patch("builtins.input", return_value="")
    @patch("builtins.print")
    def test_human_feedback_property_returns_last(self, mock_print, mock_input):
        """Test that human_feedback property returns the last result."""

        class TestFlow(Flow):
            @start()
            @human_feedback(message="Review:")
            def generate(self):
                return "output"

        flow = TestFlow()

        with patch.object(flow, "_request_human_feedback", return_value="last feedback"):
            flow.kickoff()

        assert flow.last_human_feedback is not None
        assert flow.last_human_feedback.feedback == "last feedback"
        assert flow.last_human_feedback is flow.last_human_feedback


class TestCollapseToOutcome:
    """Tests for the _collapse_to_outcome method."""

    def test_exact_match(self):
        """Test exact match returns the correct outcome."""
        flow = Flow()

        with patch("crewai.llm.LLM") as MockLLM:
            mock_llm = MagicMock()
            mock_llm.call.return_value = "approved"
            MockLLM.return_value = mock_llm

            result = flow._collapse_to_outcome(
                feedback="I approve this",
                outcomes=["approved", "rejected"],
                llm="gpt-4o-mini",
            )

        assert result == "approved"

    def test_partial_match(self):
        """Test partial match finds the outcome in the response."""
        flow = Flow()

        with patch("crewai.llm.LLM") as MockLLM:
            mock_llm = MagicMock()
            mock_llm.call.return_value = "The outcome is approved based on the feedback"
            MockLLM.return_value = mock_llm

            result = flow._collapse_to_outcome(
                feedback="Looks good",
                outcomes=["approved", "rejected"],
                llm="gpt-4o-mini",
            )

        assert result == "approved"

    def test_fallback_to_first(self):
        """Test that unmatched response falls back to first outcome."""
        flow = Flow()

        with patch("crewai.llm.LLM") as MockLLM:
            mock_llm = MagicMock()
            mock_llm.call.return_value = "something completely different"
            MockLLM.return_value = mock_llm

            result = flow._collapse_to_outcome(
                feedback="Unclear feedback",
                outcomes=["approved", "rejected"],
                llm="gpt-4o-mini",
            )

        assert result == "approved"  # First in list

    def test_both_llm_calls_fail_returns_first_outcome(self):
        """When both structured and simple prompting fail, return outcomes[0]."""
        flow = Flow()

        with patch("crewai.llm.LLM") as MockLLM:
            mock_llm = MagicMock()
            # Both calls raise — simulates wrong provider / auth failure
            mock_llm.call.side_effect = RuntimeError("Model not found")
            MockLLM.return_value = mock_llm

            result = flow._collapse_to_outcome(
                feedback="looks great, approve it",
                outcomes=["needs_changes", "approved"],
                llm="gemini-3-flash-preview",
            )

        assert result == "needs_changes"  # First in list (safe fallback)

    def test_structured_fails_but_simple_succeeds(self):
        """When structured output fails but simple prompting works, use that."""
        flow = Flow()

        with patch("crewai.llm.LLM") as MockLLM:
            mock_llm = MagicMock()
            # First call (structured) fails, second call (simple) succeeds
            mock_llm.call.side_effect = [
                RuntimeError("Function calling not supported"),
                "approved",
            ]
            MockLLM.return_value = mock_llm

            result = flow._collapse_to_outcome(
                feedback="looks great",
                outcomes=["needs_changes", "approved"],
                llm="gpt-4o-mini",
            )

        assert result == "approved"


# -- HITL Learning tests --


class TestHumanFeedbackLearn:
    """Tests for the learn=True HITL learning feature."""

    def test_learn_false_does_not_interact_with_memory(self):
        """When learn=False (default), memory is never touched."""

        class LearnOffFlow(Flow):
            @start()
            @human_feedback(message="Review:", learn=False)
            def produce(self):
                return "output"

        flow = LearnOffFlow()
        flow.memory = MagicMock()

        with patch.object(
            flow, "_request_human_feedback", return_value="looks good"
        ):
            flow.produce()

        # memory.recall and memory.remember_many should NOT be called
        flow.memory.recall.assert_not_called()
        flow.memory.remember_many.assert_not_called()

    def test_learn_true_stores_distilled_lessons(self):
        """When learn=True and feedback has substance, lessons are distilled and stored."""

        class LearnFlow(Flow):
            @start()
            @human_feedback(message="Review:", llm="gpt-4o-mini", learn=True)
            def produce(self):
                return "draft article"

        flow = LearnFlow()
        flow.memory = MagicMock()
        flow.memory.recall.return_value = []  # no prior lessons

        with (
            patch.object(
                flow, "_request_human_feedback", return_value="Always add citations"
            ),
            patch("crewai.llm.LLM") as MockLLM,
        ):
            from crewai.flow.human_feedback import DistilledLessons

            mock_llm = MagicMock()
            mock_llm.supports_function_calling.return_value = True
            # Distillation call -> returns structured lessons
            mock_llm.call.return_value = DistilledLessons(
                lessons=["Always include source citations when making factual claims"]
            )
            MockLLM.return_value = mock_llm

            flow.produce()

        # remember_many should be called with the distilled lesson
        flow.memory.remember_many.assert_called_once()
        lessons = flow.memory.remember_many.call_args.args[0]
        assert len(lessons) == 1
        assert "citations" in lessons[0].lower()
        # source should be "hitl"
        assert flow.memory.remember_many.call_args.kwargs.get("source") == "hitl"

    def test_learn_true_pre_reviews_with_past_lessons(self):
        """When learn=True and past lessons exist, output is pre-reviewed before human sees it."""
        from crewai.memory.types import MemoryMatch, MemoryRecord

        class LearnFlow(Flow):
            @start()
            @human_feedback(message="Review:", llm="gpt-4o-mini", learn=True)
            def produce(self):
                return "draft without citations"

        flow = LearnFlow()
        # Mock memory with a past lesson
        flow.memory = MagicMock()
        flow.memory.recall.return_value = [
            MemoryMatch(
                record=MemoryRecord(
                    content="Always include source citations when making factual claims",
                    embedding=[],
                ),
                score=0.9,
                match_reasons=["semantic"],
            )
        ]

        captured_output = {}

        def capture_feedback(message, output, metadata=None, emit=None):
            captured_output["shown_to_human"] = output
            return "approved"

        with (
            patch.object(flow, "_request_human_feedback", side_effect=capture_feedback),
            patch("crewai.llm.LLM") as MockLLM,
        ):
            from crewai.flow.human_feedback import DistilledLessons, PreReviewResult

            mock_llm = MagicMock()
            mock_llm.supports_function_calling.return_value = True
            # Pre-review returns structured improved output, distillation returns empty lessons
            mock_llm.call.side_effect = [
                PreReviewResult(improved_output="draft with citations added"),
                DistilledLessons(lessons=[]),  # "approved" has no new lessons
            ]
            MockLLM.return_value = mock_llm

            flow.produce()

        # The human should have seen the pre-reviewed output, not the raw output
        assert captured_output["shown_to_human"] == "draft with citations added"
        # recall was called to find past lessons
        flow.memory.recall.assert_called_once()

    def test_learn_true_empty_feedback_does_not_store(self):
        """When learn=True but feedback is empty, no lessons are stored."""

        class LearnFlow(Flow):
            @start()
            @human_feedback(message="Review:", llm="gpt-4o-mini", learn=True)
            def produce(self):
                return "output"

        flow = LearnFlow()
        flow.memory = MagicMock()
        flow.memory.recall.return_value = []

        with patch.object(
            flow, "_request_human_feedback", return_value=""
        ):
            flow.produce()

        # Empty feedback -> no distillation, no storage
        flow.memory.remember_many.assert_not_called()

    def test_learn_true_uses_default_llm(self):
        """When learn=True and llm is not explicitly set, the default gpt-4o-mini is used."""

        @human_feedback(message="Review:", learn=True)
        def test_method(self):
            return "output"

        config = test_method.__human_feedback_config__
        assert config is not None
        assert config.learn is True
        # llm defaults to "gpt-4o-mini" at the function level
        assert config.llm == "gpt-4o-mini"

    def test_pre_review_failure_logs_and_returns_raw_output(self, caplog):
        """Pre-review LLM failure falls back to raw output AND logs a warning."""
        from crewai.memory.types import MemoryMatch, MemoryRecord

        class LearnFlow(Flow):
            @start()
            @human_feedback(message="Review:", llm="gpt-4o-mini", learn=True)
            def produce(self):
                return "raw draft"

        flow = LearnFlow()
        flow.memory = MagicMock()
        flow.memory.recall.return_value = [
            MemoryMatch(
                record=MemoryRecord(content="some lesson", embedding=[]),
                score=0.9,
                match_reasons=["semantic"],
            )
        ]

        captured: dict[str, Any] = {}

        def capture_feedback(message, output, metadata=None, emit=None):
            captured["shown_to_human"] = output
            return ""  # empty -> no distillation path

        with (
            patch.object(flow, "_request_human_feedback", side_effect=capture_feedback),
            patch("crewai.llm.LLM") as MockLLM,
            caplog.at_level("WARNING", logger="crewai.flow.human_feedback"),
        ):
            mock_llm = MagicMock()
            mock_llm.supports_function_calling.return_value = True
            mock_llm.call.side_effect = RuntimeError("simulated pre-review failure")
            MockLLM.return_value = mock_llm

            flow.produce()

        assert captured["shown_to_human"] == "raw draft"
        assert any(
            "HITL pre-review failed" in rec.message
            and rec.levelname == "WARNING"
            and rec.exc_info is not None
            for rec in caplog.records
        )

    def test_pre_review_failure_strict_reraises(self):
        """When learn_strict=True, pre-review failures propagate instead of falling back."""
        from crewai.memory.types import MemoryMatch, MemoryRecord

        class LearnFlow(Flow):
            @start()
            @human_feedback(
                message="Review:",
                llm="gpt-4o-mini",
                learn=True,
                learn_strict=True,
            )
            def produce(self):
                return "raw draft"

        flow = LearnFlow()
        flow.memory = MagicMock()
        flow.memory.recall.return_value = [
            MemoryMatch(
                record=MemoryRecord(content="some lesson", embedding=[]),
                score=0.9,
                match_reasons=["semantic"],
            )
        ]

        with (
            patch.object(flow, "_request_human_feedback", return_value=""),
            patch("crewai.llm.LLM") as MockLLM,
        ):
            mock_llm = MagicMock()
            mock_llm.supports_function_calling.return_value = True
            mock_llm.call.side_effect = RuntimeError("simulated pre-review failure")
            MockLLM.return_value = mock_llm

            with pytest.raises(RuntimeError, match="simulated pre-review failure"):
                flow.produce()

    def test_distillation_failure_logs_and_does_not_block_flow(self, caplog):
        """Distillation LLM failure logs a warning but does not break the flow."""

        class LearnFlow(Flow):
            @start()
            @human_feedback(message="Review:", llm="gpt-4o-mini", learn=True)
            def produce(self):
                return "raw draft"

        flow = LearnFlow()
        flow.memory = MagicMock()
        flow.memory.recall.return_value = []  # no pre-review path

        with (
            patch.object(
                flow, "_request_human_feedback", return_value="please add citations"
            ),
            patch("crewai.llm.LLM") as MockLLM,
            caplog.at_level("WARNING", logger="crewai.flow.human_feedback"),
        ):
            mock_llm = MagicMock()
            mock_llm.supports_function_calling.return_value = True
            mock_llm.call.side_effect = RuntimeError("simulated distill failure")
            MockLLM.return_value = mock_llm

            flow.produce()  # must not raise

        flow.memory.remember_many.assert_not_called()
        assert any(
            "HITL lesson distillation failed" in rec.message
            and rec.levelname == "WARNING"
            for rec in caplog.records
        )

    def test_learn_strict_config_propagates(self):
        """learn_strict is captured on the decorator config."""

        @human_feedback(message="Review:", learn=True, learn_strict=True)
        def test_method(self):
            return "output"

        config = test_method.__human_feedback_config__
        assert config is not None
        assert config.learn_strict is True


class TestHumanFeedbackFinalOutputPreservation:
    """Tests for preserving method return value as flow's final output when @human_feedback with emit is terminal.

    This addresses the bug where the flow's final output was the collapsed outcome string (e.g., 'approved')
    instead of the method's actual return value when a @human_feedback method with emit is the final method.
    """

    @patch("builtins.input", return_value="Looks good!")
    @patch("builtins.print")
    def test_final_output_is_method_return_not_collapsed_outcome(
        self, mock_print, mock_input
    ):
        """When @human_feedback with emit is the final method, flow output is the method's return value."""

        class FinalHumanFeedbackFlow(Flow):
            @start()
            @human_feedback(
                message="Review this content:",
                emit=["approved", "rejected"],
                llm="gpt-4o-mini",
            )
            def generate_and_review(self):
                # This dict should be the final output, NOT the string 'approved'
                return {"title": "My Article", "content": "Article content here", "status": "ready"}

        flow = FinalHumanFeedbackFlow()

        with (
            patch.object(flow, "_request_human_feedback", return_value="Looks great, approved!"),
            patch.object(flow, "_collapse_to_outcome", return_value="approved"),
        ):
            result = flow.kickoff()

        # The final output should be the actual method return value, not the collapsed outcome
        assert isinstance(result, dict), f"Expected dict, got {type(result).__name__}: {result}"
        assert result == {"title": "My Article", "content": "Article content here", "status": "ready"}
        # But the outcome should still be tracked in last_human_feedback
        assert flow.last_human_feedback is not None
        assert flow.last_human_feedback.outcome == "approved"

    @patch("builtins.input", return_value="approved")
    @patch("builtins.print")
    def test_routing_still_works_with_downstream_listener(self, mock_print, mock_input):
        """When @human_feedback has a downstream listener, routing still triggers the listener."""
        publish_called = []

        class RoutingFlow(Flow):
            @start()
            @human_feedback(
                message="Review:",
                emit=["approved", "rejected"],
                llm="gpt-4o-mini",
            )
            def review(self):
                return {"content": "original content"}

            @listen("approved")
            def publish(self):
                publish_called.append(True)
                return {"published": True, "timestamp": "2024-01-01"}

        flow = RoutingFlow()

        with (
            patch.object(flow, "_request_human_feedback", return_value="LGTM"),
            patch.object(flow, "_collapse_to_outcome", return_value="approved"),
        ):
            result = flow.kickoff()

        # The downstream listener should have been triggered
        assert len(publish_called) == 1, "publish() should have been called"
        # The final output should be from the listener, not the human_feedback method
        assert result == {"published": True, "timestamp": "2024-01-01"}

    @patch("builtins.input", return_value="")
    @patch("builtins.print")
    @pytest.mark.asyncio
    async def test_async_human_feedback_final_output_preserved(self, mock_print, mock_input):
        """Async @human_feedback methods also preserve the real return value."""

        class AsyncFinalFlow(Flow):
            @start()
            @human_feedback(
                message="Review async content:",
                emit=["approved", "rejected"],
                llm="gpt-4o-mini",
                default_outcome="approved",
            )
            async def async_generate(self):
                return {"async_data": "value", "computed": 42}

        flow = AsyncFinalFlow()

        with (
            patch.object(flow, "_request_human_feedback", return_value=""),
        ):
            result = await flow.kickoff_async()

        # The final output should be the dict, not "approved"
        assert isinstance(result, dict), f"Expected dict, got {type(result).__name__}: {result}"
        assert result == {"async_data": "value", "computed": 42}
        assert flow.last_human_feedback.outcome == "approved"

    @patch("builtins.input", return_value="feedback")
    @patch("builtins.print")
    def test_method_outputs_contains_real_output(self, mock_print, mock_input):
        """The _method_outputs list should contain the real method output, not the collapsed outcome."""

        class OutputTrackingFlow(Flow):
            @start()
            @human_feedback(
                message="Review:",
                emit=["approved", "rejected"],
                llm="gpt-4o-mini",
            )
            def generate(self):
                return {"data": "real output"}

        flow = OutputTrackingFlow()

        with (
            patch.object(flow, "_request_human_feedback", return_value="approved"),
            patch.object(flow, "_collapse_to_outcome", return_value="approved"),
        ):
            flow.kickoff()

        # _method_outputs should contain the real output
        assert len(flow._method_outputs) == 1
        assert flow._method_outputs[0] == {"data": "real output"}

    @patch("builtins.input", return_value="looks good")
    @patch("builtins.print")
    def test_none_return_value_is_preserved(self, mock_print, mock_input):
        """A method returning None should preserve None as flow output, not the outcome string."""

        class NoneReturnFlow(Flow):
            @start()
            @human_feedback(
                message="Review:",
                emit=["approved", "rejected"],
                llm="gpt-4o-mini",
            )
            def process(self):
                # Method does work but returns None (implicit)
                pass

        flow = NoneReturnFlow()

        with (
            patch.object(flow, "_request_human_feedback", return_value=""),
            patch.object(flow, "_collapse_to_outcome", return_value="approved"),
        ):
            result = flow.kickoff()

        # Final output should be None (the method's real return), not "approved"
        assert result is None, f"Expected None, got {result!r}"
        assert flow.last_human_feedback.outcome == "approved"
