<?xml version="1.0" encoding="UTF-8"?><urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:news="http://www.google.com/schemas/sitemap-news/0.9"><url><loc>https://dreaming.press/posts/tool-response-design-for-ai-agents.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-29</news:publication_date><news:title>What Should an AI Agent's Tools Return? Designing Tool Results for the Context Window</news:title></news:news></url><url><loc>https://dreaming.press/posts/raft-retrieval-augmented-fine-tuning.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-29</news:publication_date><news:title>RAFT vs RAG vs Fine-Tuning: When to Train on the Documents You Retrieve</news:title></news:news></url><url><loc>https://dreaming.press/posts/mcp-tasks-long-running-async-work.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-29</news:publication_date><news:title>MCP Tasks: How Long-Running Agent Work Survives a Stateless Server</news:title></news:news></url><url><loc>https://dreaming.press/posts/how-to-set-a-timeout-for-an-ai-agent.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-29</news:publication_date><news:title>How to Set a Timeout for an AI Agent: A Per-Call Timeout Won't Bound the Loop</news:title></news:news></url><url><loc>https://dreaming.press/posts/how-to-load-test-an-llm-app.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-29</news:publication_date><news:title>How to Load-Test an LLM App: You're Stress-Testing the Rate Limiter, Not the Model</news:title></news:news></url><url><loc>https://dreaming.press/posts/cost-aware-agent-evaluation.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-29</news:publication_date><news:title>Cost-Aware Agent Evaluation: Why Your Benchmark Needs a Dollar Axis</news:title></news:news></url><url><loc>https://dreaming.press/posts/who-controls-mcp-agentic-ai-foundation.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-28</news:publication_date><news:title>Who Controls MCP Now? Inside the Agentic AI Foundation</news:title></news:news></url><url><loc>https://dreaming.press/posts/webmcp-vs-mcp.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-28</news:publication_date><news:title>WebMCP vs MCP: Why Browser Agents Get Their Tools From the Page</news:title></news:news></url><url><loc>https://dreaming.press/posts/vercel-eve-vs-langgraph.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-28</news:publication_date><news:title>Vercel eve vs LangGraph: Library You Host, or Harness You Rent</news:title></news:news></url><url><loc>https://dreaming.press/posts/trainium-vs-nvidia-gpu-llm-inference.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-28</news:publication_date><news:title>AWS Trainium vs NVIDIA GPU for LLM Inference: The Bill Is Cheaper, the Onramp Isn't</news:title></news:news></url><url><loc>https://dreaming.press/posts/tool-result-caching-for-ai-agents.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-28</news:publication_date><news:title>Tool-Result Caching for AI Agents: The One Cache That Can Be Wrong</news:title></news:news></url><url><loc>https://dreaming.press/posts/tau-bench-vs-tau2-bench.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-28</news:publication_date><news:title>τ-bench vs τ²-bench: The Agent Benchmark That Scores Whether You Can Guide a Human</news:title></news:news></url><url><loc>https://dreaming.press/posts/swe-evo-vs-swe-bench-long-horizon-coding-agents.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-28</news:publication_date><news:title>SWE-EVO vs SWE-bench: The Long-Horizon Test Coding Agents Fail</news:title></news:news></url><url><loc>https://dreaming.press/posts/swe-bench-pro-vs-swe-bench-verified.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-28</news:publication_date><news:title>SWE-bench Pro vs SWE-bench Verified: Why Top Coding Agents Dropped From 70% to 23%</news:title></news:news></url><url><loc>https://dreaming.press/posts/reflexion-vs-self-refine-vs-critic-vs-lats.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-28</news:publication_date><news:title>Reflexion vs Self-Refine vs CRITIC vs LATS: Who Verifies the Self-Correction?</news:title></news:news></url><url><loc>https://dreaming.press/posts/pyannote-vs-nemo-vs-cloud-speaker-diarization.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-28</news:publication_date><news:title>Speaker Diarization for Voice Agents: pyannote vs NVIDIA NeMo vs Cloud APIs</news:title></news:news></url><url><loc>https://dreaming.press/posts/nvfp4-vs-mxfp4-fp4-quantization.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-28</news:publication_date><news:title>NVFP4 vs MXFP4: The Two 4-Bit Floats Fighting Over Your Inference Bill</news:title></news:news></url><url><loc>https://dreaming.press/posts/microsoft-agent-framework-build-2026.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-28</news:publication_date><news:title>Microsoft Agent Framework at Build 2026: Agent Harness, Hosted Agents, and CodeAct</news:title></news:news></url><url><loc>https://dreaming.press/posts/mcp-server-ssrf-cloud-metadata-credentials.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-28</news:publication_date><news:title>MCP Server SSRF: How 'Convert This URL' Hands Over Your Cloud Credentials</news:title></news:news></url><url><loc>https://dreaming.press/posts/mcp-apps-interactive-ui.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-28</news:publication_date><news:title>MCP Apps: When a Tool Stops Returning Text and Starts Returning UI</news:title></news:news></url><url><loc>https://dreaming.press/posts/llm-reranker-vs-cross-encoder-vs-listwise.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-28</news:publication_date><news:title>LLM Rerankers vs Cross-Encoders vs Listwise: Which Reranking Architecture for RAG?</news:title></news:news></url><url><loc>https://dreaming.press/posts/langchain-vs-langgraph-vs-deepagents-harness.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-28</news:publication_date><news:title>LangChain vs LangGraph vs Deep Agents: Pick a Rung, Not a Framework</news:title></news:news></url><url><loc>https://dreaming.press/posts/kv-cache-eviction-streamingllm-vs-h2o-vs-snapkv-vs-quest.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-28</news:publication_date><news:title>KV Cache Eviction: StreamingLLM vs H2O vs SnapKV vs Quest</news:title></news:news></url><url><loc>https://dreaming.press/posts/hermes-agent-self-improving-explained.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-28</news:publication_date><news:title>Hermes Agent: What 'Self-Improving' Means When the Model Never Changes</news:title></news:news></url><url><loc>https://dreaming.press/posts/harness-engineering-for-ai-agents.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-28</news:publication_date><news:title>Harness Engineering: The Reliability Layer Around an Unreliable Model</news:title></news:news></url><url><loc>https://dreaming.press/posts/gartner-ai-agent-spending-2026.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-28</news:publication_date><news:title>AI Agent Software Spending Hits $206B in 2026 — and the Cancellation Forecast Explains Why</news:title></news:news></url><url><loc>https://dreaming.press/posts/filesystem-vs-vector-database-agent-memory.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-28</news:publication_date><news:title>Filesystem vs Vector Database for Agent Memory: Why 2026 Agents Write to Files</news:title></news:news></url><url><loc>https://dreaming.press/posts/claude-agent-sdk-subscription-billing-change.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-28</news:publication_date><news:title>Claude Agent SDK Billing: Why the June 15 Subscription Credit Split Was Paused</news:title></news:news></url><url><loc>https://dreaming.press/posts/browsecomp-vs-deepresearch-bench.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-28</news:publication_date><news:title>How to Evaluate a Deep Research Agent: BrowseComp vs DeepResearch Bench</news:title></news:news></url><url><loc>https://dreaming.press/posts/agent-control-specification-acs-runtime-governance.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-28</news:publication_date><news:title>The Agent Control Specification (ACS): A Portable Control Plane for AI Agents</news:title></news:news></url><url><loc>https://dreaming.press/posts/a2a-vs-acp-vs-agntcy-agent-interop-protocols.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-28</news:publication_date><news:title>A2A vs ACP vs AGNTCY: The Agent Interoperability Protocols, Compared</news:title></news:news></url><url><loc>https://dreaming.press/posts/wasm-vs-microvm-vs-v8-isolate-sandbox-ai-code.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-27</news:publication_date><news:title>WASM vs MicroVMs vs V8 Isolates: Sandboxing AI-Generated Code</news:title></news:news></url><url><loc>https://dreaming.press/posts/terminal-bench-vs-swe-bench.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-27</news:publication_date><news:title>Terminal-Bench vs SWE-bench: Why Patching Code and Operating a Shell Are Different Skills</news:title></news:news></url><url><loc>https://dreaming.press/posts/ruler-vs-needle-in-a-haystack-context-length.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-27</news:publication_date><news:title>RULER vs Needle-in-a-Haystack: How to Measure an LLM's Real Context Length</news:title></news:news></url><url><loc>https://dreaming.press/posts/recovery-bench-agent-error-recovery.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-27</news:publication_date><news:title>Recovery-Bench: Why Top Agents Still Fail to Recover From Their Own Mistakes</news:title></news:news></url><url><loc>https://dreaming.press/posts/record-replay-testing-for-ai-agents.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-27</news:publication_date><news:title>Record and Replay Testing for AI Agents: Deterministic Tests Without Live LLM Calls</news:title></news:news></url><url><loc>https://dreaming.press/posts/prompt-injection-defense-guardrails-vs-architecture.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-27</news:publication_date><news:title>Prompt Injection Defense: Detection Guardrails vs Defending Agents by Design</news:title></news:news></url><url><loc>https://dreaming.press/posts/prompt-format-json-vs-xml-vs-markdown-vs-yaml.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-27</news:publication_date><news:title>Prompt Format: JSON vs XML vs Markdown vs YAML — and Why Input and Output Want Opposite Things</news:title></news:news></url><url><loc>https://dreaming.press/posts/prompt-caching-pricing-anthropic-vs-openai-vs-gemini-vs-bedrock.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-27</news:publication_date><news:title>Prompt Caching Pricing in 2026: Anthropic vs OpenAI vs Gemini vs Bedrock</news:title></news:news></url><url><loc>https://dreaming.press/posts/owasp-mcp-top-10.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-27</news:publication_date><news:title>The OWASP MCP Top 10, Explained: A Security Checklist for Tool-Connected Agents</news:title></news:news></url><url><loc>https://dreaming.press/posts/nvidia-nim-vs-vllm-vs-tgi-self-hosting-llm-inference.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-27</news:publication_date><news:title>NVIDIA NIM vs vLLM vs TGI: How to Self-Host LLM Inference in 2026</news:title></news:news></url><url><loc>https://dreaming.press/posts/mteb-vs-mmteb-vs-rteb-embedding-leaderboard.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-27</news:publication_date><news:title>MTEB vs MMTEB vs RTEB: How to Read an Embedding Leaderboard in 2026</news:title></news:news></url><url><loc>https://dreaming.press/posts/mcp-2026-stateless-spec-changes.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-27</news:publication_date><news:title>MCP Goes Stateless: What the 2026 Spec Changes for Agent Builders</news:title></news:news></url><url><loc>https://dreaming.press/posts/llm-serving-capacity-planning.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-27</news:publication_date><news:title>How Many GPUs to Serve an LLM: Capacity Planning Is a Memory Problem, Not a FLOPs One</news:title></news:news></url><url><loc>https://dreaming.press/posts/langgraph-vs-microsoft-agent-framework.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-27</news:publication_date><news:title>LangGraph vs Microsoft Agent Framework: Who Owns the Run Loop in 2026</news:title></news:news></url><url><loc>https://dreaming.press/posts/how-to-track-llm-cost-per-customer.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-27</news:publication_date><news:title>How to Track LLM Costs Per Customer in a Multi-Tenant App</news:title></news:news></url><url><loc>https://dreaming.press/posts/how-to-test-an-ai-agent-with-simulated-users.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-27</news:publication_date><news:title>How to Test an AI Agent With Simulated Users (and Why the Fake User Is the Hard Part)</news:title></news:news></url><url><loc>https://dreaming.press/posts/how-to-roll-out-a-new-llm-shadow-vs-canary-vs-ab.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-27</news:publication_date><news:title>How to Roll Out a New LLM in Production: Shadow vs Canary vs A/B Testing</news:title></news:news></url><url><loc>https://dreaming.press/posts/how-to-price-an-ai-agent.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-27</news:publication_date><news:title>How to Price an AI Agent: Seat vs Usage vs Outcome</news:title></news:news></url><url><loc>https://dreaming.press/posts/how-to-keep-a-vector-database-in-sync.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-27</news:publication_date><news:title>How to Keep a Vector Database in Sync With Your Source Data</news:title></news:news></url><url><loc>https://dreaming.press/posts/how-to-evaluate-ai-agent-memory.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-27</news:publication_date><news:title>How to Evaluate AI Agent Memory: LoCoMo, LongMemEval, and Why Long Context Isn't Enough</news:title></news:news></url><url><loc>https://dreaming.press/posts/how-to-evaluate-a-voice-agent.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-27</news:publication_date><news:title>How to Evaluate a Voice Agent: Why Text-Agent Metrics Miss the Real Failures</news:title></news:news></url><url><loc>https://dreaming.press/posts/how-to-benchmark-llm-inference.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-27</news:publication_date><news:title>How to Benchmark LLM Inference: Why One Tokens-Per-Second Number Is Lying to You</news:title></news:news></url><url><loc>https://dreaming.press/posts/how-to-add-llm-evals-to-ci-cd.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-27</news:publication_date><news:title>How to Add LLM Evals to CI/CD Without Building a Flaky Gate</news:title></news:news></url><url><loc>https://dreaming.press/posts/flash-attention-vs-paged-attention.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-27</news:publication_date><news:title>FlashAttention vs PagedAttention: Two Different Bottlenecks, Not Two Choices</news:title></news:news></url><url><loc>https://dreaming.press/posts/expert-parallelism-moe-serving.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-27</news:publication_date><news:title>Expert Parallelism: How Giant MoE Models Are Actually Served</news:title></news:news></url><url><loc>https://dreaming.press/posts/elasticsearch-vs-opensearch-vs-vespa-hybrid-search.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-27</news:publication_date><news:title>Elasticsearch vs OpenSearch vs Vespa: Choosing a Hybrid Search Engine for RAG</news:title></news:news></url><url><loc>https://dreaming.press/posts/circuit-breaker-for-llm-api-calls.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-27</news:publication_date><news:title>A Circuit Breaker for LLM API Calls — and Why It Has to Trip on Cost, Not Just Errors</news:title></news:news></url><url><loc>https://dreaming.press/posts/amd-mi300x-vs-nvidia-h100-llm-inference.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-27</news:publication_date><news:title>AMD MI300X vs NVIDIA H100 for LLM Inference: The Memory Wall and the Software Tax</news:title></news:news></url><url><loc>https://dreaming.press/posts/2026-06-27-too-many-tools-tool-search-vs-code-execution.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-27</news:publication_date><news:title>Too Many Tools: Tool Search vs Code Execution for Agents at Scale</news:title></news:news></url><url><loc>https://dreaming.press/posts/2026-06-27-scale-to-zero-llm-inference-gpu-cold-starts.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-27</news:publication_date><news:title>Scale to Zero for LLM Inference: Why Cold Starts Are a Weight-Loading Problem</news:title></news:news></url><url><loc>https://dreaming.press/posts/2026-06-27-pass-at-k-vs-pass-hat-k-agent-reliability-evals.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-27</news:publication_date><news:title>Pass@k vs Pass^k: Measuring Whether an Agent Is Reliable, Not Just Capable</news:title></news:news></url><url><loc>https://dreaming.press/posts/2026-06-27-backpressure-for-ai-agents-bounded-queues-vs-adaptive-concurrency.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-27</news:publication_date><news:title>Backpressure for AI Agents: Why Exponential Backoff Makes Fan-Out Worse</news:title></news:news></url><url><loc>https://dreaming.press/posts/2026-06-27-advisory-to-exploit-window-self-hosted-ai-infrastructure.html</loc><news:news><news:publication><news:name>dreaming.press</news:name><news:language>en</news:language></news:publication><news:publication_date>2026-06-27</news:publication_date><news:title>Self-Hosted AI Tools Are Now Exploited in Hours: Inside 2026's Advisory-to-Attack Window</news:title></news:news></url></urlset>