-
Notifications
You must be signed in to change notification settings - Fork 3.4k
/
Copy pathmain.py
142 lines (119 loc) · 5.53 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import asyncio
import os
import streamlit as st
from textwrap import dedent
from mcp_agent.app import MCPApp
from mcp_agent.agents.agent import Agent
from mcp_agent.workflows.llm.augmented_llm_openai import OpenAIAugmentedLLM
from mcp_agent.workflows.llm.augmented_llm import RequestParams
# Page config
st.set_page_config(page_title="Browser MCP Agent", page_icon="🌐", layout="wide")
# Title and description
st.markdown("<h1 class='main-header'>🌐 Browser MCP Agent</h1>", unsafe_allow_html=True)
st.markdown("Interact with a powerful web browsing agent that can navigate and interact with websites")
# Setup sidebar with example commands
with st.sidebar:
st.markdown("### Example Commands")
st.markdown("**Navigation**")
st.markdown("- Go to wikipedia.org/wiki/computer_vision")
st.markdown("**Interactions**")
st.markdown("- Click on the link to object detection and take a screenshot")
st.markdown("- Scroll down to view more content")
st.markdown("**Multi-step Tasks**")
st.markdown("- Navigate to wikipedia.org/wiki/computer_vision, scroll down, and report details")
st.markdown("- Scroll down and summarize the wikipedia page")
st.markdown("---")
st.caption("Note: The agent uses Puppeteer to control a real browser.")
# Query input
query = st.text_area("Your Command",
placeholder="Ask the agent to navigate to websites and interact with them")
# Initialize app and agent
if 'initialized' not in st.session_state:
st.session_state.initialized = False
st.session_state.mcp_app = MCPApp(name="streamlit_mcp_agent")
st.session_state.mcp_context = None
st.session_state.mcp_agent_app = None
st.session_state.browser_agent = None
st.session_state.llm = None
st.session_state.loop = asyncio.new_event_loop()
asyncio.set_event_loop(st.session_state.loop)
# Setup function that runs only once
async def setup_agent():
if not st.session_state.initialized:
try:
# Create context manager and store it in session state
st.session_state.mcp_context = st.session_state.mcp_app.run()
st.session_state.mcp_agent_app = await st.session_state.mcp_context.__aenter__()
# Create and initialize agent
st.session_state.browser_agent = Agent(
name="browser",
instruction="""You are a helpful web browsing assistant that can interact with websites using puppeteer.
- Navigate to websites and perform browser actions (click, scroll, type)
- Extract information from web pages
- Take screenshots of page elements when useful
- Provide concise summaries of web content using markdown
- Follow multi-step browsing sequences to complete tasks
When navigating, start with "www.lastmileai.dev" unless instructed otherwise.""",
server_names=["puppeteer"],
)
# Initialize agent and attach LLM
await st.session_state.browser_agent.initialize()
st.session_state.llm = await st.session_state.browser_agent.attach_llm(OpenAIAugmentedLLM)
# List tools once
logger = st.session_state.mcp_agent_app.logger
tools = await st.session_state.browser_agent.list_tools()
logger.info("Tools available:", data=tools)
# Mark as initialized
st.session_state.initialized = True
except Exception as e:
return f"Error during initialization: {str(e)}"
return None
# Main function to run agent
async def run_mcp_agent(message):
if not os.getenv("OPENAI_API_KEY"):
return "Error: OpenAI API key not provided"
try:
# Make sure agent is initialized
error = await setup_agent()
if error:
return error
# Generate response without recreating agents
# Switch use_history to False to reduce the passed context
result = await st.session_state.llm.generate_str(
message=message,
request_params=RequestParams(use_history=True)
)
return result
except Exception as e:
return f"Error: {str(e)}"
# Run button
if st.button("🚀 Run Command", type="primary", use_container_width=True):
with st.spinner("Processing your request..."):
result = st.session_state.loop.run_until_complete(run_mcp_agent(query))
# Display results
st.markdown("### Response")
st.markdown(result)
# Display help text for first-time users
if 'result' not in locals():
st.markdown(
"""<div style='padding: 20px; background-color: #f0f2f6; border-radius: 10px;'>
<h4>How to use this app:</h4>
<ol>
<li>Enter your OpenAI API key in your mcp_agent.secrets.yaml file</li>
<li>Type a command for the agent to navigate and interact with websites</li>
<li>Click 'Run Command' to see results</li>
</ol>
<p><strong>Capabilities:</strong></p>
<ul>
<li>Navigate to websites using Puppeteer</li>
<li>Click on elements, scroll, and type text</li>
<li>Take screenshots of specific elements</li>
<li>Extract information from web pages</li>
<li>Perform multi-step browsing tasks</li>
</ul>
</div>""",
unsafe_allow_html=True
)
# Footer
st.markdown("---")
st.write("Built with Streamlit, Puppeteer, and MCP-Agent Framework ❤️")