pokeclaw-android-ai-agent

安装量: 362
排名: #8918

安装

npx skills add https://github.com/aradotso/trending-skills --skill pokeclaw-android-ai-agent

PokeClaw Android AI Agent Skill by ara.so — Daily 2026 Skills collection. PokeClaw is an open-source Android app that runs Gemma 4 entirely on-device via LiteRT-LM with native tool calling. The LLM reads the screen as a UI tree, selects tools (tap, swipe, type, open app, send message, etc.), executes them through Android Accessibility Services, observes the result, and loops until the task is complete — no cloud, no API key required for local mode. Architecture Overview User prompt │ ▼ TaskOrchestrator ← manages task lifecycle & session history │ ▼ LLMEngine (LiteRT-LM) ← Gemma 4 on-device, tool-call aware │ tool_calls[] ▼ ToolDispatcher ← routes to concrete tool implementations │ ├── AccessibilityTool ← tap / swipe / long_press / input_text ├── AppLaunchTool ← open_app ├── ScreenReaderTool ← get_screen_info / take_screenshot ├── MessagingTool ← send_message / auto_reply └── FinishTool ← finish (signals task done) │ ▼ Android Accessibility Service / UI Automator Installation / Setup 1. Clone the repo git clone https://github.com/agents-io/PokeClaw.git cd PokeClaw 2. Open in Android Studio Android Studio Hedgehog or newer recommended SDK: Android 9+ (API 28), target API 34+ Kotlin 1.9+ 3. Add LiteRT-LM dependency In app/build.gradle.kts : dependencies { // LiteRT-LM for on-device LLM inference with tool calling implementation ( "com.google.ai.edge.litert:litert-lm:1.0.0" ) // Coroutines for async inference implementation ( "org.jetbrains.kotlinx:kotlinx-coroutines-android:1.7.3" ) // JSON for tool-call serialization implementation ( "org.json:json:20231013" ) } 4. AndroidManifest.xml permissions

< uses-permission android: name = " android.permission.BIND_ACCESSIBILITY_SERVICE " />

< uses-permission android: name = " android.permission.BIND_NOTIFICATION_LISTENER_SERVICE " />

< uses-permission android: name = " android.permission.FOREGROUND_SERVICE " /> < uses-permission android: name = " android.permission.FOREGROUND_SERVICE_DATA_SYNC " />

<
service
android:
name
=
"
.accessibility.PokeAccessibilityService
"
android:
permission
=
"
android.permission.BIND_ACCESSIBILITY_SERVICE
"
android:
exported
=
"
false
"
>
<
intent-filter
>
<
action
android:
name
=
"
android.accessibilityservice.AccessibilityService
"
/>
</
intent-filter
>
<
meta-data
android:
name
=
"
android.accessibilityservice
"
android:
resource
=
"
@xml/accessibility_service_config
"
/>
</
service
>
res/xml/accessibility_service_config.xml
:
<
accessibility-service
xmlns:
android
=
"
http://schemas.android.com/apk/res/android
"
android:
accessibilityEventTypes
=
"
typeAllMask
"
android:
accessibilityFeedbackType
=
"
feedbackGeneric
"
android:
accessibilityFlags
=
"
flagDefault|flagRetrieveInteractiveWindows|flagRequestEnhancedWebAccessibility
"
android:
canRetrieveWindowContent
=
"
true
"
android:
canPerformGestures
=
"
true
"
android:
notificationTimeout
=
"
100
"
android:
description
=
"
@string/accessibility_service_description
"
/>
5. Build & install APK
./gradlew assembleDebug
adb
install
app/build/outputs/apk/debug/app-debug.apk
Or download the
latest release APK
.
Core Concepts
Tool Definition
Tools are declared as JSON schemas that LiteRT-LM uses for structured output. Define a tool:
// domain/tools/ToolDefinition.kt
data
class
ToolDefinition
(
val
name
:
String
,
val
description
:
String
,
val
parameters
:
ToolParameters
)
data
class
ToolParameters
(
val
type
:
String
=
"object"
,
val
properties
:
Map
<
String
,
ToolProperty
>
,
val
required
:
List
<
String
>
)
data
class
ToolProperty
(
val
type
:
String
,
val
description
:
String
,
val
enum
:
List
<
String
>
?
=
null
)
Registering Tools with LiteRT-LM
// llm/LLMEngine.kt
import
com
.
google
.
ai
.
edge
.
litert
.
lm
.
LiteRtLm
import
com
.
google
.
ai
.
edge
.
litert
.
lm
.
InferenceOptions
import
com
.
google
.
ai
.
edge
.
litert
.
lm
.
ToolConfig
class
LLMEngine
(
private
val
context
:
Context
)
{
private
lateinit
var
lm
:
LiteRtLm
suspend
fun
initialize
(
modelPath
:
String
)
{
lm
=
LiteRtLm
.
create
(
context
=
context
,
modelPath
=
modelPath
,
inferenceOptions
=
InferenceOptions
.
builder
(
)
.
setMaxTokens
(
2048
)
.
setTemperature
(
0.1f
)
// low temp for reliable tool calls
.
setTopK
(
40
)
.
build
(
)
)
}
fun
buildToolConfigs
(
)
:
List
<
ToolConfig
>
{
return
listOf
(
ToolConfig
.
fromJson
(
tapToolJson
(
)
)
,
ToolConfig
.
fromJson
(
inputTextToolJson
(
)
)
,
ToolConfig
.
fromJson
(
openAppToolJson
(
)
)
,
ToolConfig
.
fromJson
(
getScreenInfoToolJson
(
)
)
,
ToolConfig
.
fromJson
(
sendMessageToolJson
(
)
)
,
ToolConfig
.
fromJson
(
finishToolJson
(
)
)
)
}
private
fun
tapToolJson
(
)
=
"""
{
"name": "tap",
"description": "Tap a UI element by its resource ID, content description, or screen coordinates.",
"parameters": {
"type": "object",
"properties": {
"target": {
"type": "string",
"description": "Resource ID, content-desc, or visible text of the element to tap."
},
"x": { "type": "number", "description": "Screen X coordinate (optional)." },
"y":
},
"required": ["target"]
}
}
"""
.
trimIndent
(
)
private
fun
inputTextToolJson
(
)
=
"""
{
"name": "input_text",
"description": "Type text into the currently focused or specified text field.",
"parameters": {
"type": "object",
"properties": {
"text": { "type": "string", "description": "Text to type." },
"target":
},
"required": ["text"]
}
}
"""
.
trimIndent
(
)
private
fun
openAppToolJson
(
)
=
"""
{
"name": "open_app",
"description": "Launch an installed app by its name or package name.",
"parameters": {
"type": "object",
"properties": {
"app_name": { "type": "string", "description": "Human-readable app name, e.g. 'WhatsApp'." },
"package_name":
},
"required": ["app_name"]
}
}
"""
.
trimIndent
(
)
private
fun
getScreenInfoToolJson
(
)
=
"""
{
"name": "get_screen_info",
"description": "Return a text representation of all interactive UI elements on the current screen.",
"parameters": {
"type": "object",
"properties": {},
"required": []
}
}
"""
.
trimIndent
(
)
private
fun
sendMessageToolJson
(
)
=
"""
{
"name": "send_message",
"description": "Send a chat/SMS message to a contact. Handles: open app, find contact, type, send.",
"parameters": {
"type": "object",
"properties": {
"app": { "type": "string", "description": "Messaging app name, e.g. 'WhatsApp'." },
"contact": { "type": "string", "description": "Contact name or phone number." },
"message":
},
"required": ["app", "contact", "message"]
}
}
"""
.
trimIndent
(
)
private
fun
finishToolJson
(
)
=
"""
{
"name": "finish",
"description": "Signal that the task is complete. Include a summary of what was done.",
"parameters": {
"type": "object",
"properties": {
"summary":
},
"required": ["summary"]
}
}
"""
.
trimIndent
(
)
}
Accessibility Service Implementation
// accessibility/PokeAccessibilityService.kt
class
PokeAccessibilityService
:
AccessibilityService
(
)
{
companion
object
{
var
instance
:
PokeAccessibilityService
?
=
null
private
set
}
override
fun
onServiceConnected
(
)
{
super
.
onServiceConnected
(
)
instance
=
this
}
override
fun
onAccessibilityEvent
(
event
:
AccessibilityEvent
?
)
{
/ optional monitoring /
}
override
fun
onInterrupt
(
)
{
}
// ── Screen reading ──────────────────────────────────────────────
fun
getScreenInfo
(
)
:
String
{
val
root
=
rootInActiveWindow
?:
return
"Screen unavailable"
return
buildString
{
appendNode
(
root
,
0
)
}
}
private
fun
StringBuilder
.
appendNode
(
node
:
AccessibilityNodeInfo
,
depth
:
Int
)
{
val
indent
=
" "
.
repeat
(
depth
)
val
text
=
node
.
text
?
.
toString
(
)
?
.
trim
(
)
val
desc
=
node
.
contentDescription
?
.
toString
(
)
?
.
trim
(
)
val
resId
=
node
.
viewIdResourceName
val
cls
=
node
.
className
?
.
toString
(
)
?
.
substringAfterLast
(
'.'
)
if
(
!
text
.
isNullOrEmpty
(
)
||
!
desc
.
isNullOrEmpty
(
)
)
{
append
(
"
$
indent
[
$
cls
"
)
if
(
!
resId
.
isNullOrEmpty
(
)
)
append
(
" id=
$
resId
"
)
if
(
!
text
.
isNullOrEmpty
(
)
)
append
(
" text=\"
$
text
\""
)
if
(
!
desc
.
isNullOrEmpty
(
)
)
append
(
" desc=\"
$
desc
\""
)
if
(
node
.
isClickable
)
append
(
" clickable=true"
)
if
(
node
.
isEditable
)
append
(
" editable=true"
)
appendLine
(
"]"
)
}
for
(
i
in
0
until node
.
childCount
)
{
node
.
getChild
(
i
)
?
.
let
{
appendNode
(
it
,
depth
+
1
)
}
}
}
// ── Tap ─────────────────────────────────────────────────────────
fun
tap
(
target
:
String
?
,
x
:
Float
?
=
null
,
y
:
Float
?
=
null
)
:
Boolean
{
if
(
x
!=
null
&&
y
!=
null
)
{
return
performTapGesture
(
x
,
y
)
}
val
root
=
rootInActiveWindow
?:
return
false
val
node
=
findNode
(
root
,
target
?:
return
false
)
return
node
?
.
performAction
(
AccessibilityNodeInfo
.
ACTION_CLICK
)
?:
false
}
private
fun
performTapGesture
(
x
:
Float
,
y
:
Float
)
:
Boolean
{
val
path
=
Path
(
)
.
apply
{
moveTo
(
x
,
y
)
}
val
stroke
=
GestureDescription
.
StrokeDescription
(
path
,
0
,
50
)
val
gesture
=
GestureDescription
.
Builder
(
)
.
addStroke
(
stroke
)
.
build
(
)
return
dispatchGesture
(
gesture
,
null
,
null
)
}
private
fun
findNode
(
root
:
AccessibilityNodeInfo
,
target
:
String
)
:
AccessibilityNodeInfo
?
{
// Try by text
root
.
findAccessibilityNodeInfosByText
(
target
)
.
firstOrNull
(
)
?
.
let
{
return
it
}
// Try by viewId
root
.
findAccessibilityNodeInfosByViewId
(
target
)
.
firstOrNull
(
)
?
.
let
{
return
it
}
// Try by content-desc (recursive)
return
findByContentDesc
(
root
,
target
)
}
private
fun
findByContentDesc
(
node
:
AccessibilityNodeInfo
,
target
:
String
)
:
AccessibilityNodeInfo
?
{
if
(
node
.
contentDescription
?
.
toString
(
)
?
.
contains
(
target
,
ignoreCase
=
true
)
==
true
)
return
node
for
(
i
in
0
until node
.
childCount
)
{
node
.
getChild
(
i
)
?
.
let
{
findByContentDesc
(
it
,
target
)
}
?
.
let
{
return
it
}
}
return
null
}
// ── Swipe ───────────────────────────────────────────────────────
fun
swipe
(
startX
:
Float
,
startY
:
Float
,
endX
:
Float
,
endY
:
Float
,
durationMs
:
Long
=
300
)
:
Boolean
{
val
path
=
Path
(
)
.
apply
{
moveTo
(
startX
,
startY
)
lineTo
(
endX
,
endY
)
}
val
stroke
=
GestureDescription
.
StrokeDescription
(
path
,
0
,
durationMs
)
val
gesture
=
GestureDescription
.
Builder
(
)
.
addStroke
(
stroke
)
.
build
(
)
return
dispatchGesture
(
gesture
,
null
,
null
)
}
// ── Type text ───────────────────────────────────────────────────
fun
inputText
(
text
:
String
,
targetResId
:
String
?
=
null
)
:
Boolean
{
val
root
=
rootInActiveWindow
?:
return
false
val
node
=
if
(
targetResId
!=
null
)
{
root
.
findAccessibilityNodeInfosByViewId
(
targetResId
)
.
firstOrNull
(
)
}
else
{
findFocusedEditText
(
root
)
}
?:
return
false
node
.
performAction
(
AccessibilityNodeInfo
.
ACTION_FOCUS
)
val
args
=
Bundle
(
)
.
apply
{
putString
(
AccessibilityNodeInfo
.
ACTION_ARGUMENT_SET_TEXT_CHARSEQUENCE
,
text
)
}
return
node
.
performAction
(
AccessibilityNodeInfo
.
ACTION_SET_TEXT
,
args
)
}
private
fun
findFocusedEditText
(
node
:
AccessibilityNodeInfo
)
:
AccessibilityNodeInfo
?
{
if
(
node
.
isEditable
&&
node
.
isFocused
)
return
node
if
(
node
.
isEditable
)
return
node
// fallback: first editable
for
(
i
in
0
until node
.
childCount
)
{
node
.
getChild
(
i
)
?
.
let
{
findFocusedEditText
(
it
)
}
?
.
let
{
return
it
}
}
return
null
}
}
Task Orchestrator
// agent/TaskOrchestrator.kt
class
TaskOrchestrator
(
private
val
llmEngine
:
LLMEngine
,
private
val
toolDispatcher
:
ToolDispatcher
,
private
val
screenReader
:
ScreenReaderTool
)
{
data
class
Message
(
val
role
:
String
,
val
content
:
String
)
private
val
history
=
mutableListOf
<
Message
>
(
)
private
val
maxSteps
=
20
suspend
fun
runTask
(
userPrompt
:
String
)
:
String
=
withContext
(
Dispatchers
.
IO
)
{
history
.
clear
(
)
history
.
add
(
Message
(
"system"
,
buildSystemPrompt
(
)
)
)
history
.
add
(
Message
(
"user"
,
userPrompt
)
)
for
(
step
in
1
..
maxSteps
)
{
val
response
=
llmEngine
.
chat
(
history
,
toolConfigs
=
llmEngine
.
buildToolConfigs
(
)
)
// No tool call → plain text reply, done
if
(
response
.
toolCalls
.
isEmpty
(
)
)
{
return
@withContext
response
.
text
?:
"Task complete."
}
// Execute each tool call
val
toolResults
=
mutableListOf
<
String
>
(
)
for
(
call
in
response
.
toolCalls
)
{
val
result
=
toolDispatcher
.
dispatch
(
call
.
name
,
call
.
arguments
)
toolResults
.
add
(
"Tool
${
call
.
name
}
$
result
"
)
if
(
call
.
name
==
"finish"
)
{
return
@withContext
call
.
arguments
.
optString
(
"summary"
,
"Done."
)
}
}
// Feed results back as assistant + tool turn
history
.
add
(
Message
(
"assistant"
,
response
.
text
?:
"(tool calls)"
)
)
history
.
add
(
Message
(
"tool"
,
toolResults
.
joinToString
(
"\n"
)
)
)
}
"Task stopped: max steps (
$
maxSteps
) reached."
}
private
fun
buildSystemPrompt
(
)
=
"""
You are PokeClaw, an AI agent that controls an Android phone.
You have access to tools: tap, swipe, input_text, open_app, get_screen_info, send_message, finish.
Workflow:
1. Call get_screen_info to understand the current screen before acting.
2. Pick the most direct tool to make progress.
3. After each action, verify with get_screen_info if needed.
4. Call finish when the task is complete with a brief summary.
Rules:
- Never guess coordinates; use element IDs or text labels when possible.
- If an action fails, read the screen and adapt.
- Do not loop more than 3 times on the same element.
- Keep responses concise; the phone has limited memory.
"""
.
trimIndent
(
)
}
Tool Dispatcher
// agent/ToolDispatcher.kt
class
ToolDispatcher
(
private
val
accessibilityService
:
PokeAccessibilityService
,
private
val
context
:
Context
)
{
fun
dispatch
(
toolName
:
String
,
args
:
JSONObject
)
:
String
{
return
try
{
when
(
toolName
)
{
"tap"
->
handleTap
(
args
)
"swipe"
->
handleSwipe
(
args
)
"input_text"
->
handleInputText
(
args
)
"open_app"
->
handleOpenApp
(
args
)
"get_screen_info"
->
accessibilityService
.
getScreenInfo
(
)
"send_message"
->
handleSendMessage
(
args
)
"finish"
->
"FINISH:
${
args
.
optString
(
"summary"
)
}
"
else
->
"Unknown tool:
$
toolName
"
}
}
catch
(
e
:
Exception
)
{
"Error in
$
toolName
:
${
e
.
message
}
"
}
}
private
fun
handleTap
(
args
:
JSONObject
)
:
String
{
val
target
=
args
.
optString
(
"target"
)
.
takeIf
{
it
.
isNotEmpty
(
)
}
val
x
=
args
.
optDouble
(
"x"
)
.
takeIf
{
!
it
.
isNaN
(
)
}
?
.
toFloat
(
)
val
y
=
args
.
optDouble
(
"y"
)
.
takeIf
{
!
it
.
isNaN
(
)
}
?
.
toFloat
(
)
val
success
=
accessibilityService
.
tap
(
target
,
x
,
y
)
return
if
(
success
)
"Tapped '
$
target
'"
else
"Tap failed for '
$
target
'"
}
private
fun
handleSwipe
(
args
:
JSONObject
)
:
String
{
val
sx
=
args
.
getDouble
(
"start_x"
)
.
toFloat
(
)
val
sy
=
args
.
getDouble
(
"start_y"
)
.
toFloat
(
)
val
ex
=
args
.
getDouble
(
"end_x"
)
.
toFloat
(
)
val
ey
=
args
.
getDouble
(
"end_y"
)
.
toFloat
(
)
val
success
=
accessibilityService
.
swipe
(
sx
,
sy
,
ex
,
ey
)
return
if
(
success
)
"Swiped (
$
sx
,
$
sy
)→(
$
ex
,
$
ey
)"
else
"Swipe failed"
}
private
fun
handleInputText
(
args
:
JSONObject
)
:
String
{
val
text
=
args
.
getString
(
"text"
)
val
target
=
args
.
optString
(
"target"
)
.
takeIf
{
it
.
isNotEmpty
(
)
}
val
success
=
accessibilityService
.
inputText
(
text
,
target
)
return
if
(
success
)
"Typed: \"
$
text
\""
else
"Input failed"
}
private
fun
handleOpenApp
(
args
:
JSONObject
)
:
String
{
val
appName
=
args
.
getString
(
"app_name"
)
val
pkgName
=
args
.
optString
(
"package_name"
)
.
takeIf
{
it
.
isNotEmpty
(
)
}
?:
resolvePackageName
(
appName
)
?:
return
"App '
$
appName
' not found"
val
intent
=
context
.
packageManager
.
getLaunchIntentForPackage
(
pkgName
)
?:
return
"Cannot launch '
$
pkgName
'"
intent
.
addFlags
(
Intent
.
FLAG_ACTIVITY_NEW_TASK
)
context
.
startActivity
(
intent
)
return
"Launched
$
appName
(
$
pkgName
)"
}
private
fun
handleSendMessage
(
args
:
JSONObject
)
:
String
{
val
app
=
args
.
getString
(
"app"
)
val
contact
=
args
.
getString
(
"contact"
)
val
message
=
args
.
getString
(
"message"
)
// Delegates to MessagingSkill which orchestrates the multi-step flow
return
MessagingSkill
(
accessibilityService
,
context
)
.
sendMessage
(
app
,
contact
,
message
)
}
private
fun
resolvePackageName
(
appName
:
String
)
:
String
?
{
val
pm
=
context
.
packageManager
val
packages
=
pm
.
getInstalledApplications
(
PackageManager
.
GET_META_DATA
)
return
packages
.
firstOrNull
{
pm
.
getApplicationLabel
(
it
)
.
toString
(
)
.
equals
(
appName
,
ignoreCase
=
true
)
}
?
.
packageName
}
}
Skills System
Skills are reusable multi-step workflows. Write a skill as a Kotlin class or (upcoming) a plain-text
.skill
file.
Built-in skill example: Auto-Reply
// skills/AutoReplySkill.kt
class
AutoReplySkill
(
private
val
accessibility
:
PokeAccessibilityService
,
private
val
llmEngine
:
LLMEngine
,
private
val
contact
:
String
,
private
val
app
:
String
=
"WhatsApp"
)
{
// Called when a new message notification arrives from contact
suspend
fun
handleIncomingMessage
(
notificationText
:
String
)
:
String
{
// Step 1: Open the chat
val
dispatcher
=
ToolDispatcher
(
accessibility
,
accessibility
)
dispatcher
.
dispatch
(
"open_app"
,
JSONObject
(
)
.
put
(
"app_name"
,
app
)
)
delay
(
1500
)
// Step 2: Read the full conversation visible on screen
val
screenInfo
=
accessibility
.
getScreenInfo
(
)
// Step 3: Generate a context-aware reply using the LLM
val
reply
=
llmEngine
.
generateReply
(
systemPrompt
=
"You are replying on behalf of the user. Be brief and natural."
,
context
=
"Conversation visible on screen:\n
$
screenInfo
\n\nLatest message:
$
notificationText
"
,
instruction
=
"Write a short, friendly reply."
)
// Step 4: Send the reply
dispatcher
.
dispatch
(
"send_message"
,
JSONObject
(
)
.
put
(
"app"
,
app
)
.
put
(
"contact"
,
contact
)
.
put
(
"message"
,
reply
)
)
return
"Auto-replied to
$
contact
\" $ reply \"" } } Skill as a text recipe (upcoming format)

morning-briefing.skill

name: Morning Briefing description: Summarize weather, calendar, and email every morning. steps: 1. open_app(app_name="Weather") 2. get_screen_info() -> weather_info 3. open_app(app_name="Calendar") 4. get_screen_info() -> calendar_info 5. open_app(app_name="Gmail") 6. get_screen_info() -> email_info 7. finish(summary="Weather: {weather_info}\nCalendar: {calendar_info}\nEmail: {email_info}") Cloud Mode (Optional) When stronger reasoning is needed, swap the LLM backend. The tool interface stays identical. // llm/CloudLLMEngine.kt — example with OpenAI-compatible endpoint class CloudLLMEngine ( private val apiKey : String = System . getenv ( "POKECLAW_CLOUD_API_KEY" ) ?: "" , private val endpoint : String = System . getenv ( "POKECLAW_CLOUD_ENDPOINT" ) ?: "https://api.openai.com/v1" ) : LLMBackend { override suspend fun chat ( messages : List < TaskOrchestrator . Message

, toolConfigs : List < ToolConfig

) : LLMResponse { // Build request body with tool schemas, call endpoint, parse response // The ToolDispatcher and orchestrator are unchanged TODO ( "Implement HTTP call with OkHttp or Ktor" ) } } Switch backends in your DI setup — everything else is identical because ToolDispatcher is backend-agnostic. Model Download (Local Mode) // model/ModelManager.kt class ModelManager ( private val context : Context ) { // Default model: Gemma 4 E2B LiteRT (~2.6 GB) private val modelUrl = "https://huggingface.co/google/gemma-4-e2b-it-litert/resolve/main/model.litertlm" private val modelFile get ( ) = File ( context . filesDir , "gemma4_e2b.litertlm" ) val isDownloaded get ( ) = modelFile . exists ( ) && modelFile . length ( )

1_000_000_000L suspend fun downloadModel ( onProgress : ( Float ) -> Unit ) = withContext ( Dispatchers . IO ) { val connection = URL ( modelUrl ) . openConnection ( ) as HttpURLConnection val total = connection . contentLengthLong var downloaded = 0L connection . inputStream . use { input -> modelFile . outputStream ( ) . use { output -> val buffer = ByteArray ( 8192 ) var bytes : Int while ( input . read ( buffer ) . also { bytes = it } != - 1 ) { output . write ( buffer , 0 , bytes ) downloaded += bytes onProgress ( downloaded . toFloat ( ) / total ) } } } } fun getModelPath ( ) : String = modelFile . absolutePath } To use a custom .litertlm model, place it in context.filesDir and call llmEngine.initialize(customPath) . ViewModel Integration // ui/TaskViewModel.kt @HiltViewModel class TaskViewModel @Inject constructor ( private val orchestrator : TaskOrchestrator ) : ViewModel ( ) { private val _messages = MutableStateFlow < List < ChatMessage

( emptyList ( ) ) val messages : StateFlow < List < ChatMessage

= _messages . asStateFlow ( ) private val _isRunning = MutableStateFlow ( false ) val isRunning : StateFlow < Boolean

= _isRunning . asStateFlow ( ) fun submitTask ( prompt : String ) { viewModelScope . launch { _isRunning . value = true _messages . update { it + ChatMessage ( "user" , prompt ) } val result = try { orchestrator . runTask ( prompt ) } catch ( e : Exception ) { "Error: ${ e . message } " } _messages . update { it + ChatMessage ( "agent" , result ) } _isRunning . value = false } } } data class ChatMessage ( val role : String , val text : String ) Quick-Task Cards Surface pre-built tasks in the UI: // ui/QuickTaskCard.kt val quickTasks = listOf ( QuickTask ( "📋 Summarize notifications" , "Read my recent notifications and summarize them." ) , QuickTask ( "🔋 Battery report" , "Check battery level, temperature, and charging state." ) , QuickTask ( "💾 Storage analysis" , "Analyze storage usage and suggest apps to clean up." ) , QuickTask ( "📱 Installed apps" , "List all installed apps grouped by category." ) , QuickTask ( "🔵 Bluetooth state" , "Check if Bluetooth is on and list paired devices." ) ) data class QuickTask ( val label : String , val prompt : String ) // In your composable: @Composable fun QuickTaskRow ( onSelect : ( String ) -> Unit ) { LazyRow ( horizontalArrangement = Arrangement . spacedBy ( 8 . dp ) ) { items ( quickTasks ) { task -> AssistChip ( onClick = { onSelect ( task . prompt ) } , label = { Text ( task . label ) } ) } } } Common Patterns Pattern: Read screen → act → verify // Always read screen state before acting on unknown screens suspend fun navigateToContact ( contact : String ) : Boolean { // 1. Understand current state val screen = accessibilityService . getScreenInfo ( ) // 2. If already in the right place, proceed if ( screen . contains ( contact ) ) return true // 3. Otherwise search accessibilityService . tap ( "search" ) delay ( 500 ) accessibilityService . inputText ( contact ) delay ( 1000 ) // 4. Verify val updated = accessibilityService . getScreenInfo ( ) return updated . contains ( contact ) } Pattern: Retry with backoff suspend fun < T

retryWithBackoff ( times : Int = 3 , initialDelay : Long = 500 , block : suspend ( ) -> T ) : T { var currentDelay = initialDelay repeat ( times - 1 ) { try { return block ( ) } catch ( e : Exception ) { / continue / } delay ( currentDelay ) currentDelay *= 2 } return block ( ) // last attempt, let exception propagate } // Usage retryWithBackoff { accessibilityService . tap ( "Send" ) } Pattern: Wait for UI element suspend fun waitForElement ( target : String , timeoutMs : Long = 5000 , pollMs : Long = 200 ) : Boolean { val deadline = System . currentTimeMillis ( ) + timeoutMs while ( System . currentTimeMillis ( ) < deadline ) { if ( accessibilityService . getScreenInfo ( ) . contains ( target ) ) return true delay ( pollMs ) } return false } Troubleshooting Problem Cause Fix Screen unavailable from getScreenInfo() Accessibility service not connected Check Settings → Accessibility → PokeClaw is enabled Tap does nothing Element not in view or ID mismatch Call getScreenInfo() first; scroll if needed Model OOM crash Not enough free RAM Close background apps; need ≥8 GB device RAM Model download

返回排行榜