PokeClaw Android AI Agent Skill by ara.so — Daily 2026 Skills collection. PokeClaw is an open-source Android app that runs Gemma 4 entirely on-device via LiteRT-LM with native tool calling. The LLM reads the screen as a UI tree, selects tools (tap, swipe, type, open app, send message, etc.), executes them through Android Accessibility Services, observes the result, and loops until the task is complete — no cloud, no API key required for local mode. Architecture Overview User prompt │ ▼ TaskOrchestrator ← manages task lifecycle & session history │ ▼ LLMEngine (LiteRT-LM) ← Gemma 4 on-device, tool-call aware │ tool_calls[] ▼ ToolDispatcher ← routes to concrete tool implementations │ ├── AccessibilityTool ← tap / swipe / long_press / input_text ├── AppLaunchTool ← open_app ├── ScreenReaderTool ← get_screen_info / take_screenshot ├── MessagingTool ← send_message / auto_reply └── FinishTool ← finish (signals task done) │ ▼ Android Accessibility Service / UI Automator Installation / Setup 1. Clone the repo git clone https://github.com/agents-io/PokeClaw.git cd PokeClaw 2. Open in Android Studio Android Studio Hedgehog or newer recommended SDK: Android 9+ (API 28), target API 34+ Kotlin 1.9+ 3. Add LiteRT-LM dependency In app/build.gradle.kts : dependencies { // LiteRT-LM for on-device LLM inference with tool calling implementation ( "com.google.ai.edge.litert:litert-lm:1.0.0" ) // Coroutines for async inference implementation ( "org.jetbrains.kotlinx:kotlinx-coroutines-android:1.7.3" ) // JSON for tool-call serialization implementation ( "org.json:json:20231013" ) } 4. AndroidManifest.xml permissions
< uses-permission android: name = " android.permission.BIND_ACCESSIBILITY_SERVICE " />
< uses-permission android: name = " android.permission.BIND_NOTIFICATION_LISTENER_SERVICE " />
< uses-permission android: name = " android.permission.FOREGROUND_SERVICE " /> < uses-permission android: name = " android.permission.FOREGROUND_SERVICE_DATA_SYNC " />
- <
- service
- android:
- name
- =
- "
- .accessibility.PokeAccessibilityService
- "
- android:
- permission
- =
- "
- android.permission.BIND_ACCESSIBILITY_SERVICE
- "
- android:
- exported
- =
- "
- false
- "
- >
- <
- intent-filter
- >
- <
- action
- android:
- name
- =
- "
- android.accessibilityservice.AccessibilityService
- "
- />
- </
- intent-filter
- >
- <
- meta-data
- android:
- name
- =
- "
- android.accessibilityservice
- "
- android:
- resource
- =
- "
- @xml/accessibility_service_config
- "
- />
- </
- service
- >
- res/xml/accessibility_service_config.xml
- :
- <
- accessibility-service
- xmlns:
- android
- =
- "
- http://schemas.android.com/apk/res/android
- "
- android:
- accessibilityEventTypes
- =
- "
- typeAllMask
- "
- android:
- accessibilityFeedbackType
- =
- "
- feedbackGeneric
- "
- android:
- accessibilityFlags
- =
- "
- flagDefault|flagRetrieveInteractiveWindows|flagRequestEnhancedWebAccessibility
- "
- android:
- canRetrieveWindowContent
- =
- "
- true
- "
- android:
- canPerformGestures
- =
- "
- true
- "
- android:
- notificationTimeout
- =
- "
- 100
- "
- android:
- description
- =
- "
- @string/accessibility_service_description
- "
- />
- 5. Build & install APK
- ./gradlew assembleDebug
- adb
- install
- app/build/outputs/apk/debug/app-debug.apk
- Or download the
- latest release APK
- .
- Core Concepts
- Tool Definition
- Tools are declared as JSON schemas that LiteRT-LM uses for structured output. Define a tool:
- // domain/tools/ToolDefinition.kt
- data
- class
- ToolDefinition
- (
- val
- name
- :
- String
- ,
- val
- description
- :
- String
- ,
- val
- parameters
- :
- ToolParameters
- )
- data
- class
- ToolParameters
- (
- val
- type
- :
- String
- =
- "object"
- ,
- val
- properties
- :
- Map
- <
- String
- ,
- ToolProperty
- >
- ,
- val
- required
- :
- List
- <
- String
- >
- )
- data
- class
- ToolProperty
- (
- val
- type
- :
- String
- ,
- val
- description
- :
- String
- ,
- val
- enum
- :
- List
- <
- String
- >
- ?
- =
- null
- )
- Registering Tools with LiteRT-LM
- // llm/LLMEngine.kt
- import
- com
- .
- .
- ai
- .
- edge
- .
- litert
- .
- lm
- .
- LiteRtLm
- import
- com
- .
- .
- ai
- .
- edge
- .
- litert
- .
- lm
- .
- InferenceOptions
- import
- com
- .
- .
- ai
- .
- edge
- .
- litert
- .
- lm
- .
- ToolConfig
- class
- LLMEngine
- (
- private
- val
- context
- :
- Context
- )
- {
- private
- lateinit
- var
- lm
- :
- LiteRtLm
- suspend
- fun
- initialize
- (
- modelPath
- :
- String
- )
- {
- lm
- =
- LiteRtLm
- .
- create
- (
- context
- =
- context
- ,
- modelPath
- =
- modelPath
- ,
- inferenceOptions
- =
- InferenceOptions
- .
- builder
- (
- )
- .
- setMaxTokens
- (
- 2048
- )
- .
- setTemperature
- (
- 0.1f
- )
- // low temp for reliable tool calls
- .
- setTopK
- (
- 40
- )
- .
- build
- (
- )
- )
- }
- fun
- buildToolConfigs
- (
- )
- :
- List
- <
- ToolConfig
- >
- {
- return
- listOf
- (
- ToolConfig
- .
- fromJson
- (
- tapToolJson
- (
- )
- )
- ,
- ToolConfig
- .
- fromJson
- (
- inputTextToolJson
- (
- )
- )
- ,
- ToolConfig
- .
- fromJson
- (
- openAppToolJson
- (
- )
- )
- ,
- ToolConfig
- .
- fromJson
- (
- getScreenInfoToolJson
- (
- )
- )
- ,
- ToolConfig
- .
- fromJson
- (
- sendMessageToolJson
- (
- )
- )
- ,
- ToolConfig
- .
- fromJson
- (
- finishToolJson
- (
- )
- )
- )
- }
- private
- fun
- tapToolJson
- (
- )
- =
- """
- {
- "name": "tap",
- "description": "Tap a UI element by its resource ID, content description, or screen coordinates.",
- "parameters": {
- "type": "object",
- "properties": {
- "target": {
- "type": "string",
- "description": "Resource ID, content-desc, or visible text of the element to tap."
- },
- "x": { "type": "number", "description": "Screen X coordinate (optional)." },
- "y":
- },
- "required": ["target"]
- }
- }
- """
- .
- trimIndent
- (
- )
- private
- fun
- inputTextToolJson
- (
- )
- =
- """
- {
- "name": "input_text",
- "description": "Type text into the currently focused or specified text field.",
- "parameters": {
- "type": "object",
- "properties": {
- "text": { "type": "string", "description": "Text to type." },
- "target":
- },
- "required": ["text"]
- }
- }
- """
- .
- trimIndent
- (
- )
- private
- fun
- openAppToolJson
- (
- )
- =
- """
- {
- "name": "open_app",
- "description": "Launch an installed app by its name or package name.",
- "parameters": {
- "type": "object",
- "properties": {
- "app_name": { "type": "string", "description": "Human-readable app name, e.g. 'WhatsApp'." },
- "package_name":
- },
- "required": ["app_name"]
- }
- }
- """
- .
- trimIndent
- (
- )
- private
- fun
- getScreenInfoToolJson
- (
- )
- =
- """
- {
- "name": "get_screen_info",
- "description": "Return a text representation of all interactive UI elements on the current screen.",
- "parameters": {
- "type": "object",
- "properties": {},
- "required": []
- }
- }
- """
- .
- trimIndent
- (
- )
- private
- fun
- sendMessageToolJson
- (
- )
- =
- """
- {
- "name": "send_message",
- "description": "Send a chat/SMS message to a contact. Handles: open app, find contact, type, send.",
- "parameters": {
- "type": "object",
- "properties": {
- "app": { "type": "string", "description": "Messaging app name, e.g. 'WhatsApp'." },
- "contact": { "type": "string", "description": "Contact name or phone number." },
- "message":
- },
- "required": ["app", "contact", "message"]
- }
- }
- """
- .
- trimIndent
- (
- )
- private
- fun
- finishToolJson
- (
- )
- =
- """
- {
- "name": "finish",
- "description": "Signal that the task is complete. Include a summary of what was done.",
- "parameters": {
- "type": "object",
- "properties": {
- "summary":
- },
- "required": ["summary"]
- }
- }
- """
- .
- trimIndent
- (
- )
- }
- Accessibility Service Implementation
- // accessibility/PokeAccessibilityService.kt
- class
- PokeAccessibilityService
- :
- AccessibilityService
- (
- )
- {
- companion
- object
- {
- var
- instance
- :
- PokeAccessibilityService
- ?
- =
- null
- private
- set
- }
- override
- fun
- onServiceConnected
- (
- )
- {
- super
- .
- onServiceConnected
- (
- )
- instance
- =
- this
- }
- override
- fun
- onAccessibilityEvent
- (
- event
- :
- AccessibilityEvent
- ?
- )
- {
- / optional monitoring /
- }
- override
- fun
- onInterrupt
- (
- )
- {
- }
- // ── Screen reading ──────────────────────────────────────────────
- fun
- getScreenInfo
- (
- )
- :
- String
- {
- val
- root
- =
- rootInActiveWindow
- ?:
- return
- "Screen unavailable"
- return
- buildString
- {
- appendNode
- (
- root
- ,
- 0
- )
- }
- }
- private
- fun
- StringBuilder
- .
- appendNode
- (
- node
- :
- AccessibilityNodeInfo
- ,
- depth
- :
- Int
- )
- {
- val
- indent
- =
- " "
- .
- repeat
- (
- depth
- )
- val
- text
- =
- node
- .
- text
- ?
- .
- toString
- (
- )
- ?
- .
- trim
- (
- )
- val
- desc
- =
- node
- .
- contentDescription
- ?
- .
- toString
- (
- )
- ?
- .
- trim
- (
- )
- val
- resId
- =
- node
- .
- viewIdResourceName
- val
- cls
- =
- node
- .
- className
- ?
- .
- toString
- (
- )
- ?
- .
- substringAfterLast
- (
- '.'
- )
- if
- (
- !
- text
- .
- isNullOrEmpty
- (
- )
- ||
- !
- desc
- .
- isNullOrEmpty
- (
- )
- )
- {
- append
- (
- "
- $
- indent
- [
- $
- cls
- "
- )
- if
- (
- !
- resId
- .
- isNullOrEmpty
- (
- )
- )
- append
- (
- " id=
- $
- resId
- "
- )
- if
- (
- !
- text
- .
- isNullOrEmpty
- (
- )
- )
- append
- (
- " text=\"
- $
- text
- \""
- )
- if
- (
- !
- desc
- .
- isNullOrEmpty
- (
- )
- )
- append
- (
- " desc=\"
- $
- desc
- \""
- )
- if
- (
- node
- .
- isClickable
- )
- append
- (
- " clickable=true"
- )
- if
- (
- node
- .
- isEditable
- )
- append
- (
- " editable=true"
- )
- appendLine
- (
- "]"
- )
- }
- for
- (
- i
- in
- 0
- until node
- .
- childCount
- )
- {
- node
- .
- getChild
- (
- i
- )
- ?
- .
- let
- {
- appendNode
- (
- it
- ,
- depth
- +
- 1
- )
- }
- }
- }
- // ── Tap ─────────────────────────────────────────────────────────
- fun
- tap
- (
- target
- :
- String
- ?
- ,
- x
- :
- Float
- ?
- =
- null
- ,
- y
- :
- Float
- ?
- =
- null
- )
- :
- Boolean
- {
- if
- (
- x
- !=
- null
- &&
- y
- !=
- null
- )
- {
- return
- performTapGesture
- (
- x
- ,
- y
- )
- }
- val
- root
- =
- rootInActiveWindow
- ?:
- return
- false
- val
- node
- =
- findNode
- (
- root
- ,
- target
- ?:
- return
- false
- )
- return
- node
- ?
- .
- performAction
- (
- AccessibilityNodeInfo
- .
- ACTION_CLICK
- )
- ?:
- false
- }
- private
- fun
- performTapGesture
- (
- x
- :
- Float
- ,
- y
- :
- Float
- )
- :
- Boolean
- {
- val
- path
- =
- Path
- (
- )
- .
- apply
- {
- moveTo
- (
- x
- ,
- y
- )
- }
- val
- stroke
- =
- GestureDescription
- .
- StrokeDescription
- (
- path
- ,
- 0
- ,
- 50
- )
- val
- gesture
- =
- GestureDescription
- .
- Builder
- (
- )
- .
- addStroke
- (
- stroke
- )
- .
- build
- (
- )
- return
- dispatchGesture
- (
- gesture
- ,
- null
- ,
- null
- )
- }
- private
- fun
- findNode
- (
- root
- :
- AccessibilityNodeInfo
- ,
- target
- :
- String
- )
- :
- AccessibilityNodeInfo
- ?
- {
- // Try by text
- root
- .
- findAccessibilityNodeInfosByText
- (
- target
- )
- .
- firstOrNull
- (
- )
- ?
- .
- let
- {
- return
- it
- }
- // Try by viewId
- root
- .
- findAccessibilityNodeInfosByViewId
- (
- target
- )
- .
- firstOrNull
- (
- )
- ?
- .
- let
- {
- return
- it
- }
- // Try by content-desc (recursive)
- return
- findByContentDesc
- (
- root
- ,
- target
- )
- }
- private
- fun
- findByContentDesc
- (
- node
- :
- AccessibilityNodeInfo
- ,
- target
- :
- String
- )
- :
- AccessibilityNodeInfo
- ?
- {
- if
- (
- node
- .
- contentDescription
- ?
- .
- toString
- (
- )
- ?
- .
- contains
- (
- target
- ,
- ignoreCase
- =
- true
- )
- ==
- true
- )
- return
- node
- for
- (
- i
- in
- 0
- until node
- .
- childCount
- )
- {
- node
- .
- getChild
- (
- i
- )
- ?
- .
- let
- {
- findByContentDesc
- (
- it
- ,
- target
- )
- }
- ?
- .
- let
- {
- return
- it
- }
- }
- return
- null
- }
- // ── Swipe ───────────────────────────────────────────────────────
- fun
- swipe
- (
- startX
- :
- Float
- ,
- startY
- :
- Float
- ,
- endX
- :
- Float
- ,
- endY
- :
- Float
- ,
- durationMs
- :
- Long
- =
- 300
- )
- :
- Boolean
- {
- val
- path
- =
- Path
- (
- )
- .
- apply
- {
- moveTo
- (
- startX
- ,
- startY
- )
- lineTo
- (
- endX
- ,
- endY
- )
- }
- val
- stroke
- =
- GestureDescription
- .
- StrokeDescription
- (
- path
- ,
- 0
- ,
- durationMs
- )
- val
- gesture
- =
- GestureDescription
- .
- Builder
- (
- )
- .
- addStroke
- (
- stroke
- )
- .
- build
- (
- )
- return
- dispatchGesture
- (
- gesture
- ,
- null
- ,
- null
- )
- }
- // ── Type text ───────────────────────────────────────────────────
- fun
- inputText
- (
- text
- :
- String
- ,
- targetResId
- :
- String
- ?
- =
- null
- )
- :
- Boolean
- {
- val
- root
- =
- rootInActiveWindow
- ?:
- return
- false
- val
- node
- =
- if
- (
- targetResId
- !=
- null
- )
- {
- root
- .
- findAccessibilityNodeInfosByViewId
- (
- targetResId
- )
- .
- firstOrNull
- (
- )
- }
- else
- {
- findFocusedEditText
- (
- root
- )
- }
- ?:
- return
- false
- node
- .
- performAction
- (
- AccessibilityNodeInfo
- .
- ACTION_FOCUS
- )
- val
- args
- =
- Bundle
- (
- )
- .
- apply
- {
- putString
- (
- AccessibilityNodeInfo
- .
- ACTION_ARGUMENT_SET_TEXT_CHARSEQUENCE
- ,
- text
- )
- }
- return
- node
- .
- performAction
- (
- AccessibilityNodeInfo
- .
- ACTION_SET_TEXT
- ,
- args
- )
- }
- private
- fun
- findFocusedEditText
- (
- node
- :
- AccessibilityNodeInfo
- )
- :
- AccessibilityNodeInfo
- ?
- {
- if
- (
- node
- .
- isEditable
- &&
- node
- .
- isFocused
- )
- return
- node
- if
- (
- node
- .
- isEditable
- )
- return
- node
- // fallback: first editable
- for
- (
- i
- in
- 0
- until node
- .
- childCount
- )
- {
- node
- .
- getChild
- (
- i
- )
- ?
- .
- let
- {
- findFocusedEditText
- (
- it
- )
- }
- ?
- .
- let
- {
- return
- it
- }
- }
- return
- null
- }
- }
- Task Orchestrator
- // agent/TaskOrchestrator.kt
- class
- TaskOrchestrator
- (
- private
- val
- llmEngine
- :
- LLMEngine
- ,
- private
- val
- toolDispatcher
- :
- ToolDispatcher
- ,
- private
- val
- screenReader
- :
- ScreenReaderTool
- )
- {
- data
- class
- Message
- (
- val
- role
- :
- String
- ,
- val
- content
- :
- String
- )
- private
- val
- history
- =
- mutableListOf
- <
- Message
- >
- (
- )
- private
- val
- maxSteps
- =
- 20
- suspend
- fun
- runTask
- (
- userPrompt
- :
- String
- )
- :
- String
- =
- withContext
- (
- Dispatchers
- .
- IO
- )
- {
- history
- .
- clear
- (
- )
- history
- .
- add
- (
- Message
- (
- "system"
- ,
- buildSystemPrompt
- (
- )
- )
- )
- history
- .
- add
- (
- Message
- (
- "user"
- ,
- userPrompt
- )
- )
- for
- (
- step
- in
- 1
- ..
- maxSteps
- )
- {
- val
- response
- =
- llmEngine
- .
- chat
- (
- history
- ,
- toolConfigs
- =
- llmEngine
- .
- buildToolConfigs
- (
- )
- )
- // No tool call → plain text reply, done
- if
- (
- response
- .
- toolCalls
- .
- isEmpty
- (
- )
- )
- {
- return
- @withContext
- response
- .
- text
- ?:
- "Task complete."
- }
- // Execute each tool call
- val
- toolResults
- =
- mutableListOf
- <
- String
- >
- (
- )
- for
- (
- call
- in
- response
- .
- toolCalls
- )
- {
- val
- result
- =
- toolDispatcher
- .
- dispatch
- (
- call
- .
- name
- ,
- call
- .
- arguments
- )
- toolResults
- .
- add
- (
- "Tool
- ${
- call
- .
- name
- }
- →
- $
- result
- "
- )
- if
- (
- call
- .
- name
- ==
- "finish"
- )
- {
- return
- @withContext
- call
- .
- arguments
- .
- optString
- (
- "summary"
- ,
- "Done."
- )
- }
- }
- // Feed results back as assistant + tool turn
- history
- .
- add
- (
- Message
- (
- "assistant"
- ,
- response
- .
- text
- ?:
- "(tool calls)"
- )
- )
- history
- .
- add
- (
- Message
- (
- "tool"
- ,
- toolResults
- .
- joinToString
- (
- "\n"
- )
- )
- )
- }
- "Task stopped: max steps (
- $
- maxSteps
- ) reached."
- }
- private
- fun
- buildSystemPrompt
- (
- )
- =
- """
- You are PokeClaw, an AI agent that controls an Android phone.
- You have access to tools: tap, swipe, input_text, open_app, get_screen_info, send_message, finish.
- Workflow:
- 1. Call get_screen_info to understand the current screen before acting.
- 2. Pick the most direct tool to make progress.
- 3. After each action, verify with get_screen_info if needed.
- 4. Call finish when the task is complete with a brief summary.
- Rules:
- - Never guess coordinates; use element IDs or text labels when possible.
- - If an action fails, read the screen and adapt.
- - Do not loop more than 3 times on the same element.
- - Keep responses concise; the phone has limited memory.
- """
- .
- trimIndent
- (
- )
- }
- Tool Dispatcher
- // agent/ToolDispatcher.kt
- class
- ToolDispatcher
- (
- private
- val
- accessibilityService
- :
- PokeAccessibilityService
- ,
- private
- val
- context
- :
- Context
- )
- {
- fun
- dispatch
- (
- toolName
- :
- String
- ,
- args
- :
- JSONObject
- )
- :
- String
- {
- return
- try
- {
- when
- (
- toolName
- )
- {
- "tap"
- ->
- handleTap
- (
- args
- )
- "swipe"
- ->
- handleSwipe
- (
- args
- )
- "input_text"
- ->
- handleInputText
- (
- args
- )
- "open_app"
- ->
- handleOpenApp
- (
- args
- )
- "get_screen_info"
- ->
- accessibilityService
- .
- getScreenInfo
- (
- )
- "send_message"
- ->
- handleSendMessage
- (
- args
- )
- "finish"
- ->
- "FINISH:
- ${
- args
- .
- optString
- (
- "summary"
- )
- }
- "
- else
- ->
- "Unknown tool:
- $
- toolName
- "
- }
- }
- catch
- (
- e
- :
- Exception
- )
- {
- "Error in
- $
- toolName
- :
- ${
- e
- .
- message
- }
- "
- }
- }
- private
- fun
- handleTap
- (
- args
- :
- JSONObject
- )
- :
- String
- {
- val
- target
- =
- args
- .
- optString
- (
- "target"
- )
- .
- takeIf
- {
- it
- .
- isNotEmpty
- (
- )
- }
- val
- x
- =
- args
- .
- optDouble
- (
- "x"
- )
- .
- takeIf
- {
- !
- it
- .
- isNaN
- (
- )
- }
- ?
- .
- toFloat
- (
- )
- val
- y
- =
- args
- .
- optDouble
- (
- "y"
- )
- .
- takeIf
- {
- !
- it
- .
- isNaN
- (
- )
- }
- ?
- .
- toFloat
- (
- )
- val
- success
- =
- accessibilityService
- .
- tap
- (
- target
- ,
- x
- ,
- y
- )
- return
- if
- (
- success
- )
- "Tapped '
- $
- target
- '"
- else
- "Tap failed for '
- $
- target
- '"
- }
- private
- fun
- handleSwipe
- (
- args
- :
- JSONObject
- )
- :
- String
- {
- val
- sx
- =
- args
- .
- getDouble
- (
- "start_x"
- )
- .
- toFloat
- (
- )
- val
- sy
- =
- args
- .
- getDouble
- (
- "start_y"
- )
- .
- toFloat
- (
- )
- val
- ex
- =
- args
- .
- getDouble
- (
- "end_x"
- )
- .
- toFloat
- (
- )
- val
- ey
- =
- args
- .
- getDouble
- (
- "end_y"
- )
- .
- toFloat
- (
- )
- val
- success
- =
- accessibilityService
- .
- swipe
- (
- sx
- ,
- sy
- ,
- ex
- ,
- ey
- )
- return
- if
- (
- success
- )
- "Swiped (
- $
- sx
- ,
- $
- sy
- )→(
- $
- ex
- ,
- $
- ey
- )"
- else
- "Swipe failed"
- }
- private
- fun
- handleInputText
- (
- args
- :
- JSONObject
- )
- :
- String
- {
- val
- text
- =
- args
- .
- getString
- (
- "text"
- )
- val
- target
- =
- args
- .
- optString
- (
- "target"
- )
- .
- takeIf
- {
- it
- .
- isNotEmpty
- (
- )
- }
- val
- success
- =
- accessibilityService
- .
- inputText
- (
- text
- ,
- target
- )
- return
- if
- (
- success
- )
- "Typed: \"
- $
- text
- \""
- else
- "Input failed"
- }
- private
- fun
- handleOpenApp
- (
- args
- :
- JSONObject
- )
- :
- String
- {
- val
- appName
- =
- args
- .
- getString
- (
- "app_name"
- )
- val
- pkgName
- =
- args
- .
- optString
- (
- "package_name"
- )
- .
- takeIf
- {
- it
- .
- isNotEmpty
- (
- )
- }
- ?:
- resolvePackageName
- (
- appName
- )
- ?:
- return
- "App '
- $
- appName
- ' not found"
- val
- intent
- =
- context
- .
- packageManager
- .
- getLaunchIntentForPackage
- (
- pkgName
- )
- ?:
- return
- "Cannot launch '
- $
- pkgName
- '"
- intent
- .
- addFlags
- (
- Intent
- .
- FLAG_ACTIVITY_NEW_TASK
- )
- context
- .
- startActivity
- (
- intent
- )
- return
- "Launched
- $
- appName
- (
- $
- pkgName
- )"
- }
- private
- fun
- handleSendMessage
- (
- args
- :
- JSONObject
- )
- :
- String
- {
- val
- app
- =
- args
- .
- getString
- (
- "app"
- )
- val
- contact
- =
- args
- .
- getString
- (
- "contact"
- )
- val
- message
- =
- args
- .
- getString
- (
- "message"
- )
- // Delegates to MessagingSkill which orchestrates the multi-step flow
- return
- MessagingSkill
- (
- accessibilityService
- ,
- context
- )
- .
- sendMessage
- (
- app
- ,
- contact
- ,
- message
- )
- }
- private
- fun
- resolvePackageName
- (
- appName
- :
- String
- )
- :
- String
- ?
- {
- val
- pm
- =
- context
- .
- packageManager
- val
- packages
- =
- pm
- .
- getInstalledApplications
- (
- PackageManager
- .
- GET_META_DATA
- )
- return
- packages
- .
- firstOrNull
- {
- pm
- .
- getApplicationLabel
- (
- it
- )
- .
- toString
- (
- )
- .
- equals
- (
- appName
- ,
- ignoreCase
- =
- true
- )
- }
- ?
- .
- packageName
- }
- }
- Skills System
- Skills are reusable multi-step workflows. Write a skill as a Kotlin class or (upcoming) a plain-text
- .skill
- file.
- Built-in skill example: Auto-Reply
- // skills/AutoReplySkill.kt
- class
- AutoReplySkill
- (
- private
- val
- accessibility
- :
- PokeAccessibilityService
- ,
- private
- val
- llmEngine
- :
- LLMEngine
- ,
- private
- val
- contact
- :
- String
- ,
- private
- val
- app
- :
- String
- =
- "WhatsApp"
- )
- {
- // Called when a new message notification arrives from
contact - suspend
- fun
- handleIncomingMessage
- (
- notificationText
- :
- String
- )
- :
- String
- {
- // Step 1: Open the chat
- val
- dispatcher
- =
- ToolDispatcher
- (
- accessibility
- ,
- accessibility
- )
- dispatcher
- .
- dispatch
- (
- "open_app"
- ,
- JSONObject
- (
- )
- .
- put
- (
- "app_name"
- ,
- app
- )
- )
- delay
- (
- 1500
- )
- // Step 2: Read the full conversation visible on screen
- val
- screenInfo
- =
- accessibility
- .
- getScreenInfo
- (
- )
- // Step 3: Generate a context-aware reply using the LLM
- val
- reply
- =
- llmEngine
- .
- generateReply
- (
- systemPrompt
- =
- "You are replying on behalf of the user. Be brief and natural."
- ,
- context
- =
- "Conversation visible on screen:\n
- $
- screenInfo
- \n\nLatest message:
- $
- notificationText
- "
- ,
- instruction
- =
- "Write a short, friendly reply."
- )
- // Step 4: Send the reply
- dispatcher
- .
- dispatch
- (
- "send_message"
- ,
- JSONObject
- (
- )
- .
- put
- (
- "app"
- ,
- app
- )
- .
- put
- (
- "contact"
- ,
- contact
- )
- .
- put
- (
- "message"
- ,
- reply
- )
- )
- return
- "Auto-replied to
- $
- contact
- \" $ reply \"" } } Skill as a text recipe (upcoming format)
morning-briefing.skill
name: Morning Briefing description: Summarize weather, calendar, and email every morning. steps: 1. open_app(app_name="Weather") 2. get_screen_info() -> weather_info 3. open_app(app_name="Calendar") 4. get_screen_info() -> calendar_info 5. open_app(app_name="Gmail") 6. get_screen_info() -> email_info 7. finish(summary="Weather: {weather_info}\nCalendar: {calendar_info}\nEmail: {email_info}") Cloud Mode (Optional) When stronger reasoning is needed, swap the LLM backend. The tool interface stays identical. // llm/CloudLLMEngine.kt — example with OpenAI-compatible endpoint class CloudLLMEngine ( private val apiKey : String = System . getenv ( "POKECLAW_CLOUD_API_KEY" ) ?: "" , private val endpoint : String = System . getenv ( "POKECLAW_CLOUD_ENDPOINT" ) ?: "https://api.openai.com/v1" ) : LLMBackend { override suspend fun chat ( messages : List < TaskOrchestrator . Message
, toolConfigs : List < ToolConfig
) : LLMResponse { // Build request body with tool schemas, call endpoint, parse response // The ToolDispatcher and orchestrator are unchanged TODO ( "Implement HTTP call with OkHttp or Ktor" ) } } Switch backends in your DI setup — everything else is identical because ToolDispatcher is backend-agnostic. Model Download (Local Mode) // model/ModelManager.kt class ModelManager ( private val context : Context ) { // Default model: Gemma 4 E2B LiteRT (~2.6 GB) private val modelUrl = "https://huggingface.co/google/gemma-4-e2b-it-litert/resolve/main/model.litertlm" private val modelFile get ( ) = File ( context . filesDir , "gemma4_e2b.litertlm" ) val isDownloaded get ( ) = modelFile . exists ( ) && modelFile . length ( )
1_000_000_000L suspend fun downloadModel ( onProgress : ( Float ) -> Unit ) = withContext ( Dispatchers . IO ) { val connection = URL ( modelUrl ) . openConnection ( ) as HttpURLConnection val total = connection . contentLengthLong var downloaded = 0L connection . inputStream . use { input -> modelFile . outputStream ( ) . use { output -> val buffer = ByteArray ( 8192 ) var bytes : Int while ( input . read ( buffer ) . also { bytes = it } != - 1 ) { output . write ( buffer , 0 , bytes ) downloaded += bytes onProgress ( downloaded . toFloat ( ) / total ) } } } } fun getModelPath ( ) : String = modelFile . absolutePath } To use a custom .litertlm model, place it in context.filesDir and call llmEngine.initialize(customPath) . ViewModel Integration // ui/TaskViewModel.kt @HiltViewModel class TaskViewModel @Inject constructor ( private val orchestrator : TaskOrchestrator ) : ViewModel ( ) { private val _messages = MutableStateFlow < List < ChatMessage
( emptyList ( ) ) val messages : StateFlow < List < ChatMessage
= _messages . asStateFlow ( ) private val _isRunning = MutableStateFlow ( false ) val isRunning : StateFlow < Boolean
= _isRunning . asStateFlow ( ) fun submitTask ( prompt : String ) { viewModelScope . launch { _isRunning . value = true _messages . update { it + ChatMessage ( "user" , prompt ) } val result = try { orchestrator . runTask ( prompt ) } catch ( e : Exception ) { "Error: ${ e . message } " } _messages . update { it + ChatMessage ( "agent" , result ) } _isRunning . value = false } } } data class ChatMessage ( val role : String , val text : String ) Quick-Task Cards Surface pre-built tasks in the UI: // ui/QuickTaskCard.kt val quickTasks = listOf ( QuickTask ( "📋 Summarize notifications" , "Read my recent notifications and summarize them." ) , QuickTask ( "🔋 Battery report" , "Check battery level, temperature, and charging state." ) , QuickTask ( "💾 Storage analysis" , "Analyze storage usage and suggest apps to clean up." ) , QuickTask ( "📱 Installed apps" , "List all installed apps grouped by category." ) , QuickTask ( "🔵 Bluetooth state" , "Check if Bluetooth is on and list paired devices." ) ) data class QuickTask ( val label : String , val prompt : String ) // In your composable: @Composable fun QuickTaskRow ( onSelect : ( String ) -> Unit ) { LazyRow ( horizontalArrangement = Arrangement . spacedBy ( 8 . dp ) ) { items ( quickTasks ) { task -> AssistChip ( onClick = { onSelect ( task . prompt ) } , label = { Text ( task . label ) } ) } } } Common Patterns Pattern: Read screen → act → verify // Always read screen state before acting on unknown screens suspend fun navigateToContact ( contact : String ) : Boolean { // 1. Understand current state val screen = accessibilityService . getScreenInfo ( ) // 2. If already in the right place, proceed if ( screen . contains ( contact ) ) return true // 3. Otherwise search accessibilityService . tap ( "search" ) delay ( 500 ) accessibilityService . inputText ( contact ) delay ( 1000 ) // 4. Verify val updated = accessibilityService . getScreenInfo ( ) return updated . contains ( contact ) } Pattern: Retry with backoff suspend fun < T
retryWithBackoff ( times : Int = 3 , initialDelay : Long = 500 , block : suspend ( ) -> T ) : T { var currentDelay = initialDelay repeat ( times - 1 ) { try { return block ( ) } catch ( e : Exception ) { / continue / } delay ( currentDelay ) currentDelay *= 2 } return block ( ) // last attempt, let exception propagate } // Usage retryWithBackoff { accessibilityService . tap ( "Send" ) } Pattern: Wait for UI element suspend fun waitForElement ( target : String , timeoutMs : Long = 5000 , pollMs : Long = 200 ) : Boolean { val deadline = System . currentTimeMillis ( ) + timeoutMs while ( System . currentTimeMillis ( ) < deadline ) { if ( accessibilityService . getScreenInfo ( ) . contains ( target ) ) return true delay ( pollMs ) } return false } Troubleshooting Problem Cause Fix Screen unavailable from getScreenInfo() Accessibility service not connected Check Settings → Accessibility → PokeClaw is enabled Tap does nothing Element not in view or ID mismatch Call getScreenInfo() first; scroll if needed Model OOM crash Not enough free RAM Close background apps; need ≥8 GB device RAM Model download