% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/gridworld.R
\name{gridworld}
\alias{gridworld}
\alias{gridworld_init}
\alias{gridworld_maze_MDP}
\alias{gridworld_s2rc}
\alias{gridworld_rc2s}
\alias{gridworld_matrix}
\alias{gridworld_plot_policy}
\alias{gridworld_plot_transition_graph}
\alias{gridworld_animate}
\title{Helper Functions for Gridworld MDPs}
\usage{
gridworld_init(
  dim,
  action_labels = c("up", "right", "down", "left"),
  unreachable_states = NULL,
  absorbing_states = NULL,
  labels = NULL
)

gridworld_maze_MDP(
  dim,
  start,
  goal,
  walls = NULL,
  action_labels = c("up", "right", "down", "left"),
  goal_reward = 1,
  step_cost = 0,
  restart = FALSE,
  discount = 0.9,
  horizon = Inf,
  info = NULL,
  name = NA
)

gridworld_s2rc(s)

gridworld_rc2s(rc)

gridworld_matrix(model, epoch = 1L, what = "states")

gridworld_plot_policy(
  model,
  epoch = 1L,
  actions = "character",
  states = FALSE,
  labels = TRUE,
  absorbing_state_action = FALSE,
  main = NULL,
  cex = 1,
  offset = 0.5,
  lines = TRUE,
  ...
)

gridworld_plot_transition_graph(
  x,
  hide_unreachable_states = TRUE,
  remove.loops = TRUE,
  vertex.color = "gray",
  vertex.shape = "square",
  vertex.size = 10,
  vertex.label = NA,
  edge.arrow.size = 0.3,
  margin = 0.2,
  main = NULL,
  ...
)

gridworld_animate(x, method, n, zlim = NULL, ...)
}
\arguments{
\item{dim}{vector of length two with the x and y extent of the gridworld.}

\item{action_labels}{vector with four action labels that move the agent up, right, down,
and left.}

\item{unreachable_states}{a vector with state labels for unreachable states.
These states will be excluded.}

\item{absorbing_states}{a vector with state labels for absorbing states.}

\item{labels}{logical; show state labels.}

\item{start, goal}{labels for the start state and the goal state.}

\item{walls}{a vector with state labels for walls. Walls will
become unreachable states.}

\item{goal_reward}{reward to transition to the goal state.}

\item{step_cost}{cost of each action that does not lead to the goal state.}

\item{restart}{logical; if \code{TRUE} then the problem automatically restarts when
the agent reaches the goal state.}

\item{discount, horizon}{MDP discount factor, and horizon.}

\item{info}{A list with additional information. Has to contain the gridworld
dimensions as element \code{gridworld_dim}.}

\item{name}{a string to identify the MDP problem.}

\item{s}{a state label.}

\item{rc}{a vector of length two with the row and column coordinate of a
state in the gridworld matrix.}

\item{model, x}{a solved gridworld MDP.}

\item{epoch}{epoch for unconverged finite-horizon solutions.}

\item{what}{What should be returned in the matrix. Options are:
\code{"states"}, \code{"labels"}, \code{"values"}, \code{"actions"}, \code{"absorbing"}, and
\code{"reachable"}.}

\item{actions}{how to show actions. Options are:
simple \code{"character"}, \code{"unicode"} arrows (needs to be supported by the used font),
\code{"label"} of the action, and  \code{"none"} to suppress showing the action.}

\item{states}{logical; show state names.}

\item{absorbing_state_action}{logical; show the value and the action for absorbing states.}

\item{main}{a main title for the plot. Defaults to the name of the problem.}

\item{cex}{expansion factor for the action.}

\item{offset}{move the state labels out of the way (in fractions of a character width).}

\item{lines}{logical; draw lines to separate states.}

\item{...}{further arguments are passed on to \code{igraph::plot.igraph()}.}

\item{hide_unreachable_states}{logical; do not show unreachable states.}

\item{remove.loops}{logical; do not show transitions from a state back to itself.}

\item{vertex.color, vertex.shape, vertex.size, vertex.label, edge.arrow.size}{see \code{igraph::igraph.plotting} for details. Set \code{vertex.label = NULL} to show the
state labels on the graph.}

\item{margin}{a single number specifying the margin of the plot. Can be used if the
graph does not fit inside the plotting area.}

\item{method}{a MDP solution method for \code{\link[=solve_MDP]{solve_MDP()}}.}

\item{n}{number of iterations to animate.}

\item{zlim}{limits for visualizing the state value.}
}
\description{
Helper functions for gridworld MDPs to convert between state names and
gridworld positions, and for visualizing policies.
}
\details{
Gridworlds are implemented with state names \code{s(row,col)}, where
\code{row} and \code{col} are locations in the matrix representing the gridworld.
The actions are \code{"up"}, \code{"right"},  \code{"down"}, and  \code{"left"}.

\code{gridworld_init()} initializes a new gridworld creating a matrix
of states with the given dimensions. Other action names
can be specified, but they must have the same effects in the same order
as above. Unreachable states (walls) and absorbing state can be defined.
This information can be used to build a custom gridworld MDP.

Several helper functions are provided
to use states, look at the state layout, and plot policies on the
gridworld.

\code{gridworld_maze_MDP()} helps to easily define maze-like gridworld MDPs.
By default, the goal state is absorbing, but with \code{restart = TRUE}, the
agent restarts the problem at the start state every time it reaches the goal
and receives the reward. Note that this implies that the goal state itself
becomes unreachable.

\code{gridworld_animate()} applies algorithms from \code{\link[=solve_MDP]{solve_MDP()}} iteration
by iteration and visualized the state utilities. This helps to understand
how the algorithms work.
}
\examples{
# Defines states, actions and a transition model for a standard gridworld
gw <- gridworld_init(dim = c(7,7),
                unreachable_states = c("s(2,2)", "s(7,3)", "s(3,6)"),
                absorbing_states = "s(4,4)",
                labels = list("s(4,4)" = "Black Hole")
                )

gw$states
gw$actions
gw$info

# display the state labels in the gridworld
gridworld_matrix(gw)
gridworld_matrix(gw, what = "label")
gridworld_matrix(gw, what = "reachable")
gridworld_matrix(gw, what = "absorbing")

# a transition function for regular moves in the gridworld is provided
gw$transition_prob("right", "s(1,1)", "s(1,2)")
gw$transition_prob("right", "s(2,1)", "s(2,2)")  ### we cannot move into an unreachable state
gw$transition_prob("right", "s(2,1)", "s(2,1)")  ### but the agent stays in place

# convert between state names and row/column indices
gridworld_s2rc("s(1,1)")
gridworld_rc2s(c(1,1))

# The information in gw can be used to build a custom MDP.

# We modify the standard transition function so there is a 50\% chance that
# you will get sucked into the black hole from the adjacent squares.
trans_black_hole <- function(action = NA, start.state = NA, end.state = NA) {
  # ignore the action next to the black hole
  if (start.state \%in\% c("s(3,3)", "s(3,4)", "s(3,5)", "s(4,3)", "s(4,5)",
                         "s(5,3)", "s(5,4)", "s(5,5)")) {
        if(end.state == "s(4,4)")
            return(.5)
        else
            return(gw$transition_prob(action, start.state, end.state) * .5)
  }

  # use the standard gridworld movement
  gw$transition_prob(action, start.state, end.state)
}

black_hole <- MDP(states = gw$states,
  actions = gw$actions,
  transition_prob = trans_black_hole,
  reward = rbind(R_(value = +1), R_(end.state = "s(4,4)", value = -100)),
  info = gw$info,
  name = "Black hole"
  )

black_hole

gridworld_plot_transition_graph(black_hole)

# solve the problem
sol <- solve_MDP(black_hole)
gridworld_matrix(sol, what = "values")
gridworld_plot_policy(sol)
# the optimal policy is to fly around, but avoid the black hole.

# Build a Maze: The Dyna Maze from Chapter 8 in the RL book

DynaMaze <- gridworld_maze_MDP(
                dim = c(6,9),
                start = "s(3,1)",
                goal = "s(1,9)",
                walls = c("s(2,3)", "s(3,3)", "s(4,3)",
                          "s(5,6)",
                          "s(1,8)", "s(2,8)", "s(3,8)"),
                restart = TRUE,
                discount = 0.95,
                name = "Dyna Maze",
                )
DynaMaze

gridworld_matrix(DynaMaze)
gridworld_matrix(DynaMaze, what = "labels")

gridworld_plot_transition_graph(DynaMaze)
# Note that the problems resets if the goal state would be reached.

sol <- solve_MDP(DynaMaze)

gridworld_matrix(sol, what = "values")
gridworld_matrix(sol, what = "actions")
gridworld_plot_policy(sol)
gridworld_plot_policy(sol, actions = "label", cex = 1, states = FALSE)

# visualize the first 3 iterations of value iteration
gridworld_animate(DynaMaze, method = "value", n = 3)
}
\seealso{
Other gridworld: 
\code{\link{Cliff_walking}},
\code{\link{DynaMaze}},
\code{\link{Maze}},
\code{\link{Windy_gridworld}}

Other MDP: 
\code{\link{MDP}()},
\code{\link{MDP2POMDP}},
\code{\link{MDP_policy_functions}},
\code{\link{accessors}},
\code{\link{actions}()},
\code{\link{add_policy}()},
\code{\link{reachable_and_absorbing}},
\code{\link{regret}()},
\code{\link{simulate_MDP}()},
\code{\link{solve_MDP}()},
\code{\link{transition_graph}()},
\code{\link{value_function}()}
}
\concept{MDP}
\concept{gridworld}
