Value Iteration in MDPs

Value Iteration in MDPs preview image

1 collaborator

Default-person Larry Lin (Author)

Tags

mdp 

Tagged by Marzieh Jahanbazi over 9 years ago

value iteration 

Tagged by Marzieh Jahanbazi over 9 years ago

Visible to everyone | Changeable by everyone
Model was written in NetLogo 5.0.5 • Viewed 537 times • Downloaded 61 times • Run 0 times
Download the 'Value Iteration in MDPs' modelDownload this modelEmbed this model

Do you have questions or comments about this model? Ask them here! (You'll first need to log in.)


Comments and Questions

Please start the discussion about this model! (You'll first need to log in.)

Click to Run Model

globals[patches-plabel-list]

to setup
  ca
  reset-ticks
  
  set patches-plabel-list []
  
  ask patches[set plabel 0]
  
  reset-patch-color
end 

to go
  tick
  
  set patches-plabel-list []
  foreach[0 1 2 3][
    let s-pxcor ?
    foreach[0 1 2][
      let s-pycor ?
      
      set patches-plabel-list lput ([plabel] of patch s-pxcor s-pycor) patches-plabel-list
    ]
  ]
  
  ask patch 2 1[
    value-iteration
  ]
  
  reset-patch-color
  
  let checklist []
  
  foreach[0 1 2 3][
    let s-pxcor ?
    foreach[0 1 2][
      let s-pycor ?
      
      set checklist lput ([plabel] of patch s-pxcor s-pycor) checklist
    ]
  ]
    
  let difference-found false
  let i 0
  
  while[(difference-found = false) and (i < length checklist)][
    if((item i patches-plabel-list) != (item i checklist))[
      set difference-found true
    ]
    
    set i (i + 1)
  ]
  
  if(difference-found = false)[
    output-print "Equilibrium reached"
    stop
  ]
end 

to value-iteration
  
  set pcolor yellow
  
  set plabel compute-plabel pxcor pycor
  
  ask neighbors4[
    
    if (pcolor = black)[
      value-iteration
    ]
    
  ]
end 

to-report compute-plabel [input-pxcor input-pycor]
  let own-pxcor input-pxcor
  let own-pycor input-pycor
  let own-plabel [plabel] of patch own-pxcor own-pycor
  
  let north-plabel 0
  let south-plabel 0
  let east-plabel 0
  let west-plabel 0
  
  ifelse(patch own-pxcor (own-pycor + 1) = nobody) or ([pcolor] of patch own-pxcor (own-pycor + 1) = blue)[
    set north-plabel own-plabel
  ]
  [
    set north-plabel ([plabel] of patch own-pxcor (own-pycor + 1))
  ]
  
  ifelse(patch (own-pxcor + 1) own-pycor = nobody) or ([pcolor] of patch (own-pxcor + 1) own-pycor = blue)[
    set east-plabel own-plabel
  ]
  [
    set east-plabel ([plabel] of patch (own-pxcor + 1) own-pycor)
  ]
  
  ifelse(patch own-pxcor (own-pycor - 1) = nobody) or ([pcolor] of patch own-pxcor (own-pycor - 1) = blue)[
    set south-plabel own-plabel
  ]
  [
    set south-plabel ([plabel] of patch own-pxcor (own-pycor - 1))
  ]
  
  ifelse(patch (own-pxcor - 1) own-pycor = nobody) or ([pcolor] of patch (own-pxcor - 1) own-pycor = blue)[
    set west-plabel own-plabel
  ]
  [
    set west-plabel ([plabel] of patch (own-pxcor - 1) own-pycor)
  ]
  
  let max-label north-plabel
  let max-plabel-direction "north"
  
  if(east-plabel > max-label)[
    set max-label east-plabel
    set max-plabel-direction "east"
  ]
  
  if(south-plabel > max-label)[
    set max-label south-plabel
    set max-plabel-direction "south"
  ]
  
  if(west-plabel > max-label)[
    set max-label west-plabel
    set max-plabel-direction "west"
  ]
  
  if(max-plabel-direction = "north")[
    report precision ((0.8 * north-plabel) + (0.1 * east-plabel) + (0.1 * west-plabel) + R) 3
  ]
  if(max-plabel-direction = "east")[
    report precision ((0.8 * east-plabel) + (0.1 * north-plabel) + (0.1 * south-plabel) + R) 3
  ]
  if(max-plabel-direction = "south")[
    report precision ((0.8 * south-plabel) + (0.1 * east-plabel) + (0.1 * west-plabel) + R) 3
  ]
  if(max-plabel-direction = "west")[
    report precision ((0.8 * west-plabel) + (0.1 * north-plabel) + (0.1 * south-plabel) + R) 3
  ]
end 

to reset-patch-color
  
  ask patches[set pcolor black]
  
  ask patch 1 1[set pcolor blue]
  
  ask patch 3 2[
    set pcolor green
    set plabel winning-state-value
  ]
  ask patch 3 1[
    set pcolor red
    set plabel losing-state-value
  ]
end 

There are 2 versions of this model.

Uploaded by When Description Download
Larry Lin about 10 years ago Value Iteration in MDPs Download this version
Larry Lin about 10 years ago Initial upload Download this version

Attached files

File Type Description Last updated
Value Iteration in MDPs.png preview Preview for 'Value Iteration in MDPs' about 10 years ago, by Larry Lin Download

This model does not have any ancestors.

This model does not have any descendants.