Value Iteration in MDPs

1 collaborator

Larry Lin (Author)

Comments and Questions

Please start the discussion about this model! (You'll first need to log in.)

Click to Run Model

globals[patches-plabel-list]

to setup
  ca
  reset-ticks
  
  set patches-plabel-list []
  
  ask patches[set plabel 0]
  
  reset-patch-color
end 

to go
  tick
  
  set patches-plabel-list []
  foreach[0 1 2 3][
    let s-pxcor ?
    foreach[0 1 2][
      let s-pycor ?
      
      set patches-plabel-list lput ([plabel] of patch s-pxcor s-pycor) patches-plabel-list
    ]
  ]
  
  ask patch 2 1[
    value-iteration
  ]
  
  reset-patch-color
  
  let checklist []
  
  foreach[0 1 2 3][
    let s-pxcor ?
    foreach[0 1 2][
      let s-pycor ?
      
      set checklist lput ([plabel] of patch s-pxcor s-pycor) checklist
    ]
  ]
    
  let difference-found false
  let i 0
  
  while[(difference-found = false) and (i < length checklist)][
    if((item i patches-plabel-list) != (item i checklist))[
      set difference-found true
    ]
    
    set i (i + 1)
  ]
  
  if(difference-found = false)[
    output-print "Equilibrium reached"
    stop
  ]
end 

to value-iteration
  
  set pcolor yellow
  
  set plabel compute-plabel pxcor pycor
  
  ask neighbors4[
    
    if (pcolor = black)[
      value-iteration
    ]
    
  ]
end 

to-report compute-plabel [input-pxcor input-pycor]
  let own-pxcor input-pxcor
  let own-pycor input-pycor
  let own-plabel [plabel] of patch own-pxcor own-pycor
  
  let north-plabel 0
  let south-plabel 0
  let east-plabel 0
  let west-plabel 0
  
  ifelse(patch own-pxcor (own-pycor + 1) = nobody) or ([pcolor] of patch own-pxcor (own-pycor + 1) = blue)[
    set north-plabel own-plabel
  ]
  [
    set north-plabel ([plabel] of patch own-pxcor (own-pycor + 1))
  ]
  
  ifelse(patch (own-pxcor + 1) own-pycor = nobody) or ([pcolor] of patch (own-pxcor + 1) own-pycor = blue)[
    set east-plabel own-plabel
  ]
  [
    set east-plabel ([plabel] of patch (own-pxcor + 1) own-pycor)
  ]
  
  ifelse(patch own-pxcor (own-pycor - 1) = nobody) or ([pcolor] of patch own-pxcor (own-pycor - 1) = blue)[
    set south-plabel own-plabel
  ]
  [
    set south-plabel ([plabel] of patch own-pxcor (own-pycor - 1))
  ]
  
  ifelse(patch (own-pxcor - 1) own-pycor = nobody) or ([pcolor] of patch (own-pxcor - 1) own-pycor = blue)[
    set west-plabel own-plabel
  ]
  [
    set west-plabel ([plabel] of patch (own-pxcor - 1) own-pycor)
  ]
  
  let max-label north-plabel
  let max-plabel-direction "north"
  
  if(east-plabel > max-label)[
    set max-label east-plabel
    set max-plabel-direction "east"
  ]
  
  if(south-plabel > max-label)[
    set max-label south-plabel
    set max-plabel-direction "south"
  ]
  
  if(west-plabel > max-label)[
    set max-label west-plabel
    set max-plabel-direction "west"
  ]
  
  if(max-plabel-direction = "north")[
    report precision ((0.8 * north-plabel) + (0.1 * east-plabel) + (0.1 * west-plabel) + R) 3
  ]
  if(max-plabel-direction = "east")[
    report precision ((0.8 * east-plabel) + (0.1 * north-plabel) + (0.1 * south-plabel) + R) 3
  ]
  if(max-plabel-direction = "south")[
    report precision ((0.8 * south-plabel) + (0.1 * east-plabel) + (0.1 * west-plabel) + R) 3
  ]
  if(max-plabel-direction = "west")[
    report precision ((0.8 * west-plabel) + (0.1 * north-plabel) + (0.1 * south-plabel) + R) 3
  ]
end 

to reset-patch-color
  
  ask patches[set pcolor black]
  
  ask patch 1 1[set pcolor blue]
  
  ask patch 3 2[
    set pcolor green
    set plabel winning-state-value
  ]
  ask patch 3 1[
    set pcolor red
    set plabel losing-state-value
  ]
end

There are 2 versions of this model.

		Uploaded by	When	Description	Download
		Larry Lin	over 11 years ago	Value Iteration in MDPs	Download this version
		Larry Lin	over 11 years ago	Initial upload	Download this version

Attached files

File	Type	Description	Last updated
Value Iteration in MDPs.png	preview	Preview for 'Value Iteration in MDPs'	over 11 years ago, by Larry Lin	Download

This model does not have any ancestors.

This model does not have any descendants.

NetLogo

Value Iteration in MDPs

1 collaborator

Close

Tags

Close

Comments and Questions

Attached files