utf8バイト列を1文字ずつ処理する

function each_utf8(text)
  local i=1
  return function()
    if i>#text then
      -- 終了
      return
    end
    local b=string.byte(text, i)
    if b==0 then
      -- eof
      return
    elseif b<128 then
      -- ascii
      local m=string.sub(text, i, i)
      i=i+1
      return m
    elseif b<192 then
      assert(false, 'invalid byte')
      return
    elseif b<224 then
      -- 2bytes
      local m=string.sub(text, i, i+1)
      i=i+2
      return m
    elseif b<240 then
      -- 3bytes(japanese multibyte)
      local m=string.sub(text, i, i+2)
      i=i+3
      return m
    elseif b<248 then
      -- 4bytes
      local m=string.sub(text, i, i+3)
      i=i+4
      return m
    elseif b<252 then
      -- 5bytes
      local m=string.sub(text, i, i+4)
      i=i+5
      return m
    elseif b<254 then
      -- 6bytes
      local m=string.sub(text, i, i+5)
      i=i+6
      return m
    else
      assert(false, 'unknown')
      return
    end
  end
end

-- 使う
for v in each_utf8('Abあソ漢ｱ') do
  print(v)
end

UTF8文字列を1文字ずつ処理するイテレータ関数。
無さそうだったので作ってみた。
string.gmatchでもっと短く書けそうな気がしたのだができなかったのでとりあえず動くもの。