行分解器 - 三次元日誌

mkm読み込み向けに途中まで作ったmqoローダの部品を切り出してきた。
テキストフォーマットのファイルの読み込みはだいたい行毎に
読み込んでトークンにバラす処理になるのだが
フォーマット毎に一から作るのも面倒だし、
モデルデータにboost::spritとかを使うのも微妙なので作った。

\nがくるまでをベクタに突っ込んで、
splitが呼ばれたら行をトークンに区切る。
ダブルクォートが来たら次のダブルクォートが来るまでは通常の区切りを休止する。

トークンは、\0で終端された文字列を期待すると読み込んだ行からさらに
コピーして\0でフタしてやる必要が出てよろしくないので
const char*で先頭と\0の来るべき位置を指してやることにした
(boost::spritのパクリ)
。

ポリゴンデータをバラすのにstd::stringは向いてないのだが、
strlen, strcmpがなんか嫌なので作った。
あとatoi, atofも隠した。

main.cpp

#include <iostream>
#include <fstream>
#include "linereader.h"

struct Delemeter
{
  bool operator()(char letter)
  {
    switch(letter)
    {
      case ' ':
      case '\t': // fall through
      case '\r': // fall through
      case '(': // fall through
      case ')': // fall through
        return true;

      default:
        return false;
    }
  }
};

int main(int argc, char **argv)
{
  if(argc<2){
    std::cout << "require argment" << std::endl;
    return 1;
  }

  std::ifstream io(argv[1], std::ios::binary);
  if(!io){
    return 2;
  }

  // mkm読み込み実験
  LineReader<Delemeter> reader(io);
  while(reader.nextLine()){
    reader.split();
    if(reader.get(0)=="name"){
      std::cout 
        << "name: " << reader.get(2)
        << std::endl;
    }
    else if(reader.get(0)=="0"){
      std::cout 
        << "0 frame: " 
        << reader.getFloat(1) 
        << ',' << reader.getFloat(2) 
        << ',' << reader.getFloat(3)
        << std::endl;
    }
  }

  return 0;
}

linereader.h

#ifndef _LINEREADER_H
#define _LINEREADER_H

#include <fstream>
#include <cassert>
#include <vector>
#include <stdlib.h>

//------------------------------------------------------------//
// string utility
//------------------------------------------------------------//
struct CharacterSequence
{
  char *head;
  char *tail;

  CharacterSequence(char *_head, char *_tail)
  : head(_head), tail(_tail)
  {}

  bool operator==(const char *rhs)const
  {
    for(char *p=head; p!=tail; ++p, ++rhs)
    {
      if(*p!=*rhs)
      {
        return false;
      }
    }
    return *rhs=='\0';
  }

  bool operator!=(const char *rhs)const
  {
    return !(*this==rhs);
  }

  int bytes(){ return tail-head; }
};
inline std::ostream& operator<<(std::ostream& os, const CharacterSequence &rhs)
{
  return os << '"' << std::string(rhs.head, rhs.tail) << '"';
}

//------------------------------------------------------------//
// tokenizer
//------------------------------------------------------------//
template<typename DELEMETER>
class LineReader
{
  std::istream &io_;

  std::vector<char> lineBuffer_;
  std::vector<char*> heads_;
  std::vector<char*> tails_;

  int lineCount_;
  int offset_;

  public:
  LineReader(std::istream &io)
    : io_(io), lineCount_(0)
  {}

  bool nextLine()
  {
    if(io_.eof())
      return false;

    lineBuffer_.clear();
    heads_.clear();
    tails_.clear();
    offset_=0;

    readline_();

    if(io_.eof() && lineBuffer_.empty()){
      return false;
    }
    
    ++lineCount_;
    return true;
  }

  int lineCount(){ return lineCount_; }

  int split(){ 
    DELEMETER isDelemeter;

    size_t pos=0;
    size_t end=lineBuffer_.size();
    while(pos<end)
    {
      // skip space
      while(pos<end && isDelemeter(lineBuffer_[pos])){
        ++pos;
      }
      if(pos>=end)
      {
        break;
      }

      if(lineBuffer_[pos]=='"'){
        // quoted string
        ++pos;
        heads_.push_back(&lineBuffer_[pos]);
        while(lineBuffer_[pos]!='"'){
          ++pos;
          if(pos>=end){
            assert(false);
            break;
          }
        }
        tails_.push_back(&lineBuffer_[pos]);
        ++pos;
      }
      else{
        heads_.push_back(&lineBuffer_[pos]);
        while(pos<end && !isDelemeter(lineBuffer_[pos])){
          ++pos;
        }
        tails_.push_back(&lineBuffer_[pos]);
      }
    }

    return static_cast<int>(heads_.size()); 
  }

  int splitCount(){ return static_cast<int>(heads_.size())-offset_; }

  void shift(int count)
  {
    offset_+=count;
  }

  CharacterSequence get(int _index)
  { 
    if(heads_.empty()){
      return CharacterSequence(NULL, NULL);
    }
    else{
      int index=offset_+_index;
      return CharacterSequence(heads_[index], tails_[index]); 
    }
  }

  int getInt(int _index)
  {
    int index=offset_+_index;
    return atoi(heads_[index]);
  }

  double getFloat(int _index)
  {
    int index=offset_+_index;
    return atof(heads_[index]);
  }

  CharacterSequence getLine()
  { 
    if(lineBuffer_.empty()){
      return CharacterSequence(NULL, NULL);
    }
    else{
      return CharacterSequence(&lineBuffer_[0], (&lineBuffer_.back())+1);
    }
  }

  private:
  void readline_()
  {
    while(!io_.eof())
    {
      char letter=io_.get(); 

      switch(letter){
        case '\r':
          continue;

        case '\n':
          return;

        default:
          lineBuffer_.push_back(letter);
      }
    }
  }
};

//------------------------------------------------------------//
// token delemeter
//------------------------------------------------------------//
struct SpaceDelemeter
{
  bool operator()(char letter)
  {
    switch(letter)
    {
      case ' ':
      case '\t': // fall through
      case '\r': // fall through
        return true;

      default:
        return false;
    }
  }
};
#endif // _SCENE_H

例によって本題に入る前に手間がかかる・・・