src/ls-ascii-helper.cc
author Benjamin Lindner <lindnerb@users.sourceforge.net>
Wed Mar 18 15:23:14 2009 +0100 (2009-03-18)
changeset 7685 34b75a47e712
permissions -rw-r--r--
fix leaving stray '\r' in stream when reading from CRLF data file
* * *
fix CRLF issues with text-mode reading in windows when loading ascii data
lindnerb@7685
     1
/*
lindnerb@7685
     2
lindnerb@7685
     3
Copyright (C) 2003, 2005, 2006, 2007 John W. Eaton
lindnerb@7685
     4
lindnerb@7685
     5
This file is part of Octave.
lindnerb@7685
     6
lindnerb@7685
     7
Octave is free software; you can redistribute it and/or modify it
lindnerb@7685
     8
under the terms of the GNU General Public License as published by the
lindnerb@7685
     9
Free Software Foundation; either version 3 of the License, or (at your
lindnerb@7685
    10
option) any later version.
lindnerb@7685
    11
lindnerb@7685
    12
Octave is distributed in the hope that it will be useful, but WITHOUT
lindnerb@7685
    13
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
lindnerb@7685
    14
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
lindnerb@7685
    15
for more details.
lindnerb@7685
    16
lindnerb@7685
    17
You should have received a copy of the GNU General Public License
lindnerb@7685
    18
along with Octave; see the file COPYING.  If not, see
lindnerb@7685
    19
<http://www.gnu.org/licenses/>.
lindnerb@7685
    20
lindnerb@7685
    21
*/
lindnerb@7685
    22
lindnerb@7685
    23
lindnerb@7685
    24
#include "ls-ascii-helper.h"
lindnerb@7685
    25
lindnerb@7685
    26
#include <iostream>
lindnerb@7685
    27
#include <sstream>
lindnerb@7685
    28
lindnerb@7685
    29
// Helper functions when reading from ascii files.
lindnerb@7685
    30
// These function take care of CR/LF issues when files are opened in text-mode for reading 
lindnerb@7685
    31
lindnerb@7685
    32
// Skip characters from stream IS until a newline is reached.
lindnerb@7685
    33
// Depending on KEEP_NEWLINE, either eat newline from stream or
lindnerb@7685
    34
// keep it unread
lindnerb@7685
    35
lindnerb@7685
    36
void
lindnerb@7685
    37
skip_until_newline( std::istream& is, bool keep_newline )
lindnerb@7685
    38
{
lindnerb@7685
    39
  if (!is)
lindnerb@7685
    40
    return;
lindnerb@7685
    41
  
lindnerb@7685
    42
  char c,d;
lindnerb@7685
    43
  
lindnerb@7685
    44
  while (is)
lindnerb@7685
    45
  {
lindnerb@7685
    46
      c = is.peek();
lindnerb@7685
    47
      if (c == '\n' || c == '\r')
lindnerb@7685
    48
      {
lindnerb@7685
    49
	  // reached newline
lindnerb@7685
    50
	  if (keep_newline == false)
lindnerb@7685
    51
	  {
lindnerb@7685
    52
	      // eat the CR or LF character
lindnerb@7685
    53
	      is.get(d);
lindnerb@7685
    54
	      
lindnerb@7685
    55
	      // make sure that for binary-mode opened ascii files containing CRLF line endings
lindnerb@7685
    56
	      // we skip the LF after CR...
lindnerb@7685
    57
	      if (c == '\r' && is.peek()=='\n')
lindnerb@7685
    58
	      {
lindnerb@7685
    59
		  // yes, LF following CR, eat it...
lindnerb@7685
    60
		  is.get(d);
lindnerb@7685
    61
	      }
lindnerb@7685
    62
	  }
lindnerb@7685
    63
	  
lindnerb@7685
    64
	  // Newline was found, and read from stream if keep_newline==true, so exit loop
lindnerb@7685
    65
	  break;
lindnerb@7685
    66
      }
lindnerb@7685
    67
      else
lindnerb@7685
    68
	  // no newline charater peeked, so read it and proceed to next character
lindnerb@7685
    69
	  is.get(d);
lindnerb@7685
    70
  }
lindnerb@7685
    71
  
lindnerb@7685
    72
  return;
lindnerb@7685
    73
}
lindnerb@7685
    74
lindnerb@7685
    75
lindnerb@7685
    76
// If stream IS currently points to a newline (a leftover from a previous read)
lindnerb@7685
    77
// then eat newline(s) until a non-newline character is found
lindnerb@7685
    78
lindnerb@7685
    79
void
lindnerb@7685
    80
skip_preceeding_newline( std::istream& is )
lindnerb@7685
    81
{
lindnerb@7685
    82
  if (!is)
lindnerb@7685
    83
    return;
lindnerb@7685
    84
  
lindnerb@7685
    85
  char c,d;
lindnerb@7685
    86
  
lindnerb@7685
    87
  // Check if IS currently points to newline character
lindnerb@7685
    88
  c = is.peek();
lindnerb@7685
    89
  if (c == '\n' || c == '\r')
lindnerb@7685
    90
  {
lindnerb@7685
    91
      // Yes, at newline
lindnerb@7685
    92
      do {
lindnerb@7685
    93
	  // eat the CR or LF character
lindnerb@7685
    94
	  is.get(d);
lindnerb@7685
    95
	  
lindnerb@7685
    96
	  // make sure that for binary-mode opened ascii files containing CRLF line endings
lindnerb@7685
    97
	  // we skip the LF after CR...
lindnerb@7685
    98
	  if (c == '\r' && is.peek() == '\n')
lindnerb@7685
    99
	  {
lindnerb@7685
   100
	      // yes, LF following CR, eat it...
lindnerb@7685
   101
	      is.get(d);
lindnerb@7685
   102
	  }
lindnerb@7685
   103
	  
lindnerb@7685
   104
	  // Peek into next character
lindnerb@7685
   105
	  c = is.peek();
lindnerb@7685
   106
      // Loop while still a newline ahead
lindnerb@7685
   107
      } while( c == '\n' || c == '\r' );
lindnerb@7685
   108
  }
lindnerb@7685
   109
  
lindnerb@7685
   110
  return;
lindnerb@7685
   111
}
lindnerb@7685
   112
lindnerb@7685
   113
lindnerb@7685
   114
// Read charaters from stream IS until a newline is reached.
lindnerb@7685
   115
// Depending on KEEP_NEWLINE, either eat newline from stream or
lindnerb@7685
   116
// keep it unread
lindnerb@7685
   117
// Characters read are stored and returned as std::string
lindnerb@7685
   118
lindnerb@7685
   119
std::string
lindnerb@7685
   120
read_until_newline( std::istream& is, bool keep_newline )
lindnerb@7685
   121
{
lindnerb@7685
   122
  if (!is)
lindnerb@7685
   123
    return std::string();
lindnerb@7685
   124
  
lindnerb@7685
   125
  char c,d;
lindnerb@7685
   126
  std::ostringstream buf;
lindnerb@7685
   127
  
lindnerb@7685
   128
  while (is)
lindnerb@7685
   129
  {
lindnerb@7685
   130
      c = is.peek();
lindnerb@7685
   131
      if (c == '\n' || c == '\r')
lindnerb@7685
   132
      {
lindnerb@7685
   133
	  // reached newline
lindnerb@7685
   134
	  if (keep_newline == false)
lindnerb@7685
   135
	  {
lindnerb@7685
   136
	      // eat the CR or LF character
lindnerb@7685
   137
	      is.get(d);
lindnerb@7685
   138
	      
lindnerb@7685
   139
	      // make sure that for binary-mode opened ascii files containing CRLF line endings
lindnerb@7685
   140
	      // we skip the LF after CR...
lindnerb@7685
   141
	      if (c == '\r' && is.peek() == '\n')
lindnerb@7685
   142
	      {
lindnerb@7685
   143
		  // yes, LF following CR, eat it...
lindnerb@7685
   144
		  is.get(d);
lindnerb@7685
   145
	      }
lindnerb@7685
   146
	  }
lindnerb@7685
   147
	  
lindnerb@7685
   148
	  // Newline was found, and read from stream if keep_newline==true, so exit loop
lindnerb@7685
   149
	  break;
lindnerb@7685
   150
      }
lindnerb@7685
   151
      else
lindnerb@7685
   152
      {
lindnerb@7685
   153
	  // no newline charater peeked, so read it, store it, and proceed to next
lindnerb@7685
   154
	  is.get(d);
lindnerb@7685
   155
	  buf << d;
lindnerb@7685
   156
      }
lindnerb@7685
   157
  }
lindnerb@7685
   158
  
lindnerb@7685
   159
  return buf.str();
lindnerb@7685
   160
}