fix leaving stray '\r' in stream when reading from CRLF data file
authorBenjamin Lindner <lindnerb@users.sourceforge.net>
Wed Mar 18 15:23:14 2009 +0100 (11 months ago)
changeset 768534b75a47e712
parent 76844e43255734fb
child 76866f568bdb1bf3
fix leaving stray '\r' in stream when reading from CRLF data file
* * *
fix CRLF issues with text-mode reading in windows when loading ascii data
src/ChangeLog
src/Makefile.in
src/load-save.cc
src/ls-ascii-helper.cc
src/ls-ascii-helper.h
src/ls-mat-ascii.cc
src/ls-oct-ascii.cc
src/ls-oct-ascii.h
src/ov-fcn-handle.cc
src/ov-fcn-inline.cc
src/ov-range.cc
src/ov-str-mat.cc
       1 --- a/src/ChangeLog	Thu Mar 26 07:29:25 2009 +0100
       2 +++ b/src/ChangeLog	Wed Mar 18 15:23:14 2009 +0100
       3 @@ -1,3 +1,21 @@
       4 +2009-03-18  Benjamin Lindner <lindnerb@users.sourceforge.net>
       5 +
       6 +	* ls-oct-ascii.cc (extract_keyword): fix leaving stray '\r' in stream
       7 +	when reading from CRLF data file by replacing loop with call to
       8 +	read_until_newline()
       9 +
      10 +2009-03-03  Benjamin Lindner  <lindnerb@users.sourceforge.net>
      11 +
      12 +	* ls-ascii-helper.h ls-ascii-helper.cc: New files, provide helper 
      13 +	functions skip_until_newline(), skip_preceeding_newline() and
      14 +	read_until_newline() that take care of CR/LF handling.
      15 +	* Makefile.in: add new files
      16 +	* load-save.cc: Open files always in binary mode in Fload
      17 +	* ls-mat-ascii.cc (get_mat_data_input_line), ls-oct-ascii.cc 
      18 +	(extract_keyword, read_ascii_data), ls-oct-ascii.h (extract_keyword), 
      19 +	ov-fcn-handle.cc, ov-fcn-inline.cc, ov-range.cc, ov-str-mat.cc 
      20 +	(load_ascii): Use helper functions 
      21 +	
      22  2009-02-25  Marco Caliari <marco.caliari@univr.it>
      23  
      24  	* graphics.cc (base_properties::remove_child): Fix order of dims.
     1.1 --- a/src/Makefile.in	Thu Mar 26 07:29:25 2009 +0100
     1.2 +++ b/src/Makefile.in	Wed Mar 18 15:23:14 2009 +0100
     1.3 @@ -116,7 +116,7 @@
     1.4  	comment-list.h debug.h defun-dld.h defun-int.h defun.h \
     1.5  	dirfns.h dynamic-ld.h error.h file-io.h gripes.h help.h \
     1.6  	input.h lex.h load-path.h load-save.h ls-hdf5.h \
     1.7 -	ls-mat-ascii.h ls-mat4.h ls-mat5.h ls-oct-ascii.h \
     1.8 +	ls-mat-ascii.h ls-mat4.h ls-mat5.h ls-oct-ascii.h ls-ascii-helper.h \
     1.9  	ls-oct-binary.h ls-utils.h mex.h mexproto.h oct-errno.h \
    1.10  	oct-fstrm.h oct-hist.h oct-iostrm.h oct-map.h oct-obj.h \
    1.11  	oct-prcstrm.h oct-procbuf.h oct-stdstrm.h oct-stream.h \
    1.12 @@ -186,7 +186,7 @@
    1.13  	cutils.c data.cc debug.cc defaults.cc defun.cc dirfns.cc \
    1.14  	dynamic-ld.cc error.cc file-io.cc graphics.cc gripes.cc \
    1.15  	help.cc input.cc lex.l load-path.cc load-save.cc ls-hdf5.cc \
    1.16 -	ls-mat-ascii.cc ls-mat4.cc ls-mat5.cc ls-oct-ascii.cc \
    1.17 +	ls-mat-ascii.cc ls-mat4.cc ls-mat5.cc ls-oct-ascii.cc ls-ascii-helper.cc \
    1.18  	ls-oct-binary.cc ls-utils.cc main.c mappers.cc matherr.c \
    1.19  	mex.cc oct-fstrm.cc oct-hist.cc oct-iostrm.cc oct-map.cc \
    1.20  	oct-obj.cc oct-prcstrm.cc oct-procbuf.cc oct-stream.cc \
     2.1 --- a/src/load-save.cc	Thu Mar 26 07:29:25 2009 +0100
     2.2 +++ b/src/load-save.cc	Wed Mar 18 15:23:14 2009 +0100
     2.3 @@ -906,15 +906,12 @@
     2.4  
     2.5  	  std::ios::openmode mode = std::ios::in;
     2.6  
     2.7 -	  if (format == LS_BINARY
     2.8 -#ifdef HAVE_HDF5
     2.9 -	      || format == LS_HDF5
    2.10 -#endif
    2.11 -	      || format == LS_MAT_BINARY
    2.12 -	      || format == LS_MAT5_BINARY
    2.13 -	      || format == LS_MAT7_BINARY)
    2.14 -	    mode |= std::ios::binary;
    2.15 -
    2.16 +	  // Open in binary mode in any case, to fix annoying bug that
    2.17 +	  // text-mode opened streams cannot be seekg'ed/tellg'ed with
    2.18 +	  // mingw32 (See http://oldwiki.mingw.org/index.php/Known%20Problems )
    2.19 +	  // The CR/LF issues are handled in ls-ascii-helper.cc
    2.20 +	  mode |= std::ios::binary;
    2.21 +	  
    2.22  #ifdef HAVE_ZLIB
    2.23  	  if (use_zlib)
    2.24  	    {
     3.1 --- a/src/ls-mat-ascii.cc	Thu Mar 26 07:29:25 2009 +0100
     3.2 +++ b/src/ls-mat-ascii.cc	Wed Mar 18 15:23:14 2009 +0100
     3.3 @@ -65,6 +65,7 @@
     3.4  #include "dMatrix.h"
     3.5  
     3.6  #include "ls-mat-ascii.h"
     3.7 +#include "ls-ascii-helper.h"
     3.8  
     3.9  static std::string
    3.10  get_mat_data_input_line (std::istream& is)
    3.11 @@ -81,14 +82,16 @@
    3.12        while (is.get (c))
    3.13  	{
    3.14  	  if (c == '\n' || c == '\r')
    3.15 -	    break;
    3.16 +	    {
    3.17 +	      // Let skip_until_newline handle CR/LF issues...
    3.18 +	      skip_until_newline (is, false);
    3.19 +	      break;
    3.20 +	    }
    3.21  
    3.22  	  if (c == '%' || c == '#')
    3.23  	    {
    3.24  	      // skip to end of line
    3.25 -	      while (is.get (c))
    3.26 -		if (c == '\n' || c == '\r')
    3.27 -		  break;
    3.28 +	      skip_until_newline (is, false);
    3.29  
    3.30  	      break;
    3.31  	    }
     4.1 --- a/src/ls-oct-ascii.cc	Thu Mar 26 07:29:25 2009 +0100
     4.2 +++ b/src/ls-oct-ascii.cc	Wed Mar 18 15:23:14 2009 +0100
     4.3 @@ -108,14 +108,8 @@
     4.4  	      while (is.get (c) && (c == ' ' || c == '\t' || c == ':'))
     4.5  		; // Skip whitespace and the colon.
     4.6  
     4.7 -	      if (c != '\n' && c != '\r')
     4.8 -		{
     4.9 -		  value << c;
    4.10 -		  while (is.get (c) && c != '\n' && c != '\r')
    4.11 -		    value << c;
    4.12 -		}
    4.13 -
    4.14 -	      retval = value.str ();
    4.15 +	      is.putback(c);
    4.16 +	      retval = read_until_newline (is, false);
    4.17  	      break;
    4.18  	    }
    4.19  	  else if (next_only)
     5.1 --- a/src/ls-oct-ascii.h	Thu Mar 26 07:29:25 2009 +0100
     5.2 +++ b/src/ls-oct-ascii.h	Wed Mar 18 15:23:14 2009 +0100
     5.3 @@ -29,6 +29,7 @@
     5.4  #include <string>
     5.5  
     5.6  #include "str-vec.h"
     5.7 +#include "ls-ascii-helper.h"
     5.8  
     5.9  // Flag for cell elements
    5.10  #define CELL_ELT_TAG "<cell-element>"
    5.11 @@ -103,8 +104,8 @@
    5.12  		is >> value;
    5.13  	      if (is)
    5.14  		status = true;
    5.15 -	      while (is.get (c) && c != '\n' && c != '\r')
    5.16 -		; // Skip to beginning of next line;
    5.17 +	      // Skip to beginning of next line;
    5.18 +	      skip_until_newline (is, false);
    5.19  	      break;
    5.20  	    }
    5.21  	  else if (next_only)
    5.22 @@ -165,8 +166,8 @@
    5.23  		    is >> value;
    5.24  		  if (is)
    5.25  		    status = true;
    5.26 -		  while (is.get (c) && c != '\n' && c != '\r')
    5.27 -		    ; // Skip to beginning of next line;
    5.28 +		  // Skip to beginning of next line;
    5.29 +		  skip_until_newline (is, false);
    5.30  		  return status;
    5.31  		}
    5.32  	    }
     6.1 --- a/src/ov-fcn-handle.cc	Thu Mar 26 07:29:25 2009 +0100
     6.2 +++ b/src/ov-fcn-handle.cc	Wed Mar 18 15:23:14 2009 +0100
     6.3 @@ -56,6 +56,7 @@
     6.4  #include "ls-oct-binary.h"
     6.5  #include "ls-hdf5.h"
     6.6  #include "ls-utils.h"
     6.7 +#include "ls-ascii-helper.h"
     6.8  
     6.9  DEFINE_OCTAVE_ALLOCATOR (octave_fcn_handle);
    6.10  
    6.11 @@ -330,26 +331,18 @@
    6.12      {
    6.13        octave_idx_type len = 0;
    6.14        char c;
    6.15 -      std::ostringstream buf;
    6.16 +      std::string buf;
    6.17  
    6.18        // Skip preceeding newline(s).
    6.19 -      while (is.get (c) && c == '\n')
    6.20 -	/* do nothing */;
    6.21 +      skip_preceeding_newline (is);
    6.22  
    6.23        if (is)
    6.24  	{
    6.25 -	  buf << c;
    6.26  
    6.27  	  // Get a line of text whitespace characters included, leaving
    6.28  	  // newline in the stream.
    6.29 +	  buf = read_until_newline (is, true);
    6.30  
    6.31 -	  while (is.peek () != '\n')
    6.32 -	    {
    6.33 -	      is.get (c);
    6.34 -	      if (! is)
    6.35 -		break;
    6.36 -	      buf << c;
    6.37 -	    }
    6.38  	}
    6.39  
    6.40        pos = is.tellg ();
    6.41 @@ -408,7 +401,7 @@
    6.42  
    6.43  	  int parse_status;
    6.44  	  octave_value anon_fcn_handle = 
    6.45 -	    eval_string (buf.str (), true, parse_status);
    6.46 +	    eval_string (buf, true, parse_status);
    6.47  
    6.48  	  if (parse_status == 0)
    6.49  	    {
     7.1 --- a/src/ov-fcn-inline.cc	Thu Mar 26 07:29:25 2009 +0100
     7.2 +++ b/src/ov-fcn-inline.cc	Wed Mar 18 15:23:14 2009 +0100
     7.3 @@ -47,6 +47,7 @@
     7.4  #include "ls-oct-ascii.h"
     7.5  #include "ls-hdf5.h"
     7.6  #include "ls-utils.h"
     7.7 +#include "ls-ascii-helper.h"
     7.8  
     7.9  DEFINE_OCTAVE_ALLOCATOR (octave_fcn_inline);
    7.10  
    7.11 @@ -139,27 +140,20 @@
    7.12  	nm = "";
    7.13  
    7.14        char c;
    7.15 -      std::ostringstream buf;
    7.16 +      std::string buf;
    7.17  
    7.18        // Skip preceeding newline(s)
    7.19 -      while (is.get (c) && c == '\n');
    7.20 +      skip_preceeding_newline (is);
    7.21  
    7.22        if (is)
    7.23  	{
    7.24 -	  buf << c;
    7.25  
    7.26  	  // Get a line of text whitespace characters included, leaving
    7.27  	  // newline in the stream
    7.28 -	  while (is.peek () != '\n')
    7.29 -	    {
    7.30 -	      is.get (c);
    7.31 -	      if (! is)
    7.32 -		break;
    7.33 -	      buf << c;
    7.34 -	    }
    7.35 +	  buf = read_until_newline (is, true);
    7.36  	}
    7.37  
    7.38 -      iftext = buf.str ();
    7.39 +      iftext = buf;
    7.40  
    7.41        octave_fcn_inline tmp (iftext, ifargs, nm);
    7.42        fcn = tmp.fcn;
     8.1 --- a/src/ov-range.cc	Thu Mar 26 07:29:25 2009 +0100
     8.2 +++ b/src/ov-range.cc	Wed Mar 18 15:23:14 2009 +0100
     8.3 @@ -41,6 +41,7 @@
     8.4  #include "byte-swap.h"
     8.5  #include "ls-hdf5.h"
     8.6  #include "ls-utils.h"
     8.7 +#include "ls-ascii-helper.h"
     8.8  
     8.9  DEFINE_OCTAVE_ALLOCATOR (octave_range);
    8.10  
    8.11 @@ -274,14 +275,9 @@
    8.12  	break;
    8.13      }
    8.14  
    8.15 -  for (;;)
    8.16 -    {
    8.17 -      if (is && (c == '%' || c == '#'))
    8.18 -	while (is.get (c) && c != '\n')
    8.19 -	  ; // Skip to beginning of next line, ignoring everything.
    8.20 -      else
    8.21 -	break;
    8.22 -    }
    8.23 +  // Skip to beginning of next line, ignoring everything.
    8.24 +  skip_until_newline (is, false);
    8.25 +  
    8.26  }
    8.27  
    8.28  bool 
     9.1 --- a/src/ov-str-mat.cc	Thu Mar 26 07:29:25 2009 +0100
     9.2 +++ b/src/ov-str-mat.cc	Wed Mar 18 15:23:14 2009 +0100
     9.3 @@ -48,6 +48,7 @@
     9.4  #include "pr-output.h"
     9.5  #include "pt-mat.h"
     9.6  #include "utils.h"
     9.7 +#include "ls-ascii-helper.h"
     9.8  
     9.9  DEFINE_OCTAVE_ALLOCATOR (octave_char_matrix_str);
    9.10  DEFINE_OCTAVE_ALLOCATOR (octave_char_matrix_sq_str);
    9.11 @@ -344,8 +345,7 @@
    9.12  		      char *ftmp = tmp.fortran_vec ();
    9.13  
    9.14  		      // Skip the return line
    9.15 -		      if (! is.read (ftmp, 1))
    9.16 -			return false;
    9.17 +		      skip_preceeding_newline (is);
    9.18  
    9.19  		      if (! is.read (ftmp, dv.numel ()) || !is)
    9.20  			{
    10.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    10.2 +++ b/src/ls-ascii-helper.cc	Wed Mar 18 15:23:14 2009 +0100
    10.3 @@ -0,0 +1,160 @@
    10.4 +/*
    10.5 +
    10.6 +Copyright (C) 2003, 2005, 2006, 2007 John W. Eaton
    10.7 +
    10.8 +This file is part of Octave.
    10.9 +
   10.10 +Octave is free software; you can redistribute it and/or modify it
   10.11 +under the terms of the GNU General Public License as published by the
   10.12 +Free Software Foundation; either version 3 of the License, or (at your
   10.13 +option) any later version.
   10.14 +
   10.15 +Octave is distributed in the hope that it will be useful, but WITHOUT
   10.16 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   10.17 +FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   10.18 +for more details.
   10.19 +
   10.20 +You should have received a copy of the GNU General Public License
   10.21 +along with Octave; see the file COPYING.  If not, see
   10.22 +<http://www.gnu.org/licenses/>.
   10.23 +
   10.24 +*/
   10.25 +
   10.26 +
   10.27 +#include "ls-ascii-helper.h"
   10.28 +
   10.29 +#include <iostream>
   10.30 +#include <sstream>
   10.31 +
   10.32 +// Helper functions when reading from ascii files.
   10.33 +// These function take care of CR/LF issues when files are opened in text-mode for reading 
   10.34 +
   10.35 +// Skip characters from stream IS until a newline is reached.
   10.36 +// Depending on KEEP_NEWLINE, either eat newline from stream or
   10.37 +// keep it unread
   10.38 +
   10.39 +void
   10.40 +skip_until_newline( std::istream& is, bool keep_newline )
   10.41 +{
   10.42 +  if (!is)
   10.43 +    return;
   10.44 +  
   10.45 +  char c,d;
   10.46 +  
   10.47 +  while (is)
   10.48 +  {
   10.49 +      c = is.peek();
   10.50 +      if (c == '\n' || c == '\r')
   10.51 +      {
   10.52 +	  // reached newline
   10.53 +	  if (keep_newline == false)
   10.54 +	  {
   10.55 +	      // eat the CR or LF character
   10.56 +	      is.get(d);
   10.57 +	      
   10.58 +	      // make sure that for binary-mode opened ascii files containing CRLF line endings
   10.59 +	      // we skip the LF after CR...
   10.60 +	      if (c == '\r' && is.peek()=='\n')
   10.61 +	      {
   10.62 +		  // yes, LF following CR, eat it...
   10.63 +		  is.get(d);
   10.64 +	      }
   10.65 +	  }
   10.66 +	  
   10.67 +	  // Newline was found, and read from stream if keep_newline==true, so exit loop
   10.68 +	  break;
   10.69 +      }
   10.70 +      else
   10.71 +	  // no newline charater peeked, so read it and proceed to next character
   10.72 +	  is.get(d);
   10.73 +  }
   10.74 +  
   10.75 +  return;
   10.76 +}
   10.77 +
   10.78 +
   10.79 +// If stream IS currently points to a newline (a leftover from a previous read)
   10.80 +// then eat newline(s) until a non-newline character is found
   10.81 +
   10.82 +void
   10.83 +skip_preceeding_newline( std::istream& is )
   10.84 +{
   10.85 +  if (!is)
   10.86 +    return;
   10.87 +  
   10.88 +  char c,d;
   10.89 +  
   10.90 +  // Check if IS currently points to newline character
   10.91 +  c = is.peek();
   10.92 +  if (c == '\n' || c == '\r')
   10.93 +  {
   10.94 +      // Yes, at newline
   10.95 +      do {
   10.96 +	  // eat the CR or LF character
   10.97 +	  is.get(d);
   10.98 +	  
   10.99 +	  // make sure that for binary-mode opened ascii files containing CRLF line endings
  10.100 +	  // we skip the LF after CR...
  10.101 +	  if (c == '\r' && is.peek() == '\n')
  10.102 +	  {
  10.103 +	      // yes, LF following CR, eat it...
  10.104 +	      is.get(d);
  10.105 +	  }
  10.106 +	  
  10.107 +	  // Peek into next character
  10.108 +	  c = is.peek();
  10.109 +      // Loop while still a newline ahead
  10.110 +      } while( c == '\n' || c == '\r' );
  10.111 +  }
  10.112 +  
  10.113 +  return;
  10.114 +}
  10.115 +
  10.116 +
  10.117 +// Read charaters from stream IS until a newline is reached.
  10.118 +// Depending on KEEP_NEWLINE, either eat newline from stream or
  10.119 +// keep it unread
  10.120 +// Characters read are stored and returned as std::string
  10.121 +
  10.122 +std::string
  10.123 +read_until_newline( std::istream& is, bool keep_newline )
  10.124 +{
  10.125 +  if (!is)
  10.126 +    return std::string();
  10.127 +  
  10.128 +  char c,d;
  10.129 +  std::ostringstream buf;
  10.130 +  
  10.131 +  while (is)
  10.132 +  {
  10.133 +      c = is.peek();
  10.134 +      if (c == '\n' || c == '\r')
  10.135 +      {
  10.136 +	  // reached newline
  10.137 +	  if (keep_newline == false)
  10.138 +	  {
  10.139 +	      // eat the CR or LF character
  10.140 +	      is.get(d);
  10.141 +	      
  10.142 +	      // make sure that for binary-mode opened ascii files containing CRLF line endings
  10.143 +	      // we skip the LF after CR...
  10.144 +	      if (c == '\r' && is.peek() == '\n')
  10.145 +	      {
  10.146 +		  // yes, LF following CR, eat it...
  10.147 +		  is.get(d);
  10.148 +	      }
  10.149 +	  }
  10.150 +	  
  10.151 +	  // Newline was found, and read from stream if keep_newline==true, so exit loop
  10.152 +	  break;
  10.153 +      }
  10.154 +      else
  10.155 +      {
  10.156 +	  // no newline charater peeked, so read it, store it, and proceed to next
  10.157 +	  is.get(d);
  10.158 +	  buf << d;
  10.159 +      }
  10.160 +  }
  10.161 +  
  10.162 +  return buf.str();
  10.163 +}
    11.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    11.2 +++ b/src/ls-ascii-helper.h	Wed Mar 18 15:23:14 2009 +0100
    11.3 @@ -0,0 +1,40 @@
    11.4 +/*
    11.5 +
    11.6 +Copyright (C) 2003, 2005, 2006, 2007 John W. Eaton
    11.7 +
    11.8 +This file is part of Octave.
    11.9 +
   11.10 +Octave is free software; you can redistribute it and/or modify it
   11.11 +under the terms of the GNU General Public License as published by the
   11.12 +Free Software Foundation; either version 3 of the License, or (at your
   11.13 +option) any later version.
   11.14 +
   11.15 +Octave is distributed in the hope that it will be useful, but WITHOUT
   11.16 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11.17 +FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   11.18 +for more details.
   11.19 +
   11.20 +You should have received a copy of the GNU General Public License
   11.21 +along with Octave; see the file COPYING.  If not, see
   11.22 +<http://www.gnu.org/licenses/>.
   11.23 +
   11.24 +*/
   11.25 +
   11.26 +#if !defined (octave_ls_ascii_helper_h)
   11.27 +#define octave_ls_ascii_helper_h 1
   11.28 +
   11.29 +#include <iosfwd>
   11.30 +#include <string>
   11.31 +
   11.32 +#include "oct-dlldefs.h"
   11.33 +
   11.34 +extern OCTINTERP_API void
   11.35 +skip_until_newline( std::istream& is, bool keep_newline = false );
   11.36 +
   11.37 +extern OCTINTERP_API void
   11.38 +skip_preceeding_newline( std::istream& is );
   11.39 +
   11.40 +extern OCTINTERP_API std::string
   11.41 +read_until_newline( std::istream& is, bool keep_newline = false );
   11.42 +
   11.43 +#endif  // !defined (octave_ls_ascii_helper_h)