fix leaving stray '\r' in stream when reading from CRLF data file
authorBenjamin Lindner <lindnerb@users.sourceforge.net>
Wed Mar 18 15:23:14 2009 +0100 (2009-03-18)
changeset 768534b75a47e712
parent 7684 4e43255734fb
child 7686 6f568bdb1bf3
fix leaving stray '\r' in stream when reading from CRLF data file
* * *
fix CRLF issues with text-mode reading in windows when loading ascii data
src/ChangeLog
src/Makefile.in
src/load-save.cc
src/ls-ascii-helper.cc
src/ls-ascii-helper.h
src/ls-mat-ascii.cc
src/ls-oct-ascii.cc
src/ls-oct-ascii.h
src/ov-fcn-handle.cc
src/ov-fcn-inline.cc
src/ov-range.cc
src/ov-str-mat.cc
     1.1 --- a/src/ChangeLog	Thu Mar 26 07:29:25 2009 +0100
     1.2 +++ b/src/ChangeLog	Wed Mar 18 15:23:14 2009 +0100
     1.3 @@ -1,3 +1,21 @@
     1.4 +2009-03-18  Benjamin Lindner <lindnerb@users.sourceforge.net>
     1.5 +
     1.6 +	* ls-oct-ascii.cc (extract_keyword): fix leaving stray '\r' in stream
     1.7 +	when reading from CRLF data file by replacing loop with call to
     1.8 +	read_until_newline()
     1.9 +
    1.10 +2009-03-03  Benjamin Lindner  <lindnerb@users.sourceforge.net>
    1.11 +
    1.12 +	* ls-ascii-helper.h ls-ascii-helper.cc: New files, provide helper 
    1.13 +	functions skip_until_newline(), skip_preceeding_newline() and
    1.14 +	read_until_newline() that take care of CR/LF handling.
    1.15 +	* Makefile.in: add new files
    1.16 +	* load-save.cc: Open files always in binary mode in Fload
    1.17 +	* ls-mat-ascii.cc (get_mat_data_input_line), ls-oct-ascii.cc 
    1.18 +	(extract_keyword, read_ascii_data), ls-oct-ascii.h (extract_keyword), 
    1.19 +	ov-fcn-handle.cc, ov-fcn-inline.cc, ov-range.cc, ov-str-mat.cc 
    1.20 +	(load_ascii): Use helper functions 
    1.21 +	
    1.22  2009-02-25  Marco Caliari <marco.caliari@univr.it>
    1.23  
    1.24  	* graphics.cc (base_properties::remove_child): Fix order of dims.
     2.1 --- a/src/Makefile.in	Thu Mar 26 07:29:25 2009 +0100
     2.2 +++ b/src/Makefile.in	Wed Mar 18 15:23:14 2009 +0100
     2.3 @@ -116,7 +116,7 @@
     2.4  	comment-list.h debug.h defun-dld.h defun-int.h defun.h \
     2.5  	dirfns.h dynamic-ld.h error.h file-io.h gripes.h help.h \
     2.6  	input.h lex.h load-path.h load-save.h ls-hdf5.h \
     2.7 -	ls-mat-ascii.h ls-mat4.h ls-mat5.h ls-oct-ascii.h \
     2.8 +	ls-mat-ascii.h ls-mat4.h ls-mat5.h ls-oct-ascii.h ls-ascii-helper.h \
     2.9  	ls-oct-binary.h ls-utils.h mex.h mexproto.h oct-errno.h \
    2.10  	oct-fstrm.h oct-hist.h oct-iostrm.h oct-map.h oct-obj.h \
    2.11  	oct-prcstrm.h oct-procbuf.h oct-stdstrm.h oct-stream.h \
    2.12 @@ -186,7 +186,7 @@
    2.13  	cutils.c data.cc debug.cc defaults.cc defun.cc dirfns.cc \
    2.14  	dynamic-ld.cc error.cc file-io.cc graphics.cc gripes.cc \
    2.15  	help.cc input.cc lex.l load-path.cc load-save.cc ls-hdf5.cc \
    2.16 -	ls-mat-ascii.cc ls-mat4.cc ls-mat5.cc ls-oct-ascii.cc \
    2.17 +	ls-mat-ascii.cc ls-mat4.cc ls-mat5.cc ls-oct-ascii.cc ls-ascii-helper.cc \
    2.18  	ls-oct-binary.cc ls-utils.cc main.c mappers.cc matherr.c \
    2.19  	mex.cc oct-fstrm.cc oct-hist.cc oct-iostrm.cc oct-map.cc \
    2.20  	oct-obj.cc oct-prcstrm.cc oct-procbuf.cc oct-stream.cc \
     3.1 --- a/src/load-save.cc	Thu Mar 26 07:29:25 2009 +0100
     3.2 +++ b/src/load-save.cc	Wed Mar 18 15:23:14 2009 +0100
     3.3 @@ -906,15 +906,12 @@
     3.4  
     3.5  	  std::ios::openmode mode = std::ios::in;
     3.6  
     3.7 -	  if (format == LS_BINARY
     3.8 -#ifdef HAVE_HDF5
     3.9 -	      || format == LS_HDF5
    3.10 -#endif
    3.11 -	      || format == LS_MAT_BINARY
    3.12 -	      || format == LS_MAT5_BINARY
    3.13 -	      || format == LS_MAT7_BINARY)
    3.14 -	    mode |= std::ios::binary;
    3.15 -
    3.16 +	  // Open in binary mode in any case, to fix annoying bug that
    3.17 +	  // text-mode opened streams cannot be seekg'ed/tellg'ed with
    3.18 +	  // mingw32 (See http://oldwiki.mingw.org/index.php/Known%20Problems )
    3.19 +	  // The CR/LF issues are handled in ls-ascii-helper.cc
    3.20 +	  mode |= std::ios::binary;
    3.21 +	  
    3.22  #ifdef HAVE_ZLIB
    3.23  	  if (use_zlib)
    3.24  	    {
     4.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.2 +++ b/src/ls-ascii-helper.cc	Wed Mar 18 15:23:14 2009 +0100
     4.3 @@ -0,0 +1,160 @@
     4.4 +/*
     4.5 +
     4.6 +Copyright (C) 2003, 2005, 2006, 2007 John W. Eaton
     4.7 +
     4.8 +This file is part of Octave.
     4.9 +
    4.10 +Octave is free software; you can redistribute it and/or modify it
    4.11 +under the terms of the GNU General Public License as published by the
    4.12 +Free Software Foundation; either version 3 of the License, or (at your
    4.13 +option) any later version.
    4.14 +
    4.15 +Octave is distributed in the hope that it will be useful, but WITHOUT
    4.16 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    4.17 +FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    4.18 +for more details.
    4.19 +
    4.20 +You should have received a copy of the GNU General Public License
    4.21 +along with Octave; see the file COPYING.  If not, see
    4.22 +<http://www.gnu.org/licenses/>.
    4.23 +
    4.24 +*/
    4.25 +
    4.26 +
    4.27 +#include "ls-ascii-helper.h"
    4.28 +
    4.29 +#include <iostream>
    4.30 +#include <sstream>
    4.31 +
    4.32 +// Helper functions when reading from ascii files.
    4.33 +// These function take care of CR/LF issues when files are opened in text-mode for reading 
    4.34 +
    4.35 +// Skip characters from stream IS until a newline is reached.
    4.36 +// Depending on KEEP_NEWLINE, either eat newline from stream or
    4.37 +// keep it unread
    4.38 +
    4.39 +void
    4.40 +skip_until_newline( std::istream& is, bool keep_newline )
    4.41 +{
    4.42 +  if (!is)
    4.43 +    return;
    4.44 +  
    4.45 +  char c,d;
    4.46 +  
    4.47 +  while (is)
    4.48 +  {
    4.49 +      c = is.peek();
    4.50 +      if (c == '\n' || c == '\r')
    4.51 +      {
    4.52 +	  // reached newline
    4.53 +	  if (keep_newline == false)
    4.54 +	  {
    4.55 +	      // eat the CR or LF character
    4.56 +	      is.get(d);
    4.57 +	      
    4.58 +	      // make sure that for binary-mode opened ascii files containing CRLF line endings
    4.59 +	      // we skip the LF after CR...
    4.60 +	      if (c == '\r' && is.peek()=='\n')
    4.61 +	      {
    4.62 +		  // yes, LF following CR, eat it...
    4.63 +		  is.get(d);
    4.64 +	      }
    4.65 +	  }
    4.66 +	  
    4.67 +	  // Newline was found, and read from stream if keep_newline==true, so exit loop
    4.68 +	  break;
    4.69 +      }
    4.70 +      else
    4.71 +	  // no newline charater peeked, so read it and proceed to next character
    4.72 +	  is.get(d);
    4.73 +  }
    4.74 +  
    4.75 +  return;
    4.76 +}
    4.77 +
    4.78 +
    4.79 +// If stream IS currently points to a newline (a leftover from a previous read)
    4.80 +// then eat newline(s) until a non-newline character is found
    4.81 +
    4.82 +void
    4.83 +skip_preceeding_newline( std::istream& is )
    4.84 +{
    4.85 +  if (!is)
    4.86 +    return;
    4.87 +  
    4.88 +  char c,d;
    4.89 +  
    4.90 +  // Check if IS currently points to newline character
    4.91 +  c = is.peek();
    4.92 +  if (c == '\n' || c == '\r')
    4.93 +  {
    4.94 +      // Yes, at newline
    4.95 +      do {
    4.96 +	  // eat the CR or LF character
    4.97 +	  is.get(d);
    4.98 +	  
    4.99 +	  // make sure that for binary-mode opened ascii files containing CRLF line endings
   4.100 +	  // we skip the LF after CR...
   4.101 +	  if (c == '\r' && is.peek() == '\n')
   4.102 +	  {
   4.103 +	      // yes, LF following CR, eat it...
   4.104 +	      is.get(d);
   4.105 +	  }
   4.106 +	  
   4.107 +	  // Peek into next character
   4.108 +	  c = is.peek();
   4.109 +      // Loop while still a newline ahead
   4.110 +      } while( c == '\n' || c == '\r' );
   4.111 +  }
   4.112 +  
   4.113 +  return;
   4.114 +}
   4.115 +
   4.116 +
   4.117 +// Read charaters from stream IS until a newline is reached.
   4.118 +// Depending on KEEP_NEWLINE, either eat newline from stream or
   4.119 +// keep it unread
   4.120 +// Characters read are stored and returned as std::string
   4.121 +
   4.122 +std::string
   4.123 +read_until_newline( std::istream& is, bool keep_newline )
   4.124 +{
   4.125 +  if (!is)
   4.126 +    return std::string();
   4.127 +  
   4.128 +  char c,d;
   4.129 +  std::ostringstream buf;
   4.130 +  
   4.131 +  while (is)
   4.132 +  {
   4.133 +      c = is.peek();
   4.134 +      if (c == '\n' || c == '\r')
   4.135 +      {
   4.136 +	  // reached newline
   4.137 +	  if (keep_newline == false)
   4.138 +	  {
   4.139 +	      // eat the CR or LF character
   4.140 +	      is.get(d);
   4.141 +	      
   4.142 +	      // make sure that for binary-mode opened ascii files containing CRLF line endings
   4.143 +	      // we skip the LF after CR...
   4.144 +	      if (c == '\r' && is.peek() == '\n')
   4.145 +	      {
   4.146 +		  // yes, LF following CR, eat it...
   4.147 +		  is.get(d);
   4.148 +	      }
   4.149 +	  }
   4.150 +	  
   4.151 +	  // Newline was found, and read from stream if keep_newline==true, so exit loop
   4.152 +	  break;
   4.153 +      }
   4.154 +      else
   4.155 +      {
   4.156 +	  // no newline charater peeked, so read it, store it, and proceed to next
   4.157 +	  is.get(d);
   4.158 +	  buf << d;
   4.159 +      }
   4.160 +  }
   4.161 +  
   4.162 +  return buf.str();
   4.163 +}
     5.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     5.2 +++ b/src/ls-ascii-helper.h	Wed Mar 18 15:23:14 2009 +0100
     5.3 @@ -0,0 +1,40 @@
     5.4 +/*
     5.5 +
     5.6 +Copyright (C) 2003, 2005, 2006, 2007 John W. Eaton
     5.7 +
     5.8 +This file is part of Octave.
     5.9 +
    5.10 +Octave is free software; you can redistribute it and/or modify it
    5.11 +under the terms of the GNU General Public License as published by the
    5.12 +Free Software Foundation; either version 3 of the License, or (at your
    5.13 +option) any later version.
    5.14 +
    5.15 +Octave is distributed in the hope that it will be useful, but WITHOUT
    5.16 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    5.17 +FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    5.18 +for more details.
    5.19 +
    5.20 +You should have received a copy of the GNU General Public License
    5.21 +along with Octave; see the file COPYING.  If not, see
    5.22 +<http://www.gnu.org/licenses/>.
    5.23 +
    5.24 +*/
    5.25 +
    5.26 +#if !defined (octave_ls_ascii_helper_h)
    5.27 +#define octave_ls_ascii_helper_h 1
    5.28 +
    5.29 +#include <iosfwd>
    5.30 +#include <string>
    5.31 +
    5.32 +#include "oct-dlldefs.h"
    5.33 +
    5.34 +extern OCTINTERP_API void
    5.35 +skip_until_newline( std::istream& is, bool keep_newline = false );
    5.36 +
    5.37 +extern OCTINTERP_API void
    5.38 +skip_preceeding_newline( std::istream& is );
    5.39 +
    5.40 +extern OCTINTERP_API std::string
    5.41 +read_until_newline( std::istream& is, bool keep_newline = false );
    5.42 +
    5.43 +#endif  // !defined (octave_ls_ascii_helper_h)
     6.1 --- a/src/ls-mat-ascii.cc	Thu Mar 26 07:29:25 2009 +0100
     6.2 +++ b/src/ls-mat-ascii.cc	Wed Mar 18 15:23:14 2009 +0100
     6.3 @@ -65,6 +65,7 @@
     6.4  #include "dMatrix.h"
     6.5  
     6.6  #include "ls-mat-ascii.h"
     6.7 +#include "ls-ascii-helper.h"
     6.8  
     6.9  static std::string
    6.10  get_mat_data_input_line (std::istream& is)
    6.11 @@ -81,14 +82,16 @@
    6.12        while (is.get (c))
    6.13  	{
    6.14  	  if (c == '\n' || c == '\r')
    6.15 -	    break;
    6.16 +	    {
    6.17 +	      // Let skip_until_newline handle CR/LF issues...
    6.18 +	      skip_until_newline (is, false);
    6.19 +	      break;
    6.20 +	    }
    6.21  
    6.22  	  if (c == '%' || c == '#')
    6.23  	    {
    6.24  	      // skip to end of line
    6.25 -	      while (is.get (c))
    6.26 -		if (c == '\n' || c == '\r')
    6.27 -		  break;
    6.28 +	      skip_until_newline (is, false);
    6.29  
    6.30  	      break;
    6.31  	    }
     7.1 --- a/src/ls-oct-ascii.cc	Thu Mar 26 07:29:25 2009 +0100
     7.2 +++ b/src/ls-oct-ascii.cc	Wed Mar 18 15:23:14 2009 +0100
     7.3 @@ -108,14 +108,8 @@
     7.4  	      while (is.get (c) && (c == ' ' || c == '\t' || c == ':'))
     7.5  		; // Skip whitespace and the colon.
     7.6  
     7.7 -	      if (c != '\n' && c != '\r')
     7.8 -		{
     7.9 -		  value << c;
    7.10 -		  while (is.get (c) && c != '\n' && c != '\r')
    7.11 -		    value << c;
    7.12 -		}
    7.13 -
    7.14 -	      retval = value.str ();
    7.15 +	      is.putback(c);
    7.16 +	      retval = read_until_newline (is, false);
    7.17  	      break;
    7.18  	    }
    7.19  	  else if (next_only)
     8.1 --- a/src/ls-oct-ascii.h	Thu Mar 26 07:29:25 2009 +0100
     8.2 +++ b/src/ls-oct-ascii.h	Wed Mar 18 15:23:14 2009 +0100
     8.3 @@ -29,6 +29,7 @@
     8.4  #include <string>
     8.5  
     8.6  #include "str-vec.h"
     8.7 +#include "ls-ascii-helper.h"
     8.8  
     8.9  // Flag for cell elements
    8.10  #define CELL_ELT_TAG "<cell-element>"
    8.11 @@ -103,8 +104,8 @@
    8.12  		is >> value;
    8.13  	      if (is)
    8.14  		status = true;
    8.15 -	      while (is.get (c) && c != '\n' && c != '\r')
    8.16 -		; // Skip to beginning of next line;
    8.17 +	      // Skip to beginning of next line;
    8.18 +	      skip_until_newline (is, false);
    8.19  	      break;
    8.20  	    }
    8.21  	  else if (next_only)
    8.22 @@ -165,8 +166,8 @@
    8.23  		    is >> value;
    8.24  		  if (is)
    8.25  		    status = true;
    8.26 -		  while (is.get (c) && c != '\n' && c != '\r')
    8.27 -		    ; // Skip to beginning of next line;
    8.28 +		  // Skip to beginning of next line;
    8.29 +		  skip_until_newline (is, false);
    8.30  		  return status;
    8.31  		}
    8.32  	    }
     9.1 --- a/src/ov-fcn-handle.cc	Thu Mar 26 07:29:25 2009 +0100
     9.2 +++ b/src/ov-fcn-handle.cc	Wed Mar 18 15:23:14 2009 +0100
     9.3 @@ -56,6 +56,7 @@
     9.4  #include "ls-oct-binary.h"
     9.5  #include "ls-hdf5.h"
     9.6  #include "ls-utils.h"
     9.7 +#include "ls-ascii-helper.h"
     9.8  
     9.9  DEFINE_OCTAVE_ALLOCATOR (octave_fcn_handle);
    9.10  
    9.11 @@ -330,26 +331,18 @@
    9.12      {
    9.13        octave_idx_type len = 0;
    9.14        char c;
    9.15 -      std::ostringstream buf;
    9.16 +      std::string buf;
    9.17  
    9.18        // Skip preceeding newline(s).
    9.19 -      while (is.get (c) && c == '\n')
    9.20 -	/* do nothing */;
    9.21 +      skip_preceeding_newline (is);
    9.22  
    9.23        if (is)
    9.24  	{
    9.25 -	  buf << c;
    9.26  
    9.27  	  // Get a line of text whitespace characters included, leaving
    9.28  	  // newline in the stream.
    9.29 +	  buf = read_until_newline (is, true);
    9.30  
    9.31 -	  while (is.peek () != '\n')
    9.32 -	    {
    9.33 -	      is.get (c);
    9.34 -	      if (! is)
    9.35 -		break;
    9.36 -	      buf << c;
    9.37 -	    }
    9.38  	}
    9.39  
    9.40        pos = is.tellg ();
    9.41 @@ -408,7 +401,7 @@
    9.42  
    9.43  	  int parse_status;
    9.44  	  octave_value anon_fcn_handle = 
    9.45 -	    eval_string (buf.str (), true, parse_status);
    9.46 +	    eval_string (buf, true, parse_status);
    9.47  
    9.48  	  if (parse_status == 0)
    9.49  	    {
    10.1 --- a/src/ov-fcn-inline.cc	Thu Mar 26 07:29:25 2009 +0100
    10.2 +++ b/src/ov-fcn-inline.cc	Wed Mar 18 15:23:14 2009 +0100
    10.3 @@ -47,6 +47,7 @@
    10.4  #include "ls-oct-ascii.h"
    10.5  #include "ls-hdf5.h"
    10.6  #include "ls-utils.h"
    10.7 +#include "ls-ascii-helper.h"
    10.8  
    10.9  DEFINE_OCTAVE_ALLOCATOR (octave_fcn_inline);
   10.10  
   10.11 @@ -139,27 +140,20 @@
   10.12  	nm = "";
   10.13  
   10.14        char c;
   10.15 -      std::ostringstream buf;
   10.16 +      std::string buf;
   10.17  
   10.18        // Skip preceeding newline(s)
   10.19 -      while (is.get (c) && c == '\n');
   10.20 +      skip_preceeding_newline (is);
   10.21  
   10.22        if (is)
   10.23  	{
   10.24 -	  buf << c;
   10.25  
   10.26  	  // Get a line of text whitespace characters included, leaving
   10.27  	  // newline in the stream
   10.28 -	  while (is.peek () != '\n')
   10.29 -	    {
   10.30 -	      is.get (c);
   10.31 -	      if (! is)
   10.32 -		break;
   10.33 -	      buf << c;
   10.34 -	    }
   10.35 +	  buf = read_until_newline (is, true);
   10.36  	}
   10.37  
   10.38 -      iftext = buf.str ();
   10.39 +      iftext = buf;
   10.40  
   10.41        octave_fcn_inline tmp (iftext, ifargs, nm);
   10.42        fcn = tmp.fcn;
    11.1 --- a/src/ov-range.cc	Thu Mar 26 07:29:25 2009 +0100
    11.2 +++ b/src/ov-range.cc	Wed Mar 18 15:23:14 2009 +0100
    11.3 @@ -41,6 +41,7 @@
    11.4  #include "byte-swap.h"
    11.5  #include "ls-hdf5.h"
    11.6  #include "ls-utils.h"
    11.7 +#include "ls-ascii-helper.h"
    11.8  
    11.9  DEFINE_OCTAVE_ALLOCATOR (octave_range);
   11.10  
   11.11 @@ -274,14 +275,9 @@
   11.12  	break;
   11.13      }
   11.14  
   11.15 -  for (;;)
   11.16 -    {
   11.17 -      if (is && (c == '%' || c == '#'))
   11.18 -	while (is.get (c) && c != '\n')
   11.19 -	  ; // Skip to beginning of next line, ignoring everything.
   11.20 -      else
   11.21 -	break;
   11.22 -    }
   11.23 +  // Skip to beginning of next line, ignoring everything.
   11.24 +  skip_until_newline (is, false);
   11.25 +  
   11.26  }
   11.27  
   11.28  bool 
    12.1 --- a/src/ov-str-mat.cc	Thu Mar 26 07:29:25 2009 +0100
    12.2 +++ b/src/ov-str-mat.cc	Wed Mar 18 15:23:14 2009 +0100
    12.3 @@ -48,6 +48,7 @@
    12.4  #include "pr-output.h"
    12.5  #include "pt-mat.h"
    12.6  #include "utils.h"
    12.7 +#include "ls-ascii-helper.h"
    12.8  
    12.9  DEFINE_OCTAVE_ALLOCATOR (octave_char_matrix_str);
   12.10  DEFINE_OCTAVE_ALLOCATOR (octave_char_matrix_sq_str);
   12.11 @@ -344,8 +345,7 @@
   12.12  		      char *ftmp = tmp.fortran_vec ();
   12.13  
   12.14  		      // Skip the return line
   12.15 -		      if (! is.read (ftmp, 1))
   12.16 -			return false;
   12.17 +		      skip_preceeding_newline (is);
   12.18  
   12.19  		      if (! is.read (ftmp, dv.numel ()) || !is)
   12.20  			{