src/ls-oct-ascii.cc
author Benjamin Lindner <lindnerb@users.sourceforge.net>
Wed Mar 18 15:23:14 2009 +0100 (2009-03-18)
changeset 7685 34b75a47e712
parent 7566 fd669d081438
permissions -rw-r--r--
fix leaving stray '\r' in stream when reading from CRLF data file
* * *
fix CRLF issues with text-mode reading in windows when loading ascii data
     1 /*
     2 
     3 Copyright (C) 1996, 1997, 2003, 2004, 2005, 2006, 2007 John W. Eaton
     4 
     5 This file is part of Octave.
     6 
     7 Octave is free software; you can redistribute it and/or modify it
     8 under the terms of the GNU General Public License as published by the
     9 Free Software Foundation; either version 3 of the License, or (at your
    10 option) any later version.
    11 
    12 Octave is distributed in the hope that it will be useful, but WITHOUT
    13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    15 for more details.
    16 
    17 You should have received a copy of the GNU General Public License
    18 along with Octave; see the file COPYING.  If not, see
    19 <http://www.gnu.org/licenses/>.
    20 
    21 */
    22 
    23 // Author: John W. Eaton.
    24 
    25 #ifdef HAVE_CONFIG_H
    26 #include <config.h>
    27 #endif
    28 
    29 #include <cstring>
    30 #include <cctype>
    31 
    32 #include <fstream>
    33 #include <iomanip>
    34 #include <iostream>
    35 #include <sstream>
    36 #include <string>
    37 
    38 #include "byte-swap.h"
    39 #include "data-conv.h"
    40 #include "file-ops.h"
    41 #include "glob-match.h"
    42 #include "lo-mappers.h"
    43 #include "mach-info.h"
    44 #include "oct-env.h"
    45 #include "oct-time.h"
    46 #include "quit.h"
    47 #include "str-vec.h"
    48 
    49 #include "Cell.h"
    50 #include "defun.h"
    51 #include "error.h"
    52 #include "gripes.h"
    53 #include "load-save.h"
    54 #include "oct-obj.h"
    55 #include "oct-map.h"
    56 #include "ov-cell.h"
    57 #include "pager.h"
    58 #include "pt-exp.h"
    59 #include "symtab.h"
    60 #include "sysdep.h"
    61 #include "unwind-prot.h"
    62 #include "utils.h"
    63 #include "variables.h"
    64 #include "version.h"
    65 #include "dMatrix.h"
    66 
    67 #include "ls-oct-ascii.h"
    68 
    69 // The number of decimal digits to use when writing ascii data.
    70 static int Vsave_precision = 16;
    71 
    72 // Functions for reading ascii data.
    73 
    74 // Extract a KEYWORD and its value from stream IS, returning the
    75 // associated value in a new string.
    76 //
    77 // Input should look something like:
    78 //
    79 //  [%#][ \t]*keyword[ \t]*:[ \t]*string-value[ \t]*\n
    80 
    81 std::string
    82 extract_keyword (std::istream& is, const char *keyword, const bool next_only)
    83 {
    84   std::string retval;
    85 
    86   char c;
    87   while (is.get (c))
    88     {
    89       if (c == '%' || c == '#')
    90 	{
    91 	  std::ostringstream buf;
    92 	
    93 	  while (is.get (c) && (c == ' ' || c == '\t' || c == '%' || c == '#'))
    94 	    ; // Skip whitespace and comment characters.
    95 
    96 	  if (isalpha (c))
    97 	    buf << c;
    98 
    99 	  while (is.get (c) && isalpha (c))
   100 	    buf << c;
   101 
   102 	  std::string tmp = buf.str ();
   103 	  bool match = (tmp.compare (0, strlen (keyword), keyword) == 0);
   104 
   105 	  if (match)
   106 	    {
   107 	      std::ostringstream value;
   108 	      while (is.get (c) && (c == ' ' || c == '\t' || c == ':'))
   109 		; // Skip whitespace and the colon.
   110 
   111 	      is.putback(c);
   112 	      retval = read_until_newline (is, false);
   113 	      break;
   114 	    }
   115 	  else if (next_only)
   116 	    break;
   117 	  else
   118 	    {
   119 	      while (is.get (c) && c != '\n' && c != '\r')
   120 		; // Skip to end of line.
   121 	    }
   122 	}
   123     }
   124 
   125   int len = retval.length ();
   126 
   127   if (len > 0)
   128     {
   129       while (len)
   130 	{
   131 	  c = retval[len-1];
   132 
   133 	  if (c == ' ' || c == '\t')
   134 	    len--;
   135 	  else
   136 	    {
   137 	      retval.resize (len);
   138 	      break;
   139 	    }
   140 	}
   141     }
   142 
   143   return retval;
   144 }
   145 
   146 // Extract one value (scalar, matrix, string, etc.) from stream IS and
   147 // place it in TC, returning the name of the variable.  If the value
   148 // is tagged as global in the file, return TRUE in GLOBAL.
   149 //
   150 // Each type supplies its own function to load the data, and so this
   151 // function is extensible.
   152 //
   153 // FILENAME is used for error messages.
   154 //
   155 // The data is expected to be in the following format:
   156 //
   157 // The input file must have a header followed by some data.
   158 //
   159 // All lines in the header must begin with a `#' character.
   160 //
   161 // The header must contain a list of keyword and value pairs with the
   162 // keyword and value separated by a colon.
   163 //
   164 // Keywords must appear in the following order:
   165 //
   166 // # name: <name>
   167 // # type: <type>
   168 // # <info>
   169 //
   170 // Where, for the built in types are:
   171 //
   172 //  <name> : a valid identifier
   173 //
   174 //  <type> : <typename>
   175 //         | global <typename>
   176 //
   177 //  <typename> : scalar
   178 //             | complex scalar
   179 //             | matrix
   180 //             | complex matrix
   181 //             | bool
   182 //             | bool matrix
   183 //             | string
   184 //             | range
   185 //
   186 //  <info> : <matrix info>
   187 //         | <string info>
   188 //
   189 //  <matrix info> : # rows: <integer>
   190 //                : # columns: <integer>
   191 //
   192 //  <string info> : # elements: <integer>
   193 //                : # length: <integer> (once before each string)
   194 //
   195 //  For backward compatibility the type "string array" is treated as a
   196 // "string" type. Also "string" can have a single element with no elements
   197 // line such that
   198 //
   199 //  <string info> : # length: <integer>
   200 //
   201 // Formatted ASCII data follows the header.
   202 //
   203 // Example:
   204 //
   205 //  # name: foo
   206 //  # type: matrix
   207 //  # rows: 2
   208 //  # columns: 2
   209 //    2  4
   210 //    1  3
   211 //
   212 // Example:
   213 //
   214 //  # name: foo
   215 //  # type: string
   216 //  # elements: 5
   217 //  # length: 4
   218 //  this
   219 //  # length: 2
   220 //  is
   221 //  # length: 1
   222 //  a
   223 //  # length: 6
   224 //  string
   225 //  # length: 5
   226 //  array
   227 //
   228 // FIXME -- this format is fairly rigid, and doesn't allow for
   229 // arbitrary comments.  Someone should fix that. It does allow arbitrary
   230 // types however.
   231 
   232 // Ugh.  The signature of the compare method is not standard in older
   233 // versions of the GNU libstdc++.  Do this instead:
   234 
   235 #define SUBSTRING_COMPARE_EQ(s, pos, n, t) (s.substr (pos, n) == t)
   236 
   237 std::string
   238 read_ascii_data (std::istream& is, const std::string& filename, bool& global,
   239 		 octave_value& tc, octave_idx_type count)
   240 {
   241   // Read name for this entry or break on EOF.
   242 
   243   std::string name = extract_keyword (is, "name");
   244 
   245   if (name.empty ())
   246     {
   247       if (count == 0)
   248 	error ("load: empty name keyword or no data found in file `%s'",
   249 	       filename.c_str ());
   250 
   251       return std::string ();
   252     }
   253 
   254   if (name == CELL_ELT_TAG)
   255     {
   256       // This is OK -- name won't be used.
   257     }
   258   else if (! valid_identifier (name))
   259     {
   260       error ("load: bogus identifier `%s' found in file `%s'",
   261 	     name.c_str (), filename.c_str ());
   262       return std::string ();
   263     }
   264 
   265   // Look for type keyword.
   266 
   267   std::string tag = extract_keyword (is, "type");
   268 
   269   if (! tag.empty ())
   270     {
   271       std::string typ;
   272       size_t pos = tag.rfind (' ');
   273 
   274       if (pos != NPOS)
   275 	{
   276 	  global = SUBSTRING_COMPARE_EQ (tag, 0, 6, "global");
   277 
   278 	  typ = global ? tag.substr (7) : tag;
   279 	}
   280       else
   281 	typ = tag;
   282 
   283       // Special case for backward compatiablity. A small bit of cruft
   284       if (SUBSTRING_COMPARE_EQ (typ, 0, 12, "string array"))
   285 	tc = octave_value (charMatrix (), true);
   286       else
   287 	tc = octave_value_typeinfo::lookup_type (typ);
   288 
   289       if (! tc.load_ascii (is))
   290 	error ("load: trouble reading ascii file `%s'", filename.c_str ());
   291     }
   292   else
   293     error ("load: failed to extract keyword specifying value type");
   294 
   295   if (error_state)
   296     {
   297       error ("load: reading file %s", filename.c_str ());
   298       return std::string ();
   299     }
   300 
   301   return name;
   302 }
   303 
   304 // Save the data from TC along with the corresponding NAME, and global
   305 // flag MARK_AS_GLOBAL on stream OS in the plain text format described
   306 // above for load_ascii_data.  If NAME is empty, the name: line is not
   307 // generated.  PRECISION specifies the number of decimal digits to print. 
   308 //
   309 // Assumes ranges and strings cannot contain Inf or NaN values.
   310 //
   311 // Returns 1 for success and 0 for failure.
   312 
   313 // FIXME -- should probably write the help string here too.
   314 
   315 bool
   316 save_ascii_data (std::ostream& os, const octave_value& val_arg,
   317 		 const std::string& name, bool mark_as_global,
   318 		 int precision)
   319 {
   320   bool success = true;
   321 
   322   if (! name.empty ())
   323     os << "# name: " << name << "\n";
   324 
   325   octave_value val = val_arg;
   326 
   327   if (mark_as_global)
   328     os << "# type: global " << val.type_name () << "\n";
   329   else
   330     os << "# type: " << val.type_name() << "\n";
   331 
   332   if (! precision)
   333     precision = Vsave_precision;
   334 
   335   long old_precision = os.precision ();
   336   os.precision (precision);
   337 
   338   success = val.save_ascii (os);
   339 
   340   os.precision (old_precision);
   341 
   342   return (os && success);
   343 }
   344 
   345 bool
   346 save_ascii_data_for_plotting (std::ostream& os, const octave_value& t,
   347 			      const std::string& name)
   348 {
   349   return save_ascii_data (os, t, name, false, 6);
   350 }
   351 
   352 // Maybe this should be a static function in tree-plot.cc?
   353 
   354 // If TC is matrix, save it on stream OS in a format useful for
   355 // making a 3-dimensional plot with gnuplot.  If PARAMETRIC is
   356 // TRUE, assume a parametric 3-dimensional plot will be generated.
   357 
   358 bool
   359 save_three_d (std::ostream& os, const octave_value& tc, bool parametric)
   360 {
   361   bool fail = false;
   362 
   363   octave_idx_type nr = tc.rows ();
   364   octave_idx_type nc = tc.columns ();
   365 
   366   if (tc.is_real_matrix ())
   367     {
   368       os << "# 3D data...\n"
   369 	 << "# type: matrix\n"
   370 	 << "# total rows: " << nr << "\n"
   371 	 << "# total columns: " << nc << "\n";
   372 
   373       long old_precision = os.precision ();
   374       os.precision (6);
   375 
   376       if (parametric)
   377 	{
   378 	  octave_idx_type extras = nc % 3;
   379 	  if (extras)
   380 	    warning ("ignoring last %d columns", extras);
   381 
   382 	  Matrix tmp = tc.matrix_value ();
   383 	  nr = tmp.rows ();
   384 
   385 	  for (octave_idx_type i = 0; i < nc-extras; i += 3)
   386 	    {
   387 	      os << tmp.extract (0, i, nr-1, i+2);
   388 	      if (i+3 < nc-extras)
   389 		os << "\n";
   390 	    }
   391 	}
   392       else
   393 	{
   394 	  Matrix tmp = tc.matrix_value ();
   395 	  nr = tmp.rows ();
   396 
   397 	  for (octave_idx_type i = 0; i < nc; i++)
   398 	    {
   399 	      os << tmp.extract (0, i, nr-1, i);
   400 	      if (i+1 < nc)
   401 		os << "\n";
   402 	    }
   403 	}
   404 
   405       os.precision (old_precision);
   406     }
   407   else
   408     {
   409       ::error ("for now, I can only save real matrices in 3D format");
   410       fail = true;
   411     }
   412 
   413   return (os && ! fail);
   414 }
   415 
   416 DEFUN (save_precision, args, nargout,
   417     "-*- texinfo -*-\n\
   418 @deftypefn {Built-in Function} {@var{val} =} save_precision ()\n\
   419 @deftypefnx {Built-in Function} {@var{old_val} =} save_precision (@var{new_val})\n\
   420 Query or set the internal variable that specifies the number of\n\
   421 digits to keep when saving data in text format.\n\
   422 @end deftypefn")
   423 {
   424   return SET_INTERNAL_VARIABLE_WITH_LIMITS (save_precision, -1, INT_MAX);
   425 }
   426 
   427 /*
   428 ;;; Local Variables: ***
   429 ;;; mode: C++ ***
   430 ;;; End: ***
   431 */
   432