From 60c57b572fd8ee90f99aaf19a41ddc856f7c060b Mon Sep 17 00:00:00 2001
From: Eric Phipps <etphipp@sandia.gov>
Date: Wed, 18 Jan 2023 17:46:40 -0700
Subject: [PATCH] Remove old utilities

---
 tools/CMakeLists.txt              |   8 -
 tools/read_binary_tensor.cpp      | 157 -----------------
 tools/sparse_tensor_to_binary.cpp | 284 ------------------------------
 3 files changed, 449 deletions(-)
 delete mode 100644 tools/read_binary_tensor.cpp
 delete mode 100644 tools/sparse_tensor_to_binary.cpp
diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index 0683e89ced..e1f8bfbe1c 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -1,11 +1,3 @@
-ADD_EXECUTABLE(sptn_to_binary
-  ${Genten_SOURCE_DIR}/tools/sparse_tensor_to_binary.cpp)
-INSTALL(TARGETS sptn_to_binary)
-
-ADD_EXECUTABLE(read_binary_tensor
-  ${Genten_SOURCE_DIR}/tools/read_binary_tensor.cpp)
-INSTALL(TARGETS read_binary_tensor)
-
 ADD_EXECUTABLE(convert_tensor
   ${Genten_SOURCE_DIR}/tools/convert_tensor.cpp)
 TARGET_LINK_LIBRARIES (convert_tensor ${GENTEN_LINK_LIBS})
diff --git a/tools/read_binary_tensor.cpp b/tools/read_binary_tensor.cpp
deleted file mode 100644
index 0216bf369f..0000000000
--- a/tools/read_binary_tensor.cpp
+++ /dev/null
@@ -1,157 +0,0 @@
-#include <cstdint>
-#include <fstream>
-#include <iostream>
-#include <stdexcept>
-#include <vector>
-#include <sstream>
-
-struct SpHeader {
-  std::uint32_t ndims;
-  std::uint32_t float_bits;
-  std::vector<std::uint64_t> dim_length;
-  std::vector<std::uint64_t> dim_bits;
-  std::uint64_t nnz;
-};
-
-std::ostream &operator<<(std::ostream &os, SpHeader const &h) {
-  std::cout << "Sparse Tensor Header\n";
-  std::cout << "ndims         : " << h.ndims << "\n";
-  std::cout << "bits for float: " << h.float_bits << "\n";
-  std::cout << "Sizes         : ";
-  for (auto d : h.dim_length) {
-    std::cout << d << " ";
-  }
-  std::cout << "\nBits          : ";
-  for (auto b : h.dim_bits) {
-    std::cout << b << " ";
-  }
-  std::cout << "\nNNZ           : " << h.nnz;
-
-  return os;
-}
-
-SpHeader readHeader(std::istream &is) {
-  {
-    std::string hi = "xxxx";
-    is.read(&hi[0], 4 * sizeof(char));
-    if (hi != "sptn") {
-      std::cout << "First 4 bytes are not sptn\n";
-      std::terminate();
-    }
-  }
-
-  SpHeader h;
-  // Ndims
-  is.read(reinterpret_cast<char *>(&h.ndims), sizeof h.ndims);
-
-  // Num bits in float
-  is.read(reinterpret_cast<char *>(&h.float_bits), sizeof h.float_bits);
-
-  // Size of each dimension
-  h.dim_length.resize(h.ndims);
-  for (auto &d : h.dim_length) {
-    is.read(reinterpret_cast<char *>(&d), sizeof(std::uint64_t));
-  }
-
-  // Bits for each dimension index
-  h.dim_bits.resize(h.ndims);
-  for (auto &b : h.dim_bits) {
-    is.read(reinterpret_cast<char *>(&b), sizeof(std::uint64_t));
-  }
-
-  // NNZ
-  is.read(reinterpret_cast<char *>(&h.nnz), sizeof h.nnz);
-
-  return h;
-}
-
-std::uint64_t readNbitIntValue(std::istream &is, int bits) {
-  std::uint16_t ui16;
-  std::uint32_t ui32;
-  std::uint64_t ui64;
-  switch (bits) {
-  case 16:
-    is.read(reinterpret_cast<char *>(&ui16), sizeof ui16);
-    ui64 = ui16;
-    return ui64;
-  case 32:
-    is.read(reinterpret_cast<char *>(&ui32), sizeof ui32);
-    ui64 = ui32;
-    return ui64;
-  case 64:
-    is.read(reinterpret_cast<char *>(&ui64), sizeof ui64);
-    return ui64;
-  default:
-    std::cout << "Can't read Nbit unsigned int value(" << bits
-              << ") that isn't in {16, 32, 64}\n";
-    std::terminate();
-  }
-}
-
-double readNbitFloatValue(std::istream &is, int bits) {
-  float fp;
-  double dp;
-  switch (bits) {
-  case 32:
-    is.read(reinterpret_cast<char *>(&fp), sizeof fp);
-    dp = fp;
-    return dp;
-  case 64:
-    is.read(reinterpret_cast<char *>(&dp), sizeof dp);
-    return dp;
-  default:
-    std::cout << "Can't read Nbit float value(" << bits
-              << ") that isn't in {32, 64}\n";
-    std::terminate();
-  }
-}
-
-void readValue(std::istream &is, std::ostream &os, SpHeader const &h) {
-  const auto ndims = h.ndims;
-  for(auto i = 0; i < ndims; ++i){
-    os << readNbitIntValue(is, h.dim_bits[i]) << " ";
-  }
-  os << readNbitFloatValue(is, h.float_bits) << "\n";
-}
-
-/*
-The output file will have the following form without the newlines or -> comments
-73 70 74 6e                   -> 4 char 'sptn'
-ndims                         -> uint32_t
-bits_for_float_type          -> uint32_t
-size0 size1 size2 size3 size4 -> ndims uint64_t
-bits0 bits1 bits2 bits3 bits4 -> number of bits used for each index
-number_non_zero               -> uint64_t
-* the elements depend on the size of each mode to make the file size smaller we
-* will use the smallest of uint8_t uint16_t uint32_t uint64_t that holds all
-* the elements from the size field above, for now all floats are stored as
-* described above.  unlike the textual format we will always use zero based
-* indexing
-1 1 1 1049 156 1.000000 -> uint16_t uint16_t uint16_t uint16_t uint32_t
-float_type
- */
-int main(int argc, char **argv) {
-  if (argc != 2) {
-    std::cout << "You must supply a binary file\n";
-    return 1;
-  }
-
-  std::ifstream input_file(argv[1], std::ios::binary);
-  if (!input_file.is_open()) {
-    std::cout << "Could not open input file\n";
-    return 1;
-  }
-
-  auto head = readHeader(input_file);
-  std::cout << head << "\n";
-  
-  std::stringstream ss;
-  for(auto i = 0; i < head.nnz; ++i){
-    readValue(input_file, ss, head);
-  }
-
-  std::ofstream test("test.txt");
-  test << ss.rdbuf();
-
-  return 0;
-}
diff --git a/tools/sparse_tensor_to_binary.cpp b/tools/sparse_tensor_to_binary.cpp
deleted file mode 100644
index bc779d19bf..0000000000
--- a/tools/sparse_tensor_to_binary.cpp
+++ /dev/null
@@ -1,284 +0,0 @@
-//@HEADER
-// ************************************************************************
-//     Genten: Software for Generalized Tensor Decompositions
-//     by Sandia National Laboratories
-//
-// Sandia National Laboratories is a multimission laboratory managed
-// and operated by National Technology and Engineering Solutions of Sandia,
-// LLC, a wholly owned subsidiary of Honeywell International, Inc., for the
-// U.S. Department of Energy's National Nuclear Security Administration under
-// contract DE-NA0003525.
-//
-// Copyright 2017 National Technology & Engineering Solutions of Sandia, LLC
-// (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S.
-// Government retains certain rights in this software.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// 1. Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//
-// 2. Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimer in the
-// documentation and/or other materials provided with the distribution.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-// ************************************************************************
-//@HEADER
-
-#include <cstring>
-#include <exception>
-#include <fstream>
-#include <iostream>
-#include <limits>
-#include <sstream>
-#include <string>
-#include <vector>
-
-bool input_is_zero_based = false;
-
-const std::string header_string =
-    R"delimiter(sptensor
-ndims
-size0 size1 ... size(N-1)
-number non zero
-ind0 ind1 ... ind(N-1) value
-...
-)delimiter";
-
-struct SpTensorHeader {
-  uint32_t ndims;
-  std::vector<uint64_t> dim_sizes;
-  std::vector<uint64_t> dim_data_sizes;
-  uint64_t nnz;
-  uint64_t float_data_size;
-};
-
-// I don't know if there is a smart way to do this or not
-uint64_t smallestBuiltinThatHolds(uint64_t val) {
-  if (val <= uint64_t(std::numeric_limits<uint16_t>::max())) {
-    return 16;
-  }
-
-  if (val <= uint64_t(std::numeric_limits<uint32_t>::max())) {
-    return 32;
-  }
-
-  return 64; // We didn't have a better option
-}
-
-void computeDimDataSizes(SpTensorHeader &h) {
-  for (auto d : h.dim_sizes) {
-    h.dim_data_sizes.emplace_back(smallestBuiltinThatHolds(d));
-    std::cout << "Smallest: " << smallestBuiltinThatHolds(d) << "\n";
-  }
-}
-
-std::vector<uint64_t> lineToVec(std::string const &line, int size) {
-  std::vector<uint64_t> out(size);
-  std::stringstream ss(line);
-  for (auto i = 0; i < size; ++i) {
-    ss >> out[i];
-  }
-  return out;
-}
-
-// Leave file at first data point
-SpTensorHeader readHeader(std::ifstream &inFile, uint64_t float_data_size) {
-  SpTensorHeader header;
-  header.float_data_size = float_data_size;
-
-  std::string line;
-  std::getline(inFile, line);
-  if (line != "sptensor") {
-    throw std::invalid_argument("Input file did not start with 'sptensor'");
-  }
-
-  // This line should be the number of dimensions
-  std::getline(inFile, line);
-  header.ndims = std::stoi(line);
-
-  std::getline(inFile, line);
-  header.dim_sizes = lineToVec(line, header.ndims);
-
-  std::getline(inFile, line);
-  header.nnz = std::stoull(line);
-
-  computeDimDataSizes(header);
-  return header;
-}
-
-void writeIndexValue(std::ostream &outFile, SpTensorHeader const &header,
-                     uint64_t value, int position) {
-  uint16_t i16;
-  uint32_t i32;
-  switch (header.dim_data_sizes[position]) {
-  case 16:
-    i16 = value;
-    outFile.write(reinterpret_cast<char *>(&i16), sizeof(uint16_t));
-    break;
-  case 32:
-    i32 = value;
-    outFile.write(reinterpret_cast<char *>(&i32), sizeof(uint32_t));
-    break;
-  default:
-    outFile.write(reinterpret_cast<char *>(&value), sizeof(uint64_t));
-  }
-}
-
-void writeDataValue(std::ostream &outFile, SpTensorHeader const &header,
-                    double value) {
-  float fp32;
-  switch (header.float_data_size) {
-  case 16:
-    std::cout << "fp16 support not yet implemented\n";
-    std::terminate();
-    // fp16 = value;
-    // outFile.write(reinterpret_cast<char *>(&fp16), sizeof(_Float16));
-  case 32:
-    fp32 = value;
-    outFile.write(reinterpret_cast<char *>(&fp32), sizeof(float));
-    break;
-  default:
-    outFile.write(reinterpret_cast<char *>(&value), sizeof(double));
-  }
-}
-
-void writeRestOfTheData(std::ifstream &inFile, std::ofstream &outFile,
-                        SpTensorHeader const &header) {
-  const auto ndims = header.ndims;
-  // This loop reads a line at a time
-  while (!inFile.eof()) {
-    for (auto i = 0; i < ndims; ++i) {
-      uint64_t value;
-      inFile >> value;
-      if (!input_is_zero_based) {
-        --value; // For 0 based indexing
-      }
-      writeIndexValue(outFile, header, value, i);
-    }
-    double data_value;
-    inFile >> data_value;
-    writeDataValue(outFile, header, data_value);
-  }
-}
-
-/*
- * Takes a sparse tensor file text file and converts it to a binary file for
- * faster IO (Allows easier MPI_IO)
- *
- * The header for the text files needs to be in the form
------------------------------------------------------------------
-sptensor                   -> Type
-5                          -> Number of dimensions
-1605 4198 1631 4209 868131 -> Sizes of each dimension
-1698825                    -> Number nonzero
-1 1 1 1049 156 1.000000    -> This is the first nonzero element
-...                        -> More nonzero elements
------------------------------------------------------------------
-
-
-The output file will have the following form without the newlines or -> comments
-73 70 74 6e                   -> 4 char 'sptn'
-ndims                         -> uint32_t
-bits_for_float_type           -> uint32_t
-size0 size1 size2 size3 size4 -> ndims uint64_t
-bits0 bits1 bits2 bits3 bits4 -> number of bits used for each index
-number_non_zero               -> uint64_t
-* the elements depend on the size of each mode to make the file size smaller we
-* will use the smallest of uint8_t uint16_t uint32_t uint64_t that holds all
-* the elements from the size field above, for now all floats are stored as
-* described above.  unlike the textual format we will always use zero based
-* indexing
-1 1 1 1049 156 1.000000 -> uint16_t uint16_t uint16_t uint16_t uint32_t
-float_type
- */
-
-/*
- * Input should be 1 argument which is the tensor file name output will be in
- * the local directory with the basename(filename).bin
- */
-int main(int argc, char **argv) {
-  if (argc < 2 || argc > 4) {
-    std::cout << "Input is a file name to a tensor file and optionally the "
-                 "size you want to store the floating point data in in bits "
-                 "for now assume only {16,32,64(default)} are valid.  The "
-                 "tensor file should have the following format\n"
-              << header_string << "\n";
-    return 1;
-  }
-
-  uint32_t float_data_size = 64;
-  if (argc >= 3) {
-    float_data_size = std::stoul(argv[2]);
-    if (float_data_size == 16 || float_data_size == 32) {
-      std::cout << "Using non-default floating point size of "
-                << float_data_size << "\n";
-    } else if (float_data_size != 64) {
-      throw std::invalid_argument(
-          "2nd argumanet for float data point size must be 16, 32, or 64.");
-    }
-  }
-
-  if (argc == 4) {
-    input_is_zero_based = std::stoi(argv[3]);
-  }
-  if (input_is_zero_based) {
-    std::cout << "Assuming tensor indexing is zero based.\n";
-  } else {
-    std::cout << "Assuming tensor indexing is one based, will write zero based "
-                 "result.\n";
-  }
-
-  std::ifstream input_file(argv[1]);
-  std::string input_base = basename(argv[1]);
-  std::string outfile_name = input_base + ".bin";
-  std::cout << "Input filename: " << input_base << std::endl;
-  std::cout << "Output filename: " << outfile_name << std::endl;
-
-  auto header = readHeader(input_file, float_data_size);
-
-  std::ofstream outfile(outfile_name, std::ios::binary);
-  outfile.write("sptn", 4);
-  outfile.write(reinterpret_cast<char *>(&header.ndims),
-                sizeof(decltype(header.ndims)));
-  outfile.write(reinterpret_cast<char *>(&float_data_size),
-                sizeof(decltype(float_data_size)));
-  for (auto i = 0; i < header.ndims; ++i) {
-    auto value = header.dim_sizes[i];
-    std::cout << "Writing: " << value << "\n";
-    outfile.write(reinterpret_cast<char *>(&value), sizeof(uint64_t));
-  }
-  for (auto i = 0; i < header.ndims; ++i) {
-    auto value = header.dim_data_sizes[i];
-    outfile.write(reinterpret_cast<char *>(&value), sizeof(uint64_t));
-  }
-  outfile.write(reinterpret_cast<char *>(&(header.nnz)), sizeof(std::uint64_t));
-  uint64_t total_bits = 32 /*sptn*/ + 64 /*ndims*/ + 64 /*float_data_size*/ +
-                        header.ndims * 64 /* each dim size */ + 64 /* nnz */;
-
-  uint64_t line_size = header.float_data_size;
-  for (auto i = 0; i < header.ndims; ++i) {
-    line_size += header.dim_data_sizes[i];
-  }
-  total_bits += line_size * header.nnz;
-
-  const auto storage_in_MB = double(total_bits / 8.0) * 1e-6;
-  std::cout << "Storage of output file should be: " << storage_in_MB << "MB\n";
-  std::cout << "\tLine size in bytes: " << line_size / 8 << "\n";
-
-  writeRestOfTheData(input_file, outfile, header);
-  return 0;
-}