Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improving gdocs2latex.gs #21

Open
professor opened this issue Apr 1, 2018 · 2 comments
Open

Improving gdocs2latex.gs #21

professor opened this issue Apr 1, 2018 · 2 comments

Comments

@professor
Copy link

Hello,

I would be interested in improving gdocs2latex.gs to handle these situations:
bullet lists, numbered lists, subsubsections, and quotes.

However, I'm not aware of how to easily debug the script and would need guidance in understanding the data structure element.

Thanks!

Hola,

Me interesaría mejorar gdocs2latex.gs para manejar estas situaciones:
listas de viñetas, listas numeradas, subsecciones y citas.

Sin embargo, no estoy al tanto de cómo depurar fácilmente el script y necesitaría una guía para comprender el elemento de estructura de datos.

¡Gracias!

@jjconti
Copy link
Owner

jjconti commented Apr 4, 2018

I have this not published version of the file which handles bullet lists and numbered lists. Also subjections. I don't handle quotes yet. What I'd really like to add is footnotes.

That said, I don't have a good way to debug this. What I've done so far is develop via try and error with a very small doc file.

// Based on http://github.com/mangini/gdocs2md

function ConvertToSimpleLatex() {
  var numChildren = DocumentApp.getActiveDocument().getActiveSection().getNumChildren();
  var text = '';
  var attachments = [];
  var inItemize = false;
  var inEnumerate = false;

  // Walk through all the child elements of the doc.
  for (var i = 9; i < numChildren; i++) {  // 9 to skipe the firest 2 pages
    var child = DocumentApp.getActiveDocument().getActiveSection().getChild(i);
    var result = processParagraph(i, child);
    if (result !== null) {
      if (result && result.length > 0) {
        if (starts(result, '{itemize}') || starts(result, '{enumerate}')) {
          if (starts(result, '{itemize}')) {
            line = result.substring(9);
            if (!inItemize) {
              text += '\\begin{itemize}\n'
              inItemize = true;
            }
            text += '\\item ' + line
          } else {  // enumerate
            line = result.substring(11);
            if (!inEnumerate) {
              text += '\\begin{enumerate}\n'
              inEnumerate = true;
            }
            text += '\\item ' + line
          }
        } else {
          if (inItemize) {
            text += '\\end{itemize}\n'
            inItemize = false;
          }
          if (inEnumerate) {
            text += '\\end{enumerate}\n'
            inEnumerate = false;
          }
          text += result
        }
        text += '\n';
      }
    } else {
      text += '\n\n\\vspace{1cm}\n\n'
    }
  }

  attachments.push({'fileName': DocumentApp.getActiveDocument().getName() + '.txt', 'mimeType': 'text/plain', 'content': text});

  MailApp.sendEmail(Session.getActiveUser().getEmail(),
                    '[Automágica] ' + DocumentApp.getActiveDocument().getName(),
                    'Convertiste el adjunto a Latex simplificado para usar con Automágica (' + DocumentApp.getActiveDocument().getUrl() + ')' +
                    '\n\nMás información en http://www.juanjoconti.com/automagica/\n',
                    { 'attachments': attachments });
}

function starts(string, prefix) {
  return string.lastIndexOf(prefix, 0) === 0;
}

// Process each child element (not just paragraphs).
function processParagraph(index, element) {
  // First, check for things that require no processing.
  if (element.getNumChildren() == 0) {
    return null;
  }
  // TOC.
  if (element.getType() === DocumentApp.ElementType.TABLE_OF_CONTENTS) {
    return null;
  }

  // Set up for real results.
  var result = {};
  var pOut = '';
  var textElements = [];

  // Skip tables
  if (element.getType() === DocumentApp.ElementType.TABLE) {
    return null;
  }

  // Process various types (ElementType)
  for (var i = 0; i < element.getNumChildren(); i++) {
    var t = element.getChild(i).getType();

    if (t === DocumentApp.ElementType.TEXT) {
      var txt = element.getChild(i);
      pOut += txt.getText();
      textElements.push(txt);
    }
  }

  if (textElements.length == 0) {
    return result;
  }
  
  var prefix = findPrefix(element);
  var suffix = '';
  if (prefix.lastIndexOf('\\afterpage', 0) === 0) {
    suffix = '}}'
  } else if (prefix.lastIndexOf('\\', 0) === 0) {
    suffix = '}'
  } else if (prefix == '\n\n\\vspace{0.5cm}\\hrulefill \\hspace{0.1cm}\\decofourleft\\hspace{0.2cm} ') {
    suffix = ' \\hspace{0.2cm}\\decofourright \\hspace{0.1cm}\\hrulefill \\nopagebreak \\vspace{0.5cm} \\nopagebreak'
  } else if (prefix == '\n\n') {
    suffix = '\n\n';
  }

  var pOut = '';
  for (var i = 0; i < textElements.length; i++) {
    pOut += processTextElement(textElements[i]);
  }

  return prefix + pOut + suffix;
}

// Add correct prefix to list items and headers.
function findPrefix(element) {
  var prefix='';
  if (element.getType() === DocumentApp.ElementType.PARAGRAPH) {
    var paragraphObj = element;
    switch (paragraphObj.getHeading()) {
      case DocumentApp.ParagraphHeading.HEADING4: prefix+='\\subsection*{'; break;
      case DocumentApp.ParagraphHeading.HEADING3: prefix+='\\afterpage{\\includepdf{'; break;
      case DocumentApp.ParagraphHeading.HEADING2: prefix+='\n\n\\vspace{0.5cm}\\hrulefill \\hspace{0.1cm}\\decofourleft\\hspace{0.2cm} '; break;
      case DocumentApp.ParagraphHeading.HEADING1: prefix+='\n\n'; break;
      default:
    }
  } else if (element.getType() === DocumentApp.ElementType.LIST_ITEM) {
      var listItem = element;
      var nesting = listItem.getNestingLevel();
      var gt = listItem.getGlyphType();
      // Bullet list (<ul>):
      if (gt == DocumentApp.GlyphType.BULLET
          || gt == DocumentApp.GlyphType.HOLLOW_BULLET
          || gt == DocumentApp.GlyphType.SQUARE_BULLET) {
        prefix = '{itemize}';
      } else {
        // Ordered list (<ol>):
        prefix = '{enumerate}';
      }
    }
  return prefix;
}

function processTextElement(txt) {
  if (typeof(txt) === 'string') {
    return txt;
  }

  var pOut = txt.getText();
  if (!txt.getTextAttributeIndices) {
    return pOut;
  }

  var attrs = txt.getTextAttributeIndices();
  var lastOff = pOut.length;

  for (var i = attrs.length - 1; i >= 0; i--) {
    var off = attrs[i];
    if (txt.isBold(off)) {
      var d1 = '\\textbf{'
      var d2 = '}';
      if (txt.isItalic(off)) {
        d1 = '\\textbf{\\textit{'; d2 = '}}';
      }
      pOut = pOut.substring(0, off) + d1 + pOut.substring(off, lastOff) + d2 + pOut.substring(lastOff);
    } else if (txt.isItalic(off)) {
      pOut = pOut.substring(0, off) + '\\textit{' + pOut.substring(off, lastOff) + '}' + pOut.substring(lastOff);
    } else if (txt.getFontFamily(off) == "Courier New") {
      pOut = pOut.substring(0, off) + '\\small{\\texttt{' + pOut.substring(off, lastOff) + '}}' + pOut.substring(lastOff);
    } else if (txt.isStrikethrough(off)) {
      pOut = pOut.substring(0, off) + '\\st{' + pOut.substring(off, lastOff) + '}' + pOut.substring(lastOff);
    }
    lastOff=off;
  }
  if (pOut == '*') {
    pOut = '\\begin{center} * \\end{center}';
  }
  return pOut;
}

@professor
Copy link
Author

Thank you for providing me with your latest. For my google document, all the section headers are coming out as \vspace{1cm}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants