diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000..1a07bf75bf --- /dev/null +++ b/.gitignore @@ -0,0 +1,11 @@ +*.[oa] +*.so +*.obj +*.lib +*.exp +*.dll +*.exe +*.manifest +*.dmp +*.swp +.tags diff --git a/Makefile b/Makefile new file mode 100644 index 0000000000..673470416e --- /dev/null +++ b/Makefile @@ -0,0 +1,84 @@ +############################################################################## +# LuaJIT top level Makefile for installation. Requires GNU Make. +# +# Suitable for POSIX platforms (Linux, *BSD, OSX etc.). +# Note: src/Makefile has many more configurable options. +# +# ##### This Makefile is NOT useful for installation on Windows! ##### +# For MSVC, please follow the instructions given in src/msvcbuild.bat. +# For MinGW and Cygwin, cd to src and run make with the Makefile there. +# NYI: add wininstall.bat +# +# Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +############################################################################## + +BASEVER= 2.0.0 +VERSION= 2.0.0-beta1 + +############################################################################## +# +# Change the installation path as needed and modify src/luaconf.h accordingly. +# Note: PREFIX must be an absolute path! +# +PREFIX= /usr/local +############################################################################## + +INSTALL_BIN= $(PREFIX)/bin +INSTALL_NAME= luajit-$(VERSION) +INSTALL_T= $(INSTALL_BIN)/$(INSTALL_NAME) +INSTALL_TSYM= $(INSTALL_BIN)/luajit +INSTALL_INC= $(PREFIX)/include/luajit-$(BASEVER) +INSTALL_JITLIB= $(PREFIX)/share/luajit-$(VERSION)/jit + +MKDIR= mkdir -p +SYMLINK= ln -f -s +INSTALL_X= install -m 0755 +INSTALL_F= install -m 0644 + +FILES_T= luajit +FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h +FILES_JITLIB= bc.lua v.lua dump.lua dis_x86.lua dis_x64.lua vmdef.lua + +############################################################################## + +INSTALL_DEP= src/luajit + +all $(INSTALL_DEP): + @echo "==== Building LuaJIT $(VERSION) ====" + $(MAKE) -C src + @echo "==== Successfully built LuaJIT $(VERSION) ====" + +install: $(INSTALL_DEP) + @echo "==== Installing LuaJIT $(VERSION) to $(PREFIX) ====" + $(MKDIR) $(INSTALL_BIN) $(INSTALL_INC) $(INSTALL_JITLIB) + cd src && $(INSTALL_X) $(FILES_T) $(INSTALL_T) + cd src && $(INSTALL_F) $(FILES_INC) $(INSTALL_INC) + cd lib && $(INSTALL_F) $(FILES_JITLIB) $(INSTALL_JITLIB) + @echo "==== Successfully installed LuaJIT $(VERSION) to $(PREFIX) ====" + @echo "" + @echo "Note: the beta releases deliberately do NOT install a symlink for luajit" + @echo "You can do this now by running this command (with sudo):" + @echo "" + @echo " $(SYMLINK) $(INSTALL_NAME) $(INSTALL_TSYM)" + @echo "" + +############################################################################## + +amalg: + @echo "Building LuaJIT $(VERSION)" + $(MAKE) -C src amalg + +clean: + $(MAKE) -C src clean + +cleaner: + $(MAKE) -C src cleaner + +distclean: + $(MAKE) -C src distclean + +SUB_TARGETS= amalg clean cleaner distclean + +.PHONY: all install $(SUB_TARGETS) + +############################################################################## diff --git a/README b/README new file mode 100644 index 0000000000..43caf78e8e --- /dev/null +++ b/README @@ -0,0 +1,16 @@ +README for LuaJIT 2.0.0-beta1 +----------------------------- + +LuaJIT is a Just-In-Time (JIT) compiler for the Lua programming language. + +Project Homepage: http://luajit.org/ + +LuaJIT is Copyright (C) 2005-2009 Mike Pall. +LuaJIT is free software, released under the MIT/X license. +See full Copyright Notice in src/luajit.h + +Documentation for LuaJIT is available in HTML format. +Please point your favorite browser to: + + doc/luajit.html + diff --git a/doc/api.html b/doc/api.html new file mode 100644 index 0000000000..79788d956b --- /dev/null +++ b/doc/api.html @@ -0,0 +1,203 @@ + + + +API Extensions + + + + + + + + +
+Lua +
+ + +
+

+LuaJIT is fully upwards-compatible with Lua 5.1. It supports all +» standard Lua +library functions and the full set of +» Lua/C API +functions. +

+

+LuaJIT is also fully ABI-compatible to Lua 5.1 at the linker/dynamic +loader level. This means you can compile a C module against the +standard Lua headers and load the same shared library from either Lua +or LuaJIT. +

+ +

bit.* — Bitwise Operations

+

+LuaJIT supports all bitwise operations as defined by +» Lua BitOp: +

+
+bit.tobit  bit.tohex  bit.bnot    bit.band bit.bor  bit.bxor
+bit.lshift bit.rshift bit.arshift bit.rol  bit.ror  bit.bswap
+
+

+This module is a LuaJIT built-in — you don't need to download or +install Lua BitOp. The Lua BitOp site has full documentation for all +» Lua BitOp API functions. +

+

+Please make sure to require the module before using any of +its functions: +

+
+local bit = require("bit")
+
+

+An already installed Lua BitOp module is ignored by LuaJIT. +This way you can use bit operations from both Lua and LuaJIT on a +shared installation. +

+ +

jit.* — JIT compiler control

+

+The functions in this built-in module control the behavior +of the JIT compiler engine. +

+ +

jit.on()
+jit.off()

+

+Turns the whole JIT compiler on (default) or off. +

+

+These functions are typically used with the command line options +-j on or -j off. +

+ +

jit.flush()

+

+Flushes the whole cache of compiled code. +

+ +

jit.flush(tr)

+

+Flushes the code for the specified root trace and all of its +side traces from the cache. +

+ +

jit.on(func|true [,true|false])
+jit.off(func|true [,true|false])
+jit.flush(func|true [,true|false])

+

+jit.on enables JIT compilation for a Lua function (this is +the default). +

+

+jit.off disables JIT compilation for a Lua function and +flushes any already compiled code from the code cache. +

+

+jit.flush flushes the code, but doesn't affect the +enable/disable status. +

+

+The current function, i.e. the Lua function calling this library +function, can also be specified by passing true as the first +argument. +

+

+If the second argument is true, JIT compilation is also +enabled, disabled or flushed recursively for all subfunctions of a +function. With false only the subfunctions are affected. +

+

+The jit.on and jit.off functions only set a flag +which is checked when the function is about to be compiled. They do +not trigger immediate compilation. +

+

+Typical usage is jit.off(true, true) in the main chunk +of a module to turn off JIT compilation for the whole module for +debugging purposes. +

+ +

jit.version

+

+Contains the LuaJIT version string. +

+ +

jit.version_num

+

+Contains the version number of the LuaJIT core. Version xx.yy.zz +is represented by the decimal number xxyyzz. +

+ +

jit.arch

+

+Contains the target architecture name (CPU and optional ABI). +

+ +

jit.opt.* — JIT compiler optimization control

+

+This module provides the backend for the -O command line +option. +

+

+You can also use it programmatically, e.g.: +

+
+jit.opt.start(2) -- same as -O2
+jit.opt.start("-dce")
+jit.opt.start("hotloop=10", "hotexit=2")
+
+

+Unlike in LuaJIT 1.x, the module is built-in and +optimization is turned on by default! +It's no longer necessary to run require("jit.opt").start(), +which was one of the ways to enable optimization. +

+ +

jit.util.* — JIT compiler introspection

+

+This module holds functions to introspect the bytecode, generated +traces, the IR and the generated machine code. The functionality +provided by this module is still in flux and therefore undocumented. +

+

+The debug modules -jbc, -jv and -jdump make +extensive use of these functions. Please check out their source code, +if you want to know more. +

+
+
+ + + diff --git a/doc/bluequad-print.css b/doc/bluequad-print.css new file mode 100644 index 0000000000..00a6b15412 --- /dev/null +++ b/doc/bluequad-print.css @@ -0,0 +1,166 @@ +/* Copyright (C) 2004-2009 Mike Pall. + * + * You are welcome to use the general ideas of this design for your own sites. + * But please do not steal the stylesheet, the layout or the color scheme. + */ +body { + font-family: serif; + font-size: 11pt; + margin: 0 3em; + padding: 0; + border: none; +} +a:link, a:visited, a:hover, a:active { + text-decoration: none; + background: transparent; + color: #0000ff; +} +h1, h2, h3 { + font-family: sans-serif; + font-weight: bold; + text-align: left; + margin: 0.5em 0; + padding: 0; +} +h1 { + font-size: 200%; +} +h2 { + font-size: 150%; +} +h3 { + font-size: 125%; +} +p { + margin: 0 0 0.5em 0; + padding: 0; +} +ul, ol { + margin: 0.5em 0; + padding: 0 0 0 2em; +} +ul { + list-style: outside square; +} +ol { + list-style: outside decimal; +} +li { + margin: 0; + padding: 0; +} +dl { + margin: 1em 0; + padding: 1em; + border: 1px solid black; +} +dt { + font-weight: bold; + margin: 0; + padding: 0; +} +dt sup { + float: right; + margin-left: 1em; +} +dd { + margin: 0.5em 0 0 2em; + padding: 0; +} +table { + table-layout: fixed; + width: 100%; + margin: 1em 0; + padding: 0; + border: 1px solid black; + border-spacing: 0; + border-collapse: collapse; +} +tr { + margin: 0; + padding: 0; + border: none; +} +td { + text-align: left; + margin: 0; + padding: 0.2em 0.5em; + border-top: 1px solid black; + border-bottom: 1px solid black; +} +tr.separate td { + border-top: double; +} +tt, pre, code, kbd, samp { + font-family: monospace; + font-size: 75%; +} +kbd { + font-weight: bolder; +} +blockquote, pre { + margin: 1em 2em; + padding: 0; +} +img { + border: none; + vertical-align: baseline; + margin: 0; + padding: 0; +} +img.left { + float: left; + margin: 0.5em 1em 0.5em 0; +} +img.right { + float: right; + margin: 0.5em 0 0.5em 1em; +} +.flush { + clear: both; + visibility: hidden; +} +.hide, .noprint, #nav { + display: none !important; +} +.pagebreak { + page-break-before: always; +} +#site { + text-align: right; + font-family: sans-serif; + font-weight: bold; + margin: 0 1em; + border-bottom: 1pt solid black; +} +#site a { + font-size: 1.2em; +} +#site a:link, #site a:visited { + text-decoration: none; + font-weight: bold; + background: transparent; + color: #ffffff; +} +#logo { + color: #ff8000; +} +#head { + clear: both; + margin: 0 1em; +} +#main { + line-height: 1.3; + text-align: justify; + margin: 1em; +} +#foot { + clear: both; + font-size: 80%; + text-align: center; + margin: 0 1.25em; + padding: 0.5em 0 0 0; + border-top: 1pt solid black; + page-break-before: avoid; + page-break-after: avoid; +} diff --git a/doc/bluequad.css b/doc/bluequad.css new file mode 100644 index 0000000000..7e52102f31 --- /dev/null +++ b/doc/bluequad.css @@ -0,0 +1,303 @@ +/* Copyright (C) 2004-2009 Mike Pall. + * + * You are welcome to use the general ideas of this design for your own sites. + * But please do not steal the stylesheet, the layout or the color scheme. + */ +/* colorscheme: + * + * site | head #4162bf/white | #6078bf/#e6ecff + * ------+------ ----------------+------------------- + * nav | main #bfcfff | #e6ecff/black + * + * nav: hiback loback #c5d5ff #b9c9f9 + * hiborder loborder #e6ecff #97a7d7 + * link hover #2142bf #ff0000 + * + * link: link visited hover #2142bf #8122bf #ff0000 + * + * main: boxback boxborder #f0f4ff #bfcfff + */ +body { + font-family: Verdana, Arial, Helvetica, sans-serif; + font-size: 10pt; + margin: 0; + padding: 0; + border: none; + background: #e0e0e0; + color: #000000; +} +a:link { + text-decoration: none; + background: transparent; + color: #2142bf; +} +a:visited { + text-decoration: none; + background: transparent; + color: #8122bf; +} +a:hover, a:active { + text-decoration: underline; + background: transparent; + color: #ff0000; +} +h1, h2, h3 { + font-weight: bold; + text-align: left; + margin: 0.5em 0; + padding: 0; + background: transparent; +} +h1 { + font-size: 200%; + line-height: 3em; /* really 6em relative to body, match #site span */ + margin: 0; +} +h2 { + font-size: 150%; + color: #606060; +} +h3 { + font-size: 125%; + color: #404040; +} +p { + max-width: 600px; + margin: 0 0 0.5em 0; + padding: 0; +} +b { + color: #404040; +} +ul, ol { + max-width: 600px; + margin: 0.5em 0; + padding: 0 0 0 2em; +} +ul { + list-style: outside square; +} +ol { + list-style: outside decimal; +} +li { + margin: 0; + padding: 0; +} +dl { + max-width: 600px; + margin: 1em 0; + padding: 1em; + border: 1px solid #bfcfff; + background: #f0f4ff; +} +dt { + font-weight: bold; + margin: 0; + padding: 0; +} +dt sup { + float: right; + margin-left: 1em; + color: #808080; +} +dt a:visited { + text-decoration: none; + color: #2142bf; +} +dt a:hover, dt a:active { + text-decoration: none; + color: #ff0000; +} +dd { + margin: 0.5em 0 0 2em; + padding: 0; +} +div.tablewrap { /* for IE *sigh* */ + max-width: 600px; +} +table { + table-layout: fixed; + border-spacing: 0; + border-collapse: collapse; + max-width: 600px; + width: 100%; + margin: 1em 0; + padding: 0; + border: 1px solid #bfcfff; +} +tr { + margin: 0; + padding: 0; + border: none; +} +tr.odd { + background: #f0f4ff; +} +tr.separate td { + border-top: 1px solid #bfcfff; +} +td { + text-align: left; + margin: 0; + padding: 0.2em 0.5em; + border: none; +} +tt, code, kbd, samp { + font-family: Courier New, Courier, monospace; + line-height: 1.2; + font-size: 110%; +} +kbd { + font-weight: bolder; +} +blockquote, pre { + max-width: 600px; + margin: 1em 2em; + padding: 0; +} +pre { + line-height: 1.1; +} +pre.code { + line-height: 1.4; + margin: 0.5em 0 1em 0.5em; + padding: 0.5em 1em; + border: 1px solid #bfcfff; + background: #f0f4ff; +} +img { + border: none; + vertical-align: baseline; + margin: 0; + padding: 0; +} +img.left { + float: left; + margin: 0.5em 1em 0.5em 0; +} +img.right { + float: right; + margin: 0.5em 0 0.5em 1em; +} +.indent { + padding-left: 1em; +} +.flush { + clear: both; + visibility: hidden; +} +.hide, .noscreen { + display: none !important; +} +.ext { + color: #ff8000; +} +#site { + clear: both; + float: left; + width: 13em; + text-align: center; + font-weight: bold; + margin: 0; + padding: 0; + background: transparent; + color: #ffffff; +} +#site a { + font-size: 200%; +} +#site a:link, #site a:visited { + text-decoration: none; + font-weight: bold; + background: transparent; + color: #ffffff; +} +#site span { + line-height: 3em; /* really 6em relative to body, match h1 */ +} +#logo { + color: #ffb380; +} +#head { + margin: 0; + padding: 0 0 0 2em; + border-left: solid 13em #4162bf; + border-right: solid 3em #6078bf; + background: #6078bf; + color: #e6ecff; +} +#nav { + clear: both; + float: left; + overflow: hidden; + text-align: left; + line-height: 1.5; + width: 13em; + padding-top: 1em; + background: transparent; +} +#nav ul { + list-style: none outside; + margin: 0; + padding: 0; +} +#nav li { + margin: 0; + padding: 0; +} +#nav a { + display: block; + text-decoration: none; + font-weight: bold; + margin: 0; + padding: 2px 1em; + border-top: 1px solid transparent; + border-bottom: 1px solid transparent; + background: transparent; + color: #2142bf; +} +#nav a:hover, #nav a:active { + text-decoration: none; + border-top: 1px solid #97a7d7; + border-bottom: 1px solid #e6ecff; + background: #b9c9f9; + color: #ff0000; +} +#nav a.current, #nav a.current:hover, #nav a.current:active { + border-top: 1px solid #e6ecff; + border-bottom: 1px solid #97a7d7; + background: #c5d5ff; + color: #2142bf; +} +#nav ul ul a { + padding: 0 1em 0 2em; +} +#main { + line-height: 1.5; + text-align: left; + margin: 0; + padding: 1em 2em; + border-left: solid 13em #bfcfff; + border-right: solid 3em #e6ecff; + background: #e6ecff; +} +#foot { + clear: both; + font-size: 80%; + text-align: center; + margin: 0; + padding: 0.5em; + background: #6078bf; + color: #ffffff; +} +#foot a:link, #foot a:visited { + text-decoration: underline; + background: transparent; + color: #ffffff; +} +#foot a:hover, #foot a:active { + text-decoration: underline; + background: transparent; + color: #bfcfff; +} diff --git a/doc/changes.html b/doc/changes.html new file mode 100644 index 0000000000..6c34b8befd --- /dev/null +++ b/doc/changes.html @@ -0,0 +1,281 @@ + + + +LuaJIT Change History + + + + + + + + + +
+Lua +
+ + +
+

+This is a list of changes between the released versions of LuaJIT.
+The current development version is LuaJIT 2.0.0-beta1.
+The current stable version is LuaJIT 1.1.5. +

+

+Please check the +» Online Change History +to see whether newer versions are available. +

+ +
+

LuaJIT 2.0.0-beta1 — 2009-10-31

+ +
+ +
+

LuaJIT 1.1.5 — 2008-10-25

+ + +

LuaJIT 1.1.4 — 2008-02-05

+ + +

LuaJIT 1.1.3 — 2007-05-24

+ + +

LuaJIT 1.1.2 — 2006-06-24

+ + +

LuaJIT 1.1.1 — 2006-06-20

+ +

+This release is in sync with Coco 1.1.1 (see the +» Coco Change History). +

+ +

LuaJIT 1.1.0 — 2006-03-13

+ +

+This release is in sync with Coco 1.1.0 (see the +» Coco Change History). +

+
+ +
+

LuaJIT 1.0.3 — 2005-09-08

+ +

+This is the first public release of LuaJIT. +

+ +

LuaJIT 1.0.2 — 2005-09-02

+ +

+Not released because Lua 5.1 alpha came out today. +

+ +

LuaJIT 1.0.1 — 2005-08-31

+ +

+Interim non-public release. +Special thanks to Adam D. Moss for reporting most of the bugs. +

+ +

LuaJIT 1.0.0 — 2005-08-29

+

+This is the initial non-public release of LuaJIT. +

+
+
+
+ + + diff --git a/doc/contact.html b/doc/contact.html new file mode 100644 index 0000000000..36d5a825c0 --- /dev/null +++ b/doc/contact.html @@ -0,0 +1,84 @@ + + + +Contact + + + + + + + + +
+Lua +
+ + +
+

+Please send general questions to the +» Lua mailing list. +You can also send any questions you have directly to me: +

+ + + + + +

Copyright

+

+All documentation is +Copyright © 2005-2009 Mike Pall. +

+ + +
+
+ + + diff --git a/doc/faq.html b/doc/faq.html new file mode 100644 index 0000000000..6f62e1eb08 --- /dev/null +++ b/doc/faq.html @@ -0,0 +1,141 @@ + + + +Frequently Asked Questions (FAQ) + + + + + + + + + +
+Lua +
+ + +
+
+
Q: Where can I learn more about Lua and LuaJIT?
+
+ +
+ +
+
Q: Where can I learn more about the compiler technology used by LuaJIT?
+
+I'm planning to write more documentation about the internals of LuaJIT. +In the meantime, please use the following Google Scholar searches +to find relevant papers:
+Search for: » Trace Compiler
+Search for: » JIT Compiler
+Search for: » Dynamic Language Optimizations
+Search for: » SSA Form
+Search for: » Linear Scan Register Allocation
+And, you know, reading the source is of course the only way to enlightenment. :-) +
+
+ +
+
Q: Why do I get this error: "attempt to index global 'arg' (a nil value)"?
+Q: My vararg functions fail after switching to LuaJIT!
+
LuaJIT is compatible to the Lua 5.1 language standard. It doesn't +support the implicit arg parameter for old-style vararg +functions from Lua 5.0.
Please convert your code to the +» Lua 5.1 +vararg syntax.
+
+ +
+
Q: Sometimes Ctrl-C fails to stop my Lua program. Why?
+
The interrupt signal handler sets a Lua debug hook. But this is +currently ignored by compiled code (this will eventually be fixed). If +your program is running in a tight loop and never falls back to the +interpreter, the debug hook never runs and can't throw the +"interrupted!" error.
In the meantime you have to press Ctrl-C +twice to get stop your program. That's similar to when it's stuck +running inside a C function under the Lua interpreter.
+
+ +
+
Q: Why doesn't my favorite power-patch for Lua apply against LuaJIT?
+
Because it's a completely redesigned VM and has very little code +in common with Lua anymore. Also, if the patch introduces changes to +the Lua semantics, this would need to be reflected everywhere in the +VM, from the interpreter up to all stages of the compiler.
Please +use only standard Lua language constructs. For many common needs you +can use source transformations or use wrapper or proxy functions. +The compiler will happily optimize away such indirections.
+
+ +
+
Q: Lua runs everywhere. Why doesn't LuaJIT support my CPU?
+
Because it's a compiler — it needs to generate native +machine code. This means the code generator must be ported to each +architecture. And the fast interpreter is written in assembler and +must be ported, too. This is quite an undertaking.
Currently only +x86 CPUs are supported. x64 support is in the works. Other +architectures will follow with sufficient demand and/or +sponsoring.
+
+ +
+
Q: When will feature X be added? When will the next version be released?
+
When it's ready.
+C'mon, it's open source — I'm doing it on my own time and you're +getting it for free. You can either contribute a patch or sponsor +the development of certain features, if they are important to you. +
+
+
+
+ + + diff --git a/doc/img/contact.png b/doc/img/contact.png new file mode 100644 index 0000000000..9c73dc594e Binary files /dev/null and b/doc/img/contact.png differ diff --git a/doc/install.html b/doc/install.html new file mode 100644 index 0000000000..b7211d2155 --- /dev/null +++ b/doc/install.html @@ -0,0 +1,216 @@ + + + +Installation + + + + + + + + +
+Lua +
+ + +
+

+LuaJIT is only distributed as a source package. This page explains +how to build and install LuaJIT with different operating systems +and C compilers. +

+

+For the impatient (on POSIX systems): +

+
+make && sudo make install
+
+

+LuaJIT currently builds out-of-the box on all popular x86 systems +(Linux, Windows, OSX etc.). It builds and runs fine as a 32 bit +application under x64-based systems, too. +

+ +

Configuring LuaJIT

+

+The standard configuration should work fine for most installations. +Usually there is no need to tweak the settings, except when you want to +install to a non-standard path. The following three files hold all +user-configurable settings: +

+ +

+Please read the instructions given in these files, before changing +any settings. +

+ +

POSIX Systems (Linux, OSX, *BSD etc.)

+

Prerequisites

+

+Depending on your distribution, you may need to install a package for +GCC (GCC 3.4 or later required), the development headers and/or a +complete SDK. +

+

+E.g. on a current Debian/Ubuntu, install libc6-dev +with the package manager. Currently LuaJIT only builds as a 32 bit +application, so you actually need to install libc6-dev-i386 +when building on an x64 OS. +

+

+Download the current source package (pick the .tar.gz), if you haven't +already done so. Move it to a directory of your choice, open a +terminal window and change to this directory. Now unpack the archive +and change to the newly created directory: +

+
+tar zxf LuaJIT-2.0.0-beta1.tar.gz
+cd LuaJIT-2.0.0-beta1
+
+

Building LuaJIT

+

+The supplied Makefiles try to auto-detect the settings needed for your +operating system and your compiler. They need to be run with GNU Make, +which is probably the default on your system, anyway. Simply run: +

+
+make
+
+

Installing LuaJIT

+

+The top-level Makefile installs LuaJIT by default under +/usr/local, i.e. the executable ends up in +/usr/local/bin and so on. You need to have root privileges +to write to this path. So, assuming sudo is installed on your system, +run the following command and enter your sudo password: +

+
+sudo make install
+
+

+Otherwise specify the directory prefix as an absolute path, e.g.: +

+
+sudo make install PREFIX=/opt/lj2
+
+

+But note that the installation prefix and the prefix for the module paths +(configured in src/luaconf.h) must match. +

+

+Note: to avoid overwriting a previous version, the beta test releases +only install the LuaJIT executable under the versioned name (i.e. +luajit-2.0.0-beta1). You probably want to create a symlink +for convenience, with a command like this: +

+
+sudo ln -sf luajit-2.0.0-beta1 /usr/local/bin/luajit
+
+ +

Windows Systems

+

Prerequisites

+

+Either install one of the open source SDKs +(» MinGW or +» Cygwin) which come with modified +versions of GCC plus the required development headers. +

+

+Or install Microsoft's Visual C++ (MSVC) — the freely downloadable +» Express Edition +works just fine. +

+

+Next, download the source package and unpack it using an archive manager +(e.g. the Windows Explorer) to a directory of your choice. +

+

Building with MSVC

+

+Open a "Visual Studio .NET Command Prompt" and cd to the +directory where you've unpacked the sources. Then run this command: +

+
+cd src
+msvcbuild
+
+

+Then follow the installation instructions below. +

+

Building with MinGW or Cygwin

+

+Open a command prompt window and make sure the MinGW or Cygwin programs +are in your path. Then cd to the directory where +you've unpacked the sources and run this command for MinGW: +

+
+cd src
+mingw32-make
+
+

+Or this command for Cygwin: +

+
+cd src
+make
+
+

+Then follow the installation instructions below. +

+

Installing LuaJIT

+

+Copy luajit.exe and lua51.dll +to a newly created directory (any location is ok). Add lua +and lua\jit directories below it and copy all Lua files +from the lib directory of the distribution to the latter directory. +

+

+There are no hardcoded +absolute path names — all modules are loaded relative to the +directory where luajit.exe is installed +(see src/luaconf.h). +

+
+
+ + + diff --git a/doc/luajit.html b/doc/luajit.html new file mode 100644 index 0000000000..9b16ea3794 --- /dev/null +++ b/doc/luajit.html @@ -0,0 +1,120 @@ + + + +LuaJIT + + + + + + + + +
+Lua +
+ + +
+

+LuaJIT is a Just-In-Time Compiler for the Lua* +programming language. +

+

+LuaJIT is Copyright © 2005-2008 Mike Pall. +LuaJIT is open source software, released under the +» MIT/X license. +

+

+* Lua is a powerful, dynamic and light-weight programming language +designed for extending applications. Lua is also frequently used as a +general-purpose, stand-alone language. More information about +Lua can be found at: » http://www.lua.org/ +

+

Compatibility

+

+LuaJIT implements the full set of language features defined by Lua 5.1. +The virtual machine (VM) is API- and ABI-compatible to the +standard Lua interpreter and can be deployed as a drop-in replacement. +

+

+LuaJIT offers more performance, at the expense of portability. It +currently runs on all popular operating systems based on x86 CPUs +(Linux, Windows, OSX etc.). It will be ported to x64 CPUs and other +platforms in the future, based on user demand and sponsoring. +

+ +

Overview

+

+LuaJIT has been successfully used as a scripting middleware in +games, 3D modellers, numerical simulations, trading platforms and many +other specialty applications. It combines high flexibility with high +performance and an unmatched low memory footprint: less than +120K for the VM plus less than 80K for the JIT compiler. +

+

+LuaJIT has been in continuous development since 2005. It's widely +considered to be one of the fastest dynamic language +implementations. It has outperfomed other dynamic languages on many +cross-language benchmarks since its first release — often by a +substantial margin. Only now, in 2009, other dynamic language VMs are +starting to catch up with the performance of LuaJIT 1.x … +

+

+2009 also marks the first release of the long-awaited LuaJIT 2.0. +The whole VM has been rewritten from the ground up and relentlessly +optimized for performance. It combines a high-speed interpreter, +written in assembler, with a state-of-the-art JIT compiler. +

+

+An innovative trace compiler is integrated with advanced, +SSA-based optimizations and a highly tuned code generation backend. This +allows a substantial reduction of the overhead associated with dynamic +language features. It's destined to break into the performance range +traditionally reserved for offline, static language compilers. +

+ +

More ...

+

+Click on the LuaJIT sub-topics in the navigation bar to learn more +about LuaJIT. +

+

+Click on the Logo in the upper left corner to visit +the LuaJIT project page on the web. All other links to online +resources are marked with a '»'. +

+
+
+ + + diff --git a/doc/running.html b/doc/running.html new file mode 100644 index 0000000000..db69578c6a --- /dev/null +++ b/doc/running.html @@ -0,0 +1,233 @@ + + + +Running LuaJIT + + + + + + + + + +
+Lua +
+ + +
+

+LuaJIT has only a single stand-alone executable, called luajit on +POSIX systems or luajit.exe on Windows. It can be used to run simple +Lua statements or whole Lua applications from the command line. It has an +interactive mode, too. +

+

+Note: the beta test releases only install under the versioned name on +POSIX systems (to avoid overwriting a previous version). You either need +to type luajit-2.0.0-beta1 to start it or create a symlink +with a command like this: +

+
+sudo ln -sf luajit-2.0.0-beta1 /usr/local/bin/luajit
+
+

+Unlike previous versions optimization is turned on by default in +LuaJIT 2.0!
It's no longer necessary to use luajit -O. +

+ +

Command Line Options

+

+The luajit stand-alone executable is just a slightly modified +version of the regular lua stand-alone executable. +It supports the same basic options, too. luajit -h +prints a short list of the available options. Please have a look at the +» Lua manual +for details. +

+

+Two additional options control the behavior of LuaJIT: +

+ +

-j cmd[=arg[,arg...]]

+

+This option performs a LuaJIT control command or activates one of the +loadable extension modules. The command is first looked up in the +jit.* library. If no matching function is found, a module +named jit.<cmd> is loaded and the start() +function of the module is called with the specified arguments (if +any). The space between -j and cmd is optional. +

+

+Here are the available LuaJIT control commands: +

+ +

+The -jv and -jdump commands are extension modules +written in Lua. They are mainly used for debugging the JIT compiler +itself. For a description of their options and output format, please +read the comment block at the start of their source. +They can be found in the lib directory of the source +distribution or installed under the jit directory. By default +this is /usr/local/share/luajit-2.0.0-beta1/jit on POSIX +systems. +

+ +

-O[level]
+-O[+]flag -O-flag
+-Oparam=value

+

+This options allows fine-tuned control of the optimizations used by +the JIT compiler. This is mainly intended for debugging LuaJIT itself. +Please note that the JIT compiler is extremly fast (we are talking +about the microsecond to millisecond range). Disabling optimizations +doesn't have any visible impact on its overhead, but usually generates +code that runs slower. +

+

+The first form sets an optimization level — this enables a +specific mix of optimization flags. -O0 turns off all +optimizations and higher numbers enable more optimizations. Omitting +the level (i.e. just -O) sets the default optimization level, +which is -O3 in the current version. +

+

+The second form adds or removes individual optimization flags. +The third form sets a parameter for the VM or the JIT compiler +to a specific value. +

+

+You can either use this option multiple times (like -Ocse +-O-dce -Ohotloop=10) or separate several settings with a comma +(like -O+cse,-dce,hotloop=10). The settings are applied from +left to right and later settings override earlier ones. You can freely +mix the three forms, but note that setting an optimization level +overrides all earlier flags. +

+

+Here are the available flags and at what optimization levels they +are enabled: +

+ + + + + + + + + + + + + + + + + + + + + + + + +
Flag-O1-O2-O3 
foldConstant Folding, Simplifications and Reassociation
cseCommon-Subexpression Elimination
dceDead-Code Elimination
narrow Narrowing of numbers to integers
loop Loop Optimizations (code hoisting)
fwd  Load Forwarding (L2L) and Store Forwarding (S2L)
dse  Dead-Store Elimination
fuse  Fusion of operands into instructions
+

+Here are the parameters and their default settings: +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ParameterDefault 
maxtrace1000Max. number of traces in the cache
maxrecord2000Max. number of recorded IR instructions
maxirconst500Max. number of IR constants of a trace
maxside100Max. number of side traces of a root trace
maxsnap100Max. number of snapshots for a trace
hotloop57Number of iterations to detect a hot loop
hotexit10Number of taken exits to start a side trace
tryside4Number of attempts to compile a side trace
instunroll4Max. unroll factor for instable loops
loopunroll7Max. unroll factor for loop ops in side traces
callunroll3Max. unroll factor for pseudo-recursive calls
sizemcode32Size of each machine code area in KBytes (Windows: 64K)
maxmcode512Max. total size of all machine code areas in KBytes
+
+
+ + + diff --git a/doc/status.html b/doc/status.html new file mode 100644 index 0000000000..23c14c7616 --- /dev/null +++ b/doc/status.html @@ -0,0 +1,235 @@ + + + +Status & Roadmap + + + + + + + + + +
+Lua +
+ + +
+

+The LuaJIT 1.x series represents +the current stable branch. As of +this writing there have been no open bugs since about a year. So, if +you need a rock-solid VM, you are encouraged to fetch the latest +release of LuaJIT 1.x from the » Download +page. +

+

+LuaJIT 2.0 is the currently active +development branch. +It has Beta Test status and is still undergoing +substantial changes. It's expected to quickly mature within the next +months. You should definitely start to evaluate it for new projects +right now. But deploying it in production environments is not yet +recommended. +

+ +

Current Status

+

+This is a list of the things you should know about the LuaJIT 2.0 beta test: +

+ + +

Roadmap

+

+Rather than stating exact release dates (I'm well known for making +spectacularly wrong guesses), this roadmap lists the general project +plan, sorted by priority, as well as ideas for the future: +

+ +
+
+ + + diff --git a/dynasm/dasm_proto.h b/dynasm/dasm_proto.h new file mode 100644 index 0000000000..94d9a9e28e --- /dev/null +++ b/dynasm/dasm_proto.h @@ -0,0 +1,69 @@ +/* +** DynASM encoding engine prototypes. +** Copyright (C) 2005-2009 Mike Pall. All rights reserved. +** Released under the MIT/X license. See dynasm.lua for full copyright notice. +*/ + +#ifndef _DASM_PROTO_H +#define _DASM_PROTO_H + +#include +#include + +#define DASM_IDENT "DynASM 1.2.1" +#define DASM_VERSION 10201 /* 1.2.1 */ + +#ifndef Dst_DECL +#define Dst_DECL dasm_State *Dst +#endif + +#ifndef Dst_GET +#define Dst_GET (Dst) +#endif + +#ifndef DASM_FDEF +#define DASM_FDEF extern +#endif + + +/* Internal DynASM encoder state. */ +typedef struct dasm_State dasm_State; + +/* Action list type. */ +typedef const unsigned char *dasm_ActList; + + +/* Initialize and free DynASM state. */ +DASM_FDEF void dasm_init(Dst_DECL, int maxsection); +DASM_FDEF void dasm_free(Dst_DECL); + +/* Setup global array. Must be called before dasm_setup(). */ +DASM_FDEF void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl); + +/* Grow PC label array. Can be called after dasm_setup(), too. */ +DASM_FDEF void dasm_growpc(Dst_DECL, unsigned int maxpc); + +/* Setup encoder. */ +DASM_FDEF void dasm_setup(Dst_DECL, dasm_ActList actionlist); + +/* Feed encoder with actions. Calls are generated by pre-processor. */ +DASM_FDEF void dasm_put(Dst_DECL, int start, ...); + +/* Link sections and return the resulting size. */ +DASM_FDEF int dasm_link(Dst_DECL, size_t *szp); + +/* Encode sections into buffer. */ +DASM_FDEF int dasm_encode(Dst_DECL, void *buffer); + +/* Get PC label offset. */ +DASM_FDEF int dasm_getpclabel(Dst_DECL, unsigned int pc); + +#ifdef DASM_CHECKS +/* Optional sanity checker to call between isolated encoding steps. */ +DASM_FDEF int dasm_checkstep(Dst_DECL, int secmatch); +#else +#define dasm_checkstep(a, b) 0 +#endif + + +#endif /* _DASM_PROTO_H */ diff --git a/dynasm/dasm_x86.h b/dynasm/dasm_x86.h new file mode 100644 index 0000000000..dab33e5ae4 --- /dev/null +++ b/dynasm/dasm_x86.h @@ -0,0 +1,467 @@ +/* +** DynASM x86 encoding engine. +** Copyright (C) 2005-2009 Mike Pall. All rights reserved. +** Released under the MIT/X license. See dynasm.lua for full copyright notice. +*/ + +#include +#include +#include +#include + +#define DASM_ARCH "x86" + +#ifndef DASM_EXTERN +#define DASM_EXTERN(a,b,c,d) 0 +#endif + +/* Action definitions. DASM_STOP must be 255. */ +enum { + DASM_DISP = 233, + DASM_IMM_S, DASM_IMM_B, DASM_IMM_W, DASM_IMM_D, DASM_IMM_WB, DASM_IMM_DB, + DASM_VREG, DASM_SPACE, DASM_SETLABEL, DASM_REL_A, DASM_REL_LG, DASM_REL_PC, + DASM_IMM_LG, DASM_IMM_PC, DASM_LABEL_LG, DASM_LABEL_PC, DASM_ALIGN, + DASM_EXTERN, DASM_ESC, DASM_MARK, DASM_SECTION, DASM_STOP +}; + +/* Maximum number of section buffer positions for a single dasm_put() call. */ +#define DASM_MAXSECPOS 25 + +/* DynASM encoder status codes. Action list offset or number are or'ed in. */ +#define DASM_S_OK 0x00000000 +#define DASM_S_NOMEM 0x01000000 +#define DASM_S_PHASE 0x02000000 +#define DASM_S_MATCH_SEC 0x03000000 +#define DASM_S_RANGE_I 0x11000000 +#define DASM_S_RANGE_SEC 0x12000000 +#define DASM_S_RANGE_LG 0x13000000 +#define DASM_S_RANGE_PC 0x14000000 +#define DASM_S_RANGE_VREG 0x15000000 +#define DASM_S_UNDEF_L 0x21000000 +#define DASM_S_UNDEF_PC 0x22000000 + +/* Macros to convert positions (8 bit section + 24 bit index). */ +#define DASM_POS2IDX(pos) ((pos)&0x00ffffff) +#define DASM_POS2BIAS(pos) ((pos)&0xff000000) +#define DASM_SEC2POS(sec) ((sec)<<24) +#define DASM_POS2SEC(pos) ((pos)>>24) +#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos)) + +/* Per-section structure. */ +typedef struct dasm_Section { + int *rbuf; /* Biased buffer pointer (negative section bias). */ + int *buf; /* True buffer pointer. */ + size_t bsize; /* Buffer size in bytes. */ + int pos; /* Biased buffer position. */ + int epos; /* End of biased buffer position - max single put. */ + int ofs; /* Byte offset into section. */ +} dasm_Section; + +/* Core structure holding the DynASM encoding state. */ +struct dasm_State { + size_t psize; /* Allocated size of this structure. */ + dasm_ActList actionlist; /* Current actionlist pointer. */ + int *lglabels; /* Local/global chain/pos ptrs. */ + size_t lgsize; + int *pclabels; /* PC label chains/pos ptrs. */ + size_t pcsize; + void **globals; /* Array of globals (bias -10). */ + dasm_Section *section; /* Pointer to active section. */ + size_t codesize; /* Total size of all code sections. */ + int maxsection; /* 0 <= sectionidx < maxsection. */ + int status; /* Status code. */ + dasm_Section sections[1]; /* All sections. Alloc-extended. */ +}; + +/* The size of the core structure depends on the max. number of sections. */ +#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section)) + + +/* Initialize DynASM state. */ +void dasm_init(Dst_DECL, int maxsection) +{ + dasm_State *D; + size_t psz = 0; + int i; + Dst_REF = NULL; + DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection)); + D = Dst_REF; + D->psize = psz; + D->lglabels = NULL; + D->lgsize = 0; + D->pclabels = NULL; + D->pcsize = 0; + D->globals = NULL; + D->maxsection = maxsection; + for (i = 0; i < maxsection; i++) { + D->sections[i].buf = NULL; /* Need this for pass3. */ + D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i); + D->sections[i].bsize = 0; + D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */ + } +} + +/* Free DynASM state. */ +void dasm_free(Dst_DECL) +{ + dasm_State *D = Dst_REF; + int i; + for (i = 0; i < D->maxsection; i++) + if (D->sections[i].buf) + DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize); + if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize); + if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize); + DASM_M_FREE(Dst, D, D->psize); +} + +/* Setup global label array. Must be called before dasm_setup(). */ +void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl) +{ + dasm_State *D = Dst_REF; + D->globals = gl - 10; /* Negative bias to compensate for locals. */ + DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int)); +} + +/* Grow PC label array. Can be called after dasm_setup(), too. */ +void dasm_growpc(Dst_DECL, unsigned int maxpc) +{ + dasm_State *D = Dst_REF; + size_t osz = D->pcsize; + DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int)); + memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz); +} + +/* Setup encoder. */ +void dasm_setup(Dst_DECL, dasm_ActList actionlist) +{ + dasm_State *D = Dst_REF; + int i; + D->actionlist = actionlist; + D->status = DASM_S_OK; + D->section = &D->sections[0]; + memset((void *)D->lglabels, 0, D->lgsize); + if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); + for (i = 0; i < D->maxsection; i++) { + D->sections[i].pos = DASM_SEC2POS(i); + D->sections[i].ofs = 0; + } +} + + +#ifdef DASM_CHECKS +#define CK(x, st) \ + do { if (!(x)) { \ + D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0) +#define CKPL(kind, st) \ + do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \ + D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0) +#else +#define CK(x, st) ((void)0) +#define CKPL(kind, st) ((void)0) +#endif + +/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */ +void dasm_put(Dst_DECL, int start, ...) +{ + va_list ap; + dasm_State *D = Dst_REF; + dasm_ActList p = D->actionlist + start; + dasm_Section *sec = D->section; + int pos = sec->pos, ofs = sec->ofs, mrm = 4; + int *b; + + if (pos >= sec->epos) { + DASM_M_GROW(Dst, int, sec->buf, sec->bsize, + sec->bsize + 2*DASM_MAXSECPOS*sizeof(int)); + sec->rbuf = sec->buf - DASM_POS2BIAS(pos); + sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos); + } + + b = sec->rbuf; + b[pos++] = start; + + va_start(ap, start); + while (1) { + int action = *p++; + if (action < DASM_DISP) { + ofs++; + } else if (action <= DASM_REL_A) { + int n = va_arg(ap, int); + b[pos++] = n; + switch (action) { + case DASM_DISP: + if (n == 0) { if ((mrm&7) == 4) mrm = p[-2]; if ((mrm&7) != 5) break; } + case DASM_IMM_DB: if (((n+128)&-256) == 0) goto ob; + case DASM_REL_A: /* Assumes ptrdiff_t is int. !x64 */ + case DASM_IMM_D: ofs += 4; break; + case DASM_IMM_S: CK(((n+128)&-256) == 0, RANGE_I); goto ob; + case DASM_IMM_B: CK((n&-256) == 0, RANGE_I); ob: ofs++; break; + case DASM_IMM_WB: if (((n+128)&-256) == 0) goto ob; + case DASM_IMM_W: CK((n&-65536) == 0, RANGE_I); ofs += 2; break; + case DASM_SPACE: p++; ofs += n; break; + case DASM_SETLABEL: b[pos-2] = -0x40000000; break; /* Neg. label ofs. */ + case DASM_VREG: CK((n&-8) == 0 && (n != 4 || (*p&1) == 0), RANGE_VREG); + if (*p++ == 1 && *p == DASM_DISP) mrm = n; continue; + } + mrm = 4; + } else { + int *pl, n; + switch (action) { + case DASM_REL_LG: + case DASM_IMM_LG: + n = *p++; pl = D->lglabels + n; + if (n <= 246) { CKPL(lg, LG); goto putrel; } /* Bkwd rel or global. */ + pl -= 246; n = *pl; + if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */ + goto linkrel; + case DASM_REL_PC: + case DASM_IMM_PC: pl = D->pclabels + va_arg(ap, int); CKPL(pc, PC); + putrel: + n = *pl; + if (n < 0) { /* Label exists. Get label pos and store it. */ + b[pos] = -n; + } else { + linkrel: + b[pos] = n; /* Else link to rel chain, anchored at label. */ + *pl = pos; + } + pos++; + ofs += 4; /* Maximum offset needed. */ + if (action == DASM_REL_LG || action == DASM_REL_PC) + b[pos++] = ofs; /* Store pass1 offset estimate. */ + break; + case DASM_LABEL_LG: pl = D->lglabels + *p++; CKPL(lg, LG); goto putlabel; + case DASM_LABEL_PC: pl = D->pclabels + va_arg(ap, int); CKPL(pc, PC); + putlabel: + n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */ + while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos; } + *pl = -pos; /* Label exists now. */ + b[pos++] = ofs; /* Store pass1 offset estimate. */ + break; + case DASM_ALIGN: + ofs += *p++; /* Maximum alignment needed (arg is 2**n-1). */ + b[pos++] = ofs; /* Store pass1 offset estimate. */ + break; + case DASM_EXTERN: p += 2; ofs += 4; break; + case DASM_ESC: p++; ofs++; break; + case DASM_MARK: mrm = p[-2]; break; + case DASM_SECTION: + n = *p; CK(n < D->maxsection, RANGE_SEC); D->section = &D->sections[n]; + case DASM_STOP: goto stop; + } + } + } +stop: + va_end(ap); + sec->pos = pos; + sec->ofs = ofs; +} +#undef CK + +/* Pass 2: Link sections, shrink branches/aligns, fix label offsets. */ +int dasm_link(Dst_DECL, size_t *szp) +{ + dasm_State *D = Dst_REF; + int secnum; + int ofs = 0; + +#ifdef DASM_CHECKS + *szp = 0; + if (D->status != DASM_S_OK) return D->status; + { + int pc; + for (pc = 0; pc*sizeof(int) < D->pcsize; pc++) + if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc; + } +#endif + + { /* Handle globals not defined in this translation unit. */ + int idx; + for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) { + int n = D->lglabels[idx]; + /* Undefined label: Collapse rel chain and replace with marker (< 0). */ + while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } + } + } + + /* Combine all code sections. No support for data sections (yet). */ + for (secnum = 0; secnum < D->maxsection; secnum++) { + dasm_Section *sec = D->sections + secnum; + int *b = sec->rbuf; + int pos = DASM_SEC2POS(secnum); + int lastpos = sec->pos; + + while (pos != lastpos) { + dasm_ActList p = D->actionlist + b[pos++]; + while (1) { + int op, action = *p++; + switch (action) { + case DASM_REL_LG: p++; op = p[-3]; goto rel_pc; + case DASM_REL_PC: op = p[-2]; rel_pc: { + int shrink = op == 0xe9 ? 3 : ((op&0xf0) == 0x80 ? 4 : 0); + if (shrink) { /* Shrinkable branch opcode? */ + int lofs, lpos = b[pos]; + if (lpos < 0) goto noshrink; /* Ext global? */ + lofs = *DASM_POS2PTR(D, lpos); + if (lpos > pos) { /* Fwd label: add cumulative section offsets. */ + int i; + for (i = secnum; i < DASM_POS2SEC(lpos); i++) + lofs += D->sections[i].ofs; + } else { + lofs -= ofs; /* Bkwd label: unfix offset. */ + } + lofs -= b[pos+1]; /* Short branch ok? */ + if (lofs >= -128-shrink && lofs <= 127) ofs -= shrink; /* Yes. */ + else { noshrink: shrink = 0; } /* No, cannot shrink op. */ + } + b[pos+1] = shrink; + pos += 2; + break; + } + case DASM_SPACE: case DASM_IMM_LG: case DASM_VREG: p++; + case DASM_DISP: case DASM_IMM_S: case DASM_IMM_B: case DASM_IMM_W: + case DASM_IMM_D: case DASM_IMM_WB: case DASM_IMM_DB: + case DASM_SETLABEL: case DASM_REL_A: case DASM_IMM_PC: pos++; break; + case DASM_LABEL_LG: p++; + case DASM_LABEL_PC: b[pos++] += ofs; break; /* Fix label offset. */ + case DASM_ALIGN: ofs -= (b[pos++]+ofs)&*p++; break; /* Adjust ofs. */ + case DASM_EXTERN: p += 2; break; + case DASM_ESC: p++; break; + case DASM_MARK: break; + case DASM_SECTION: case DASM_STOP: goto stop; + } + } + stop: (void)0; + } + ofs += sec->ofs; /* Next section starts right after current section. */ + } + + D->codesize = ofs; /* Total size of all code sections */ + *szp = ofs; + return DASM_S_OK; +} + +#define dasmb(x) *cp++ = (unsigned char)(x) +#ifndef DASM_ALIGNED_WRITES +#define dasmw(x) \ + do { *((unsigned short *)cp) = (unsigned short)(x); cp+=2; } while (0) +#define dasmd(x) \ + do { *((unsigned int *)cp) = (unsigned int)(x); cp+=4; } while (0) +#else +#define dasmw(x) do { dasmb(x); dasmb((x)>>8); } while (0) +#define dasmd(x) do { dasmw(x); dasmw((x)>>16); } while (0) +#endif + +/* Pass 3: Encode sections. */ +int dasm_encode(Dst_DECL, void *buffer) +{ + dasm_State *D = Dst_REF; + unsigned char *base = (unsigned char *)buffer; + unsigned char *cp = base; + int secnum; + + /* Encode all code sections. No support for data sections (yet). */ + for (secnum = 0; secnum < D->maxsection; secnum++) { + dasm_Section *sec = D->sections + secnum; + int *b = sec->buf; + int *endb = sec->rbuf + sec->pos; + + while (b != endb) { + dasm_ActList p = D->actionlist + *b++; + unsigned char *mark = NULL; + while (1) { + int action = *p++; + int n = (action >= DASM_DISP && action <= DASM_ALIGN) ? *b++ : 0; + switch (action) { + case DASM_DISP: if (!mark) mark = cp; { + unsigned char *mm = mark; + if (*p != DASM_IMM_DB && *p != DASM_IMM_WB) mark = NULL; + if (n == 0) { int mrm = mm[-1]&7; if (mrm == 4) mrm = mm[0]&7; + if (mrm != 5) { mm[-1] -= 0x80; break; } } + if (((n+128) & -256) != 0) goto wd; else mm[-1] -= 0x40; + } + case DASM_IMM_S: case DASM_IMM_B: wb: dasmb(n); break; + case DASM_IMM_DB: if (((n+128)&-256) == 0) { + db: if (!mark) mark = cp; mark[-2] += 2; mark = NULL; goto wb; + } else mark = NULL; + case DASM_IMM_D: wd: dasmd(n); break; + case DASM_IMM_WB: if (((n+128)&-256) == 0) goto db; else mark = NULL; + case DASM_IMM_W: dasmw(n); break; + case DASM_VREG: { int t = *p++; if (t >= 2) n<<=3; cp[-1] |= n; break; } + case DASM_REL_LG: p++; if (n >= 0) goto rel_pc; + b++; n = (int)(ptrdiff_t)D->globals[-n]; + case DASM_REL_A: rel_a: n -= (int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */ + case DASM_REL_PC: rel_pc: { + int shrink = *b++; + int *pb = DASM_POS2PTR(D, n); if (*pb < 0) { n = pb[1]; goto rel_a; } + n = *pb - ((int)(cp-base) + 4-shrink); + if (shrink == 0) goto wd; + if (shrink == 4) { cp--; cp[-1] = *cp-0x10; } else cp[-1] = 0xeb; + goto wb; + } + case DASM_IMM_LG: + p++; if (n < 0) { n = (int)(ptrdiff_t)D->globals[-n]; goto wd; } + case DASM_IMM_PC: { + int *pb = DASM_POS2PTR(D, n); + n = *pb < 0 ? pb[1] : (*pb + (int)(ptrdiff_t)base); + goto wd; + } + case DASM_LABEL_LG: { + int idx = *p++; + if (idx >= 10) + D->globals[idx] = (void *)(base + (*p == DASM_SETLABEL ? *b : n)); + break; + } + case DASM_LABEL_PC: case DASM_SETLABEL: break; + case DASM_SPACE: { int fill = *p++; while (n--) *cp++ = fill; break; } + case DASM_ALIGN: + n = *p++; + while (((cp-base) & n)) *cp++ = 0x90; /* nop */ + break; + case DASM_EXTERN: n = DASM_EXTERN(Dst, cp, p[1], *p); p += 2; goto wd; + case DASM_MARK: mark = cp; break; + case DASM_ESC: action = *p++; + default: *cp++ = action; break; + case DASM_SECTION: case DASM_STOP: goto stop; + } + } + stop: (void)0; + } + } + + if (base + D->codesize != cp) /* Check for phase errors. */ + return DASM_S_PHASE; + return DASM_S_OK; +} + +/* Get PC label offset. */ +int dasm_getpclabel(Dst_DECL, unsigned int pc) +{ + dasm_State *D = Dst_REF; + if (pc*sizeof(int) < D->pcsize) { + int pos = D->pclabels[pc]; + if (pos < 0) return *DASM_POS2PTR(D, -pos); + if (pos > 0) return -1; /* Undefined. */ + } + return -2; /* Unused or out of range. */ +} + +#ifdef DASM_CHECKS +/* Optional sanity checker to call between isolated encoding steps. */ +int dasm_checkstep(Dst_DECL, int secmatch) +{ + dasm_State *D = Dst_REF; + if (D->status == DASM_S_OK) { + int i; + for (i = 1; i <= 9; i++) { + if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_L|i; break; } + D->lglabels[i] = 0; + } + } + if (D->status == DASM_S_OK && secmatch >= 0 && + D->section != &D->sections[secmatch]) + D->status = DASM_S_MATCH_SEC|(D->section-D->sections); + return D->status; +} +#endif + diff --git a/dynasm/dasm_x86.lua b/dynasm/dasm_x86.lua new file mode 100644 index 0000000000..8221080677 --- /dev/null +++ b/dynasm/dasm_x86.lua @@ -0,0 +1,1799 @@ +------------------------------------------------------------------------------ +-- DynASM x86 module. +-- +-- Copyright (C) 2005-2009 Mike Pall. All rights reserved. +-- See dynasm.lua for full copyright notice. +------------------------------------------------------------------------------ + +-- Module information: +local _info = { + arch = "x86", + description = "DynASM x86 (i386) module", + version = "1.2.1", + vernum = 10201, + release = "2009-04-16", + author = "Mike Pall", + license = "MIT", +} + +-- Exported glue functions for the arch-specific module. +local _M = { _info = _info } + +-- Cache library functions. +local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs +local assert, unpack = assert, unpack +local _s = string +local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char +local find, match, gmatch, gsub = _s.find, _s.match, _s.gmatch, _s.gsub +local concat, sort = table.concat, table.sort +local char, unpack = string.char, unpack + +-- Inherited tables and callbacks. +local g_opt, g_arch +local wline, werror, wfatal, wwarn + +-- Action name list. +-- CHECK: Keep this in sync with the C code! +local action_names = { + -- int arg, 1 buffer pos: + "DISP", "IMM_S", "IMM_B", "IMM_W", "IMM_D", "IMM_WB", "IMM_DB", + -- action arg (1 byte), int arg, 1 buffer pos (reg/num): + "VREG", "SPACE", + -- ptrdiff_t arg, 1 buffer pos (address): !x64 + "SETLABEL", "REL_A", + -- action arg (1 byte) or int arg, 2 buffer pos (link, offset): + "REL_LG", "REL_PC", + -- action arg (1 byte) or int arg, 1 buffer pos (link): + "IMM_LG", "IMM_PC", + -- action arg (1 byte) or int arg, 1 buffer pos (offset): + "LABEL_LG", "LABEL_PC", + -- action arg (1 byte), 1 buffer pos (offset): + "ALIGN", + -- action args (2 bytes), no buffer pos. + "EXTERN", + -- action arg (1 byte), no buffer pos. + "ESC", + -- no action arg, no buffer pos. + "MARK", + -- action arg (1 byte), no buffer pos, terminal action: + "SECTION", + -- no args, no buffer pos, terminal action: + "STOP" +} + +-- Maximum number of section buffer positions for dasm_put(). +-- CHECK: Keep this in sync with the C code! +local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines. + +-- Action name -> action number (dynamically generated below). +local map_action = {} +-- First action number. Everything below does not need to be escaped. +local actfirst = 256-#action_names + +-- Action list buffer and string (only used to remove dupes). +local actlist = {} +local actstr = "" + +-- Argument list for next dasm_put(). Start with offset 0 into action list. +local actargs = { 0 } + +-- Current number of section buffer positions for dasm_put(). +local secpos = 1 + +------------------------------------------------------------------------------ + +-- Compute action numbers for action names. +for n,name in ipairs(action_names) do + local num = actfirst + n - 1 + map_action[name] = num +end + +-- Dump action names and numbers. +local function dumpactions(out) + out:write("DynASM encoding engine action codes:\n") + for n,name in ipairs(action_names) do + local num = map_action[name] + out:write(format(" %-10s %02X %d\n", name, num, num)) + end + out:write("\n") +end + +-- Write action list buffer as a huge static C array. +local function writeactions(out, name) + local nn = #actlist + local last = actlist[nn] or 255 + actlist[nn] = nil -- Remove last byte. + if nn == 0 then nn = 1 end + out:write("static const unsigned char ", name, "[", nn, "] = {\n") + local s = " " + for n,b in ipairs(actlist) do + s = s..b.."," + if #s >= 75 then + assert(out:write(s, "\n")) + s = " " + end + end + out:write(s, last, "\n};\n\n") -- Add last byte back. +end + +------------------------------------------------------------------------------ + +-- Add byte to action list. +local function wputxb(n) + assert(n >= 0 and n <= 255 and n % 1 == 0, "byte out of range") + actlist[#actlist+1] = n +end + +-- Add action to list with optional arg. Advance buffer pos, too. +local function waction(action, a, num) + wputxb(assert(map_action[action], "bad action name `"..action.."'")) + if a then actargs[#actargs+1] = a end + if a or num then secpos = secpos + (num or 1) end +end + +-- Add call to embedded DynASM C code. +local function wcall(func, args) + wline(format("dasm_%s(Dst, %s);", func, concat(args, ", ")), true) +end + +-- Delete duplicate action list chunks. A tad slow, but so what. +local function dedupechunk(offset) + local al, as = actlist, actstr + local chunk = char(unpack(al, offset+1, #al)) + local orig = find(as, chunk, 1, true) + if orig then + actargs[1] = orig-1 -- Replace with original offset. + for i=offset+1,#al do al[i] = nil end -- Kill dupe. + else + actstr = as..chunk + end +end + +-- Flush action list (intervening C code or buffer pos overflow). +local function wflush(term) + local offset = actargs[1] + if #actlist == offset then return end -- Nothing to flush. + if not term then waction("STOP") end -- Terminate action list. + dedupechunk(offset) + wcall("put", actargs) -- Add call to dasm_put(). + actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put(). + secpos = 1 -- The actionlist offset occupies a buffer position, too. +end + +-- Put escaped byte. +local function wputb(n) + if n >= actfirst then waction("ESC") end -- Need to escape byte. + wputxb(n) +end + +------------------------------------------------------------------------------ + +-- Global label name -> global label number. With auto assignment on 1st use. +local next_global = 10 +local map_global = setmetatable({}, { __index = function(t, name) + if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end + local n = next_global + if n > 246 then werror("too many global labels") end + next_global = n + 1 + t[name] = n + return n +end}) + +-- Dump global labels. +local function dumpglobals(out, lvl) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("Global labels:\n") + for i=10,next_global-1 do + out:write(format(" %s\n", t[i])) + end + out:write("\n") +end + +-- Write global label enum. +local function writeglobals(out, prefix) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("enum {\n") + for i=10,next_global-1 do + out:write(" ", prefix, t[i], ",\n") + end + out:write(" ", prefix, "_MAX\n};\n") +end + +-- Write global label names. +local function writeglobalnames(out, name) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("static const char *const ", name, "[] = {\n") + for i=10,next_global-1 do + out:write(" \"", t[i], "\",\n") + end + out:write(" (const char *)0\n};\n") +end + +------------------------------------------------------------------------------ + +-- Extern label name -> extern label number. With auto assignment on 1st use. +local next_extern = -1 +local map_extern = setmetatable({}, { __index = function(t, name) + -- No restrictions on the name for now. + local n = next_extern + if n < -256 then werror("too many extern labels") end + next_extern = n - 1 + t[name] = n + return n +end}) + +-- Dump extern labels. +local function dumpexterns(out, lvl) + local t = {} + for name, n in pairs(map_extern) do t[-n] = name end + out:write("Extern labels:\n") + for i=1,-next_extern-1 do + out:write(format(" %s\n", t[i])) + end + out:write("\n") +end + +-- Write extern label names. +local function writeexternnames(out, name) + local t = {} + for name, n in pairs(map_extern) do t[-n] = name end + out:write("static const char *const ", name, "[] = {\n") + for i=1,-next_extern-1 do + out:write(" \"", t[i], "\",\n") + end + out:write(" (const char *)0\n};\n") +end + +------------------------------------------------------------------------------ + +-- Arch-specific maps. +local map_archdef = {} -- Ext. register name -> int. name. +local map_reg_rev = {} -- Int. register name -> ext. name. +local map_reg_num = {} -- Int. register name -> register number. +local map_reg_opsize = {} -- Int. register name -> operand size. +local map_reg_valid_base = {} -- Int. register name -> valid base register? +local map_reg_valid_index = {} -- Int. register name -> valid index register? +local reg_list = {} -- Canonical list of int. register names. + +local map_type = {} -- Type name -> { ctype, reg } +local ctypenum = 0 -- Type number (for _PTx macros). + +local addrsize = "d" -- Size for address operands. !x64 + +-- Helper function to fill register maps. +local function mkrmap(sz, cl, names) + local cname = format("@%s", sz) + reg_list[#reg_list+1] = cname + map_archdef[cl] = cname + map_reg_rev[cname] = cl + map_reg_num[cname] = -1 + map_reg_opsize[cname] = sz + if sz == addrsize then + map_reg_valid_base[cname] = true + map_reg_valid_index[cname] = true + end + for n,name in ipairs(names) do + local iname = format("@%s%x", sz, n-1) + reg_list[#reg_list+1] = iname + map_archdef[name] = iname + map_reg_rev[iname] = name + map_reg_num[iname] = n-1 + map_reg_opsize[iname] = sz + if sz == addrsize then + map_reg_valid_base[iname] = true + map_reg_valid_index[iname] = true + end + end + reg_list[#reg_list+1] = "" +end + +-- Integer registers (dword, word and byte sized). +mkrmap("d", "Rd", {"eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi"}) +map_reg_valid_index[map_archdef.esp] = false +mkrmap("w", "Rw", {"ax", "cx", "dx", "bx", "sp", "bp", "si", "di"}) +mkrmap("b", "Rb", {"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"}) +map_archdef["Ra"] = "@"..addrsize + +-- FP registers (internally tword sized, but use "f" as operand size). +mkrmap("f", "Rf", {"st0", "st1", "st2", "st3", "st4", "st5", "st6", "st7"}) + +-- SSE registers (oword sized, but qword and dword accessible). +mkrmap("o", "xmm", {"xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7"}) + +-- Operand size prefixes to codes. +local map_opsize = { + byte = "b", word = "w", dword = "d", qword = "q", oword = "o", tword = "t", + aword = addrsize, +} + +-- Operand size code to number. +local map_opsizenum = { + b = 1, w = 2, d = 4, q = 8, o = 16, t = 10, +} + +-- Operand size code to name. +local map_opsizename = { + b = "byte", w = "word", d = "dword", q = "qword", o = "oword", t = "tword", + f = "fpword", +} + +-- Valid index register scale factors. +local map_xsc = { + ["1"] = 0, ["2"] = 1, ["4"] = 2, ["8"] = 3, +} + +-- Condition codes. +local map_cc = { + o = 0, no = 1, b = 2, nb = 3, e = 4, ne = 5, be = 6, nbe = 7, + s = 8, ns = 9, p = 10, np = 11, l = 12, nl = 13, le = 14, nle = 15, + c = 2, nae = 2, nc = 3, ae = 3, z = 4, nz = 5, na = 6, a = 7, + pe = 10, po = 11, nge = 12, ge = 13, ng = 14, g = 15, +} + + +-- Reverse defines for registers. +function _M.revdef(s) + return gsub(s, "@%w+", map_reg_rev) +end + +-- Dump register names and numbers +local function dumpregs(out) + out:write("Register names, sizes and internal numbers:\n") + for _,reg in ipairs(reg_list) do + if reg == "" then + out:write("\n") + else + local name = map_reg_rev[reg] + local num = map_reg_num[reg] + local opsize = map_opsizename[map_reg_opsize[reg]] + out:write(format(" %-5s %-8s %s\n", name, opsize, + num < 0 and "(variable)" or num)) + end + end +end + +------------------------------------------------------------------------------ + +-- Put action for label arg (IMM_LG, IMM_PC, REL_LG, REL_PC). +local function wputlabel(aprefix, imm, num) + if type(imm) == "number" then + if imm < 0 then + waction("EXTERN") + wputxb(aprefix == "IMM_" and 0 or 1) + imm = -imm-1 + else + waction(aprefix.."LG", nil, num); + end + wputxb(imm) + else + waction(aprefix.."PC", imm, num) + end +end + +-- Put signed byte or arg. +local function wputsbarg(n) + if type(n) == "number" then + if n < -128 or n > 127 then + werror("signed immediate byte out of range") + end + if n < 0 then n = n + 256 end + wputb(n) + else waction("IMM_S", n) end +end + +-- Put unsigned byte or arg. +local function wputbarg(n) + if type(n) == "number" then + if n < 0 or n > 255 then + werror("unsigned immediate byte out of range") + end + wputb(n) + else waction("IMM_B", n) end +end + +-- Put unsigned word or arg. +local function wputwarg(n) + if type(n) == "number" then + if n < 0 or n > 65535 then + werror("unsigned immediate word out of range") + end + local r = n%256; n = (n-r)/256; wputb(r); wputb(n); + else waction("IMM_W", n) end +end + +-- Put signed or unsigned dword or arg. +local function wputdarg(n) + local tn = type(n) + if tn == "number" then + if n < 0 then n = n + 4294967296 end + local r = n%256; n = (n-r)/256; wputb(r); + r = n%256; n = (n-r)/256; wputb(r); + r = n%256; n = (n-r)/256; wputb(r); wputb(n); + elseif tn == "table" then + wputlabel("IMM_", n[1], 1) + else + waction("IMM_D", n) + end +end + +-- Put operand-size dependent number or arg (defaults to dword). +local function wputszarg(sz, n) + if not sz or sz == "d" then wputdarg(n) + elseif sz == "w" then wputwarg(n) + elseif sz == "b" then wputbarg(n) + elseif sz == "s" then wputsbarg(n) + else werror("bad operand size") end +end + +-- Put multi-byte opcode with operand-size dependent modifications. +local function wputop(sz, op) + local r + if sz == "w" then wputb(102) end + -- Needs >32 bit numbers, but only for crc32 eax, word [ebx] + if op >= 4294967296 then r = op%4294967296 wputb((op-r)/4294967296) op = r end + if op >= 16777216 then r = op % 16777216 wputb((op-r) / 16777216) op = r end + if op >= 65536 then r = op % 65536 wputb((op-r) / 65536) op = r end + if op >= 256 then r = op % 256 wputb((op-r) / 256) op = r end + if sz == "b" then op = op - 1 end + wputb(op) +end + +-- Put ModRM or SIB formatted byte. +local function wputmodrm(m, s, rm, vs, vrm) + assert(m < 4 and s < 8 and rm < 8, "bad modrm operands") + wputb(64*m + 8*s + rm) +end + +-- Put ModRM/SIB plus optional displacement. +local function wputmrmsib(t, imark, s, vsreg) + local vreg, vxreg + local reg, xreg = t.reg, t.xreg + if reg and reg < 0 then reg = 0; vreg = t.vreg end + if xreg and xreg < 0 then xreg = 0; vxreg = t.vxreg end + if s < 0 then s = 0 end + + -- Register mode. + if sub(t.mode, 1, 1) == "r" then + wputmodrm(3, s, reg) + if vsreg then waction("VREG", vsreg); wputxb(2) end + if vreg then waction("VREG", vreg); wputxb(0) end + return + end + + local disp = t.disp + local tdisp = type(disp) + -- No base register? + if not reg then + if xreg then + -- Indexed mode with index register only. + -- [xreg*xsc+disp] -> (0, s, esp) (xsc, xreg, ebp) + wputmodrm(0, s, 4) + if imark then waction("MARK") end + if vsreg then waction("VREG", vsreg); wputxb(2) end + wputmodrm(t.xsc, xreg, 5) + if vxreg then waction("VREG", vxreg); wputxb(3) end + else + -- Pure displacement. + wputmodrm(0, s, 5) -- [disp] -> (0, s, ebp) + if imark then waction("MARK") end + if vsreg then waction("VREG", vsreg); wputxb(2) end + end + wputdarg(disp) + return + end + + local m + if tdisp == "number" then -- Check displacement size at assembly time. + if disp == 0 and reg ~= 5 then -- [ebp] -> [ebp+0] (in SIB, too) + if not vreg then m = 0 end -- Force DISP to allow [Rd(5)] -> [ebp+0] + elseif disp >= -128 and disp <= 127 then m = 1 + else m = 2 end + elseif tdisp == "table" then + m = 2 + end + + -- Index register present or esp as base register: need SIB encoding. + if xreg or reg == 4 then + wputmodrm(m or 2, s, 4) -- ModRM. + if m == nil or imark then waction("MARK") end + if vsreg then waction("VREG", vsreg); wputxb(2) end + wputmodrm(t.xsc or 0, xreg or 4, reg) -- SIB. + if vxreg then waction("VREG", vxreg); wputxb(3) end + if vreg then waction("VREG", vreg); wputxb(1) end + else + wputmodrm(m or 2, s, reg) -- ModRM. + if (imark and (m == 1 or m == 2)) or + (m == nil and (vsreg or vreg)) then waction("MARK") end + if vsreg then waction("VREG", vsreg); wputxb(2) end + if vreg then waction("VREG", vreg); wputxb(1) end + end + + -- Put displacement. + if m == 1 then wputsbarg(disp) + elseif m == 2 then wputdarg(disp) + elseif m == nil then waction("DISP", disp) end +end + +------------------------------------------------------------------------------ + +-- Return human-readable operand mode string. +local function opmodestr(op, args) + local m = {} + for i=1,#args do + local a = args[i] + m[#m+1] = sub(a.mode, 1, 1)..(a.opsize or "?") + end + return op.." "..concat(m, ",") +end + +-- Convert number to valid integer or nil. +local function toint(expr) + local n = tonumber(expr) + if n then + if n % 1 ~= 0 or n < -2147483648 or n > 4294967295 then + werror("bad integer number `"..expr.."'") + end + return n + end +end + +-- Parse immediate expression. +local function immexpr(expr) + -- &expr (pointer) + if sub(expr, 1, 1) == "&" then + return "iPJ", format("(ptrdiff_t)(%s)", sub(expr,2)) + end + + local prefix = sub(expr, 1, 2) + -- =>expr (pc label reference) + if prefix == "=>" then + return "iJ", sub(expr, 3) + end + -- ->name (global label reference) + if prefix == "->" then + return "iJ", map_global[sub(expr, 3)] + end + + -- [<>][1-9] (local label reference) + local dir, lnum = match(expr, "^([<>])([1-9])$") + if dir then -- Fwd: 247-255, Bkwd: 1-9. + return "iJ", lnum + (dir == ">" and 246 or 0) + end + + local extname = match(expr, "^extern%s+(%S+)$") + if extname then + return "iJ", map_extern[extname] + end + + -- expr (interpreted as immediate) + return "iI", expr +end + +-- Parse displacement expression: +-num, +-expr, +-opsize*num +local function dispexpr(expr) + local disp = expr == "" and 0 or toint(expr) + if disp then return disp end + local c, dispt = match(expr, "^([+-])%s*(.+)$") + if c == "+" then + expr = dispt + elseif not c then + werror("bad displacement expression `"..expr.."'") + end + local opsize, tailops = match(dispt, "^(%w+)%s*%*%s*(.+)$") + local ops, imm = map_opsize[opsize], toint(tailops) + if ops and imm then + if c == "-" then imm = -imm end + return imm*map_opsizenum[ops] + end + local mode, iexpr = immexpr(dispt) + if mode == "iJ" then + if c == "-" then werror("cannot invert label reference") end + return { iexpr } + end + return expr -- Need to return original signed expression. +end + +-- Parse register or type expression. +local function rtexpr(expr) + if not expr then return end + local tname, ovreg = match(expr, "^([%w_]+):(@[%w_]+)$") + local tp = map_type[tname or expr] + if tp then + local reg = ovreg or tp.reg + local rnum = map_reg_num[reg] + if not rnum then + werror("type `"..(tname or expr).."' needs a register override") + end + if not map_reg_valid_base[reg] then + werror("bad base register override `"..(map_reg_rev[reg] or reg).."'") + end + return reg, rnum, tp + end + return expr, map_reg_num[expr] +end + +-- Parse operand and return { mode, opsize, reg, xreg, xsc, disp, imm }. +local function parseoperand(param) + local t = {} + + local expr = param + local opsize, tailops = match(param, "^(%w+)%s*(.+)$") + if opsize then + t.opsize = map_opsize[opsize] + if t.opsize then expr = tailops end + end + + local br = match(expr, "^%[%s*(.-)%s*%]$") + repeat + if br then + t.mode = "xm" + + -- [disp] + t.disp = toint(br) + if t.disp then + t.mode = "xmO" + break + end + + -- [reg...] + local tp + local reg, tailr = match(br, "^([@%w_:]+)%s*(.*)$") + reg, t.reg, tp = rtexpr(reg) + if not t.reg then + -- [expr] + t.mode = "xmO" + t.disp = dispexpr("+"..br) + break + end + + if t.reg == -1 then + t.vreg, tailr = match(tailr, "^(%b())(.*)$") + if not t.vreg then werror("bad variable register expression") end + end + + -- [xreg*xsc] or [xreg*xsc+-disp] or [xreg*xsc+-expr] + local xsc, tailsc = match(tailr, "^%*%s*([1248])%s*(.*)$") + if xsc then + if not map_reg_valid_index[reg] then + werror("bad index register `"..map_reg_rev[reg].."'") + end + t.xsc = map_xsc[xsc] + t.xreg = t.reg + t.vxreg = t.vreg + t.reg = nil + t.vreg = nil + t.disp = dispexpr(tailsc) + break + end + if not map_reg_valid_base[reg] then + werror("bad base register `"..map_reg_rev[reg].."'") + end + + -- [reg] or [reg+-disp] + t.disp = toint(tailr) or (tailr == "" and 0) + if t.disp then break end + + -- [reg+xreg...] + local xreg, tailx = match(tailr, "^+%s*([@%w_:]+)%s*(.*)$") + xreg, t.xreg, tp = rtexpr(xreg) + if not t.xreg then + -- [reg+-expr] + t.disp = dispexpr(tailr) + break + end + if not map_reg_valid_index[xreg] then + werror("bad index register `"..map_reg_rev[xreg].."'") + end + + if t.xreg == -1 then + t.vxreg, tailx = match(tailx, "^(%b())(.*)$") + if not t.vxreg then werror("bad variable register expression") end + end + + -- [reg+xreg*xsc...] + local xsc, tailsc = match(tailx, "^%*%s*([1248])%s*(.*)$") + if xsc then + t.xsc = map_xsc[xsc] + tailx = tailsc + end + + -- [...] or [...+-disp] or [...+-expr] + t.disp = dispexpr(tailx) + else + -- imm or opsize*imm + local imm = toint(expr) + if not imm and sub(expr, 1, 1) == "*" and t.opsize then + imm = toint(sub(expr, 2)) + if imm then + imm = imm * map_opsizenum[t.opsize] + t.opsize = nil + end + end + if imm then + if t.opsize then werror("bad operand size override") end + local m = "i" + if imm == 1 then m = m.."1" end + if imm >= 4294967168 and imm <= 4294967295 then imm = imm-4294967296 end + if imm >= -128 and imm <= 127 then m = m.."S" end + t.imm = imm + t.mode = m + break + end + + local tp + local reg, tailr = match(expr, "^([@%w_:]+)%s*(.*)$") + reg, t.reg, tp = rtexpr(reg) + if t.reg then + if t.reg == -1 then + t.vreg, tailr = match(tailr, "^(%b())(.*)$") + if not t.vreg then werror("bad variable register expression") end + end + -- reg + if tailr == "" then + if t.opsize then werror("bad operand size override") end + t.opsize = map_reg_opsize[reg] + if t.opsize == "f" then + t.mode = t.reg == 0 and "fF" or "f" + else + if reg == "@w4" then wwarn("bad idea, try again with `esp'") end + t.mode = t.reg == 0 and "rmR" or (reg == "@b1" and "rmC" or "rm") + end + break + end + + -- type[idx], type[idx].field, type->field -> [reg+offset_expr] + if not tp then werror("bad operand `"..param.."'") end + t.mode = "xm" + t.disp = format(tp.ctypefmt, tailr) + else + t.mode, t.imm = immexpr(expr) + if sub(t.mode, -1) == "J" then + if t.opsize and t.opsize ~= addrsize then + werror("bad operand size override") + end + t.opsize = addrsize + end + end + end + until true + return t +end + +------------------------------------------------------------------------------ +-- x86 Template String Description +-- =============================== +-- +-- Each template string is a list of [match:]pattern pairs, +-- separated by "|". The first match wins. No match means a +-- bad or unsupported combination of operand modes or sizes. +-- +-- The match part and the ":" is omitted if the operation has +-- no operands. Otherwise the first N characters are matched +-- against the mode strings of each of the N operands. +-- +-- The mode string for each operand type is (see parseoperand()): +-- Integer register: "rm", +"R" for eax, ax, al, +"C" for cl +-- FP register: "f", +"F" for st0 +-- Index operand: "xm", +"O" for [disp] (pure offset) +-- Immediate: "i", +"S" for signed 8 bit, +"1" for 1, +-- +"I" for arg, +"P" for pointer +-- Any: +"J" for valid jump targets +-- +-- So a match character "m" (mixed) matches both an integer register +-- and an index operand (to be encoded with the ModRM/SIB scheme). +-- But "r" matches only a register and "x" only an index operand +-- (e.g. for FP memory access operations). +-- +-- The operand size match string starts right after the mode match +-- characters and ends before the ":". "dwb" is assumed, if empty. +-- The effective data size of the operation is matched against this list. +-- +-- If only the regular "b", "w", "d", "q", "t" operand sizes are +-- present, then all operands must be the same size. Unspecified sizes +-- are ignored, but at least one operand must have a size or the pattern +-- won't match (use the "byte", "word", "dword", "qword", "tword" +-- operand size overrides. E.g.: mov dword [eax], 1). +-- +-- If the list has a "1" or "2" prefix, the operand size is taken +-- from the respective operand and any other operand sizes are ignored. +-- If the list contains only ".", all operand sizes are ignored. +-- If the list has a "/" prefix, the concatenated (mixed) operand sizes +-- are compared to the match. +-- +-- E.g. "rrdw" matches for either two dword registers or two word +-- registers. "Fx2dq" matches an st0 operand plus an index operand +-- pointing to a dword (float) or qword (double). +-- +-- Every character after the ":" is part of the pattern string: +-- Hex chars are accumulated to form the opcode (left to right). +-- "n" disables the standard opcode mods +-- (otherwise: -1 for "b", o16 prefix for "w") +-- "r"/"R" adds the reg. number from the 1st/2nd operand to the opcode. +-- "m"/"M" generates ModRM/SIB from the 1st/2nd operand. +-- The spare 3 bits are either filled with the last hex digit or +-- the result from a previous "r"/"R". The opcode is restored. +-- +-- All of the following characters force a flush of the opcode: +-- "o"/"O" stores a pure 32 bit disp (offset) from the 1st/2nd operand. +-- "S" stores a signed 8 bit immediate from the last operand. +-- "U" stores an unsigned 8 bit immediate from the last operand. +-- "W" stores an unsigned 16 bit immediate from the last operand. +-- "i" stores an operand sized immediate from the last operand. +-- "I" dito, but generates an action code to optionally modify +-- the opcode (+2) for a signed 8 bit immediate. +-- "J" generates one of the REL action codes from the last operand. +-- +------------------------------------------------------------------------------ + +-- Template strings for x86 instructions. Ordered by first opcode byte. +-- Unimplemented opcodes (deliberate omissions) are marked with *. +local map_op = { + -- 00-05: add... + -- 06: *push es + -- 07: *pop es + -- 08-0D: or... + -- 0E: *push cs + -- 0F: two byte opcode prefix + -- 10-15: adc... + -- 16: *push ss + -- 17: *pop ss + -- 18-1D: sbb... + -- 1E: *push ds + -- 1F: *pop ds + -- 20-25: and... + es_0 = "26", + -- 27: *daa + -- 28-2D: sub... + cs_0 = "2E", + -- 2F: *das + -- 30-35: xor... + ss_0 = "36", + -- 37: *aaa + -- 38-3D: cmp... + ds_0 = "3E", + -- 3F: *aas + inc_1 = "rdw:40r|m:FF0m", + dec_1 = "rdw:48r|m:FF1m", + push_1 = "rdw:50r|mdw:FF6m|S.:6AS|ib:n6Ai|i.:68i", + pop_1 = "rdw:58r|mdw:8F0m", + -- 60: *pusha, *pushad, *pushaw + -- 61: *popa, *popad, *popaw + -- 62: *bound rdw,x + -- 63: *arpl mw,rw + fs_0 = "64", + gs_0 = "65", + o16_0 = "66", + a16_0 = "67", + -- 68: push idw + -- 69: imul rdw,mdw,idw + -- 6A: push ib + -- 6B: imul rdw,mdw,S + -- 6C: *insb + -- 6D: *insd, *insw + -- 6E: *outsb + -- 6F: *outsd, *outsw + -- 70-7F: jcc lb + -- 80: add... mb,i + -- 81: add... mdw,i + -- 82: *undefined + -- 83: add... mdw,S + test_2 = "mr:85Rm|rm:85rM|Ri:A9ri|mi:F70mi", + -- 86: xchg rb,mb + -- 87: xchg rdw,mdw + -- 88: mov mb,r + -- 89: mov mdw,r + -- 8A: mov r,mb + -- 8B: mov r,mdw + -- 8C: *mov mdw,seg + lea_2 = "rxd:8DrM", + -- 8E: *mov seg,mdw + -- 8F: pop mdw + nop_0 = "90", + xchg_2 = "Rrdw:90R|rRdw:90r|rm:87rM|mr:87Rm", + cbw_0 = "6698", + cwde_0 = "98", + cwd_0 = "6699", + cdq_0 = "99", + -- 9A: *call iw:idw + wait_0 = "9B", + fwait_0 = "9B", + pushf_0 = "9C", + pushfw_0 = "669C", + pushfd_0 = "9C", + popf_0 = "9D", + popfw_0 = "669D", + popfd_0 = "9D", + sahf_0 = "9E", + lahf_0 = "9F", + mov_2 = "OR:A3o|RO:A1O|mr:89Rm|rm:8BrM|rib:nB0ri|ridw:B8ri|mi:C70mi", + movsb_0 = "A4", + movsw_0 = "66A5", + movsd_0 = "A5", + cmpsb_0 = "A6", + cmpsw_0 = "66A7", + cmpsd_0 = "A7", + -- A8: test Rb,i + -- A9: test Rdw,i + stosb_0 = "AA", + stosw_0 = "66AB", + stosd_0 = "AB", + lodsb_0 = "AC", + lodsw_0 = "66AD", + lodsd_0 = "AD", + scasb_0 = "AE", + scasw_0 = "66AF", + scasd_0 = "AF", + -- B0-B7: mov rb,i + -- B8-BF: mov rdw,i + -- C0: rol... mb,i + -- C1: rol... mdw,i + ret_1 = "i.:nC2W", + ret_0 = "C3", + -- C4: *les rdw,mq + -- C5: *lds rdw,mq + -- C6: mov mb,i + -- C7: mov mdw,i + -- C8: *enter iw,ib + leave_0 = "C9", + -- CA: *retf iw + -- CB: *retf + int3_0 = "CC", + int_1 = "i.:nCDU", + into_0 = "CE", + -- CF: *iret + -- D0: rol... mb,1 + -- D1: rol... mdw,1 + -- D2: rol... mb,cl + -- D3: rol... mb,cl + -- D4: *aam ib + -- D5: *aad ib + -- D6: *salc + -- D7: *xlat + -- D8-DF: floating point ops + -- E0: *loopne + -- E1: *loope + -- E2: *loop + -- E3: *jcxz, *jecxz + -- E4: *in Rb,ib + -- E5: *in Rdw,ib + -- E6: *out ib,Rb + -- E7: *out ib,Rdw + call_1 = "md:FF2m|J.:E8J", + jmp_1 = "md:FF4m|J.:E9J", -- short: EB + -- EA: *jmp iw:idw + -- EB: jmp ib + -- EC: *in Rb,dx + -- ED: *in Rdw,dx + -- EE: *out dx,Rb + -- EF: *out dx,Rdw + -- F0: *lock + int1_0 = "F1", + repne_0 = "F2", + repnz_0 = "F2", + rep_0 = "F3", + repe_0 = "F3", + repz_0 = "F3", + -- F4: *hlt + cmc_0 = "F5", + -- F6: test... mb,i; div... mb + -- F7: test... mdw,i; div... mdw + clc_0 = "F8", + stc_0 = "F9", + -- FA: *cli + cld_0 = "FC", + std_0 = "FD", + -- FE: inc... mb + -- FF: inc... mdw + + -- misc ops + not_1 = "m:F72m", + neg_1 = "m:F73m", + mul_1 = "m:F74m", + imul_1 = "m:F75m", + div_1 = "m:F76m", + idiv_1 = "m:F77m", + + imul_2 = "rmdw:0FAFrM|rIdw:69rmI|rSdw:6BrmS|ridw:69rmi", + imul_3 = "rmIdw:69rMI|rmSdw:6BrMS|rmidw:69rMi", + + movzx_2 = "rm/db:0FB6rM|rm/wb:0FB6rM|rm/dw:0FB7rM", + movsx_2 = "rm/db:0FBErM|rm/wb:0FBErM|rm/dw:0FBFrM", + + bswap_1 = "rd:0FC8r", + bsf_2 = "rmdw:0FBCrM", + bsr_2 = "rmdw:0FBDrM", + bt_2 = "mrdw:0FA3Rm|midw:0FBA4mU", + btc_2 = "mrdw:0FBBRm|midw:0FBA7mU", + btr_2 = "mrdw:0FB3Rm|midw:0FBA6mU", + bts_2 = "mrdw:0FABRm|midw:0FBA5mU", + + rdtsc_0 = "0F31", -- P1+ + cpuid_0 = "0FA2", -- P1+ + + -- floating point ops + fst_1 = "ff:DDD0r|xd:D92m|xq:DD2m", + fstp_1 = "ff:DDD8r|xd:D93m|xq:DD3m|xt:DB7m", + fld_1 = "ff:D9C0r|xd:D90m|xq:DD0m|xt:DB5m", + + fpop_0 = "DDD8", -- Alias for fstp st0. + + fist_1 = "xw:nDF2m|xd:DB2m", + fistp_1 = "xw:nDF3m|xd:DB3m|xq:DF7m", + fild_1 = "xw:nDF0m|xd:DB0m|xq:DF5m", + + fxch_0 = "D9C9", + fxch_1 = "ff:D9C8r", + fxch_2 = "fFf:D9C8r|Fff:D9C8R", + + fucom_1 = "ff:DDE0r", + fucom_2 = "Fff:DDE0R", + fucomp_1 = "ff:DDE8r", + fucomp_2 = "Fff:DDE8R", + fucomi_1 = "ff:DBE8r", -- P6+ + fucomi_2 = "Fff:DBE8R", -- P6+ + fucomip_1 = "ff:DFE8r", -- P6+ + fucomip_2 = "Fff:DFE8R", -- P6+ + fcomi_1 = "ff:DBF0r", -- P6+ + fcomi_2 = "Fff:DBF0R", -- P6+ + fcomip_1 = "ff:DFF0r", -- P6+ + fcomip_2 = "Fff:DFF0R", -- P6+ + fucompp_0 = "DAE9", + fcompp_0 = "DED9", + + fldcw_1 = "xw:nD95m", + fstcw_1 = "xw:n9BD97m", + fnstcw_1 = "xw:nD97m", + fstsw_1 = "Rw:n9BDFE0|xw:n9BDD7m", + fnstsw_1 = "Rw:nDFE0|xw:nDD7m", + fclex_0 = "9BDBE2", + fnclex_0 = "DBE2", + + fnop_0 = "D9D0", + -- D9D1-D9DF: unassigned + + fchs_0 = "D9E0", + fabs_0 = "D9E1", + -- D9E2: unassigned + -- D9E3: unassigned + ftst_0 = "D9E4", + fxam_0 = "D9E5", + -- D9E6: unassigned + -- D9E7: unassigned + fld1_0 = "D9E8", + fldl2t_0 = "D9E9", + fldl2e_0 = "D9EA", + fldpi_0 = "D9EB", + fldlg2_0 = "D9EC", + fldln2_0 = "D9ED", + fldz_0 = "D9EE", + -- D9EF: unassigned + + f2xm1_0 = "D9F0", + fyl2x_0 = "D9F1", + fptan_0 = "D9F2", + fpatan_0 = "D9F3", + fxtract_0 = "D9F4", + fprem1_0 = "D9F5", + fdecstp_0 = "D9F6", + fincstp_0 = "D9F7", + fprem_0 = "D9F8", + fyl2xp1_0 = "D9F9", + fsqrt_0 = "D9FA", + fsincos_0 = "D9FB", + frndint_0 = "D9FC", + fscale_0 = "D9FD", + fsin_0 = "D9FE", + fcos_0 = "D9FF", + + -- SSE, SSE2 + andnpd_2 = "rmo:660F55rM", + andnps_2 = "rmo:0F55rM", + andpd_2 = "rmo:660F54rM", + andps_2 = "rmo:0F54rM", + clflush_1 = "x.:0FAE7m", + cmppd_3 = "rmio:660FC2rMU", + cmpps_3 = "rmio:0FC2rMU", + cmpsd_3 = "rmio:F20FC2rMU", + cmpss_3 = "rmio:F30FC2rMU", + comisd_2 = "rmo:660F2FrM", + comiss_2 = "rmo:0F2FrM", + cvtdq2pd_2 = "rro:F30FE6rM|rx/oq:", + cvtdq2ps_2 = "rmo:0F5BrM", + cvtpd2dq_2 = "rmo:F20FE6rM", + cvtpd2ps_2 = "rmo:660F5ArM", + cvtpi2pd_2 = "rx/oq:660F2ArM", + cvtpi2ps_2 = "rx/oq:0F2ArM", + cvtps2dq_2 = "rmo:660F5BrM", + cvtps2pd_2 = "rro:0F5ArM|rx/oq:", + cvtsd2si_2 = "rr/do:F20F2DrM|rx/dq:", + cvtsd2ss_2 = "rro:F20F5ArM|rx/oq:", + cvtsi2sd_2 = "rm/od:F20F2ArM", + cvtsi2ss_2 = "rm/od:F30F2ArM", + cvtss2sd_2 = "rro:F30F5ArM|rx/od:", + cvtss2si_2 = "rr/do:F20F2CrM|rx/dd:", + cvttpd2dq_2 = "rmo:660FE6rM", + cvttps2dq_2 = "rmo:F30F5BrM", + cvttsd2si_2 = "rr/do:F20F2CrM|rx/dq:", + cvttss2si_2 = "rr/do:F30F2CrM|rx/dd:", + ldmxcsr_1 = "xd:0FAE2m", + lfence_0 = "0FAEE8", + maskmovdqu_2 = "rro:660FF7rM", + mfence_0 = "0FAEF0", + movapd_2 = "rmo:660F28rM|mro:660F29Rm", + movaps_2 = "rmo:0F28rM|mro:0F29Rm", + movd_2 = "rm/od:660F6ErM|mr/do:660F7ERm", + movdqa_2 = "rmo:660F6FrM|mro:660F7FRm", + movdqu_2 = "rmo:F30F6FrM|mro:F30F7FRm", + movhlps_2 = "rro:0F12rM", + movhpd_2 = "rx/oq:660F16rM|xr/qo:660F17Rm", + movhps_2 = "rx/oq:0F16rM|xr/qo:0F17Rm", + movlhps_2 = "rro:0F16rM", + movlpd_2 = "rx/oq:660F12rM|xr/qo:660F13Rm", + movlps_2 = "rx/oq:0F12rM|xr/qo:0F13Rm", + movmskpd_2 = "rr/do:660F50rM", + movmskps_2 = "rr/do:0F50rM", + movntdq_2 = "xro:660FE7Rm", + movnti_2 = "xrd:0FC3Rm", + movntpd_2 = "xro:660F2BRm", + movntps_2 = "xro:0F2BRm", + movq_2 = "rro:F30F7ErM|rx/oq:|xr/qo:660FD6Rm", + movsd_2 = "rro:F20F10rM|rx/oq:|xr/qo:F20F11Rm", + movss_2 = "rro:F30F10rM|rx/od:|xr/do:F30F11Rm", + movupd_2 = "rmo:660F10rM|mro:660F11Rm", + movups_2 = "rmo:0F10rM|mro:0F11Rm", + orpd_2 = "rmo:660F56rM", + orps_2 = "rmo:0F56rM", + packssdw_2 = "rmo:660F6BrM", + packsswb_2 = "rmo:660F63rM", + packuswb_2 = "rmo:660F67rM", + paddb_2 = "rmo:660FFCrM", + paddd_2 = "rmo:660FFErM", + paddq_2 = "rmo:660FD4rM", + paddsb_2 = "rmo:660FECrM", + paddsw_2 = "rmo:660FEDrM", + paddusb_2 = "rmo:660FDCrM", + paddusw_2 = "rmo:660FDDrM", + paddw_2 = "rmo:660FFDrM", + pand_2 = "rmo:660FDBrM", + pandn_2 = "rmo:660FDFrM", + pause_0 = "F390", + pavgb_2 = "rmo:660FE0rM", + pavgw_2 = "rmo:660FE3rM", + pcmpeqb_2 = "rmo:660F74rM", + pcmpeqd_2 = "rmo:660F76rM", + pcmpeqw_2 = "rmo:660F75rM", + pcmpgtb_2 = "rmo:660F64rM", + pcmpgtd_2 = "rmo:660F66rM", + pcmpgtw_2 = "rmo:660F65rM", + pextrw_3 = "rri/do:660FC5rMU|xri/wo:660F3A15nrMU", -- Mem op: SSE4.1 only. + pinsrw_3 = "rri/od:660FC4rMU|rxi/ow:", + pmaddwd_2 = "rmo:660FF5rM", + pmaxsw_2 = "rmo:660FEErM", + pmaxub_2 = "rmo:660FDErM", + pminsw_2 = "rmo:660FEArM", + pminub_2 = "rmo:660FDArM", + pmovmskb_2 = "rr/do:660FD7rM", + pmulhuw_2 = "rmo:660FE4rM", + pmulhw_2 = "rmo:660FE5rM", + pmullw_2 = "rmo:660FD5rM", + pmuludq_2 = "rmo:660FF4rM", + por_2 = "rmo:660FEBrM", + prefetchnta_1 = "xb:n0F180m", + prefetcht0_1 = "xb:n0F181m", + prefetcht1_1 = "xb:n0F182m", + prefetcht2_1 = "xb:n0F183m", + psadbw_2 = "rmo:660FF6rM", + pshufd_3 = "rmio:660F70rMU", + pshufhw_3 = "rmio:F30F70rMU", + pshuflw_3 = "rmio:F20F70rMU", + pslld_2 = "rmo:660FF2rM|rio:660F726mU", + pslldq_2 = "rio:660F737mU", + psllq_2 = "rmo:660FF3rM|rio:660F736mU", + psllw_2 = "rmo:660FF1rM|rio:660F716mU", + psrad_2 = "rmo:660FE2rM|rio:660F724mU", + psraw_2 = "rmo:660FE1rM|rio:660F714mU", + psrld_2 = "rmo:660FD2rM|rio:660F722mU", + psrldq_2 = "rio:660F733mU", + psrlq_2 = "rmo:660FD3rM|rio:660F732mU", + psrlw_2 = "rmo:660FD1rM|rio:660F712mU", + psubb_2 = "rmo:660FF8rM", + psubd_2 = "rmo:660FFArM", + psubq_2 = "rmo:660FFBrM", + psubsb_2 = "rmo:660FE8rM", + psubsw_2 = "rmo:660FE9rM", + psubusb_2 = "rmo:660FD8rM", + psubusw_2 = "rmo:660FD9rM", + psubw_2 = "rmo:660FF9rM", + punpckhbw_2 = "rmo:660F68rM", + punpckhdq_2 = "rmo:660F6ArM", + punpckhqdq_2 = "rmo:660F6DrM", + punpckhwd_2 = "rmo:660F69rM", + punpcklbw_2 = "rmo:660F60rM", + punpckldq_2 = "rmo:660F62rM", + punpcklqdq_2 = "rmo:660F6CrM", + punpcklwd_2 = "rmo:660F61rM", + pxor_2 = "rmo:660FEFrM", + rcpps_2 = "rmo:0F53rM", + rcpss_2 = "rmo:F30F53rM", + rsqrtps_2 = "rmo:0F52rM", + rsqrtss_2 = "rmo:F30F52rM", + sfence_0 = "0FAEF8", + shufpd_3 = "rmio:660FC6rMU", + shufps_3 = "rmio:0FC6rMU", + stmxcsr_1 = "xd:0FAE3m", + ucomisd_2 = "rmo:660F2ErM", + ucomiss_2 = "rmo:0F2ErM", + unpckhpd_2 = "rmo:660F15rM", + unpckhps_2 = "rmo:0F15rM", + unpcklpd_2 = "rmo:660F14rM", + unpcklps_2 = "rmo:0F14rM", + xorpd_2 = "rmo:660F57rM", + xorps_2 = "rmo:0F57rM", + + -- SSE3 ops + fisttp_1 = "xw:nDF1m|xd:DB1m|xq:DD1m", + addsubpd_2 = "rmo:660FD0rM", + addsubps_2 = "rmo:F20FD0rM", + haddpd_2 = "rmo:660F7CrM", + haddps_2 = "rmo:F20F7CrM", + hsubpd_2 = "rmo:660F7DrM", + hsubps_2 = "rmo:F20F7DrM", + lddqu_2 = "rxo:F20FF0rM", + movddup_2 = "rmo:F20F12rM", + movshdup_2 = "rmo:F30F16rM", + movsldup_2 = "rmo:F30F12rM", + + -- SSSE3 ops + pabsb_2 = "rmo:660F381CrM", + pabsd_2 = "rmo:660F381ErM", + pabsw_2 = "rmo:660F381DrM", + palignr_3 = "rmio:660F3A0FrMU", + phaddd_2 = "rmo:660F3802rM", + phaddsw_2 = "rmo:660F3803rM", + phaddw_2 = "rmo:660F3801rM", + phsubd_2 = "rmo:660F3806rM", + phsubsw_2 = "rmo:660F3807rM", + phsubw_2 = "rmo:660F3805rM", + pmaddubsw_2 = "rmo:660F3804rM", + pmulhrsw_2 = "rmo:660F380BrM", + pshufb_2 = "rmo:660F3800rM", + psignb_2 = "rmo:660F3808rM", + psignd_2 = "rmo:660F380ArM", + psignw_2 = "rmo:660F3809rM", + + -- SSE4.1 ops + blendpd_3 = "rmio:660F3A0DrMU", + blendps_3 = "rmio:660F3A0CrMU", + blendvpd_3 = "rmRo:660F3815rM", + blendvps_3 = "rmRo:660F3814rM", + dppd_3 = "rmio:660F3A41rMU", + dpps_3 = "rmio:660F3A40rMU", + extractps_3 = "mri/do:660F3A17RmU", + insertps_3 = "rrio:660F3A41rMU|rxi/od:", + movntdqa_2 = "rmo:660F382ArM", + mpsadbw_3 = "rmio:660F3A42rMU", + packusdw_2 = "rmo:660F382BrM", + pblendvb_3 = "rmRo:660F3810rM", + pblendw_3 = "rmio:660F3A0ErMU", + pcmpeqq_2 = "rmo:660F3829rM", + pextrb_3 = "rri/do:660F3A14nRmU|xri/bo:", + pextrd_3 = "mri/do:660F3A16RmU", + -- x64: pextrq + -- pextrw is SSE2, mem operand is SSE4.1 only + phminposuw_2 = "rmo:660F3841rM", + pinsrb_3 = "rri/od:660F3A20nrMU|rxi/ob:", + pinsrd_3 = "rmi/od:660F3A22rMU", + -- x64: pinsrq + pmaxsb_2 = "rmo:660F383CrM", + pmaxsd_2 = "rmo:660F383DrM", + pmaxud_2 = "rmo:660F383FrM", + pmaxuw_2 = "rmo:660F383ErM", + pminsb_2 = "rmo:660F3838rM", + pminsd_2 = "rmo:660F3839rM", + pminud_2 = "rmo:660F383BrM", + pminuw_2 = "rmo:660F383ArM", + pmovsxbd_2 = "rro:660F3821rM|rx/od:", + pmovsxbq_2 = "rro:660F3822rM|rx/ow:", + pmovsxbw_2 = "rro:660F3820rM|rx/oq:", + pmovsxdq_2 = "rro:660F3825rM|rx/oq:", + pmovsxwd_2 = "rro:660F3823rM|rx/oq:", + pmovsxwq_2 = "rro:660F3824rM|rx/od:", + pmovzxbd_2 = "rro:660F3831rM|rx/od:", + pmovzxbq_2 = "rro:660F3832rM|rx/ow:", + pmovzxbw_2 = "rro:660F3830rM|rx/oq:", + pmovzxdq_2 = "rro:660F3835rM|rx/oq:", + pmovzxwd_2 = "rro:660F3833rM|rx/oq:", + pmovzxwq_2 = "rro:660F3834rM|rx/od:", + pmuldq_2 = "rmo:660F3828rM", + pmulld_2 = "rmo:660F3840rM", + ptest_2 = "rmo:660F3817rM", + roundpd_3 = "rmio:660F3A09rMU", + roundps_3 = "rmio:660F3A08rMU", + roundsd_3 = "rrio:660F3A0BrMU|rxi/oq:", + roundss_3 = "rrio:660F3A0ArMU|rxi/od:", + + -- SSE4.2 ops + crc32_2 = "rmd:F20F38F1rM|rm/dw:66F20F38F1rM|rm/db:F20F38F0nrM", + pcmpestri_3 = "rmio:660F3A61rMU", + pcmpestrm_3 = "rmio:660F3A60rMU", + pcmpgtq_2 = "rmo:660F3837rM", + pcmpistri_3 = "rmio:660F3A63rMU", + pcmpistrm_3 = "rmio:660F3A62rMU", + popcnt_2 = "rmdw:F30FB8rM", + + -- SSE4a + extrq_2 = "rro:660F79rM", + extrq_3 = "riio:660F780mUU", + insertq_2 = "rro:F20F79rM", + insertq_4 = "rriio:F20F78rMUU", + lzcnt_2 = "rmdw:F30FBDrM", + movntsd_2 = "xr/qo:F20F2BRm", + movntss_2 = "xr/do:F30F2BRm", + -- popcnt is also in SSE4.2 +} + +------------------------------------------------------------------------------ + +-- Arithmetic ops. +for name,n in pairs{ add = 0, ["or"] = 1, adc = 2, sbb = 3, + ["and"] = 4, sub = 5, xor = 6, cmp = 7 } do + local n8 = n * 8 + map_op[name.."_2"] = format( + "mr:%02XRm|rm:%02XrM|mI1dw:81%XmI|mS1dw:83%XmS|Ri1dwb:%02Xri|mi1dwb:81%Xmi", + 1+n8, 3+n8, n, n, 5+n8, n) +end + +-- Shift ops. +for name,n in pairs{ rol = 0, ror = 1, rcl = 2, rcr = 3, + shl = 4, shr = 5, sar = 7, sal = 4 } do + map_op[name.."_2"] = format("m1:D1%Xm|mC1dwb:D3%Xm|mi:C1%XmU", n, n, n) +end + +-- Conditional ops. +for cc,n in pairs(map_cc) do + map_op["j"..cc.."_1"] = format("J.:0F8%XJ", n) -- short: 7%X + map_op["set"..cc.."_1"] = format("mb:n0F9%X2m", n) + map_op["cmov"..cc.."_2"] = format("rmdw:0F4%XrM", n) -- P6+ +end + +-- FP arithmetic ops. +for name,n in pairs{ add = 0, mul = 1, com = 2, comp = 3, + sub = 4, subr = 5, div = 6, divr = 7 } do + local nc = 192 + n * 8 + local nr = nc + (n < 4 and 0 or (n % 2 == 0 and 8 or -8)) + local fn = "f"..name + map_op[fn.."_1"] = format("ff:D8%02Xr|xd:D8%Xm|xq:DC%Xm", nc, n, n) + if n == 2 or n == 3 then + map_op[fn.."_2"] = format("Fff:D8%02XR|Fx2d:D8%XM|Fx2q:DC%XM", nc, n, n) + else + map_op[fn.."_2"] = format("Fff:D8%02XR|fFf:DC%02Xr|Fx2d:D8%XM|Fx2q:DC%XM", nc, nr, n, n) + map_op[fn.."p_1"] = format("ff:DE%02Xr", nr) + map_op[fn.."p_2"] = format("fFf:DE%02Xr", nr) + end + map_op["fi"..name.."_1"] = format("xd:DA%Xm|xw:nDE%Xm", n, n) +end + +-- FP conditional moves. +for cc,n in pairs{ b=0, e=1, be=2, u=3, nb=4, ne=5, nbe=6, nu=7 } do + local n4 = n % 4 + local nc = 56000 + n4 * 8 + (n-n4) * 64 + map_op["fcmov"..cc.."_1"] = format("ff:%04Xr", nc) -- P6+ + map_op["fcmov"..cc.."_2"] = format("Fff:%04XR", nc) -- P6+ +end + +-- SSE FP arithmetic ops. +for name,n in pairs{ sqrt = 1, add = 8, mul = 9, + sub = 12, min = 13, div = 14, max = 15 } do + map_op[name.."ps_2"] = format("rmo:0F5%XrM", n) + map_op[name.."ss_2"] = format("rro:F30F5%XrM|rx/od:", n) + map_op[name.."pd_2"] = format("rmo:660F5%XrM", n) + map_op[name.."sd_2"] = format("rro:F20F5%XrM|rx/oq:", n) +end + +------------------------------------------------------------------------------ + +-- Process pattern string. +local function dopattern(pat, args, sz, op) + local digit, addin + local opcode = 0 + local szov = sz + local narg = 1 + + -- Limit number of section buffer positions used by a single dasm_put(). + -- A single opcode needs a maximum of 2 positions. !x64 + if secpos+2 > maxsecpos then wflush() end + + -- Process each character. + for c in gmatch(pat.."|", ".") do + if match(c, "%x") then -- Hex digit. + digit = byte(c) - 48 + if digit > 48 then digit = digit - 39 + elseif digit > 16 then digit = digit - 7 end + opcode = opcode*16 + digit + addin = nil + elseif c == "n" then -- Disable operand size mods for opcode. + szov = nil + elseif c == "r" then -- Merge 1st operand regno. into opcode. + addin = args[1]; opcode = opcode + addin.reg + if narg < 2 then narg = 2 end + elseif c == "R" then -- Merge 2nd operand regno. into opcode. + addin = args[2]; opcode = opcode + addin.reg + narg = 3 + elseif c == "m" or c == "M" then -- Encode ModRM/SIB. + local s + if addin then + s = addin.reg + opcode = opcode - s -- Undo regno opcode merge. + else + s = opcode % 16 -- Undo last digit. + opcode = (opcode - s) / 16 + end + wputop(szov, opcode); opcode = nil + local imark = (sub(pat, -1) == "I") -- Force a mark (ugly). + -- Put ModRM/SIB with regno/last digit as spare. + local nn = c == "m" and 1 or 2 + wputmrmsib(args[nn], imark, s, addin and addin.vreg) + if narg <= nn then narg = nn + 1 end + addin = nil + else + if opcode then -- Flush opcode. + if addin and addin.reg == -1 then + wputop(szov, opcode + 1) + waction("VREG", addin.vreg); wputxb(0) + else + wputop(szov, opcode) + end + opcode = nil + end + if c == "|" then break end + if c == "o" then -- Offset (pure 32 bit displacement). + wputdarg(args[1].disp); if narg < 2 then narg = 2 end + elseif c == "O" then + wputdarg(args[2].disp); narg = 3 + else + -- Anything else is an immediate operand. + local a = args[narg] + narg = narg + 1 + local mode, imm = a.mode, a.imm + if mode == "iJ" and not match("iIJ", c) then + werror("bad operand size for label") + end + if c == "S" then + wputsbarg(imm) + elseif c == "U" then + wputbarg(imm) + elseif c == "W" then + wputwarg(imm) + elseif c == "i" or c == "I" then + if mode == "iJ" then + wputlabel("IMM_", imm, 1) + elseif mode == "iI" and c == "I" then + waction(sz == "w" and "IMM_WB" or "IMM_DB", imm) + else + wputszarg(sz, imm) + end + elseif c == "J" then + if mode == "iPJ" then + waction("REL_A", imm) -- !x64 (secpos) + else + wputlabel("REL_", imm, 2) + end + else + werror("bad char `"..c.."' in pattern `"..pat.."' for `"..op.."'") + end + end + end + end +end + +------------------------------------------------------------------------------ + +-- Mapping of operand modes to short names. Suppress output with '#'. +local map_modename = { + r = "reg", R = "eax", C = "cl", x = "mem", m = "mrm", i = "imm", + f = "stx", F = "st0", J = "lbl", ["1"] = "1", + I = "#", S = "#", O = "#", +} + +-- Return a table/string showing all possible operand modes. +local function templatehelp(template, nparams) + if nparams == 0 then return "" end + local t = {} + for tm in gmatch(template, "[^%|]+") do + local s = map_modename[sub(tm, 1, 1)] + s = s..gsub(sub(tm, 2, nparams), ".", function(c) + return ", "..map_modename[c] + end) + if not match(s, "#") then t[#t+1] = s end + end + return t +end + +-- Match operand modes against mode match part of template. +local function matchtm(tm, args) + for i=1,#args do + if not match(args[i].mode, sub(tm, i, i)) then return end + end + return true +end + +-- Handle opcodes defined with template strings. +map_op[".template__"] = function(params, template, nparams) + if not params then return templatehelp(template, nparams) end + local args = {} + + -- Zero-operand opcodes have no match part. + if #params == 0 then + dopattern(template, args, "d", params.op) + return + end + + -- Determine common operand size (coerce undefined size) or flag as mixed. + local sz, szmix + for i,p in ipairs(params) do + args[i] = parseoperand(p) + local nsz = args[i].opsize + if nsz then + if sz and sz ~= nsz then szmix = true else sz = nsz end + end + end + + -- Try all match:pattern pairs (separated by '|'). + local gotmatch, lastpat + for tm in gmatch(template, "[^%|]+") do + -- Split off size match (starts after mode match) and pattern string. + local szm, pat = match(tm, "^(.-):(.*)$", #args+1) + if pat == "" then pat = lastpat else lastpat = pat end + if matchtm(tm, args) then + local prefix = sub(szm, 1, 1) + if prefix == "/" then -- Match both operand sizes. + if args[1].opsize == sub(szm, 2, 2) and + args[2].opsize == sub(szm, 3, 3) then + dopattern(pat, args, sz, params.op) -- Process pattern string. + return + end + else -- Match common operand size. + local szp = sz + if szm == "" then szm = "dwb" end -- Default size match. + if prefix == "1" then szp = args[1].opsize; szmix = nil + elseif prefix == "2" then szp = args[2].opsize; szmix = nil end + if not szmix and (prefix == "." or match(szm, szp or "#")) then + dopattern(pat, args, szp, params.op) -- Process pattern string. + return + end + end + gotmatch = true + end + end + + local msg = "bad operand mode" + if gotmatch then + if szmix then + msg = "mixed operand size" + else + msg = sz and "bad operand size" or "missing operand size" + end + end + + werror(msg.." in `"..opmodestr(params.op, args).."'") +end + +------------------------------------------------------------------------------ + +-- Pseudo-opcodes for data storage. +local function op_data(params) + if not params then return "imm..." end + local sz = sub(params.op, 2, 2) + if sz == "a" then sz = addrsize end + for _,p in ipairs(params) do + local a = parseoperand(p) + if sub(a.mode, 1, 1) ~= "i" or (a.opsize and a.opsize ~= sz) then + werror("bad mode or size in `"..p.."'") + end + if a.mode == "iJ" then + wputlabel("IMM_", a.imm, 1) + else + wputszarg(sz, a.imm) + end + end +end + +map_op[".byte_*"] = op_data +map_op[".sbyte_*"] = op_data +map_op[".word_*"] = op_data +map_op[".dword_*"] = op_data +map_op[".aword_*"] = op_data + +------------------------------------------------------------------------------ + +-- Pseudo-opcode to mark the position where the action list is to be emitted. +map_op[".actionlist_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeactions(out, name) end) +end + +-- Pseudo-opcode to mark the position where the global enum is to be emitted. +map_op[".globals_1"] = function(params) + if not params then return "prefix" end + local prefix = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeglobals(out, prefix) end) +end + +-- Pseudo-opcode to mark the position where the global names are to be emitted. +map_op[".globalnames_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeglobalnames(out, name) end) +end + +-- Pseudo-opcode to mark the position where the extern names are to be emitted. +map_op[".externnames_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeexternnames(out, name) end) +end + +------------------------------------------------------------------------------ + +-- Label pseudo-opcode (converted from trailing colon form). +map_op[".label_2"] = function(params) + if not params then return "[1-9] | ->global | =>pcexpr [, addr]" end + local a = parseoperand(params[1]) + local mode, imm = a.mode, a.imm + if type(imm) == "number" and (mode == "iJ" or (imm >= 1 and imm <= 9)) then + -- Local label (1: ... 9:) or global label (->global:). + waction("LABEL_LG", nil, 1) + wputxb(imm) + elseif mode == "iJ" then + -- PC label (=>pcexpr:). + waction("LABEL_PC", imm) + else + werror("bad label definition") + end + -- SETLABEL must immediately follow LABEL_LG/LABEL_PC. + local addr = params[2] + if addr then + local a = parseoperand(params[2]) + if a.mode == "iPJ" then + waction("SETLABEL", a.imm) -- !x64 (secpos) + else + werror("bad label assignment") + end + end +end +map_op[".label_1"] = map_op[".label_2"] + +------------------------------------------------------------------------------ + +-- Alignment pseudo-opcode. +map_op[".align_1"] = function(params) + if not params then return "numpow2" end + local align = tonumber(params[1]) or map_opsizenum[map_opsize[params[1]]] + if align then + local x = align + -- Must be a power of 2 in the range (2 ... 256). + for i=1,8 do + x = x / 2 + if x == 1 then + waction("ALIGN", nil, 1) + wputxb(align-1) -- Action byte is 2**n-1. + return + end + end + end + werror("bad alignment") +end + +-- Spacing pseudo-opcode. +map_op[".space_2"] = function(params) + if not params then return "num [, filler]" end + waction("SPACE", params[1]) + local fill = params[2] + if fill then + fill = tonumber(fill) + if not fill or fill < 0 or fill > 255 then werror("bad filler") end + end + wputxb(fill or 0) +end +map_op[".space_1"] = map_op[".space_2"] + +------------------------------------------------------------------------------ + +-- Pseudo-opcode for (primitive) type definitions (map to C types). +map_op[".type_3"] = function(params, nparams) + if not params then + return nparams == 2 and "name, ctype" or "name, ctype, reg" + end + local name, ctype, reg = params[1], params[2], params[3] + if not match(name, "^[%a_][%w_]*$") then + werror("bad type name `"..name.."'") + end + local tp = map_type[name] + if tp then + werror("duplicate type `"..name.."'") + end + if reg and not map_reg_valid_base[reg] then + werror("bad base register `"..(map_reg_rev[reg] or reg).."'") + end + -- Add #type to defines. A bit unclean to put it in map_archdef. + map_archdef["#"..name] = "sizeof("..ctype..")" + -- Add new type and emit shortcut define. + local num = ctypenum + 1 + map_type[name] = { + ctype = ctype, + ctypefmt = format("Dt%X(%%s)", num), + reg = reg, + } + wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype)) + ctypenum = num +end +map_op[".type_2"] = map_op[".type_3"] + +-- Dump type definitions. +local function dumptypes(out, lvl) + local t = {} + for name in pairs(map_type) do t[#t+1] = name end + sort(t) + out:write("Type definitions:\n") + for _,name in ipairs(t) do + local tp = map_type[name] + local reg = tp.reg and map_reg_rev[tp.reg] or "" + out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg)) + end + out:write("\n") +end + +------------------------------------------------------------------------------ + +-- Set the current section. +function _M.section(num) + waction("SECTION") + wputxb(num) + wflush(true) -- SECTION is a terminal action. +end + +------------------------------------------------------------------------------ + +-- Dump architecture description. +function _M.dumparch(out) + out:write(format("DynASM %s version %s, released %s\n\n", + _info.arch, _info.version, _info.release)) + dumpregs(out) + dumpactions(out) +end + +-- Dump all user defined elements. +function _M.dumpdef(out, lvl) + dumptypes(out, lvl) + dumpglobals(out, lvl) + dumpexterns(out, lvl) +end + +------------------------------------------------------------------------------ + +-- Pass callbacks from/to the DynASM core. +function _M.passcb(wl, we, wf, ww) + wline, werror, wfatal, wwarn = wl, we, wf, ww + return wflush +end + +-- Setup the arch-specific module. +function _M.setup(arch, opt) + g_arch, g_opt = arch, opt +end + +-- Merge the core maps and the arch-specific maps. +function _M.mergemaps(map_coreop, map_def) + setmetatable(map_op, { __index = map_coreop }) + setmetatable(map_def, { __index = map_archdef }) + return map_op, map_def +end + +return _M + +------------------------------------------------------------------------------ + diff --git a/dynasm/dynasm.lua b/dynasm/dynasm.lua new file mode 100644 index 0000000000..20ff9cf5a7 --- /dev/null +++ b/dynasm/dynasm.lua @@ -0,0 +1,1070 @@ +------------------------------------------------------------------------------ +-- DynASM. A dynamic assembler for code generation engines. +-- Originally designed and implemented for LuaJIT. +-- +-- Copyright (C) 2005-2009 Mike Pall. All rights reserved. +-- See below for full copyright notice. +------------------------------------------------------------------------------ + +-- Application information. +local _info = { + name = "DynASM", + description = "A dynamic assembler for code generation engines", + version = "1.2.1", + vernum = 10201, + release = "2009-04-16", + author = "Mike Pall", + url = "http://luajit.org/dynasm.html", + license = "MIT", + copyright = [[ +Copyright (C) 2005-2009 Mike Pall. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +[ MIT license: http://www.opensource.org/licenses/mit-license.php ] +]], +} + +-- Cache library functions. +local type, pairs, ipairs = type, pairs, ipairs +local pcall, error, assert = pcall, error, assert +local _s = string +local sub, match, gmatch, gsub = _s.sub, _s.match, _s.gmatch, _s.gsub +local format, rep, upper = _s.format, _s.rep, _s.upper +local _t = table +local insert, remove, concat, sort = _t.insert, _t.remove, _t.concat, _t.sort +local exit = os.exit +local io = io +local stdin, stdout, stderr = io.stdin, io.stdout, io.stderr + +------------------------------------------------------------------------------ + +-- Program options. +local g_opt = {} + +-- Global state for current file. +local g_fname, g_curline, g_indent, g_lineno, g_synclineno, g_arch +local g_errcount = 0 + +-- Write buffer for output file. +local g_wbuffer, g_capbuffer + +------------------------------------------------------------------------------ + +-- Write an output line (or callback function) to the buffer. +local function wline(line, needindent) + local buf = g_capbuffer or g_wbuffer + buf[#buf+1] = needindent and g_indent..line or line + g_synclineno = g_synclineno + 1 +end + +-- Write assembler line as a comment, if requestd. +local function wcomment(aline) + if g_opt.comment then + wline(g_opt.comment..aline..g_opt.endcomment, true) + end +end + +-- Resync CPP line numbers. +local function wsync() + if g_synclineno ~= g_lineno and g_opt.cpp then + wline("# "..g_lineno..' "'..g_fname..'"') + g_synclineno = g_lineno + end +end + +-- Dummy action flush function. Replaced with arch-specific function later. +local function wflush(term) +end + +-- Dump all buffered output lines. +local function wdumplines(out, buf) + for _,line in ipairs(buf) do + if type(line) == "string" then + assert(out:write(line, "\n")) + else + -- Special callback to dynamically insert lines after end of processing. + line(out) + end + end +end + +------------------------------------------------------------------------------ + +-- Emit an error. Processing continues with next statement. +local function werror(msg) + error(format("%s:%s: error: %s:\n%s", g_fname, g_lineno, msg, g_curline), 0) +end + +-- Emit a fatal error. Processing stops. +local function wfatal(msg) + g_errcount = "fatal" + werror(msg) +end + +-- Print a warning. Processing continues. +local function wwarn(msg) + stderr:write(format("%s:%s: warning: %s:\n%s\n", + g_fname, g_lineno, msg, g_curline)) +end + +-- Print caught error message. But suppress excessive errors. +local function wprinterr(...) + if type(g_errcount) == "number" then + -- Regular error. + g_errcount = g_errcount + 1 + if g_errcount < 21 then -- Seems to be a reasonable limit. + stderr:write(...) + elseif g_errcount == 21 then + stderr:write(g_fname, + ":*: warning: too many errors (suppressed further messages).\n") + end + else + -- Fatal error. + stderr:write(...) + return true -- Stop processing. + end +end + +------------------------------------------------------------------------------ + +-- Map holding all option handlers. +local opt_map = {} +local opt_current + +-- Print error and exit with error status. +local function opterror(...) + stderr:write("dynasm.lua: ERROR: ", ...) + stderr:write("\n") + exit(1) +end + +-- Get option parameter. +local function optparam(args) + local argn = args.argn + local p = args[argn] + if not p then + opterror("missing parameter for option `", opt_current, "'.") + end + args.argn = argn + 1 + return p +end + +------------------------------------------------------------------------------ + +-- Core pseudo-opcodes. +local map_coreop = {} +-- Dummy opcode map. Replaced by arch-specific map. +local map_op = {} + +-- Forward declarations. +local dostmt +local readfile + +------------------------------------------------------------------------------ + +-- Map for defines (initially empty, chains to arch-specific map). +local map_def = {} + +-- Pseudo-opcode to define a substitution. +map_coreop[".define_2"] = function(params, nparams) + if not params then return nparams == 1 and "name" or "name, subst" end + local name, def = params[1], params[2] or "1" + if not match(name, "^[%a_][%w_]*$") then werror("bad or duplicate define") end + map_def[name] = def +end +map_coreop[".define_1"] = map_coreop[".define_2"] + +-- Define a substitution on the command line. +function opt_map.D(args) + local namesubst = optparam(args) + local name, subst = match(namesubst, "^([%a_][%w_]*)=(.*)$") + if name then + map_def[name] = subst + elseif match(namesubst, "^[%a_][%w_]*$") then + map_def[namesubst] = "1" + else + opterror("bad define") + end +end + +-- Undefine a substitution on the command line. +function opt_map.U(args) + local name = optparam(args) + if match(name, "^[%a_][%w_]*$") then + map_def[name] = nil + else + opterror("bad define") + end +end + +-- Helper for definesubst. +local gotsubst + +local function definesubst_one(word) + local subst = map_def[word] + if subst then gotsubst = word; return subst else return word end +end + +-- Iteratively substitute defines. +local function definesubst(stmt) + -- Limit number of iterations. + for i=1,100 do + gotsubst = false + stmt = gsub(stmt, "#?[%w_]+", definesubst_one) + if not gotsubst then break end + end + if gotsubst then wfatal("recursive define involving `"..gotsubst.."'") end + return stmt +end + +-- Dump all defines. +local function dumpdefines(out, lvl) + local t = {} + for name in pairs(map_def) do + t[#t+1] = name + end + sort(t) + out:write("Defines:\n") + for _,name in ipairs(t) do + local subst = map_def[name] + if g_arch then subst = g_arch.revdef(subst) end + out:write(format(" %-20s %s\n", name, subst)) + end + out:write("\n") +end + +------------------------------------------------------------------------------ + +-- Support variables for conditional assembly. +local condlevel = 0 +local condstack = {} + +-- Evaluate condition with a Lua expression. Substitutions already performed. +local function cond_eval(cond) + local func, err = loadstring("return "..cond) + if func then + setfenv(func, {}) -- No globals. All unknown identifiers evaluate to nil. + local ok, res = pcall(func) + if ok then + if res == 0 then return false end -- Oh well. + return not not res + end + err = res + end + wfatal("bad condition: "..err) +end + +-- Skip statements until next conditional pseudo-opcode at the same level. +local function stmtskip() + local dostmt_save = dostmt + local lvl = 0 + dostmt = function(stmt) + local op = match(stmt, "^%s*(%S+)") + if op == ".if" then + lvl = lvl + 1 + elseif lvl ~= 0 then + if op == ".endif" then lvl = lvl - 1 end + elseif op == ".elif" or op == ".else" or op == ".endif" then + dostmt = dostmt_save + dostmt(stmt) + end + end +end + +-- Pseudo-opcodes for conditional assembly. +map_coreop[".if_1"] = function(params) + if not params then return "condition" end + local lvl = condlevel + 1 + local res = cond_eval(params[1]) + condlevel = lvl + condstack[lvl] = res + if not res then stmtskip() end +end + +map_coreop[".elif_1"] = function(params) + if not params then return "condition" end + if condlevel == 0 then wfatal(".elif without .if") end + local lvl = condlevel + local res = condstack[lvl] + if res then + if res == "else" then wfatal(".elif after .else") end + else + res = cond_eval(params[1]) + if res then + condstack[lvl] = res + return + end + end + stmtskip() +end + +map_coreop[".else_0"] = function(params) + if condlevel == 0 then wfatal(".else without .if") end + local lvl = condlevel + local res = condstack[lvl] + condstack[lvl] = "else" + if res then + if res == "else" then wfatal(".else after .else") end + stmtskip() + end +end + +map_coreop[".endif_0"] = function(params) + local lvl = condlevel + if lvl == 0 then wfatal(".endif without .if") end + condlevel = lvl - 1 +end + +-- Check for unfinished conditionals. +local function checkconds() + if g_errcount ~= "fatal" and condlevel ~= 0 then + wprinterr(g_fname, ":*: error: unbalanced conditional\n") + end +end + +------------------------------------------------------------------------------ + +-- Search for a file in the given path and open it for reading. +local function pathopen(path, name) + local dirsep = match(package.path, "\\") and "\\" or "/" + for _,p in ipairs(path) do + local fullname = p == "" and name or p..dirsep..name + local fin = io.open(fullname, "r") + if fin then + g_fname = fullname + return fin + end + end +end + +-- Include a file. +map_coreop[".include_1"] = function(params) + if not params then return "filename" end + local name = params[1] + -- Save state. Ugly, I know. but upvalues are fast. + local gf, gl, gcl, gi = g_fname, g_lineno, g_curline, g_indent + -- Read the included file. + local fatal = readfile(pathopen(g_opt.include, name) or + wfatal("include file `"..name.."' not found")) + -- Restore state. + g_synclineno = -1 + g_fname, g_lineno, g_curline, g_indent = gf, gl, gcl, gi + if fatal then wfatal("in include file") end +end + +-- Make .include initially available, too. +map_op[".include_1"] = map_coreop[".include_1"] + +------------------------------------------------------------------------------ + +-- Support variables for macros. +local mac_capture, mac_lineno, mac_name +local mac_active = {} +local mac_list = {} + +-- Pseudo-opcode to define a macro. +map_coreop[".macro_*"] = function(mparams) + if not mparams then return "name [, params...]" end + -- Split off and validate macro name. + local name = remove(mparams, 1) + if not name then werror("missing macro name") end + if not (match(name, "^[%a_][%w_%.]*$") or match(name, "^%.[%w_%.]+$")) then + wfatal("bad macro name `"..name.."'") + end + -- Validate macro parameter names. + local mdup = {} + for _,mp in ipairs(mparams) do + if not match(mp, "^[%a_][%w_]*$") then + wfatal("bad macro parameter name `"..mp.."'") + end + if mdup[mp] then wfatal("duplicate macro parameter name `"..mp.."'") end + mdup[mp] = true + end + -- Check for duplicate or recursive macro definitions. + local opname = name.."_"..#mparams + if map_op[opname] or map_op[name.."_*"] then + wfatal("duplicate macro `"..name.."' ("..#mparams.." parameters)") + end + if mac_capture then wfatal("recursive macro definition") end + + -- Enable statement capture. + local lines = {} + mac_lineno = g_lineno + mac_name = name + mac_capture = function(stmt) -- Statement capture function. + -- Stop macro definition with .endmacro pseudo-opcode. + if not match(stmt, "^%s*.endmacro%s*$") then + lines[#lines+1] = stmt + return + end + mac_capture = nil + mac_lineno = nil + mac_name = nil + mac_list[#mac_list+1] = opname + -- Add macro-op definition. + map_op[opname] = function(params) + if not params then return mparams, lines end + -- Protect against recursive macro invocation. + if mac_active[opname] then wfatal("recursive macro invocation") end + mac_active[opname] = true + -- Setup substitution map. + local subst = {} + for i,mp in ipairs(mparams) do subst[mp] = params[i] end + local mcom + if g_opt.maccomment and g_opt.comment then + mcom = " MACRO "..name.." ("..#mparams..")" + wcomment("{"..mcom) + end + -- Loop through all captured statements + for _,stmt in ipairs(lines) do + -- Substitute macro parameters. + local st = gsub(stmt, "[%w_]+", subst) + st = definesubst(st) + st = gsub(st, "%s*%.%.%s*", "") -- Token paste a..b. + if mcom and sub(st, 1, 1) ~= "|" then wcomment(st) end + -- Emit statement. Use a protected call for better diagnostics. + local ok, err = pcall(dostmt, st) + if not ok then + -- Add the captured statement to the error. + wprinterr(err, "\n", g_indent, "| ", stmt, + "\t[MACRO ", name, " (", #mparams, ")]\n") + end + end + if mcom then wcomment("}"..mcom) end + mac_active[opname] = nil + end + end +end + +-- An .endmacro pseudo-opcode outside of a macro definition is an error. +map_coreop[".endmacro_0"] = function(params) + wfatal(".endmacro without .macro") +end + +-- Dump all macros and their contents (with -PP only). +local function dumpmacros(out, lvl) + sort(mac_list) + out:write("Macros:\n") + for _,opname in ipairs(mac_list) do + local name = sub(opname, 1, -3) + local params, lines = map_op[opname]() + out:write(format(" %-20s %s\n", name, concat(params, ", "))) + if lvl > 1 then + for _,line in ipairs(lines) do + out:write(" |", line, "\n") + end + out:write("\n") + end + end + out:write("\n") +end + +-- Check for unfinished macro definitions. +local function checkmacros() + if mac_capture then + wprinterr(g_fname, ":", mac_lineno, + ": error: unfinished .macro `", mac_name ,"'\n") + end +end + +------------------------------------------------------------------------------ + +-- Support variables for captures. +local cap_lineno, cap_name +local cap_buffers = {} +local cap_used = {} + +-- Start a capture. +map_coreop[".capture_1"] = function(params) + if not params then return "name" end + wflush() + local name = params[1] + if not match(name, "^[%a_][%w_]*$") then + wfatal("bad capture name `"..name.."'") + end + if cap_name then + wfatal("already capturing to `"..cap_name.."' since line "..cap_lineno) + end + cap_name = name + cap_lineno = g_lineno + -- Create or continue a capture buffer and start the output line capture. + local buf = cap_buffers[name] + if not buf then buf = {}; cap_buffers[name] = buf end + g_capbuffer = buf + g_synclineno = 0 +end + +-- Stop a capture. +map_coreop[".endcapture_0"] = function(params) + wflush() + if not cap_name then wfatal(".endcapture without a valid .capture") end + cap_name = nil + cap_lineno = nil + g_capbuffer = nil + g_synclineno = 0 +end + +-- Dump a capture buffer. +map_coreop[".dumpcapture_1"] = function(params) + if not params then return "name" end + wflush() + local name = params[1] + if not match(name, "^[%a_][%w_]*$") then + wfatal("bad capture name `"..name.."'") + end + cap_used[name] = true + wline(function(out) + local buf = cap_buffers[name] + if buf then wdumplines(out, buf) end + end) + g_synclineno = 0 +end + +-- Dump all captures and their buffers (with -PP only). +local function dumpcaptures(out, lvl) + out:write("Captures:\n") + for name,buf in pairs(cap_buffers) do + out:write(format(" %-20s %4s)\n", name, "("..#buf)) + if lvl > 1 then + local bar = rep("=", 76) + out:write(" ", bar, "\n") + for _,line in ipairs(buf) do + out:write(" ", line, "\n") + end + out:write(" ", bar, "\n\n") + end + end + out:write("\n") +end + +-- Check for unfinished or unused captures. +local function checkcaptures() + if cap_name then + wprinterr(g_fname, ":", cap_lineno, + ": error: unfinished .capture `", cap_name,"'\n") + return + end + for name in pairs(cap_buffers) do + if not cap_used[name] then + wprinterr(g_fname, ":*: error: missing .dumpcapture ", name ,"\n") + end + end +end + +------------------------------------------------------------------------------ + +-- Sections names. +local map_sections = {} + +-- Pseudo-opcode to define code sections. +-- TODO: Data sections, BSS sections. Needs extra C code and API. +map_coreop[".section_*"] = function(params) + if not params then return "name..." end + if #map_sections > 0 then werror("duplicate section definition") end + wflush() + for sn,name in ipairs(params) do + local opname = "."..name.."_0" + if not match(name, "^[%a][%w_]*$") or + map_op[opname] or map_op["."..name.."_*"] then + werror("bad section name `"..name.."'") + end + map_sections[#map_sections+1] = name + wline(format("#define DASM_SECTION_%s\t%d", upper(name), sn-1)) + map_op[opname] = function(params) g_arch.section(sn-1) end + end + wline(format("#define DASM_MAXSECTION\t\t%d", #map_sections)) +end + +-- Dump all sections. +local function dumpsections(out, lvl) + out:write("Sections:\n") + for _,name in ipairs(map_sections) do + out:write(format(" %s\n", name)) + end + out:write("\n") +end + +------------------------------------------------------------------------------ + +-- Load architecture-specific module. +local function loadarch(arch) + if not match(arch, "^[%w_]+$") then return "bad arch name" end + local ok, m_arch = pcall(require, "dasm_"..arch) + if not ok then return "cannot load module: "..m_arch end + g_arch = m_arch + wflush = m_arch.passcb(wline, werror, wfatal, wwarn) + m_arch.setup(arch, g_opt) + map_op, map_def = m_arch.mergemaps(map_coreop, map_def) +end + +-- Dump architecture description. +function opt_map.dumparch(args) + local name = optparam(args) + if not g_arch then + local err = loadarch(name) + if err then opterror(err) end + end + + local t = {} + for name in pairs(map_coreop) do t[#t+1] = name end + for name in pairs(map_op) do t[#t+1] = name end + sort(t) + + local out = stdout + local _arch = g_arch._info + out:write(format("%s version %s, released %s, %s\n", + _info.name, _info.version, _info.release, _info.url)) + g_arch.dumparch(out) + + local pseudo = true + out:write("Pseudo-Opcodes:\n") + for _,sname in ipairs(t) do + local name, nparam = match(sname, "^(.+)_([0-9%*])$") + if name then + if pseudo and sub(name, 1, 1) ~= "." then + out:write("\nOpcodes:\n") + pseudo = false + end + local f = map_op[sname] + local s + if nparam ~= "*" then nparam = nparam + 0 end + if nparam == 0 then + s = "" + elseif type(f) == "string" then + s = map_op[".template__"](nil, f, nparam) + else + s = f(nil, nparam) + end + if type(s) == "table" then + for _,s2 in ipairs(s) do + out:write(format(" %-12s %s\n", name, s2)) + end + else + out:write(format(" %-12s %s\n", name, s)) + end + end + end + out:write("\n") + exit(0) +end + +-- Pseudo-opcode to set the architecture. +-- Only initially available (map_op is replaced when called). +map_op[".arch_1"] = function(params) + if not params then return "name" end + local err = loadarch(params[1]) + if err then wfatal(err) end +end + +-- Dummy .arch pseudo-opcode to improve the error report. +map_coreop[".arch_1"] = function(params) + if not params then return "name" end + wfatal("duplicate .arch statement") +end + +------------------------------------------------------------------------------ + +-- Dummy pseudo-opcode. Don't confuse '.nop' with 'nop'. +map_coreop[".nop_*"] = function(params) + if not params then return "[ignored...]" end +end + +-- Pseudo-opcodes to raise errors. +map_coreop[".error_1"] = function(params) + if not params then return "message" end + werror(params[1]) +end + +map_coreop[".fatal_1"] = function(params) + if not params then return "message" end + wfatal(params[1]) +end + +-- Dump all user defined elements. +local function dumpdef(out) + local lvl = g_opt.dumpdef + if lvl == 0 then return end + dumpsections(out, lvl) + dumpdefines(out, lvl) + if g_arch then g_arch.dumpdef(out, lvl) end + dumpmacros(out, lvl) + dumpcaptures(out, lvl) +end + +------------------------------------------------------------------------------ + +-- Helper for splitstmt. +local splitlvl + +local function splitstmt_one(c) + if c == "(" then + splitlvl = ")"..splitlvl + elseif c == "[" then + splitlvl = "]"..splitlvl + elseif c == ")" or c == "]" then + if sub(splitlvl, 1, 1) ~= c then werror("unbalanced () or []") end + splitlvl = sub(splitlvl, 2) + elseif splitlvl == "" then + return " \0 " + end + return c +end + +-- Split statement into (pseudo-)opcode and params. +local function splitstmt(stmt) + -- Convert label with trailing-colon into .label statement. + local label = match(stmt, "^%s*(.+):%s*$") + if label then return ".label", {label} end + + -- Split at commas and equal signs, but obey parentheses and brackets. + splitlvl = "" + stmt = gsub(stmt, "[,%(%)%[%]]", splitstmt_one) + if splitlvl ~= "" then werror("unbalanced () or []") end + + -- Split off opcode. + local op, other = match(stmt, "^%s*([^%s%z]+)%s*(.*)$") + if not op then werror("bad statement syntax") end + + -- Split parameters. + local params = {} + for p in gmatch(other, "%s*(%Z+)%z?") do + params[#params+1] = gsub(p, "%s+$", "") + end + if #params > 16 then werror("too many parameters") end + + params.op = op + return op, params +end + +-- Process a single statement. +dostmt = function(stmt) + -- Ignore empty statements. + if match(stmt, "^%s*$") then return end + + -- Capture macro defs before substitution. + if mac_capture then return mac_capture(stmt) end + stmt = definesubst(stmt) + + -- Emit C code without parsing the line. + if sub(stmt, 1, 1) == "|" then + local tail = sub(stmt, 2) + wflush() + if sub(tail, 1, 2) == "//" then wcomment(tail) else wline(tail, true) end + return + end + + -- Split into (pseudo-)opcode and params. + local op, params = splitstmt(stmt) + + -- Get opcode handler (matching # of parameters or generic handler). + local f = map_op[op.."_"..#params] or map_op[op.."_*"] + if not f then + if not g_arch then wfatal("first statement must be .arch") end + -- Improve error report. + for i=0,16 do + if map_op[op.."_"..i] then + werror("wrong number of parameters for `"..op.."'") + end + end + werror("unknown statement `"..op.."'") + end + + -- Call opcode handler or special handler for template strings. + if type(f) == "string" then + map_op[".template__"](params, f) + else + f(params) + end +end + +-- Process a single line. +local function doline(line) + if g_opt.flushline then wflush() end + + -- Assembler line? + local indent, aline = match(line, "^(%s*)%|(.*)$") + if not aline then + -- No, plain C code line, need to flush first. + wflush() + wsync() + wline(line, false) + return + end + + g_indent = indent -- Remember current line indentation. + + -- Emit C code (even from macros). Avoids echo and line parsing. + if sub(aline, 1, 1) == "|" then + if not mac_capture then + wsync() + elseif g_opt.comment then + wsync() + wcomment(aline) + end + dostmt(aline) + return + end + + -- Echo assembler line as a comment. + if g_opt.comment then + wsync() + wcomment(aline) + end + + -- Strip assembler comments. + aline = gsub(aline, "//.*$", "") + + -- Split line into statements at semicolons. + if match(aline, ";") then + for stmt in gmatch(aline, "[^;]+") do dostmt(stmt) end + else + dostmt(aline) + end +end + +------------------------------------------------------------------------------ + +-- Write DynASM header. +local function dasmhead(out) + out:write(format([[ +/* +** This file has been pre-processed with DynASM. +** %s +** DynASM version %s, DynASM %s version %s +** DO NOT EDIT! The original file is in "%s". +*/ + +#if DASM_VERSION != %d +#error "Version mismatch between DynASM and included encoding engine" +#endif + +]], _info.url, + _info.version, g_arch._info.arch, g_arch._info.version, + g_fname, _info.vernum)) +end + +-- Read input file. +readfile = function(fin) + g_indent = "" + g_lineno = 0 + g_synclineno = -1 + + -- Process all lines. + for line in fin:lines() do + g_lineno = g_lineno + 1 + g_curline = line + local ok, err = pcall(doline, line) + if not ok and wprinterr(err, "\n") then return true end + end + wflush() + + -- Close input file. + assert(fin == stdin or fin:close()) +end + +-- Write output file. +local function writefile(outfile) + local fout + + -- Open output file. + if outfile == nil or outfile == "-" then + fout = stdout + else + fout = assert(io.open(outfile, "w")) + end + + -- Write all buffered lines + wdumplines(fout, g_wbuffer) + + -- Close output file. + assert(fout == stdout or fout:close()) + + -- Optionally dump definitions. + dumpdef(fout == stdout and stderr or stdout) +end + +-- Translate an input file to an output file. +local function translate(infile, outfile) + g_wbuffer = {} + g_indent = "" + g_lineno = 0 + g_synclineno = -1 + + -- Put header. + wline(dasmhead) + + -- Read input file. + local fin + if infile == "-" then + g_fname = "(stdin)" + fin = stdin + else + g_fname = infile + fin = assert(io.open(infile, "r")) + end + readfile(fin) + + -- Check for errors. + if not g_arch then + wprinterr(g_fname, ":*: error: missing .arch directive\n") + end + checkconds() + checkmacros() + checkcaptures() + + if g_errcount ~= 0 then + stderr:write(g_fname, ":*: info: ", g_errcount, " error", + (type(g_errcount) == "number" and g_errcount > 1) and "s" or "", + " in input file -- no output file generated.\n") + dumpdef(stderr) + exit(1) + end + + -- Write output file. + writefile(outfile) +end + +------------------------------------------------------------------------------ + +-- Print help text. +function opt_map.help() + stdout:write("DynASM -- ", _info.description, ".\n") + stdout:write("DynASM ", _info.version, " ", _info.release, " ", _info.url, "\n") + stdout:write[[ + +Usage: dynasm [OPTION]... INFILE.dasc|- + + -h, --help Display this help text. + -V, --version Display version and copyright information. + + -o, --outfile FILE Output file name (default is stdout). + -I, --include DIR Add directory to the include search path. + + -c, --ccomment Use /* */ comments for assembler lines. + -C, --cppcomment Use // comments for assembler lines (default). + -N, --nocomment Suppress assembler lines in output. + -M, --maccomment Show macro expansions as comments (default off). + + -L, --nolineno Suppress CPP line number information in output. + -F, --flushline Flush action list for every line. + + -D NAME[=SUBST] Define a substitution. + -U NAME Undefine a substitution. + + -P, --dumpdef Dump defines, macros, etc. Repeat for more output. + -A, --dumparch ARCH Load architecture ARCH and dump description. +]] + exit(0) +end + +-- Print version information. +function opt_map.version() + stdout:write(format("%s version %s, released %s\n%s\n\n%s", + _info.name, _info.version, _info.release, _info.url, _info.copyright)) + exit(0) +end + +-- Misc. options. +function opt_map.outfile(args) g_opt.outfile = optparam(args) end +function opt_map.include(args) insert(g_opt.include, 1, optparam(args)) end +function opt_map.ccomment() g_opt.comment = "/*|"; g_opt.endcomment = " */" end +function opt_map.cppcomment() g_opt.comment = "//|"; g_opt.endcomment = "" end +function opt_map.nocomment() g_opt.comment = false end +function opt_map.maccomment() g_opt.maccomment = true end +function opt_map.nolineno() g_opt.cpp = false end +function opt_map.flushline() g_opt.flushline = true end +function opt_map.dumpdef() g_opt.dumpdef = g_opt.dumpdef + 1 end + +------------------------------------------------------------------------------ + +-- Short aliases for long options. +local opt_alias = { + h = "help", ["?"] = "help", V = "version", + o = "outfile", I = "include", + c = "ccomment", C = "cppcomment", N = "nocomment", M = "maccomment", + L = "nolineno", F = "flushline", + P = "dumpdef", A = "dumparch", +} + +-- Parse single option. +local function parseopt(opt, args) + opt_current = #opt == 1 and "-"..opt or "--"..opt + local f = opt_map[opt] or opt_map[opt_alias[opt]] + if not f then + opterror("unrecognized option `", opt_current, "'. Try `--help'.\n") + end + f(args) +end + +-- Parse arguments. +local function parseargs(args) + -- Default options. + g_opt.comment = "//|" + g_opt.endcomment = "" + g_opt.cpp = true + g_opt.dumpdef = 0 + g_opt.include = { "" } + + -- Process all option arguments. + args.argn = 1 + repeat + local a = args[args.argn] + if not a then break end + local lopt, opt = match(a, "^%-(%-?)(.+)") + if not opt then break end + args.argn = args.argn + 1 + if lopt == "" then + -- Loop through short options. + for o in gmatch(opt, ".") do parseopt(o, args) end + else + -- Long option. + parseopt(opt, args) + end + until false + + -- Check for proper number of arguments. + local nargs = #args - args.argn + 1 + if nargs ~= 1 then + if nargs == 0 then + if g_opt.dumpdef > 0 then return dumpdef(stdout) end + end + opt_map.help() + end + + -- Translate a single input file to a single output file + -- TODO: Handle multiple files? + translate(args[args.argn], g_opt.outfile) +end + +------------------------------------------------------------------------------ + +-- Add the directory dynasm.lua resides in to the Lua module search path. +local arg = arg +if arg and arg[0] then + local prefix = match(arg[0], "^(.*[/\\])") + if prefix then package.path = prefix.."?.lua;"..package.path end +end + +-- Start DynASM. +parseargs{...} + +------------------------------------------------------------------------------ + diff --git a/etc/strict.lua b/etc/strict.lua new file mode 100644 index 0000000000..604619dd2e --- /dev/null +++ b/etc/strict.lua @@ -0,0 +1,41 @@ +-- +-- strict.lua +-- checks uses of undeclared global variables +-- All global variables must be 'declared' through a regular assignment +-- (even assigning nil will do) in a main chunk before being used +-- anywhere or assigned to inside a function. +-- + +local getinfo, error, rawset, rawget = debug.getinfo, error, rawset, rawget + +local mt = getmetatable(_G) +if mt == nil then + mt = {} + setmetatable(_G, mt) +end + +mt.__declared = {} + +local function what () + local d = getinfo(3, "S") + return d and d.what or "C" +end + +mt.__newindex = function (t, n, v) + if not mt.__declared[n] then + local w = what() + if w ~= "main" and w ~= "C" then + error("assign to undeclared variable '"..n.."'", 2) + end + mt.__declared[n] = true + end + rawset(t, n, v) +end + +mt.__index = function (t, n) + if not mt.__declared[n] and what() ~= "C" then + error("variable '"..n.."' is not declared", 2) + end + return rawget(t, n) +end + diff --git a/lib/.gitignore b/lib/.gitignore new file mode 100644 index 0000000000..500e2855af --- /dev/null +++ b/lib/.gitignore @@ -0,0 +1 @@ +vmdef.lua diff --git a/lib/bc.lua b/lib/bc.lua new file mode 100644 index 0000000000..532f24933a --- /dev/null +++ b/lib/bc.lua @@ -0,0 +1,182 @@ +---------------------------------------------------------------------------- +-- LuaJIT bytecode listing module. +-- +-- Copyright (C) 2005-2009 Mike Pall. All rights reserved. +-- Released under the MIT/X license. See Copyright Notice in luajit.h +---------------------------------------------------------------------------- +-- +-- This module lists the bytecode of a Lua function. If it's loaded by -jbc +-- it hooks into the parser and lists all functions of a chunk as they +-- are parsed. +-- +-- Example usage: +-- +-- luajit -jbc -e 'local x=0; for i=1,1e6 do x=x+i end; print(x)' +-- luajit -jbc=- foo.lua +-- luajit -jbc=foo.list foo.lua +-- +-- Default output is to stderr. To redirect the output to a file, pass a +-- filename as an argument (use '-' for stdout) or set the environment +-- variable LUAJIT_LISTFILE. The file is overwritten every time the module +-- is started. +-- +-- This module can also be used programmatically: +-- +-- local bc = require("jit.bc") +-- +-- local function foo() print("hello") end +-- +-- bc.dump(foo) --> -- BYTECODE -- [...] +-- print(bc.line(foo, 2)) --> 0002 KSTR 1 1 ; "hello" +-- +-- local out = { +-- -- Do something wich each line: +-- write = function(t, ...) io.write(...) end, +-- close = function(t) end, +-- flush = function(t) end, +-- } +-- bc.dump(foo, out) +-- +------------------------------------------------------------------------------ + +-- Cache some library functions and objects. +local jit = require("jit") +assert(jit.version_num == 20000, "LuaJIT core/library version mismatch") +local jutil = require("jit.util") +local vmdef = require("jit.vmdef") +local bit = require("bit") +local sub, gsub, format = string.sub, string.gsub, string.format +local byte, band, shr = string.byte, bit.band, bit.rshift +local funcinfo, funcbc, funck = jutil.funcinfo, jutil.funcbc, jutil.funck +local funcuvname = jutil.funcuvname +local bcnames = vmdef.bcnames +local stdout, stderr = io.stdout, io.stderr + +------------------------------------------------------------------------------ + +local function ctlsub(c) + if c == "\n" then return "\\n" + elseif c == "\r" then return "\\r" + elseif c == "\t" then return "\\t" + elseif c == "\r" then return "\\r" + else return format("\\%03d", byte(c)) + end +end + +-- Return one bytecode line. +local function bcline(func, pc, prefix) + local ins, m = funcbc(func, pc) + if not ins then return end + local ma, mb, mc = band(m, 7), band(m, 15*8), band(m, 15*128) + local a = band(shr(ins, 8), 0xff) + local oidx = 6*band(ins, 0xff) + local s = format("%04d %s %-6s %3s ", + pc, prefix or " ", sub(bcnames, oidx+1, oidx+6), ma == 0 and "" or a) + local d = shr(ins, 16) + if mc == 13*128 then -- BCMjump + if ma == 0 then + return format("%s=> %04d\n", sub(s, 1, -3), pc+d-0x7fff) + end + return format("%s=> %04d\n", s, pc+d-0x7fff) + end + if mb ~= 0 then d = band(d, 0xff) end + local kc + if mc == 10*128 then -- BCMstr + kc = funck(func, -d-1) + kc = format(#kc > 40 and '"%.40s"~' or '"%s"', gsub(kc, "%c", ctlsub)) + elseif mc == 9*128 then -- BCMnum + kc = funck(func, d) + elseif mc == 12*128 then -- BCMfunc + local fi = funcinfo(funck(func, -d-1)) + if fi.ffid then + kc = vmdef.ffnames[fi.ffid] + else + kc = fi.loc + end + elseif mc == 5*128 then -- BCMuv + kc = funcuvname(func, d) + end + if ma == 5 then -- BCMuv + local ka = funcuvname(func, a) + if kc then kc = ka.." ; "..kc else kc = ka end + end + if mb ~= 0 then + local b = shr(ins, 24) + if kc then return format("%s%3d %3d ; %s\n", s, b, d, kc) end + return format("%s%3d %3d\n", s, b, d) + end + if kc then return format("%s%3d ; %s\n", s, d, kc) end + if mc == 7*128 and d > 32767 then d = d - 65536 end -- BCMlits + return format("%s%3d\n", s, d) +end + +-- Collect branch targets of a function. +local function bctargets(func) + local target = {} + for pc=1,1000000000 do + local ins, m = funcbc(func, pc) + if not ins then break end + if band(m, 15*128) == 13*128 then target[pc+shr(ins, 16)-0x7fff] = true end + end + return target +end + +-- Dump bytecode instructions of a function. +local function bcdump(func, out) + if not out then out = stdout end + local fi = funcinfo(func) + out:write(format("-- BYTECODE -- %s-%d\n", fi.loc, fi.lastlinedefined)) + local target = bctargets(func) + for pc=1,1000000000 do + local s = bcline(func, pc, target[pc] and "=>") + if not s then break end + out:write(s) + end + out:write("\n") + out:flush() +end + +------------------------------------------------------------------------------ + +-- Active flag and output file handle. +local active, out + +-- List handler. +local function h_list(func) + return bcdump(func, out) +end + +-- Detach list handler. +local function bclistoff() + if active then + active = false + jit.attach(h_list) + if out and out ~= stdout and out ~= stderr then out:close() end + out = nil + end +end + +-- Open the output file and attach list handler. +local function bcliston(outfile) + if active then bclistoff() end + if not outfile then outfile = os.getenv("LUAJIT_LISTFILE") end + if outfile then + out = outfile == "-" and stdout or assert(io.open(outfile, "w")) + else + out = stderr + end + jit.attach(h_list, "bc") + active = true +end + +-- Public module functions. +module(...) + +line = bcline +dump = bcdump +targets = bctargets + +on = bcliston +off = bclistoff +start = bcliston -- For -j command line option. + diff --git a/lib/dis_x64.lua b/lib/dis_x64.lua new file mode 100644 index 0000000000..da3d63f8ba --- /dev/null +++ b/lib/dis_x64.lua @@ -0,0 +1,19 @@ +---------------------------------------------------------------------------- +-- LuaJIT x64 disassembler wrapper module. +-- +-- Copyright (C) 2005-2009 Mike Pall. All rights reserved. +-- Released under the MIT/X license. See Copyright Notice in luajit.h +---------------------------------------------------------------------------- +-- This module just exports the 64 bit functions from the combined +-- x86/x64 disassembler module. All the interesting stuff is there. +------------------------------------------------------------------------------ + +local require = require + +module(...) + +local dis_x86 = require(_PACKAGE.."dis_x86") + +create = dis_x86.create64 +disass = dis_x86.disass64 + diff --git a/lib/dis_x86.lua b/lib/dis_x86.lua new file mode 100644 index 0000000000..8f127bee92 --- /dev/null +++ b/lib/dis_x86.lua @@ -0,0 +1,824 @@ +---------------------------------------------------------------------------- +-- LuaJIT x86/x64 disassembler module. +-- +-- Copyright (C) 2005-2009 Mike Pall. All rights reserved. +-- Released under the MIT/X license. See Copyright Notice in luajit.h +---------------------------------------------------------------------------- +-- This is a helper module used by the LuaJIT machine code dumper module. +-- +-- Sending small code snippets to an external disassembler and mixing the +-- output with our own stuff was too fragile. So I had to bite the bullet +-- and write yet another x86 disassembler. Oh well ... +-- +-- The output format is very similar to what ndisasm generates. But it has +-- been developed independently by looking at the opcode tables from the +-- Intel and AMD manuals. The supported instruction set is quite extensive +-- and reflects what a current generation Intel or AMD CPU implements in +-- 32 bit and 64 bit mode. Yes, this includes MMX, SSE, SSE2, SSE3, SSSE3, +-- SSE4.1, SSE4.2, SSE4a and even privileged and hypervisor (VMX/SVM) +-- instructions. +-- +-- Notes: +-- * The (useless) a16 prefix, 3DNow and pre-586 opcodes are unsupported. +-- * No attempt at optimization has been made -- it's fast enough for my needs. +-- * The public API may change when more architectures are added. +------------------------------------------------------------------------------ + +local type = type +local sub, byte, format = string.sub, string.byte, string.format +local match, gmatch, gsub = string.match, string.gmatch, string.gsub +local lower, rep = string.lower, string.rep + +-- Map for 1st opcode byte in 32 bit mode. Ugly? Well ... read on. +local map_opc1_32 = { +--0x +[0]="addBmr","addVmr","addBrm","addVrm","addBai","addVai","push es","pop es", +"orBmr","orVmr","orBrm","orVrm","orBai","orVai","push cs","opc2*", +--1x +"adcBmr","adcVmr","adcBrm","adcVrm","adcBai","adcVai","push ss","pop ss", +"sbbBmr","sbbVmr","sbbBrm","sbbVrm","sbbBai","sbbVai","push ds","pop ds", +--2x +"andBmr","andVmr","andBrm","andVrm","andBai","andVai","es:seg","daa", +"subBmr","subVmr","subBrm","subVrm","subBai","subVai","cs:seg","das", +--3x +"xorBmr","xorVmr","xorBrm","xorVrm","xorBai","xorVai","ss:seg","aaa", +"cmpBmr","cmpVmr","cmpBrm","cmpVrm","cmpBai","cmpVai","ds:seg","aas", +--4x +"incVR","incVR","incVR","incVR","incVR","incVR","incVR","incVR", +"decVR","decVR","decVR","decVR","decVR","decVR","decVR","decVR", +--5x +"pushUR","pushUR","pushUR","pushUR","pushUR","pushUR","pushUR","pushUR", +"popUR","popUR","popUR","popUR","popUR","popUR","popUR","popUR", +--6x +"sz*pushaw,pusha","sz*popaw,popa","boundVrm","arplWmr", +"fs:seg","gs:seg","o16:","a16", +"pushUi","imulVrmi","pushBs","imulVrms", +"insb","insVS","outsb","outsVS", +--7x +"joBj","jnoBj","jbBj","jnbBj","jzBj","jnzBj","jbeBj","jaBj", +"jsBj","jnsBj","jpeBj","jpoBj","jlBj","jgeBj","jleBj","jgBj", +--8x +"arith!Bmi","arith!Vmi","arith!Bmi","arith!Vms", +"testBmr","testVmr","xchgBrm","xchgVrm", +"movBmr","movVmr","movBrm","movVrm", +"movVmg","leaVrm","movWgm","popUm", +--9x +"nop*xchgVaR|pause|xchgWaR|repne nop","xchgVaR","xchgVaR","xchgVaR", +"xchgVaR","xchgVaR","xchgVaR","xchgVaR", +"sz*cbw,cwde,cdqe","sz*cwd,cdq,cqo","call farViw","wait", +"sz*pushfw,pushf","sz*popfw,popf","sahf","lahf", +--Ax +"movBao","movVao","movBoa","movVoa", +"movsb","movsVS","cmpsb","cmpsVS", +"testBai","testVai","stosb","stosVS", +"lodsb","lodsVS","scasb","scasVS", +--Bx +"movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi", +"movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI", +--Cx +"shift!Bmu","shift!Vmu","retBw","ret","$lesVrm","$ldsVrm","movBmi","movVmi", +"enterBwu","leave","retfBw","retf","int3","intBu","into","iretVS", +--Dx +"shift!Bm1","shift!Vm1","shift!Bmc","shift!Vmc","aamBu","aadBu","salc","xlatb", +"fp*0","fp*1","fp*2","fp*3","fp*4","fp*5","fp*6","fp*7", +--Ex +"loopneBj","loopeBj","loopBj","sz*jcxzBj,jecxzBj,jrcxzBj", +"inBau","inVau","outBua","outVua", +"callVj","jmpVj","jmp farViw","jmpBj","inBad","inVad","outBda","outVda", +--Fx +"lock:","int1","repne:rep","rep:","hlt","cmc","testb!Bm","testv!Vm", +"clc","stc","cli","sti","cld","std","incb!Bm","incd!Vm", +} +assert(#map_opc1_32 == 255) + +-- Map for 1st opcode byte in 64 bit mode (overrides only). +local map_opc1_64 = setmetatable({ + [0x06]=false, [0x07]=false, [0x0e]=false, + [0x16]=false, [0x17]=false, [0x1e]=false, [0x1f]=false, + [0x27]=false, [0x2f]=false, [0x37]=false, [0x3f]=false, + [0x60]=false, [0x61]=false, [0x62]=false, [0x63]="movsxdVrDmt", [0x67]="a32:", + [0x40]="rex*", [0x41]="rex*b", [0x42]="rex*x", [0x43]="rex*xb", + [0x44]="rex*r", [0x45]="rex*rb", [0x46]="rex*rx", [0x47]="rex*rxb", + [0x48]="rex*w", [0x49]="rex*wb", [0x4a]="rex*wx", [0x4b]="rex*wxb", + [0x4c]="rex*wr", [0x4d]="rex*wrb", [0x4e]="rex*wrx", [0x4f]="rex*wrxb", + [0x82]=false, [0x9a]=false, [0xc4]=false, [0xc5]=false, [0xce]=false, + [0xd4]=false, [0xd5]=false, [0xd6]=false, [0xea]=false, +}, { __index = map_opc1_32 }) + +-- Map for 2nd opcode byte (0F xx). True CISC hell. Hey, I told you. +-- Prefix dependent MMX/SSE opcodes: (none)|rep|o16|repne, -|F3|66|F2 +local map_opc2 = { +--0x +[0]="sldt!Dmp","sgdt!Ump","larVrm","lslVrm",nil,"syscall","clts","sysret", +"invd","wbinvd",nil,"ud1",nil,"$prefetch!Bm","femms","3dnowMrmu", +--1x +"movupsXrm|movssXrm|movupdXrm|movsdXrm", +"movupsXmr|movssXmr|movupdXmr|movsdXmr", +"movhlpsXrm$movlpsXrm|movsldupXrm|movlpdXrm|movddupXrm", +"movlpsXmr||movlpdXmr", +"unpcklpsXrm||unpcklpdXrm", +"unpckhpsXrm||unpckhpdXrm", +"movlhpsXrm$movhpsXrm|movshdupXrm|movhpdXrm", +"movhpsXmr||movhpdXmr", +"$prefetcht!Bm","hintnopVm","hintnopVm","hintnopVm", +"hintnopVm","hintnopVm","hintnopVm","hintnopVm", +--2x +"movUmx$","movUmy$","movUxm$","movUym$","movUmz$",nil,"movUzm$",nil, +"movapsXrm||movapdXrm", +"movapsXmr||movapdXmr", +"cvtpi2psXrMm|cvtsi2ssXrVm|cvtpi2pdXrMm|cvtsi2sdXrVm", +"movntpsXmr|movntssXmr|movntpdXmr|movntsdXmr", +"cvttps2piMrXm|cvttss2siVrXm|cvttpd2piMrXm|cvttsd2siVrXm", +"cvtps2piMrXm|cvtss2siVrXm|cvtpd2piMrXm|cvtsd2siVrXm", +"ucomissXrm||ucomisdXrm", +"comissXrm||comisdXrm", +--3x +"wrmsr","rdtsc","rdmsr","rdpmc","sysenter","sysexit",nil,"getsec", +"opc3*38",nil,"opc3*3a",nil,nil,nil,nil,nil, +--4x +"cmovoVrm","cmovnoVrm","cmovbVrm","cmovnbVrm", +"cmovzVrm","cmovnzVrm","cmovbeVrm","cmovaVrm", +"cmovsVrm","cmovnsVrm","cmovpeVrm","cmovpoVrm", +"cmovlVrm","cmovgeVrm","cmovleVrm","cmovgVrm", +--5x +"movmskpsVrXm$||movmskpdVrXm$","sqrtpsXrm|sqrtssXrm|sqrtpdXrm|sqrtsdXrm", +"rsqrtpsXrm|rsqrtssXrm","rcppsXrm|rcpssXrm", +"andpsXrm||andpdXrm","andnpsXrm||andnpdXrm", +"orpsXrm||orpdXrm","xorpsXrm||xorpdXrm", +"addpsXrm|addssXrm|addpdXrm|addsdXrm","mulpsXrm|mulssXrm|mulpdXrm|mulsdXrm", +"cvtps2pdXrm|cvtss2sdXrm|cvtpd2psXrm|cvtsd2ssXrm", +"cvtdq2psXrm|cvttps2dqXrm|cvtps2dqXrm", +"subpsXrm|subssXrm|subpdXrm|subsdXrm","minpsXrm|minssXrm|minpdXrm|minsdXrm", +"divpsXrm|divssXrm|divpdXrm|divsdXrm","maxpsXrm|maxssXrm|maxpdXrm|maxsdXrm", +--6x +"punpcklbwPrm","punpcklwdPrm","punpckldqPrm","packsswbPrm", +"pcmpgtbPrm","pcmpgtwPrm","pcmpgtdPrm","packuswbPrm", +"punpckhbwPrm","punpckhwdPrm","punpckhdqPrm","packssdwPrm", +"||punpcklqdqXrm","||punpckhqdqXrm", +"movPrVSm","movqMrm|movdquXrm|movdqaXrm", +--7x +"pshufwMrmu|pshufhwXrmu|pshufdXrmu|pshuflwXrmu","pshiftw!Pmu", +"pshiftd!Pmu","pshiftq!Mmu||pshiftdq!Xmu", +"pcmpeqbPrm","pcmpeqwPrm","pcmpeqdPrm","emms|", +"vmreadUmr||extrqXmuu$|insertqXrmuu$","vmwriteUrm||extrqXrm$|insertqXrm$", +nil,nil, +"||haddpdXrm|haddpsXrm","||hsubpdXrm|hsubpsXrm", +"movVSmMr|movqXrm|movVSmXr","movqMmr|movdquXmr|movdqaXmr", +--8x +"joVj","jnoVj","jbVj","jnbVj","jzVj","jnzVj","jbeVj","jaVj", +"jsVj","jnsVj","jpeVj","jpoVj","jlVj","jgeVj","jleVj","jgVj", +--9x +"setoBm","setnoBm","setbBm","setnbBm","setzBm","setnzBm","setbeBm","setaBm", +"setsBm","setnsBm","setpeBm","setpoBm","setlBm","setgeBm","setleBm","setgBm", +--Ax +"push fs","pop fs","cpuid","btVmr","shldVmru","shldVmrc",nil,nil, +"push gs","pop gs","rsm","btsVmr","shrdVmru","shrdVmrc","fxsave!Dmp","imulVrm", +--Bx +"cmpxchgBmr","cmpxchgVmr","$lssVrm","btrVmr", +"$lfsVrm","$lgsVrm","movzxVrBmt","movzxVrWmt", +"|popcntVrm","ud2Dp","bt!Vmu","btcVmr", +"bsfVrm","bsrVrm|lzcntVrm|bsrWrm","movsxVrBmt","movsxVrWmt", +--Cx +"xaddBmr","xaddVmr", +"cmppsXrmu|cmpssXrmu|cmppdXrmu|cmpsdXrmu","$movntiVmr|", +"pinsrwPrWmu","pextrwDrPmu", +"shufpsXrmu||shufpdXrmu","$cmpxchg!Qmp", +"bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR", +--Dx +"||addsubpdXrm|addsubpsXrm","psrlwPrm","psrldPrm","psrlqPrm", +"paddqPrm","pmullwPrm", +"|movq2dqXrMm|movqXmr|movdq2qMrXm$","pmovmskbVrMm||pmovmskbVrXm", +"psubusbPrm","psubuswPrm","pminubPrm","pandPrm", +"paddusbPrm","padduswPrm","pmaxubPrm","pandnPrm", +--Ex +"pavgbPrm","psrawPrm","psradPrm","pavgwPrm", +"pmulhuwPrm","pmulhwPrm", +"|cvtdq2pdXrm|cvttpd2dqXrm|cvtpd2dqXrm","$movntqMmr||$movntdqXmr", +"psubsbPrm","psubswPrm","pminswPrm","porPrm", +"paddsbPrm","paddswPrm","pmaxswPrm","pxorPrm", +--Fx +"|||lddquXrm","psllwPrm","pslldPrm","psllqPrm", +"pmuludqPrm","pmaddwdPrm","psadbwPrm","maskmovqMrm||maskmovdquXrm$", +"psubbPrm","psubwPrm","psubdPrm","psubqPrm", +"paddbPrm","paddwPrm","padddPrm","ud", +} +assert(map_opc2[255] == "ud") + +-- Map for three-byte opcodes. Can't wait for their next invention. +local map_opc3 = { +["38"] = { -- [66] 0f 38 xx +--0x +[0]="pshufbPrm","phaddwPrm","phadddPrm","phaddswPrm", +"pmaddubswPrm","phsubwPrm","phsubdPrm","phsubswPrm", +"psignbPrm","psignwPrm","psigndPrm","pmulhrswPrm", +nil,nil,nil,nil, +--1x +"||pblendvbXrma",nil,nil,nil, +"||blendvpsXrma","||blendvpdXrma",nil,"||ptestXrm", +nil,nil,nil,nil, +"pabsbPrm","pabswPrm","pabsdPrm",nil, +--2x +"||pmovsxbwXrm","||pmovsxbdXrm","||pmovsxbqXrm","||pmovsxwdXrm", +"||pmovsxwqXrm","||pmovsxdqXrm",nil,nil, +"||pmuldqXrm","||pcmpeqqXrm","||$movntdqaXrm","||packusdwXrm", +nil,nil,nil,nil, +--3x +"||pmovzxbwXrm","||pmovzxbdXrm","||pmovzxbqXrm","||pmovzxwdXrm", +"||pmovzxwqXrm","||pmovzxdqXrm",nil,"||pcmpgtqXrm", +"||pminsbXrm","||pminsdXrm","||pminuwXrm","||pminudXrm", +"||pmaxsbXrm","||pmaxsdXrm","||pmaxuwXrm","||pmaxudXrm", +--4x +"||pmulddXrm","||phminposuwXrm", +--Fx +[0xf0] = "|||crc32TrBmt",[0xf1] = "|||crc32TrVmt", +}, + +["3a"] = { -- [66] 0f 3a xx +--0x +[0x00]=nil,nil,nil,nil,nil,nil,nil,nil, +"||roundpsXrmu","||roundpdXrmu","||roundssXrmu","||roundsdXrmu", +"||blendpsXrmu","||blendpdXrmu","||pblendwXrmu","palignrPrmu", +--1x +nil,nil,nil,nil, +"||pextrbVmXru","||pextrwVmXru","||pextrVmSXru","||extractpsVmXru", +nil,nil,nil,nil,nil,nil,nil,nil, +--2x +"||pinsrbXrVmu","||insertpsXrmu","||pinsrXrVmuS",nil, +--4x +[0x40] = "||dppsXrmu", +[0x41] = "||dppdXrmu", +[0x42] = "||mpsadbwXrmu", +--6x +[0x60] = "||pcmpestrmXrmu",[0x61] = "||pcmpestriXrmu", +[0x62] = "||pcmpistrmXrmu",[0x63] = "||pcmpistriXrmu", +}, +} + +-- Map for VMX/SVM opcodes 0F 01 C0-FF (sgdt group with register operands). +local map_opcvm = { +[0xc1]="vmcall",[0xc2]="vmlaunch",[0xc3]="vmresume",[0xc4]="vmxoff", +[0xc8]="monitor",[0xc9]="mwait", +[0xd8]="vmrun",[0xd9]="vmmcall",[0xda]="vmload",[0xdb]="vmsave", +[0xdc]="stgi",[0xdd]="clgi",[0xde]="skinit",[0xdf]="invlpga", +[0xf8]="swapgs",[0xf9]="rdtscp", +} + +-- Map for FP opcodes. And you thought stack machines are simple? +local map_opcfp = { +-- D8-DF 00-BF: opcodes with a memory operand. +-- D8 +[0]="faddFm","fmulFm","fcomFm","fcompFm","fsubFm","fsubrFm","fdivFm","fdivrFm", +"fldFm",nil,"fstFm","fstpFm","fldenvVm","fldcwWm","fnstenvVm","fnstcwWm", +-- DA +"fiaddDm","fimulDm","ficomDm","ficompDm", +"fisubDm","fisubrDm","fidivDm","fidivrDm", +-- DB +"fildDm","fisttpDm","fistDm","fistpDm",nil,"fld twordFmp",nil,"fstp twordFmp", +-- DC +"faddGm","fmulGm","fcomGm","fcompGm","fsubGm","fsubrGm","fdivGm","fdivrGm", +-- DD +"fldGm","fisttpQm","fstGm","fstpGm","frstorDmp",nil,"fnsaveDmp","fnstswWm", +-- DE +"fiaddWm","fimulWm","ficomWm","ficompWm", +"fisubWm","fisubrWm","fidivWm","fidivrWm", +-- DF +"fildWm","fisttpWm","fistWm","fistpWm", +"fbld twordFmp","fildQm","fbstp twordFmp","fistpQm", +-- xx C0-FF: opcodes with a pseudo-register operand. +-- D8 +"faddFf","fmulFf","fcomFf","fcompFf","fsubFf","fsubrFf","fdivFf","fdivrFf", +-- D9 +"fldFf","fxchFf",{"fnop"},nil, +{"fchs","fabs",nil,nil,"ftst","fxam"}, +{"fld1","fldl2t","fldl2e","fldpi","fldlg2","fldln2","fldz"}, +{"f2xm1","fyl2x","fptan","fpatan","fxtract","fprem1","fdecstp","fincstp"}, +{"fprem","fyl2xp1","fsqrt","fsincos","frndint","fscale","fsin","fcos"}, +-- DA +"fcmovbFf","fcmoveFf","fcmovbeFf","fcmovuFf",nil,{nil,"fucompp"},nil,nil, +-- DB +"fcmovnbFf","fcmovneFf","fcmovnbeFf","fcmovnuFf", +{nil,nil,"fnclex","fninit"},"fucomiFf","fcomiFf",nil, +-- DC +"fadd toFf","fmul toFf",nil,nil, +"fsub toFf","fsubr toFf","fdivr toFf","fdiv toFf", +-- DD +"ffreeFf",nil,"fstFf","fstpFf","fucomFf","fucompFf",nil,nil, +-- DE +"faddpFf","fmulpFf",nil,{nil,"fcompp"}, +"fsubrpFf","fsubpFf","fdivrpFf","fdivpFf", +-- DF +nil,nil,nil,nil,{"fnstsw ax"},"fucomipFf","fcomipFf",nil, +} +assert(map_opcfp[126] == "fcomipFf") + +-- Map for opcode groups. The subkey is sp from the ModRM byte. +local map_opcgroup = { + arith = { "add", "or", "adc", "sbb", "and", "sub", "xor", "cmp" }, + shift = { "rol", "ror", "rcl", "rcr", "shl", "shr", "sal", "sar" }, + testb = { "testBmi", "testBmi", "not", "neg", "mul", "imul", "div", "idiv" }, + testv = { "testVmi", "testVmi", "not", "neg", "mul", "imul", "div", "idiv" }, + incb = { "inc", "dec" }, + incd = { "inc", "dec", "callDmp", "$call farDmp", + "jmpDmp", "$jmp farDmp", "pushUm" }, + sldt = { "sldt", "str", "lldt", "ltr", "verr", "verw" }, + sgdt = { "vm*$sgdt", "vm*$sidt", "$lgdt", "vm*$lidt", + "smsw", nil, "lmsw", "vm*$invlpg" }, + bt = { nil, nil, nil, nil, "bt", "bts", "btr", "btc" }, + cmpxchg = { nil, "sz*,cmpxchg8bQmp,cmpxchg16bXmp", nil, nil, + nil, nil, "vmptrld|vmxon|vmclear", "vmptrst" }, + pshiftw = { nil, nil, "psrlw", nil, "psraw", nil, "psllw" }, + pshiftd = { nil, nil, "psrld", nil, "psrad", nil, "pslld" }, + pshiftq = { nil, nil, "psrlq", nil, nil, nil, "psllq" }, + pshiftdq = { nil, nil, "psrlq", "psrldq", nil, nil, "psllq", "pslldq" }, + fxsave = { "$fxsave", "$fxrstor", "$ldmxcsr", "$stmxcsr", + nil, "lfenceDp$", "mfenceDp$", "sfenceDp$clflush" }, + prefetch = { "prefetch", "prefetchw" }, + prefetcht = { "prefetchnta", "prefetcht0", "prefetcht1", "prefetcht2" }, +} + +------------------------------------------------------------------------------ + +-- Maps for register names. +local map_regs = { + B = { "al", "cl", "dl", "bl", "ah", "ch", "dh", "bh", + "r8b", "r9b", "r10b", "r11b", "r12b", "r13b", "r14b", "r15b" }, + B64 = { "al", "cl", "dl", "bl", "spl", "bpl", "sil", "dil", + "r8b", "r9b", "r10b", "r11b", "r12b", "r13b", "r14b", "r15b" }, + W = { "ax", "cx", "dx", "bx", "sp", "bp", "si", "di", + "r8w", "r9w", "r10w", "r11w", "r12w", "r13w", "r14w", "r15w" }, + D = { "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi", + "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d" }, + Q = { "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", + "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" }, + M = { "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7", + "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" }, -- No x64 ext! + X = { "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", + "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15" }, +} +local map_segregs = { "es", "cs", "ss", "ds", "fs", "gs", "segr6", "segr7" } + +-- Maps for size names. +local map_sz2n = { + B = 1, W = 2, D = 4, Q = 8, M = 8, X = 16, +} +local map_sz2prefix = { + B = "byte", W = "word", D = "dword", + Q = "qword", + M = "qword", X = "xword", + F = "dword", G = "qword", -- No need for sizes/register names for these two. +} + +------------------------------------------------------------------------------ + +-- Output a nicely formatted line with an opcode and operands. +local function putop(ctx, text, operands) + local code, pos, hex = ctx.code, ctx.pos, "" + local hmax = ctx.hexdump + if hmax > 0 then + for i=ctx.start,pos-1 do + hex = hex..format("%02X", byte(code, i, i)) + end + if #hex > hmax then hex = sub(hex, 1, hmax)..". " + else hex = hex..rep(" ", hmax-#hex+2) end + end + if operands then text = text.." "..operands end + if ctx.o16 then text = "o16 "..text; ctx.o16 = false end + if ctx.a32 then text = "a32 "..text; ctx.a32 = false end + if ctx.rep then text = ctx.rep.." "..text; ctx.rep = false end + if ctx.rex then + local t = (ctx.rexw and "w" or "")..(ctx.rexr and "r" or "").. + (ctx.rexx and "x" or "")..(ctx.rexb and "b" or "") + if t ~= "" then text = "rex."..t.." "..text end + ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false + ctx.rex = false + end + if ctx.seg then + local text2, n = gsub(text, "%[", "["..ctx.seg..":") + if n == 0 then text = ctx.seg.." "..text else text = text2 end + ctx.seg = false + end + if ctx.lock then text = "lock "..text; ctx.lock = false end + local imm = ctx.imm + if imm then + local sym = ctx.symtab[imm] + if sym then text = text.."\t->"..sym end + end + ctx.out(format("%08x %s%s\n", ctx.addr+ctx.start, hex, text)) + ctx.mrm = false + ctx.start = pos + ctx.imm = nil +end + +-- Clear all prefix flags. +local function clearprefixes(ctx) + ctx.o16 = false; ctx.seg = false; ctx.lock = false; ctx.rep = false + ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false + ctx.rex = false; ctx.a32 = false +end + +-- Fallback for incomplete opcodes at the end. +local function incomplete(ctx) + ctx.pos = ctx.stop+1 + clearprefixes(ctx) + return putop(ctx, "(incomplete)") +end + +-- Fallback for unknown opcodes. +local function unknown(ctx) + clearprefixes(ctx) + return putop(ctx, "(unknown)") +end + +-- Return an immediate of the specified size. +local function getimm(ctx, pos, n) + if pos+n-1 > ctx.stop then return incomplete(ctx) end + local code = ctx.code + if n == 1 then + local b1 = byte(code, pos, pos) + return b1 + elseif n == 2 then + local b1, b2 = byte(code, pos, pos+1) + return b1+b2*256 + else + local b1, b2, b3, b4 = byte(code, pos, pos+3) + local imm = b1+b2*256+b3*65536+b4*16777216 + ctx.imm = imm + return imm + end +end + +-- Process pattern string and generate the operands. +local function putpat(ctx, name, pat) + local operands, regs, sz, mode, sp, rm, sc, rx, sdisp + local code, pos, stop = ctx.code, ctx.pos, ctx.stop + + -- Chars used: 1DFGIMPQRSTUVWXacdfgijmoprstuwxyz + for p in gmatch(pat, ".") do + local x = nil + if p == "V" or p == "U" then + if ctx.rexw then sz = "Q"; ctx.rexw = false + elseif ctx.o16 then sz = "W"; ctx.o16 = false + elseif p == "U" and ctx.x64 then sz = "Q" + else sz = "D" end + regs = map_regs[sz] + elseif p == "T" then + if ctx.rexw then sz = "Q"; ctx.rexw = false else sz = "D" end + regs = map_regs[sz] + elseif p == "B" then + sz = "B" + regs = ctx.rex and map_regs.B64 or map_regs.B + elseif match(p, "[WDQMXFG]") then + sz = p + regs = map_regs[sz] + elseif p == "P" then + sz = ctx.o16 and "X" or "M"; ctx.o16 = false + regs = map_regs[sz] + elseif p == "S" then + name = name..lower(sz) + elseif p == "s" then + local imm = getimm(ctx, pos, 1); if not imm then return end + x = imm <= 127 and format("+0x%02x", imm) + or format("-0x%02x", 256-imm) + pos = pos+1 + elseif p == "u" then + local imm = getimm(ctx, pos, 1); if not imm then return end + x = format("0x%02x", imm) + pos = pos+1 + elseif p == "w" then + local imm = getimm(ctx, pos, 2); if not imm then return end + x = format("0x%x", imm) + pos = pos+2 + elseif p == "o" then -- [offset] + if ctx.x64 then + local imm1 = getimm(ctx, pos, 4); if not imm1 then return end + local imm2 = getimm(ctx, pos+4, 4); if not imm2 then return end + x = format("[0x%08x%08x]", imm2, imm1) + pos = pos+8 + else + local imm = getimm(ctx, pos, 4); if not imm then return end + x = format("[0x%08x]", imm) + pos = pos+4 + end + elseif p == "i" or p == "I" then + local n = map_sz2n[sz] + if n == 8 and ctx.x64 and p == "I" then + local imm1 = getimm(ctx, pos, 4); if not imm1 then return end + local imm2 = getimm(ctx, pos+4, 4); if not imm2 then return end + x = format("0x%08x%08x", imm2, imm1) + else + if n == 8 then n = 4 end + local imm = getimm(ctx, pos, n); if not imm then return end + if sz == "Q" and (imm < 0 or imm > 0x7fffffff) then + imm = (0xffffffff+1)-imm + x = format(imm > 65535 and "-0x%08x" or "-0x%x", imm) + else + x = format(imm > 65535 and "0x%08x" or "0x%x", imm) + end + end + pos = pos+n + elseif p == "j" then + local n = map_sz2n[sz] + if n == 8 then n = 4 end + local imm = getimm(ctx, pos, n); if not imm then return end + if sz == "B" and imm > 127 then imm = imm-256 + elseif imm > 2147483647 then imm = imm-4294967296 end + pos = pos+n + imm = imm + pos + ctx.addr + if imm > 4294967295 and not ctx.x64 then imm = imm-4294967296 end + ctx.imm = imm + if sz == "W" then + x = format("word 0x%04x", imm%65536) + elseif ctx.x64 then + local lo = imm % 0x1000000 + x = format("0x%02x%06x", (imm-lo) / 0x1000000, lo) + else + x = format("0x%08x", imm) + end + elseif p == "R" then + local r = byte(code, pos-1, pos-1)%8 + if ctx.rexb then r = r + 8; ctx.rexb = false end + x = regs[r+1] + elseif p == "a" then x = regs[1] + elseif p == "c" then x = "cl" + elseif p == "d" then x = "dx" + elseif p == "1" then x = "1" + else + if not mode then + mode = ctx.mrm + if not mode then + if pos > stop then return incomplete(ctx) end + mode = byte(code, pos, pos) + pos = pos+1 + end + rm = mode%8; mode = (mode-rm)/8 + sp = mode%8; mode = (mode-sp)/8 + sdisp = "" + if mode < 3 then + if rm == 4 then + if pos > stop then return incomplete(ctx) end + sc = byte(code, pos, pos) + pos = pos+1 + rm = sc%8; sc = (sc-rm)/8 + rx = sc%8; sc = (sc-rx)/8 + if ctx.rexx then rx = rx + 8; ctx.rexx = false end + if rx == 4 then rx = nil end + end + if mode > 0 or rm == 5 then + local dsz = mode + if dsz ~= 1 then dsz = 4 end + local disp = getimm(ctx, pos, dsz); if not disp then return end + if mode == 0 then rm = nil end + if rm or rx or (not sc and ctx.x64 and not ctx.a32) then + if dsz == 1 and disp > 127 then + sdisp = format("-0x%x", 256-disp) + elseif disp >= 0 and disp <= 0x7fffffff then + sdisp = format("+0x%x", disp) + else + sdisp = format("-0x%x", (0xffffffff+1)-disp) + end + else + sdisp = format(ctx.x64 and not ctx.a32 and + not (disp >= 0 and disp <= 0x7fffffff) + and "0xffffffff%08x" or "0x%08x", disp) + end + pos = pos+dsz + end + end + if rm and ctx.rexb then rm = rm + 8; ctx.rexb = false end + if ctx.rexr then sp = sp + 8; ctx.rexr = false end + end + if p == "m" then + if mode == 3 then x = regs[rm+1] + else + local aregs = ctx.a32 and map_regs.D or ctx.aregs + local srm, srx = "", "" + if rm then srm = aregs[rm+1] + elseif not sc and ctx.x64 and not ctx.a32 then srm = "rip" end + ctx.a32 = false + if rx then + if rm then srm = srm.."+" end + srx = aregs[rx+1] + if sc > 0 then srx = srx.."*"..(2^sc) end + end + x = format("[%s%s%s]", srm, srx, sdisp) + end + if mode < 3 and + (not match(pat, "[aRrgp]") or match(pat, "t")) then -- Yuck. + x = map_sz2prefix[sz].." "..x + end + elseif p == "r" then x = regs[sp+1] + elseif p == "g" then x = map_segregs[sp+1] + elseif p == "p" then -- Suppress prefix. + elseif p == "f" then x = "st"..rm + elseif p == "x" then + if sp == 0 and ctx.lock and not ctx.x64 then + x = "CR8"; ctx.lock = false + else + x = "CR"..sp + end + elseif p == "y" then x = "DR"..sp + elseif p == "z" then x = "TR"..sp + elseif p == "t" then + else + error("bad pattern `"..pat.."'") + end + end + if x then operands = operands and operands..", "..x or x end + end + ctx.pos = pos + return putop(ctx, name, operands) +end + +-- Forward declaration. +local map_act + +-- Fetch and cache MRM byte. +local function getmrm(ctx) + local mrm = ctx.mrm + if not mrm then + local pos = ctx.pos + if pos > ctx.stop then return nil end + mrm = byte(ctx.code, pos, pos) + ctx.pos = pos+1 + ctx.mrm = mrm + end + return mrm +end + +-- Dispatch to handler depending on pattern. +local function dispatch(ctx, opat, patgrp) + if not opat then return unknown(ctx) end + if match(opat, "%|") then -- MMX/SSE variants depending on prefix. + local p + if ctx.rep then + p = ctx.rep=="rep" and "%|([^%|]*)" or "%|[^%|]*%|[^%|]*%|([^%|]*)" + ctx.rep = false + elseif ctx.o16 then p = "%|[^%|]*%|([^%|]*)"; ctx.o16 = false + else p = "^[^%|]*" end + opat = match(opat, p) + if not opat then return unknown(ctx) end +-- ctx.rep = false; ctx.o16 = false + --XXX fails for 66 f2 0f 38 f1 06 crc32 eax,WORD PTR [esi] + --XXX remove in branches? + end + if match(opat, "%$") then -- reg$mem variants. + local mrm = getmrm(ctx); if not mrm then return incomplete(ctx) end + opat = match(opat, mrm >= 192 and "^[^%$]*" or "%$(.*)") + if opat == "" then return unknown(ctx) end + end + if opat == "" then return unknown(ctx) end + local name, pat = match(opat, "^([a-z0-9 ]*)(.*)") + if pat == "" and patgrp then pat = patgrp end + return map_act[sub(pat, 1, 1)](ctx, name, pat) +end + +-- Get a pattern from an opcode map and dispatch to handler. +local function dispatchmap(ctx, opcmap) + local pos = ctx.pos + local opat = opcmap[byte(ctx.code, pos, pos)] + pos = pos + 1 + ctx.pos = pos + return dispatch(ctx, opat) +end + +-- Map for action codes. The key is the first char after the name. +map_act = { + -- Simple opcodes without operands. + [""] = function(ctx, name, pat) + return putop(ctx, name) + end, + + -- Operand size chars fall right through. + B = putpat, W = putpat, D = putpat, Q = putpat, + V = putpat, U = putpat, T = putpat, + M = putpat, X = putpat, P = putpat, + F = putpat, G = putpat, + + -- Collect prefixes. + [":"] = function(ctx, name, pat) + ctx[pat == ":" and name or sub(pat, 2)] = name + if ctx.pos - ctx.start > 5 then return unknown(ctx) end -- Limit #prefixes. + end, + + -- Chain to special handler specified by name. + ["*"] = function(ctx, name, pat) + return map_act[name](ctx, name, sub(pat, 2)) + end, + + -- Use named subtable for opcode group. + ["!"] = function(ctx, name, pat) + local mrm = getmrm(ctx); if not mrm then return incomplete(ctx) end + return dispatch(ctx, map_opcgroup[name][((mrm-(mrm%8))/8)%8+1], sub(pat, 2)) + end, + + -- o16,o32[,o64] variants. + sz = function(ctx, name, pat) + if ctx.o16 then ctx.o16 = false + else + pat = match(pat, ",(.*)") + if ctx.rexw then + local p = match(pat, ",(.*)") + if p then pat = p; ctx.rexw = false end + end + end + pat = match(pat, "^[^,]*") + return dispatch(ctx, pat) + end, + + -- Two-byte opcode dispatch. + opc2 = function(ctx, name, pat) + return dispatchmap(ctx, map_opc2) + end, + + -- Three-byte opcode dispatch. + opc3 = function(ctx, name, pat) + return dispatchmap(ctx, map_opc3[pat]) + end, + + -- VMX/SVM dispatch. + vm = function(ctx, name, pat) + return dispatch(ctx, map_opcvm[ctx.mrm]) + end, + + -- Floating point opcode dispatch. + fp = function(ctx, name, pat) + local mrm = getmrm(ctx); if not mrm then return incomplete(ctx) end + local rm = mrm%8 + local idx = pat*8 + ((mrm-rm)/8)%8 + if mrm >= 192 then idx = idx + 64 end + local opat = map_opcfp[idx] + if type(opat) == "table" then opat = opat[rm+1] end + return dispatch(ctx, opat) + end, + + -- REX prefix. + rex = function(ctx, name, pat) + if ctx.rex then return unknown(ctx) end -- Only 1 REX prefix allowed. + for p in gmatch(pat, ".") do ctx["rex"..p] = true end + ctx.rex = true + end, + + -- Special case for nop with REX prefix. + nop = function(ctx, name, pat) + return dispatch(ctx, ctx.rex and pat or "nop") + end, +} + +------------------------------------------------------------------------------ + +-- Disassemble a block of code. +local function disass_block(ctx, ofs, len) + if not ofs then ofs = 0 end + local stop = len and ofs+len or #ctx.code + ofs = ofs + 1 + ctx.start = ofs + ctx.pos = ofs + ctx.stop = stop + ctx.imm = nil + ctx.mrm = false + clearprefixes(ctx) + while ctx.pos <= stop do dispatchmap(ctx, ctx.map1) end + if ctx.pos ~= ctx.start then incomplete(ctx) end +end + +-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). +local function create_(code, addr, out) + local ctx = {} + ctx.code = code + ctx.addr = (addr or 0) - 1 + ctx.out = out or io.write + ctx.symtab = {} + ctx.disass = disass_block + ctx.hexdump = 16 + ctx.x64 = false + ctx.map1 = map_opc1_32 + ctx.aregs = map_regs.D + return ctx +end + +local function create64_(code, addr, out) + local ctx = create_(code, addr, out) + ctx.x64 = true + ctx.map1 = map_opc1_64 + ctx.aregs = map_regs.Q + return ctx +end + +-- Simple API: disassemble code (a string) at address and output via out. +local function disass_(code, addr, out) + create_(code, addr, out):disass() +end + +local function disass64_(code, addr, out) + create64_(code, addr, out):disass() +end + + +-- Public module functions. +module(...) + +create = create_ +create64 = create64_ +disass = disass_ +disass64 = disass64_ + diff --git a/lib/dump.lua b/lib/dump.lua new file mode 100644 index 0000000000..9fde87c1e9 --- /dev/null +++ b/lib/dump.lua @@ -0,0 +1,567 @@ +---------------------------------------------------------------------------- +-- LuaJIT compiler dump module. +-- +-- Copyright (C) 2005-2009 Mike Pall. All rights reserved. +-- Released under the MIT/X license. See Copyright Notice in luajit.h +---------------------------------------------------------------------------- +-- +-- This module can be used to debug the JIT compiler itself. It dumps the +-- code representations and structures used in various compiler stages. +-- +-- Example usage: +-- +-- luajit -jdump -e "local x=0; for i=1,1e6 do x=x+i end; print(x)" +-- luajit -jdump=im -e "for i=1,1000 do for j=1,1000 do end end" | less -R +-- luajit -jdump=is myapp.lua | less -R +-- luajit -jdump=-b myapp.lua +-- luajit -jdump=+aH,myapp.html myapp.lua +-- luajit -jdump=ixT,myapp.dump myapp.lua +-- +-- The first argument specifies the dump mode. The second argument gives +-- the output file name. Default output is to stdout, unless the environment +-- variable LUAJIT_DUMPFILE is set. The file is overwritten every time the +-- module is started. +-- +-- Different features can be turned on or off with the dump mode. If the +-- mode starts with a '+', the following features are added to the default +-- set of features; a '-' removes them. Otherwise the features are replaced. +-- +-- The following dump features are available (* marks the default): +-- +-- * t Print a line for each started, ended or aborted trace (see also -jv). +-- * b Dump the traced bytecode. +-- * i Dump the IR (intermediate representation). +-- r Augment the IR with register/stack slots. +-- s Dump the snapshot map. +-- * m Dump the generated machine code. +-- x Print each taken trace exit. +-- X Print each taken trace exit and the contents of all registers. +-- +-- The output format can be set with the following characters: +-- +-- T Plain text output. +-- A ANSI-colored text output +-- H Colorized HTML + CSS output. +-- +-- The default output format is plain text. It's set to ANSI-colored text +-- if the COLORTERM variable is set. Note: this is independent of any output +-- redirection, which is actually considered a feature. +-- +-- You probably want to use less -R to enjoy viewing ANSI-colored text from +-- a pipe or a file. Add this to your ~/.bashrc: export LESS="-R" +-- +------------------------------------------------------------------------------ + +-- Cache some library functions and objects. +local jit = require("jit") +assert(jit.version_num == 20000, "LuaJIT core/library version mismatch") +local jutil = require("jit.util") +local vmdef = require("jit.vmdef") +local funcinfo, funcbc = jutil.funcinfo, jutil.funcbc +local traceinfo, traceir, tracek = jutil.traceinfo, jutil.traceir, jutil.tracek +local tracemc, traceexitstub = jutil.tracemc, jutil.traceexitstub +local tracesnap = jutil.tracesnap +local bit = require("bit") +local band, shl, shr = bit.band, bit.lshift, bit.rshift +local sub, gsub, format = string.sub, string.gsub, string.format +local byte, char, rep = string.byte, string.char, string.rep +local type, tostring = type, tostring +local stdout, stderr = io.stdout, io.stderr + +-- Load other modules on-demand. +local bcline, discreate + +-- Active flag, output file handle and dump mode. +local active, out, dumpmode + +------------------------------------------------------------------------------ + +local symtab = {} +local nexitsym = 0 + +-- Fill symbol table with trace exit addresses. +local function fillsymtab(nexit) + local t = symtab + if nexit > nexitsym then + for i=nexitsym,nexit-1 do t[traceexitstub(i)] = tostring(i) end + nexitsym = nexit + end + return t +end + +local function dumpwrite(s) + out:write(s) +end + +-- Disassemble machine code. +local function dump_mcode(tr) + local info = traceinfo(tr) + if not info then return end + local mcode, addr, loop = tracemc(tr) + if not mcode then return end + if not discreate then + discreate = require("jit.dis_"..jit.arch).create + end + out:write("---- TRACE ", tr, " mcode ", #mcode, "\n") + local ctx = discreate(mcode, addr, dumpwrite) + ctx.hexdump = 0 + ctx.symtab = fillsymtab(info.nexit) + if loop ~= 0 then + symtab[addr+loop] = "LOOP" + ctx:disass(0, loop) + out:write("->LOOP:\n") + ctx:disass(loop, #mcode-loop) + symtab[addr+loop] = nil + else + ctx:disass(0, #mcode) + end +end + +------------------------------------------------------------------------------ + +local irtype_text = { + [0] = "nil", + "fal", + "tru", + "lud", + "str", + "ptr", + "thr", + "pro", + "fun", + "t09", + "tab", + "udt", + "num", + "int", + "i8 ", + "u8 ", + "i16", + "u16", +} + +local colortype_ansi = { + [0] = "%s", + "%s", + "%s", + "%s", + "\027[32m%s\027[m", + "%s", + "\027[1m%s\027[m", + "%s", + "\027[1m%s\027[m", + "%s", + "\027[31m%s\027[m", + "\027[36m%s\027[m", + "\027[34m%s\027[m", + "\027[35m%s\027[m", + "\027[35m%s\027[m", + "\027[35m%s\027[m", + "\027[35m%s\027[m", + "\027[35m%s\027[m", +} + +local function colorize_text(s, t) + return s +end + +local function colorize_ansi(s, t) + return format(colortype_ansi[t], s) +end + +local irtype_ansi = setmetatable({}, + { __index = function(tab, t) + local s = colorize_ansi(irtype_text[t], t); tab[t] = s; return s; end }) + +local html_escape = { ["<"] = "<", [">"] = ">", ["&"] = "&", } + +local function colorize_html(s, t) + s = gsub(s, "[<>&]", html_escape) + return format('%s', irtype_text[t], s) +end + +local irtype_html = setmetatable({}, + { __index = function(tab, t) + local s = colorize_html(irtype_text[t], t); tab[t] = s; return s; end }) + +local header_html = [[ + +]] + +local colorize, irtype + +-- Lookup table to convert some literals into names. +local litname = { + ["SLOAD "] = { [0] = "", "I", "R", "RI", "P", "PI", "PR", "PRI", }, + ["XLOAD "] = { [0] = "", "unaligned", }, + ["TOINT "] = { [0] = "check", "index", "", }, + ["FLOAD "] = vmdef.irfield, + ["FREF "] = vmdef.irfield, + ["FPMATH"] = vmdef.irfpm, +} + +local function ctlsub(c) + if c == "\n" then return "\\n" + elseif c == "\r" then return "\\r" + elseif c == "\t" then return "\\t" + elseif c == "\r" then return "\\r" + else return format("\\%03d", byte(c)) + end +end + +local function formatk(tr, idx) + local k, t, slot = tracek(tr, idx) + local tn = type(k) + local s + if tn == "number" then + if k == 2^52+2^51 then + s = "bias" + else + s = format("%+.14g", k) + end + elseif tn == "string" then + s = format(#k > 20 and '"%.20s"~' or '"%s"', gsub(k, "%c", ctlsub)) + elseif tn == "function" then + local fi = funcinfo(k) + if fi.ffid then + s = vmdef.ffnames[fi.ffid] + else + s = fi.loc + end + elseif tn == "table" then + s = format("{%p}", k) + elseif tn == "userdata" then + if t == 11 then + s = format("userdata:%p", k) + else + s = format("[%p]", k) + if s == "[0x00000000]" then s = "NULL" end + end + else + s = tostring(k) -- For primitives. + end + s = colorize(format("%-4s", s), t) + if slot then + s = format("%s @%d", s, slot) + end + return s +end + +local function printsnap(tr, snap) + for i=1,#snap do + local ref = snap[i] + if not ref then + out:write("---- ") + elseif ref < 0 then + out:write(formatk(tr, ref), " ") + else + local m, ot, op1, op2 = traceir(tr, ref) + local t = band(ot, 15) + local sep = " " + if t == 8 then + local oidx = 6*shr(ot, 8) + local op = sub(vmdef.irnames, oidx+1, oidx+6) + if op == "FRAME " then + sep = "|" + end + end + out:write(colorize(format("%04d", ref), t), sep) + end + end + out:write("]\n") +end + +-- Dump snapshots (not interleaved with IR). +local function dump_snap(tr) + out:write("---- TRACE ", tr, " snapshots\n") + for i=0,1000000000 do + local snap = tracesnap(tr, i) + if not snap then break end + out:write(format("#%-3d %04d [ ", i, snap[0])) + printsnap(tr, snap) + end +end + +-- NYI: should really get the register map from the disassembler. +local reg_map = { + [0] = "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi", + "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", +} + +-- Return a register name or stack slot for a rid/sp location. +local function ridsp_name(ridsp) + local rid = band(ridsp, 0xff) + if ridsp > 255 then return format("[%x]", shr(ridsp, 8)*4) end + if rid < 128 then return reg_map[rid] end + return "" +end + +-- Dump IR and interleaved snapshots. +local function dump_ir(tr, dumpsnap, dumpreg) + local info = traceinfo(tr) + if not info then return end + local nins = info.nins + out:write("---- TRACE ", tr, " IR\n") + local irnames = vmdef.irnames + local snapref = 65536 + local snap, snapno + if dumpsnap then + snap = tracesnap(tr, 0) + snapref = snap[0] + snapno = 0 + end + for ins=1,nins do + if ins >= snapref then + if dumpreg then + out:write(format(".... SNAP #%-3d [ ", snapno)) + else + out:write(format(".... SNAP #%-3d [ ", snapno)) + end + printsnap(tr, snap) + snapno = snapno + 1 + snap = tracesnap(tr, snapno) + snapref = snap and snap[0] or 65536 + end + local m, ot, op1, op2, ridsp = traceir(tr, ins) + local oidx, t = 6*shr(ot, 8), band(ot, 31) + local op = sub(irnames, oidx+1, oidx+6) + if op == "LOOP " then + if dumpreg then + out:write(format("%04d ------------ LOOP ------------\n", ins)) + else + out:write(format("%04d ------ LOOP ------------\n", ins)) + end + elseif op ~= "NOP " and (dumpreg or op ~= "RENAME") then + if dumpreg then + out:write(format("%04d %-5s ", ins, ridsp_name(ridsp))) + else + out:write(format("%04d ", ins)) + end + out:write(format("%s%s %s %s ", + band(ot, 64) == 0 and " " or ">", + band(ot, 128) == 0 and " " or "+", + irtype[t], op)) + local m1 = band(m, 3) + if m1 ~= 3 then -- op1 != IRMnone + if op1 < 0 then + out:write(formatk(tr, op1)) + else + out:write(format(m1 == 0 and "%04d" or "#%-3d", op1)) + end + local m2 = band(m, 3*4) + if m2 ~= 3*4 then -- op2 != IRMnone + if m2 == 1*4 then -- op2 == IRMlit + local litn = litname[op] + if litn and litn[op2] then + out:write(" ", litn[op2]) + else + out:write(format(" #%-3d", op2)) + end + elseif op2 < 0 then + out:write(" ", formatk(tr, op2)) + else + out:write(format(" %04d", op2)) + end + end + end + out:write("\n") + end + end + if snap then + if dumpreg then + out:write(format(".... SNAP #%-3d [ ", snapno)) + else + out:write(format(".... SNAP #%-3d [ ", snapno)) + end + printsnap(tr, snap) + end +end + +------------------------------------------------------------------------------ + +local recprefix = "" +local recdepth = 0 + +-- Format trace error message. +local function fmterr(err, info) + if type(err) == "number" then + if type(info) == "function" then + local fi = funcinfo(info) + if fi.ffid then + info = vmdef.ffnames[fi.ffid] + else + info = fi.loc + end + end + err = format(vmdef.traceerr[err], info) + end + return err +end + +-- Dump trace states. +local function dump_trace(what, tr, func, pc, otr, oex) + if what == "stop" or (what == "abort" and dumpmode.a) then + if dumpmode.i then dump_ir(tr, dumpmode.s, dumpmode.r and what == "stop") + elseif dumpmode.s then dump_snap(tr) end + if dumpmode.m then dump_mcode(tr) end + end + if what == "start" then + if dumpmode.H then out:write('
\n') end
+    out:write("---- TRACE ", tr, " ", what)
+    if otr then out:write(" ", otr, "/", oex) end
+    local fi = funcinfo(func, pc)
+    out:write(" ", fi.loc, "\n")
+    recprefix = ""
+    reclevel = 0
+  elseif what == "stop" or what == "abort" then
+    out:write("---- TRACE ", tr, " ", what)
+    recprefix = nil
+    if what == "abort" then
+      local fi = funcinfo(func, pc)
+      out:write(" ", fi.loc, " -- ", fmterr(otr, oex), "\n")
+    else
+      local link = traceinfo(tr).link
+      if link == tr then
+	link = "loop"
+      elseif link == 0 then
+	link = "interpreter"
+      end
+      out:write(" -> ", link, "\n")
+    end
+    if dumpmode.H then out:write("
\n\n") else out:write("\n") end + else + out:write("---- TRACE ", what, "\n\n") + end + out:flush() +end + +-- Dump recorded bytecode. +local function dump_record(tr, func, pc, depth, callee) + if depth ~= recdepth then + recdepth = depth + recprefix = rep(" .", depth) + end + local line = bcline(func, pc, recprefix) + if dumpmode.H then line = gsub(line, "[<>&]", html_escape) end + if type(callee) == "function" then + local fi = funcinfo(callee) + if fi.ffid then + out:write(sub(line, 1, -2), " ; ", vmdef.ffnames[fi.ffid], "\n") + else + out:write(sub(line, 1, -2), " ; ", fi.loc, "\n") + end + else + out:write(line) + end + if band(funcbc(func, pc), 0xff) < 16 then -- Write JMP for cond. ORDER BC + out:write(bcline(func, pc+1, recprefix)) + end +end + +------------------------------------------------------------------------------ + +-- Dump taken trace exits. +local function dump_texit(tr, ex, ngpr, nfpr, ...) + out:write("---- TRACE ", tr, " exit ", ex, "\n") + if dumpmode.X then + local regs = {...} + for i=1,ngpr do + out:write(format(" %08x", regs[i])) + if i % 8 == 0 then out:write("\n") end + end + for i=1,nfpr do + out:write(format(" %+17.14g", regs[ngpr+i])) + if i % 4 == 0 then out:write("\n") end + end + end +end + +------------------------------------------------------------------------------ + +-- Detach dump handlers. +local function dumpoff() + if active then + active = false + jit.attach(dump_texit) + jit.attach(dump_record) + jit.attach(dump_trace) + if out and out ~= stdout and out ~= stderr then out:close() end + out = nil + end +end + +-- Open the output file and attach dump handlers. +local function dumpon(opt, outfile) + if active then dumpoff() end + + local colormode = os.getenv("COLORTERM") and "A" or "T" + if opt then + opt = gsub(opt, "[TAH]", function(mode) colormode = mode; return ""; end) + end + + local m = { t=true, b=true, i=true, m=true, } + if opt and opt ~= "" then + local o = sub(opt, 1, 1) + if o ~= "+" and o ~= "-" then m = {} end + for i=1,#opt do m[sub(opt, i, i)] = (o ~= "-") end + end + dumpmode = m + + if m.t or m.b or m.i or m.s or m.m then + jit.attach(dump_trace, "trace") + end + if m.b then + jit.attach(dump_record, "record") + if not bcline then bcline = require("jit.bc").line end + end + if m.x or m.X then + jit.attach(dump_texit, "texit") + end + + if not outfile then outfile = os.getenv("LUAJIT_DUMPFILE") end + if outfile then + out = outfile == "-" and stdout or assert(io.open(outfile, "w")) + else + out = stdout + end + + m[colormode] = true + if colormode == "A" then + colorize = colorize_ansi + irtype = irtype_ansi + elseif colormode == "H" then + colorize = colorize_html + irtype = irtype_html + out:write(header_html) + else + colorize = colorize_text + irtype = irtype_text + end + + active = true +end + +-- Public module functions. +module(...) + +on = dumpon +off = dumpoff +start = dumpon -- For -j command line option. + diff --git a/lib/v.lua b/lib/v.lua new file mode 100644 index 0000000000..39fb8ed543 --- /dev/null +++ b/lib/v.lua @@ -0,0 +1,156 @@ +---------------------------------------------------------------------------- +-- Verbose mode of the LuaJIT compiler. +-- +-- Copyright (C) 2005-2009 Mike Pall. All rights reserved. +-- Released under the MIT/X license. See Copyright Notice in luajit.h +---------------------------------------------------------------------------- +-- +-- This module shows verbose information about the progress of the +-- JIT compiler. It prints one line for each generated trace. This module +-- is useful to see which code has been compiled or where the compiler +-- punts and falls back to the interpreter. +-- +-- Example usage: +-- +-- luajit -jv -e "for i=1,1000 do for j=1,1000 do end end" +-- luajit -jv=myapp.out myapp.lua +-- +-- Default output is to stderr. To redirect the output to a file, pass a +-- filename as an argument (use '-' for stdout) or set the environment +-- variable LUAJIT_VERBOSEFILE. The file is overwritten every time the +-- module is started. +-- +-- The output from the first example should look like this: +-- +-- [TRACE 1 (command line):1] +-- [TRACE 2 (1/3) (command line):1 -> 1] +-- +-- The first number in each line is the internal trace number. Next are +-- the file name ('(command line)') and the line number (':1') where the +-- trace has started. Side traces also show the parent trace number and +-- the exit number where they are attached to in parentheses ('(1/3)'). +-- An arrow at the end shows where the trace links to ('-> 1'), unless +-- it loops to itself. +-- +-- In this case the inner loop gets hot and is traced first, generating +-- a root trace. Then the last exit from the 1st trace gets hot, too, +-- and triggers generation of the 2nd trace. The side trace follows the +-- path along the outer loop and *around* the inner loop, back to its +-- start, and then links to the 1st trace. Yes, this may seem unusual, +-- if you know how traditional compilers work. Trace compilers are full +-- of surprises like this -- have fun! :-) +-- +-- Aborted traces are shown like this: +-- +-- [TRACE --- foo.lua:44 -- leaving loop in root trace at foo:lua:50] +-- +-- Don't worry -- trace aborts are quite common, even in programs which +-- can be fully compiled. The compiler may retry several times until it +-- finds a suitable trace. +-- +-- Of course this doesn't work with features that are not-yet-implemented +-- (NYI error messages). The VM simply falls back to the interpreter. This +-- may not matter at all if the particular trace is not very high up in +-- the CPU usage profile. Oh, and the interpreter is quite fast, too. +-- +-- Also check out the -jdump module, which prints all the gory details. +-- +------------------------------------------------------------------------------ + +-- Cache some library functions and objects. +local jit = require("jit") +assert(jit.version_num == 20000, "LuaJIT core/library version mismatch") +local jutil = require("jit.util") +local vmdef = require("jit.vmdef") +local funcinfo, traceinfo = jutil.funcinfo, jutil.traceinfo +local type, format = type, string.format +local stdout, stderr = io.stdout, io.stderr + +-- Active flag and output file handle. +local active, out + +------------------------------------------------------------------------------ + +local startloc, startex + +-- Format trace error message. +local function fmterr(err, info) + if type(err) == "number" then + if type(info) == "function" then + local fi = funcinfo(info) + if fi.ffid then + info = vmdef.ffnames[fi.ffid] + else + info = fi.loc + end + end + err = format(vmdef.traceerr[err], info) + end + return err +end + +-- Dump trace states. +local function dump_trace(what, tr, func, pc, otr, oex) + if what == "start" then + startloc = funcinfo(func, pc).loc + startex = otr and "("..otr.."/"..oex..") " or "" + else + if what == "abort" then + local loc = funcinfo(func, pc).loc + if loc ~= startloc then + out:write(format("[TRACE --- %s%s -- %s at %s]\n", + startex, startloc, fmterr(otr, oex), loc)) + else + out:write(format("[TRACE --- %s%s -- %s]\n", + startex, startloc, fmterr(otr, oex))) + end + elseif what == "stop" then + local link = traceinfo(tr).link + if link == 0 then + out:write(format("[TRACE %3s %s%s -- fallback to interpreter]\n", + tr, startex, startloc)) + elseif link == tr then + out:write(format("[TRACE %3s %s%s]\n", tr, startex, startloc)) + else + out:write(format("[TRACE %3s %s%s -> %d]\n", + tr, startex, startloc, link)) + end + else + out:write(format("[TRACE %s]\n", what)) + end + out:flush() + end +end + +------------------------------------------------------------------------------ + +-- Detach dump handlers. +local function dumpoff() + if active then + active = false + jit.attach(dump_trace) + if out and out ~= stdout and out ~= stderr then out:close() end + out = nil + end +end + +-- Open the output file and attach dump handlers. +local function dumpon(outfile) + if active then dumpoff() end + if not outfile then outfile = os.getenv("LUAJIT_VERBOSEFILE") end + if outfile then + out = outfile == "-" and stdout or assert(io.open(outfile, "w")) + else + out = stderr + end + jit.attach(dump_trace, "trace") + active = true +end + +-- Public module functions. +module(...) + +on = dumpon +off = dumpoff +start = dumpon -- For -j command line option. + diff --git a/src/.gitignore b/src/.gitignore new file mode 100644 index 0000000000..e9f998cef8 --- /dev/null +++ b/src/.gitignore @@ -0,0 +1,8 @@ +luajit +buildvm +buildvm_*.h +lj_ffdef.h +lj_libdef.h +lj_recdef.h +lj_folddef.h +lj_vm.s diff --git a/src/Makefile b/src/Makefile new file mode 100644 index 0000000000..bb1839d1da --- /dev/null +++ b/src/Makefile @@ -0,0 +1,326 @@ +############################################################################## +# LuaJIT Makefile. Requires GNU Make. +# +# Suitable for POSIX platforms (Linux, *BSD, OSX etc.). +# Also works with MinGW and Cygwin on Windows. +# Please check msvcbuild.bat for building with MSVC on Windows. +# +# Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +############################################################################## + +############################################################################## +# Compiler options: change them as needed. This mainly affects the speed of +# the JIT compiler itself, not the speed of the JIT compiled code. +# Turn any of the optional settings on by removing the '#' in front of them. +# +# Note: LuaJIT can only be compiled for x86, and not for x64 (yet)! +# In the meantime, the x86 binary runs fine under a x64 OS. +# +# It's recommended to compile at least for i686. By default the assembler part +# of the interpreter makes use of CMOV/FCOMI*/FUCOMI* instructions, anyway. +CC= gcc -m32 -march=i686 +# Use this for GCC 4.2 or higher if you don't intend to distribute the +# binaries to a different machine: +#CC= gcc -m32 -march=native +# +# Since the assembler part does NOT maintain a frame pointer, it's pointless +# to slow down the C part by not omitting it. Debugging and tracebacks are +# not affected -- the assembler part has frame unwind information and GCC +# emits it with -g (see CCDEBUG below). +CCOPT= -O2 -fomit-frame-pointer +# Use this if you want to generate a smaller binary (but it's slower): +#CCOPT= -Os -fomit-frame-pointer +# Note: it's no longer recommended to use -O3 with GCC 4.x. +# The I-Cache bloat usually outweighs the benefits from aggressive inlining. +# +CCDEBUG= +# Uncomment the next line to generate debug information: +#CCDEBUG= -g +# +CCWARN= -Wall +# Uncomment the next line to enable more warnings: +#CCWARN+= -Wextra -Wdeclaration-after-statement -Wredundant-decls -Wshadow -Wpointer-arith +# +############################################################################## + +############################################################################## +# Compile time definitions: change them as needed, but make sure you force +# a full recompile with "make clean", followed by "make". +# Note that most of these are NOT suitable for benchmarking or release mode! +XCFLAGS= +# +# Disable the use of CMOV and FCOMI*/FUCOMI* instructions in the interpreter. +# This is only necessary if you intend to run the code on REALLY ANCIENT CPUs +# (before Pentium Pro, or on the VIA C3). This generally slows down the +# interpreter. Don't bother if your OS wouldn't run on them, anyway. +#XCFLAGS+= -DLUAJIT_CPU_NOCMOV +# +# Disable the JIT compiler, i.e. turn LuaJIT into a pure interpreter: +#XCFLAGS+= -DLUAJIT_DISABLE_JIT +# +# Use the system provided memory allocator (realloc) instead of the +# bundled memory allocator. This is slower, but sometimes helpful for +# debugging. It's mandatory for Valgrind's memcheck tool, too. +#XCFLAGS+= -DLUAJIT_USE_SYSMALLOC +# +# This define is required to run LuaJIT under Valgrind. The Valgrind +# header files must be installed. You should enable debug information, too. +#XCFLAGS+= -DLUAJIT_USE_VALGRIND +# +# This is the client for the GDB JIT API. GDB 7.0 or higher is required +# to make use of it. See lj_gdbjit.c for details. Enabling this causes +# a non-negligible overhead, even when not running under GDB. +#XCFLAGS+= -DLUAJIT_USE_GDBJIT +# +# Turn on assertions for the Lua/C API to debug problems with lua_* calls. +# This is rather slow -- use only while developing C libraries/embeddings. +#XCFLAGS+= -DLUA_USE_APICHECK +# +# Turn on assertions for the whole LuaJIT VM. This significantly slows down +# everything. Use only if you suspect a problem with LuaJIT itself. +#XCFLAGS+= -DLUA_USE_ASSERT +# +############################################################################## +# You probably don't need to change anything below this line. +############################################################################## + +CCOPTIONS= $(CCDEBUG) $(CCOPT) $(CCWARN) $(CFLAGS) $(XCFLAGS) +LDOPTIONS= $(CCDEBUG) $(LDFLAGS) + +HOST_CC= $(CC) +HOST_RM= rm -f +HOST_XCFLAGS= +HOST_XLDFLAGS= +HOST_XLIBS= + +TARGET_CC= $(CC) +TARGET_STRIP= strip +TARGET_XCFLAGS= -D_FILE_OFFSET_BITS=64 +TARGET_XLDFLAGS= +TARGET_XSHLDFLAGS= -shared +TARGET_XLIBS= +TARGET_ARCH= $(patsubst %,-DLUAJIT_TARGET=LUAJIT_ARCH_%,$(TARGET)) +TARGET_DISABLE= -U_FORTIFY_SOURCE +ifneq (,$(findstring stack-protector,$(shell $(CC) -dumpspecs))) + TARGET_DISABLE+= -fno-stack-protector +endif + +ifneq (,$(findstring Windows,$(OS))) + TARGET_SYS= Windows +else + TARGET_SYS:= $(shell uname -s) + ifneq (,$(findstring CYGWIN,$(TARGET_SYS))) + TARGET_SYS= Windows + endif +endif + +ifeq (Linux,$(TARGET_SYS)) + TARGET_XLIBS= -ldl + TARGET_XLDFLAGS= -Wl,-E +else +ifeq (Windows,$(TARGET_SYS)) + HOST_RM= del + TARGET_STRIP= strip --strip-unneeded +else +ifeq (Darwin,$(TARGET_SYS)) + TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup + TARGET_STRIP= strip -x + export MACOSX_DEPLOYMENT_TARGET=10.3 +else + TARGET_XLDFLAGS= -Wl,-E +endif +endif +endif + +# NOTE: The LuaJIT distribution comes with a pre-generated buildvm_*.h. +# You DO NOT NEED an installed copy of (plain) Lua 5.1 to run DynASM unless +# you want to MODIFY the corresponding *.dasc file. You can also use LuaJIT +# itself (bootstrapped from the pre-generated file) to run DynASM of course. +DASM_LUA= lua + +Q= @ +E= @echo +#Q= +#E= @: + +############################################################################## + +TARGET_CFLAGS= $(CCOPTIONS) $(TARGET_DISABLE) $(TARGET_XCFLAGS) +TARGET_LDFLAGS= $(LDOPTIONS) $(TARGET_XLDFLAGS) +TARGET_SHLDFLAGS= $(LDOPTIONS) $(TARGET_XSHLDFLAGS) +TARGET_LIBS= -lm $(TARGET_XLIBS) +ifneq (,$(CCDEBUG)) + TARGET_STRIP= @: +endif + +HOST_CFLAGS= $(CCOPTIONS) $(HOST_XCFLAGS) $(TARGET_ARCH) +HOST_LDFLAGS= $(LDOPTIONS) $(HOST_XLDFLAGS) +HOST_LIBS= $(HOST_XLIBS) + +DASM_DIR= ../dynasm +DASM= $(DASM_LUA) $(DASM_DIR)/dynasm.lua +DASM_FLAGS= +DASM_DISTFLAGS= -LN + +BUILDVM_O= buildvm.o buildvm_asm.o buildvm_peobj.o buildvm_lib.o buildvm_fold.o +BUILDVM_T= buildvm + +HOST_O= $(BUILDVM_O) +HOST_T= $(BUILDVM_T) + +LJVM_S= lj_vm.s +LJVM_O= lj_vm.o +LJVM_BOUT= $(LJVM_S) +LJVM_MODE= asm + +LJLIB_O= lib_base.o lib_math.o lib_bit.o lib_string.o lib_table.o \ + lib_io.o lib_os.o lib_package.o lib_debug.o lib_jit.o +LJLIB_C= $(LJLIB_O:.o=.c) + +LJCORE_O= lj_gc.o lj_err.o lj_ctype.o lj_bc.o lj_obj.o \ + lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o \ + lj_state.o lj_dispatch.o lj_vmevent.o lj_api.o \ + lj_lex.o lj_parse.o \ + lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \ + lj_opt_dce.o lj_opt_loop.o \ + lj_mcode.o lj_snap.o lj_record.o lj_asm.o lj_trace.o lj_gdbjit.o \ + lj_lib.o lj_alloc.o lib_aux.o \ + $(LJLIB_O) lib_init.o + +LJVMCORE_O= $(LJVM_O) $(LJCORE_O) + +# NYI: Need complete support for building as a shared library on POSIX. +# This is currently *only* suitable for MinGW and Cygwin, see below. +LUAJIT_O= luajit.o +LUAJIT_SO= luajit.so +LUAJIT_T= luajit + +LIB_VMDEF= ../lib/vmdef.lua + +TARGET_DEP= $(LIB_VMDEF) +TARGET_O= $(LJVMCORE_O) $(LUAJIT_O) +TARGET_T= $(LUAJIT_T) + +ALL_GEN= $(LJVM_S) lj_ffdef.h lj_libdef.h lj_recdef.h $(LIB_VMDEF) lj_folddef.h +ALL_DYNGEN= buildvm_x86.h +WIN_RM= *.obj *.lib *.exp *.dll *.exe *.manifest +ALL_RM= $(LUAJIT_T) $(LUAJIT_SO) $(HOST_T) $(ALL_GEN) *.o $(WIN_RM) + +ifeq (Windows,$(TARGET_SYS)) + LJVM_BOUT= $(LJVM_O) + LJVM_MODE= peobj + LIB_VMDEF= ..\lib\vmdef.lua + # Imported symbols are bound to a specific DLL name under Windows. + LUAJIT_SO= lua51.dll + LUAJIT_T= luajit.exe + BUILDVM_T= buildvm.exe + # + # You can comment out the following two lines to build a static executable. + # But then you won't be able to dynamically load any C modules, because + # they bind to lua51.dll. + # + TARGET_XCFLAGS+= -DLUA_BUILD_AS_DLL + TARGET_O= $(LUAJIT_SO) $(LUAJIT_O) +endif + +############################################################################## + +default: $(TARGET_T) + +all: $(TARGET_T) + +amalg: + @grep "^[+|]" ljamalg.c + $(MAKE) all "LJCORE_O=ljamalg.o" + +MAKE_TARGETS= amalg + +############################################################################## + +buildvm_x86.h: buildvm_x86.dasc + $(E) "DYNASM $@" + $(Q)$(DASM) $(DASM_FLAGS) -o $@ buildvm_x86.dasc + +$(BUILDVM_T): $(BUILDVM_O) + $(E) "HOSTLINK $@" + $(Q)$(HOST_CC) $(HOST_LDFLAGS) -o $@ $(BUILDVM_O) $(HOST_LIBS) + +$(LJVM_BOUT): $(BUILDVM_T) + $(E) "BUILDVM $@" + $(Q)./$(BUILDVM_T) -m $(LJVM_MODE) -o $@ + +lj_ffdef.h: $(BUILDVM_T) $(LJLIB_C) + $(E) "BUILDVM $@" + $(Q)./$(BUILDVM_T) -m ffdef -o $@ $(LJLIB_C) + +lj_libdef.h: $(BUILDVM_T) $(LJLIB_C) + $(E) "BUILDVM $@" + $(Q)./$(BUILDVM_T) -m libdef -o $@ $(LJLIB_C) + +lj_recdef.h: $(BUILDVM_T) $(LJLIB_C) + $(E) "BUILDVM $@" + $(Q)./$(BUILDVM_T) -m recdef -o $@ $(LJLIB_C) + +$(LIB_VMDEF): $(BUILDVM_T) $(LJLIB_C) + $(E) "BUILDVM $@" + $(Q)./$(BUILDVM_T) -m vmdef -o $@ $(LJLIB_C) + +lj_folddef.h: $(BUILDVM_T) lj_opt_fold.c + $(E) "BUILDVM $@" + $(Q)./$(BUILDVM_T) -m folddef -o $@ lj_opt_fold.c + +$(LUAJIT_SO): $(LJVMCORE_O) + $(E) "LINK $@" + $(Q)$(TARGET_CC) $(TARGET_SHLDFLAGS) -o $@ $(LJVMCORE_O) $(TARGET_LIBS) + $(Q)$(TARGET_STRIP) $@ + +$(LUAJIT_T): $(TARGET_O) $(TARGET_DEP) + $(E) "LINK $@" + $(Q)$(TARGET_CC) $(TARGET_LDFLAGS) -o $@ $(TARGET_O) $(TARGET_LIBS) + $(Q)$(TARGET_STRIP) $@ + $(E) "OK Successfully built LuaJIT" + +############################################################################## + +%.o: %.c + $(E) "CC $@" + $(Q)$(TARGET_CC) $(TARGET_CFLAGS) -c -o $@ $< + +%.o: %.s + $(E) "ASM $@" + $(Q)$(TARGET_CC) $(TARGET_CFLAGS) -c -o $@ $< + +$(HOST_O): %.o: %.c + $(E) "HOSTCC $@" + $(Q)$(HOST_CC) $(HOST_CFLAGS) -c -o $@ $< + +include Makefile.dep + +############################################################################## + +clean: + $(HOST_RM) $(ALL_RM) + +cleaner: clean + $(HOST_RM) $(ALL_DYNGEN) + +distclean: clean + $(E) "DYNASM $@" + $(Q)$(DASM) $(DASM_DISTFLAGS) -o buildvm_x86.h buildvm_x86.dasc + +depend: + @test -f lj_ffdef.h || touch lj_ffdef.h + @test -f lj_libdef.h || touch lj_libdef.h + @test -f lj_recdef.h || touch lj_recdef.h + @test -f lj_folddef.h || touch lj_folddef.h + @test -f buildvm_x86.h || touch buildvm_x86.h + @$(HOST_CC) $(HOST_CFLAGS) -MM *.c | sed "s|$(DASM_DIR)|\$$(DASM_DIR)|g" >Makefile.dep + @test -s lj_ffdef.h || $(HOST_RM) lj_ffdef.h + @test -s lj_libdef.h || $(HOST_RM) lj_libdef.h + @test -s lj_recdef.h || $(HOST_RM) lj_recdef.h + @test -s lj_folddef.h || $(HOST_RM) lj_folddef.h + @test -s buildvm_x86.h || $(HOST_RM) buildvm_x86.h + +.PHONY: default all $(MAKE_TARGETS) clean cleaner distclean depend + +############################################################################## diff --git a/src/Makefile.dep b/src/Makefile.dep new file mode 100644 index 0000000000..b1cdd93b52 --- /dev/null +++ b/src/Makefile.dep @@ -0,0 +1,139 @@ +buildvm.o: buildvm.c lua.h luaconf.h luajit.h lj_obj.h lj_def.h lj_arch.h \ + lj_gc.h lj_bc.h lj_ir.h lj_frame.h lj_dispatch.h lj_jit.h lj_target.h \ + lj_target_x86.h buildvm.h $(DASM_DIR)/dasm_proto.h $(DASM_DIR)/dasm_x86.h \ + buildvm_x86.h lj_traceerr.h +buildvm_asm.o: buildvm_asm.c buildvm.h lj_def.h lua.h luaconf.h lj_arch.h \ + lj_bc.h +buildvm_fold.o: buildvm_fold.c lj_obj.h lua.h luaconf.h lj_def.h \ + lj_arch.h lj_ir.h buildvm.h +buildvm_lib.o: buildvm_lib.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ + lj_lib.h buildvm.h +buildvm_peobj.o: buildvm_peobj.c buildvm.h lj_def.h lua.h luaconf.h \ + lj_arch.h lj_bc.h +lib_aux.o: lib_aux.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \ + lj_arch.h lj_err.h lj_errmsg.h lj_lib.h lj_alloc.h +lib_base.o: lib_base.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ + lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h \ + lj_meta.h lj_state.h lj_ff.h lj_ffdef.h lj_ctype.h lj_lib.h lj_libdef.h +lib_bit.o: lib_bit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ + lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_lib.h lj_libdef.h +lib_debug.o: lib_debug.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ + lj_def.h lj_arch.h lj_err.h lj_errmsg.h lj_lib.h lj_libdef.h +lib_init.o: lib_init.c lua.h luaconf.h lauxlib.h lualib.h +lib_io.o: lib_io.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ + lj_arch.h lj_err.h lj_errmsg.h lj_gc.h lj_ff.h lj_ffdef.h lj_lib.h \ + lj_libdef.h +lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h \ + lj_obj.h lj_def.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ir.h \ + lj_jit.h lj_iropt.h lj_dispatch.h lj_bc.h lj_vm.h lj_vmevent.h lj_lib.h \ + luajit.h lj_libdef.h +lib_math.o: lib_math.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ + lj_def.h lj_arch.h lj_lib.h lj_libdef.h +lib_os.o: lib_os.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ + lj_arch.h lj_err.h lj_errmsg.h lj_lib.h lj_libdef.h +lib_package.o: lib_package.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ + lj_def.h lj_arch.h lj_err.h lj_errmsg.h lj_lib.h +lib_string.o: lib_string.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ + lj_def.h lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_state.h \ + lj_ff.h lj_ffdef.h lj_ctype.h lj_lib.h lj_libdef.h +lib_table.o: lib_table.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ + lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_lib.h \ + lj_libdef.h +lj_alloc.o: lj_alloc.c lj_def.h lua.h luaconf.h lj_arch.h lj_alloc.h +lj_api.o: lj_api.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ + lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_udata.h lj_meta.h \ + lj_state.h lj_frame.h lj_bc.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \ + lj_traceerr.h lj_vm.h lj_lex.h lj_parse.h +lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ + lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_mcode.h lj_trace.h \ + lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h lj_asm.h lj_vm.h \ + lj_target.h lj_target_x86.h +lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h +lj_ctype.o: lj_ctype.c lj_ctype.h lj_def.h lua.h luaconf.h +lj_dispatch.o: lj_dispatch.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ + lj_err.h lj_errmsg.h lj_state.h lj_frame.h lj_bc.h lj_jit.h lj_ir.h \ + lj_trace.h lj_dispatch.h lj_traceerr.h lj_vm.h luajit.h +lj_err.o: lj_err.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_err.h \ + lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_state.h lj_frame.h lj_bc.h \ + lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h +lj_func.o: lj_func.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ + lj_func.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \ + lj_traceerr.h lj_vm.h +lj_gc.o: lj_gc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ + lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_udata.h lj_meta.h \ + lj_state.h lj_frame.h lj_bc.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \ + lj_traceerr.h lj_vm.h +lj_gdbjit.o: lj_gdbjit.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ + lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_frame.h lj_bc.h lj_jit.h \ + lj_ir.h lj_dispatch.h +lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ + lj_str.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h \ + lj_traceerr.h +lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ + lj_err.h lj_errmsg.h lj_str.h lj_lex.h lj_parse.h lj_ctype.h +lj_lib.o: lj_lib.c lauxlib.h lua.h luaconf.h lj_obj.h lj_def.h lj_arch.h \ + lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_vm.h \ + lj_lib.h +lj_mcode.o: lj_mcode.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ + lj_gc.h lj_jit.h lj_ir.h lj_mcode.h lj_trace.h lj_dispatch.h lj_bc.h \ + lj_traceerr.h +lj_meta.o: lj_meta.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ + lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_bc.h lj_vm.h +lj_obj.o: lj_obj.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h +lj_opt_dce.o: lj_opt_dce.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ + lj_ir.h lj_jit.h lj_iropt.h +lj_opt_fold.o: lj_opt_fold.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ + lj_str.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h \ + lj_traceerr.h lj_vm.h lj_folddef.h +lj_opt_loop.o: lj_opt_loop.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ + lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_iropt.h \ + lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h lj_vm.h +lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ + lj_tab.h lj_ir.h lj_jit.h lj_iropt.h +lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \ + lj_arch.h lj_str.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \ + lj_dispatch.h lj_traceerr.h +lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ + lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_state.h \ + lj_bc.h lj_lex.h lj_parse.h lj_vm.h lj_vmevent.h +lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ + lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_state.h lj_frame.h \ + lj_bc.h lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \ + lj_dispatch.h lj_traceerr.h lj_record.h lj_snap.h lj_asm.h lj_vm.h \ + lj_recdef.h +lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ + lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \ + lj_dispatch.h lj_traceerr.h lj_snap.h lj_target.h lj_target_x86.h +lj_state.o: lj_state.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ + lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_meta.h \ + lj_state.h lj_frame.h lj_bc.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \ + lj_traceerr.h lj_vm.h lj_lex.h lj_alloc.h +lj_str.o: lj_str.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ + lj_err.h lj_errmsg.h lj_str.h lj_state.h lj_ctype.h +lj_tab.o: lj_tab.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ + lj_err.h lj_errmsg.h lj_tab.h +lj_trace.o: lj_trace.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ + lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_frame.h lj_bc.h lj_state.h \ + lj_ir.h lj_jit.h lj_iropt.h lj_mcode.h lj_trace.h lj_dispatch.h \ + lj_traceerr.h lj_snap.h lj_gdbjit.h lj_record.h lj_asm.h lj_vm.h \ + lj_vmevent.h lj_target.h lj_target_x86.h +lj_udata.o: lj_udata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ + lj_gc.h lj_udata.h +lj_vmevent.o: lj_vmevent.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ + lj_str.h lj_tab.h lj_state.h lj_dispatch.h lj_bc.h lj_jit.h lj_ir.h \ + lj_vm.h lj_vmevent.h +ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \ + lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h \ + lj_udata.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_trace.h lj_jit.h \ + lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_err.c lj_ctype.c \ + lj_ctype.h lj_bc.c lj_obj.c lj_str.c lj_tab.c lj_func.c lj_udata.c \ + lj_meta.c lj_state.c lj_lex.h lj_alloc.h lj_dispatch.c luajit.h \ + lj_vmevent.c lj_vmevent.h lj_api.c lj_parse.h lj_lex.c lj_parse.c \ + lj_lib.c lj_lib.h lj_ir.c lj_iropt.h lj_opt_mem.c lj_opt_fold.c \ + lj_folddef.h lj_opt_narrow.c lj_opt_dce.c lj_opt_loop.c lj_snap.h \ + lj_mcode.c lj_mcode.h lj_snap.c lj_target.h lj_target_x86.h lj_record.c \ + lj_ff.h lj_ffdef.h lj_record.h lj_asm.h lj_recdef.h lj_asm.c lj_trace.c \ + lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c lib_base.c lualib.h \ + lj_libdef.h lib_math.c lib_string.c lib_table.c lib_io.c lib_os.c \ + lib_package.c lib_debug.c lib_bit.c lib_jit.c lib_init.c +luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h diff --git a/src/buildvm.c b/src/buildvm.c new file mode 100644 index 0000000000..b3738db42e --- /dev/null +++ b/src/buildvm.c @@ -0,0 +1,438 @@ +/* +** LuaJIT VM builder. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +** +** This is a tool to build the hand-tuned assembler code required for +** LuaJIT's bytecode interpreter. It supports a variety of output formats +** to feed different toolchains (see usage() below). +** +** This tool is not particularly optimized because it's only used while +** _building_ LuaJIT. There's no point in distributing or installing it. +** Only the object code generated by this tool is linked into LuaJIT. +** +** Caveat: some memory is not free'd, error handling is lazy. +** It's a one-shot tool -- any effort fixing this would be wasted. +*/ + +#include "lua.h" +#include "luajit.h" + +#ifdef LUA_USE_WIN +#include +#include +#endif + +#include "lj_obj.h" +#include "lj_gc.h" +#include "lj_bc.h" +#include "lj_ir.h" +#include "lj_frame.h" +#include "lj_dispatch.h" +#include "lj_target.h" + +#include "buildvm.h" + +/* ------------------------------------------------------------------------ */ + +/* DynASM glue definitions. */ +#define Dst ctx +#define Dst_DECL BuildCtx *ctx +#define Dst_REF (ctx->D) + +#include "../dynasm/dasm_proto.h" + +/* Glue macros for DynASM. */ +#define DASM_M_GROW(ctx, t, p, sz, need) \ + do { \ + size_t _sz = (sz), _need = (need); \ + if (_sz < _need) { \ + if (_sz < 16) _sz = 16; \ + while (_sz < _need) _sz += _sz; \ + (p) = (t *)realloc((p), _sz); \ + if ((p) == NULL) exit(1); \ + (sz) = _sz; \ + } \ + } while(0) + +#define DASM_M_FREE(ctx, p, sz) free(p) + +static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int type); + +#define DASM_EXTERN(ctx, addr, idx, type) \ + collect_reloc(ctx, addr, idx, type) + +/* ------------------------------------------------------------------------ */ + +/* Avoid trouble if cross-compiling for an x86 target. Speed doesn't matter. */ +#define DASM_ALIGNED_WRITES 1 + +/* Embed architecture-specific DynASM encoder and backend. */ +#if LJ_TARGET_X86 +#include "../dynasm/dasm_x86.h" +#include "buildvm_x86.h" +#else +#error "No support for this architecture (yet)" +#endif + +/* ------------------------------------------------------------------------ */ + +void owrite(BuildCtx *ctx, const void *ptr, size_t sz) +{ + if (fwrite(ptr, 1, sz, ctx->fp) != sz) { + fprintf(stderr, "Error: cannot write to output file: %s\n", + strerror(errno)); + exit(1); + } +} + +/* ------------------------------------------------------------------------ */ + +/* Emit code as raw bytes. Only used for DynASM debugging. */ +static void emit_raw(BuildCtx *ctx) +{ + owrite(ctx, ctx->code, ctx->codesz); +} + +/* -- Build machine code -------------------------------------------------- */ + +/* Collect external relocations. */ +static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int type) +{ + if (ctx->nreloc >= BUILD_MAX_RELOC) { + fprintf(stderr, "Error: too many relocations, increase BUILD_MAX_RELOC.\n"); + exit(1); + } + ctx->reloc[ctx->nreloc].ofs = (int32_t)(addr - ctx->code); + ctx->reloc[ctx->nreloc].sym = idx; + ctx->reloc[ctx->nreloc].type = type; + ctx->nreloc++; + return 0; /* Encode symbol offset of 0. */ +} + +/* Naive insertion sort. Performance doesn't matter here. */ +static void perm_insert(int *perm, int32_t *ofs, int i) +{ + perm[i] = i; + while (i > 0) { + int a = perm[i-1]; + int b = perm[i]; + if (ofs[a] <= ofs[b]) break; + perm[i] = a; + perm[i-1] = b; + i--; + } +} + +/* Build the machine code. */ +static int build_code(BuildCtx *ctx) +{ + int status; + int i, j; + + /* Initialize DynASM structures. */ + ctx->nglob = GLOB__MAX; + ctx->glob = (void **)malloc(ctx->nglob*sizeof(void *)); + memset(ctx->glob, 0, ctx->nglob*sizeof(void *)); + ctx->nreloc = 0; + + ctx->extnames = extnames; + ctx->globnames = globnames; + + ctx->dasm_ident = DASM_IDENT; + ctx->dasm_arch = DASM_ARCH; + + dasm_init(Dst, DASM_MAXSECTION); + dasm_setupglobal(Dst, ctx->glob, ctx->nglob); + dasm_setup(Dst, build_actionlist); + + /* Call arch-specific backend to emit the code. */ + ctx->npc = build_backend(ctx); + + /* Finalize the code. */ + (void)dasm_checkstep(Dst, DASM_SECTION_CODE); + if ((status = dasm_link(Dst, &ctx->codesz))) return status; + ctx->code = (uint8_t *)malloc(ctx->codesz); + if ((status = dasm_encode(Dst, (void *)ctx->code))) return status; + + /* Allocate the symbol offset and permutation tables. */ + ctx->nsym = ctx->npc + ctx->nglob; + ctx->perm = (int *)malloc((ctx->nsym+1)*sizeof(int *)); + ctx->sym_ofs = (int32_t *)malloc((ctx->nsym+1)*sizeof(int32_t)); + + /* Collect the opcodes (PC labels). */ + for (i = 0; i < ctx->npc; i++) { + int32_t n = dasm_getpclabel(Dst, i); + if (n < 0) return 0x22000000|i; + ctx->sym_ofs[i] = n; + perm_insert(ctx->perm, ctx->sym_ofs, i); + } + + /* Collect the globals (named labels). */ + for (j = 0; j < ctx->nglob; j++, i++) { + const char *gl = globnames[j]; + int len = (int)strlen(gl); + if (!ctx->glob[j]) { + fprintf(stderr, "Error: undefined global %s\n", gl); + exit(2); + } + if (len >= 2 && gl[len-2] == '_' && gl[len-1] == 'Z') + ctx->sym_ofs[i] = -1; /* Skip the _Z symbols. */ + else + ctx->sym_ofs[i] = (int32_t)((uint8_t *)(ctx->glob[j]) - ctx->code); + perm_insert(ctx->perm, ctx->sym_ofs, i); + } + + /* Close the address range. */ + ctx->sym_ofs[i] = (int32_t)ctx->codesz; + perm_insert(ctx->perm, ctx->sym_ofs, i); + + dasm_free(Dst); + + return 0; +} + +/* -- Generate VM enums --------------------------------------------------- */ + +const char *const bc_names[] = { +#define BCNAME(name, ma, mb, mc, mt) #name, +BCDEF(BCNAME) +#undef BCNAME + NULL +}; + +const char *const ir_names[] = { +#define IRNAME(name, m, m1, m2) #name, +IRDEF(IRNAME) +#undef IRNAME + NULL +}; + +const char *const irfpm_names[] = { +#define FPMNAME(name) #name, +IRFPMDEF(FPMNAME) +#undef FPMNAME + NULL +}; + +const char *const irfield_names[] = { +#define FLNAME(name, type, field) #name, +IRFLDEF(FLNAME) +#undef FLNAME + NULL +}; + +static const char *const trace_errors[] = { +#define TREDEF(name, msg) msg, +#include "lj_traceerr.h" + NULL +}; + +static const char *lower(char *buf, const char *s) +{ + char *p = buf; + while (*s) { + *p++ = (*s >= 'A' && *s <= 'Z') ? *s+0x20 : *s; + s++; + } + *p = '\0'; + return buf; +} + +/* Emit VM definitions as Lua code for debug modules. */ +static void emit_vmdef(BuildCtx *ctx) +{ + char buf[80]; + int i; + fprintf(ctx->fp, "-- This is a generated file. DO NOT EDIT!\n\n"); + fprintf(ctx->fp, "module(...)\n\n"); + + fprintf(ctx->fp, "bcnames = \""); + for (i = 0; bc_names[i]; i++) fprintf(ctx->fp, "%-6s", bc_names[i]); + fprintf(ctx->fp, "\"\n\n"); + + fprintf(ctx->fp, "irnames = \""); + for (i = 0; ir_names[i]; i++) fprintf(ctx->fp, "%-6s", ir_names[i]); + fprintf(ctx->fp, "\"\n\n"); + + fprintf(ctx->fp, "irfpm = { [0]="); + for (i = 0; irfpm_names[i]; i++) + fprintf(ctx->fp, "\"%s\", ", lower(buf, irfpm_names[i])); + fprintf(ctx->fp, "}\n\n"); + + fprintf(ctx->fp, "irfield = { [0]="); + for (i = 0; irfield_names[i]; i++) { + char *p; + lower(buf, irfield_names[i]); + p = strchr(buf, '_'); + if (p) *p = '.'; + fprintf(ctx->fp, "\"%s\", ", buf); + } + fprintf(ctx->fp, "}\n\n"); + + fprintf(ctx->fp, "traceerr = {\n[0]="); + for (i = 0; trace_errors[i]; i++) + fprintf(ctx->fp, "\"%s\",\n", trace_errors[i]); + fprintf(ctx->fp, "}\n\n"); +} + +/* -- Argument parsing ---------------------------------------------------- */ + +/* Build mode names. */ +static const char *const modenames[] = { +#define BUILDNAME(name) #name, +BUILDDEF(BUILDNAME) +#undef BUILDNAME + NULL +}; + +/* Print usage information and exit. */ +static void usage(void) +{ + int i; + fprintf(stderr, LUAJIT_VERSION " VM builder.\n"); + fprintf(stderr, LUAJIT_COPYRIGHT ", " LUAJIT_URL "\n"); + fprintf(stderr, "Target architecture: " LJ_ARCH_NAME "\n\n"); + fprintf(stderr, "Usage: buildvm -m mode [-o outfile] [infiles...]\n\n"); + fprintf(stderr, "Available modes:\n"); + for (i = 0; i < BUILD__MAX; i++) + fprintf(stderr, " %s\n", modenames[i]); + exit(1); +} + +/* Parse the output mode name. */ +static BuildMode parsemode(const char *mode) +{ + int i; + for (i = 0; modenames[i]; i++) + if (!strcmp(mode, modenames[i])) + return (BuildMode)i; + usage(); + return (BuildMode)-1; +} + +/* Parse arguments. */ +static void parseargs(BuildCtx *ctx, char **argv) +{ + const char *a; + int i; + ctx->mode = (BuildMode)-1; + ctx->outname = "-"; + for (i = 1; (a = argv[i]) != NULL; i++) { + if (a[0] != '-') + break; + switch (a[1]) { + case '-': + if (a[2]) goto err; + i++; + goto ok; + case '\0': + goto ok; + case 'm': + i++; + if (a[2] || argv[i] == NULL) goto err; + ctx->mode = parsemode(argv[i]); + break; + case 'o': + i++; + if (a[2] || argv[i] == NULL) goto err; + ctx->outname = argv[i]; + break; + default: err: + usage(); + break; + } + } +ok: + ctx->args = argv+i; + if (ctx->mode == (BuildMode)-1) goto err; +} + +int main(int argc, char **argv) +{ + BuildCtx ctx_; + BuildCtx *ctx = &ctx_; + int status, binmode; + + UNUSED(argc); + parseargs(ctx, argv); + + if ((status = build_code(ctx))) { + fprintf(stderr,"Error: DASM error %08x\n", status); + return 1; + } + + switch (ctx->mode) { +#if LJ_TARGET_X86ORX64 + case BUILD_peobj: +#endif + case BUILD_raw: + binmode = 1; + break; + default: + binmode = 0; + break; + } + + if (ctx->outname[0] == '-' && ctx->outname[1] == '\0') { + ctx->fp = stdout; +#ifdef LUA_USE_WIN + if (binmode) + _setmode(_fileno(stdout), _O_BINARY); /* Yuck. */ +#endif + } else if (!(ctx->fp = fopen(ctx->outname, binmode ? "wb" : "w"))) { + fprintf(stderr, "Error: cannot open output file '%s': %s\n", + ctx->outname, strerror(errno)); + exit(1); + } + + switch (ctx->mode) { + case BUILD_asm: +#if defined(__ELF__) + ctx->mode = BUILD_elfasm; +#elif defined(__MACH__) + ctx->mode = BUILD_machasm; +#else + fprintf(stderr,"Error: auto-guessing the system assembler failed\n"); + return 1; +#endif + /* fallthrough */ + case BUILD_elfasm: + case BUILD_coffasm: + case BUILD_machasm: + emit_asm(ctx); + emit_asm_debug(ctx); + break; +#if LJ_TARGET_X86ORX64 + case BUILD_peobj: + emit_peobj(ctx); + break; +#endif + case BUILD_raw: + emit_raw(ctx); + break; + case BUILD_vmdef: + emit_vmdef(ctx); + /* fallthrough */ + case BUILD_ffdef: + case BUILD_libdef: + case BUILD_recdef: + emit_lib(ctx); + break; + case BUILD_folddef: + emit_fold(ctx); + break; + default: + break; + } + + fflush(ctx->fp); + if (ferror(ctx->fp)) { + fprintf(stderr, "Error: cannot write to output file: %s\n", + strerror(errno)); + exit(1); + } + fclose(ctx->fp); + + return 0; +} + diff --git a/src/buildvm.h b/src/buildvm.h new file mode 100644 index 0000000000..e55527fdce --- /dev/null +++ b/src/buildvm.h @@ -0,0 +1,106 @@ +/* +** LuaJIT VM builder. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +*/ + +#ifndef _BUILDVM_H +#define _BUILDVM_H + +#include +#include +#include +#include +#include + +#include "lj_def.h" +#include "lj_arch.h" + +/* Hardcoded limits. Increase as needed. */ +#define BUILD_MAX_RELOC 100 /* Max. number of relocations. */ +#define BUILD_MAX_FOLD 4096 /* Max. number of fold rules. */ + +/* Prefix for scanned library definitions. */ +#define LIBDEF_PREFIX "LJLIB_" + +/* Prefix for scanned fold definitions. */ +#define FOLDDEF_PREFIX "LJFOLD" + +/* Prefixes for generated labels. */ +#define LABEL_PREFIX "lj_" +#define LABEL_PREFIX_BC LABEL_PREFIX "BC_" +#define LABEL_PREFIX_FF LABEL_PREFIX "ff_" +#define LABEL_PREFIX_CF LABEL_PREFIX "cf_" +#define LABEL_PREFIX_FFH LABEL_PREFIX "ffh_" +#define LABEL_PREFIX_LIBCF LABEL_PREFIX "lib_cf_" +#define LABEL_PREFIX_LIBINIT LABEL_PREFIX "lib_init_" + +/* Extra labels. */ +#define LABEL_ASM_BEGIN LABEL_PREFIX "vm_asm_begin" +#define LABEL_OP_OFS LABEL_PREFIX "vm_op_ofs" + +/* Forward declaration. */ +struct dasm_State; + +/* Build modes. */ +#if LJ_TARGET_X86ORX64 +#define BUILDDEFX(_) _(peobj) +#else +#define BUILDDEFX(_) +#endif + +#define BUILDDEF(_) \ + _(asm) _(elfasm) _(coffasm) _(machasm) BUILDDEFX(_) _(raw) \ + _(ffdef) _(libdef) _(recdef) _(vmdef) \ + _(folddef) + +typedef enum { +#define BUILDENUM(name) BUILD_##name, +BUILDDEF(BUILDENUM) +#undef BUILDENUM + BUILD__MAX +} BuildMode; + +/* Code relocation. */ +typedef struct BuildReloc { + int32_t ofs; + int sym; + int type; +} BuildReloc; + +/* Build context structure. */ +typedef struct BuildCtx { + /* DynASM state pointer. Should be first member. */ + struct dasm_State *D; + /* Parsed command line. */ + BuildMode mode; + FILE *fp; + const char *outname; + char **args; + /* Code and symbols generated by DynASM. */ + uint8_t *code; + size_t codesz; + int npc, nglob, nsym, nreloc; + void **glob; + int *perm; + int32_t *sym_ofs; + /* Strings generated by DynASM. */ + const char *const *extnames; + const char *const *globnames; + const char *dasm_ident; + const char *dasm_arch; + /* Relocations. */ + BuildReloc reloc[BUILD_MAX_RELOC]; +} BuildCtx; + +extern void owrite(BuildCtx *ctx, const void *ptr, size_t sz); +extern void emit_asm(BuildCtx *ctx); +extern void emit_peobj(BuildCtx *ctx); +extern void emit_lib(BuildCtx *ctx); +extern void emit_fold(BuildCtx *ctx); + +extern const char *const bc_names[]; +extern const char *const ir_names[]; +extern const char *const irfpm_names[]; +extern const char *const irfield_names[]; + +#endif diff --git a/src/buildvm_asm.c b/src/buildvm_asm.c new file mode 100644 index 0000000000..e6972bd58f --- /dev/null +++ b/src/buildvm_asm.c @@ -0,0 +1,220 @@ +/* +** LuaJIT VM builder: Assembler source code emitter. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +*/ + +#include "buildvm.h" +#include "lj_bc.h" + +/* ------------------------------------------------------------------------ */ + +/* Emit bytes piecewise as assembler text. */ +static void emit_asm_bytes(BuildCtx *ctx, uint8_t *p, int n) +{ + int i; + for (i = 0; i < n; i++) { + if ((i & 15) == 0) + fprintf(ctx->fp, "\t.byte %d", p[i]); + else + fprintf(ctx->fp, ",%d", p[i]); + if ((i & 15) == 15) putc('\n', ctx->fp); + } + if ((n & 15) != 0) putc('\n', ctx->fp); +} + +/* Emit relocation */ +static void emit_asm_reloc(BuildCtx *ctx, BuildReloc *r) +{ + const char *sym = ctx->extnames[r->sym]; + switch (ctx->mode) { + case BUILD_elfasm: + if (r->type) + fprintf(ctx->fp, "\t.long %s-.-4\n", sym); + else + fprintf(ctx->fp, "\t.long %s\n", sym); + break; + case BUILD_coffasm: + fprintf(ctx->fp, "\t.def _%s; .scl 3; .type 32; .endef\n", sym); + if (r->type) + fprintf(ctx->fp, "\t.long _%s-.-4\n", sym); + else + fprintf(ctx->fp, "\t.long _%s\n", sym); + break; + default: /* BUILD_machasm for relative relocations handled below. */ + fprintf(ctx->fp, "\t.long _%s\n", sym); + break; + } +} + +static const char *const jccnames[] = { + "jo", "jno", "jb", "jnb", "jz", "jnz", "jbe", "ja", + "js", "jns", "jpe", "jpo", "jl", "jge", "jle", "jg" +}; + +/* Emit relocation for the incredibly stupid OSX assembler. */ +static void emit_asm_reloc_mach(BuildCtx *ctx, uint8_t *cp, int n, + const char *sym) +{ + const char *opname = NULL; + if (--n < 0) goto err; + if (cp[n] == 0xe8) { + opname = "call"; + } else if (cp[n] == 0xe9) { + opname = "jmp"; + } else if (cp[n] >= 0x80 && cp[n] <= 0x8f && n > 0 && cp[n-1] == 0x0f) { + opname = jccnames[cp[n]-0x80]; + n--; + } else { +err: + fprintf(stderr, "Error: unsupported opcode for %s symbol relocation.\n", + sym); + exit(1); + } + emit_asm_bytes(ctx, cp, n); + if (!strncmp(sym, LABEL_PREFIX, sizeof(LABEL_PREFIX)-1)) + fprintf(ctx->fp, "\t%s _%s\n", opname, sym); + else + fprintf(ctx->fp, "\t%s _" LABEL_PREFIX "wrapper_%s\n", opname, sym); +} + +/* Emit an assembler label. */ +static void emit_asm_label(BuildCtx *ctx, const char *name, int size, int isfunc) +{ + switch (ctx->mode) { + case BUILD_elfasm: + fprintf(ctx->fp, + "\n\t.globl %s\n" + "\t.hidden %s\n" + "\t.type %s, @%s\n" + "\t.size %s, %d\n" + "%s:\n", + name, name, name, isfunc ? "function" : "object", name, size, name); + break; + case BUILD_coffasm: + fprintf(ctx->fp, "\n\t.globl _%s\n", name); + if (isfunc) + fprintf(ctx->fp, "\t.def _%s; .scl 3; .type 32; .endef\n", name); + fprintf(ctx->fp, "_%s:\n", name); + break; + case BUILD_machasm: + fprintf(ctx->fp, + "\n\t.private_extern _%s\n" + "_%s:\n", name, name); + break; + default: + break; + } +} + +/* Emit alignment. */ +static void emit_asm_align(BuildCtx *ctx, int bits) +{ + switch (ctx->mode) { + case BUILD_elfasm: + case BUILD_coffasm: + fprintf(ctx->fp, "\t.p2align %d\n", bits); + break; + case BUILD_machasm: + fprintf(ctx->fp, "\t.align %d\n", bits); + break; + default: + break; + } +} + +/* ------------------------------------------------------------------------ */ + +/* Emit assembler source code. */ +void emit_asm(BuildCtx *ctx) +{ + char name[80]; + int32_t prev; + int i, pi, rel; + + fprintf(ctx->fp, "\t.file \"buildvm_%s.dasc\"\n", ctx->dasm_arch); + fprintf(ctx->fp, "\t.text\n"); + emit_asm_align(ctx, 4); + + emit_asm_label(ctx, LABEL_ASM_BEGIN, 0, 1); + if (ctx->mode == BUILD_elfasm) + fprintf(ctx->fp, ".Lbegin:\n"); + + i = 0; + do { + pi = ctx->perm[i++]; + prev = ctx->sym_ofs[pi]; + } while (prev < 0); /* Skip the _Z symbols. */ + + for (rel = 0; i <= ctx->nsym; i++) { + int ni = ctx->perm[i]; + int32_t next = ctx->sym_ofs[ni]; + int size = (int)(next - prev); + int32_t stop = next; + if (pi >= ctx->npc) { + sprintf(name, LABEL_PREFIX "%s", ctx->globnames[pi-ctx->npc]); + emit_asm_label(ctx, name, size, 1); +#if LJ_HASJIT + } else { +#else + } else if (!(pi == BC_JFORI || pi == BC_JFORL || pi == BC_JITERL || + pi == BC_JLOOP || pi == BC_IFORL || pi == BC_IITERL || + pi == BC_ILOOP)) { +#endif + sprintf(name, LABEL_PREFIX_BC "%s", bc_names[pi]); + emit_asm_label(ctx, name, size, 1); + } + while (rel < ctx->nreloc && ctx->reloc[rel].ofs < stop) { + int n = ctx->reloc[rel].ofs - prev; + if (ctx->mode == BUILD_machasm && ctx->reloc[rel].type != 0) { + emit_asm_reloc_mach(ctx, ctx->code+prev, n, + ctx->extnames[ctx->reloc[rel].sym]); + } else { + emit_asm_bytes(ctx, ctx->code+prev, n); + emit_asm_reloc(ctx, &ctx->reloc[rel]); + } + prev += n+4; + rel++; + } + emit_asm_bytes(ctx, ctx->code+prev, stop-prev); + prev = next; + pi = ni; + } + + switch (ctx->mode) { + case BUILD_elfasm: + fprintf(ctx->fp, "\n\t.section .rodata\n"); + break; + case BUILD_coffasm: + fprintf(ctx->fp, "\n\t.section .rdata,\"dr\"\n"); + break; + case BUILD_machasm: + fprintf(ctx->fp, "\n\t.const\n"); + break; + default: + break; + } + emit_asm_align(ctx, 5); + + emit_asm_label(ctx, LABEL_OP_OFS, 2*ctx->npc, 0); + for (i = 0; i < ctx->npc; i++) + fprintf(ctx->fp, "\t.short %d\n", ctx->sym_ofs[i]); + + fprintf(ctx->fp, "\n"); + switch (ctx->mode) { + case BUILD_elfasm: + fprintf(ctx->fp, "\t.section .note.GNU-stack,\"\",@progbits\n"); + /* fallthrough */ + case BUILD_coffasm: + fprintf(ctx->fp, "\t.ident \"%s\"\n", ctx->dasm_ident); + break; + case BUILD_machasm: + fprintf(ctx->fp, + "\t.cstring\n" + "\t.ascii \"%s\\0\"\n", ctx->dasm_ident); + break; + default: + break; + } + fprintf(ctx->fp, "\n"); +} + diff --git a/src/buildvm_fold.c b/src/buildvm_fold.c new file mode 100644 index 0000000000..5f065643af --- /dev/null +++ b/src/buildvm_fold.c @@ -0,0 +1,206 @@ +/* +** LuaJIT VM builder: IR folding hash table generator. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +*/ + +#include "lj_obj.h" +#include "lj_ir.h" + +#include "buildvm.h" + +/* Context for the folding hash table generator. */ +static int lineno; +static int funcidx; +static uint32_t foldkeys[BUILD_MAX_FOLD]; +static uint32_t nkeys; + +/* Try to fill the hash table with keys using the hash parameters. */ +static int tryhash(uint32_t *htab, uint32_t sz, uint32_t r, int dorol) +{ + uint32_t i; + if (dorol && ((r & 31) == 0 || (r>>5) == 0)) + return 0; /* Avoid zero rotates. */ + memset(htab, 0xff, (sz+1)*sizeof(uint32_t)); + for (i = 0; i < nkeys; i++) { + uint32_t key = foldkeys[i]; + uint32_t k = key & 0xffffff; + uint32_t h = (dorol ? lj_rol(lj_rol(k, r>>5) - k, r&31) : + (((k << (r>>5)) - k) << (r&31))) % sz; + if (htab[h] != 0xffffffff) { /* Collision on primary slot. */ + if (htab[h+1] != 0xffffffff) { /* Collision on secondary slot. */ + /* Try to move the colliding key, if possible. */ + if (h < sz-1 && htab[h+2] == 0xffffffff) { + uint32_t k2 = htab[h+1] & 0xffffff; + uint32_t h2 = (dorol ? lj_rol(lj_rol(k2, r>>5) - k2, r&31) : + (((k2 << (r>>5)) - k2) << (r&31))) % sz; + if (h2 != h+1) return 0; /* Cannot resolve collision. */ + htab[h+2] = htab[h+1]; /* Move colliding key to secondary slot. */ + } else { + return 0; /* Collision. */ + } + } + htab[h+1] = key; + } else { + htab[h] = key; + } + } + return 1; /* Success, all keys could be stored. */ +} + +/* Print the generated hash table. */ +static void printhash(BuildCtx *ctx, uint32_t *htab, uint32_t sz) +{ + uint32_t i; + fprintf(ctx->fp, "static const uint32_t fold_hash[%d] = {\n0x%08x", + sz+1, htab[0]); + for (i = 1; i < sz+1; i++) + fprintf(ctx->fp, ",\n0x%08x", htab[i]); + fprintf(ctx->fp, "\n};\n\n"); +} + +/* Exhaustive search for the shortest semi-perfect hash table. */ +static void makehash(BuildCtx *ctx) +{ + uint32_t htab[BUILD_MAX_FOLD*2+1]; + uint32_t sz, r; + /* Search for the smallest hash table with an odd size. */ + for (sz = (nkeys|1); sz < BUILD_MAX_FOLD*2; sz += 2) { + /* First try all shift hash combinations. */ + for (r = 0; r < 32*32; r++) { + if (tryhash(htab, sz, r, 0)) { + printhash(ctx, htab, sz); + fprintf(ctx->fp, + "#define fold_hashkey(k)\t(((((k)<<%u)-(k))<<%u)%%%u)\n\n", + r>>5, r&31, sz); + return; + } + } + /* Then try all rotate hash combinations. */ + for (r = 0; r < 32*32; r++) { + if (tryhash(htab, sz, r, 1)) { + printhash(ctx, htab, sz); + fprintf(ctx->fp, + "#define fold_hashkey(k)\t(lj_rol(lj_rol((k),%u)-(k),%u)%%%u)\n\n", + r>>5, r&31, sz); + return; + } + } + } + fprintf(stderr, "Error: search for perfect hash failed\n"); + exit(1); +} + +/* Parse one token of a fold rule. */ +static uint32_t nexttoken(char **pp, int allowlit, int allowany) +{ + char *p = *pp; + if (p) { + uint32_t i; + char *q = strchr(p, ' '); + if (q) *q++ = '\0'; + *pp = q; + if (allowlit && !strncmp(p, "IRFPM_", 6)) { + for (i = 0; irfpm_names[i]; i++) + if (!strcmp(irfpm_names[i], p+6)) + return i; + } else if (allowlit && !strncmp(p, "IRFL_", 5)) { + for (i = 0; irfield_names[i]; i++) + if (!strcmp(irfield_names[i], p+5)) + return i; + } else if (allowany && !strcmp("any", p)) { + return 0xff; + } else { + for (i = 0; ir_names[i]; i++) + if (!strcmp(ir_names[i], p)) + return i; + } + fprintf(stderr, "Error: bad fold definition token \"%s\" at line %d\n", p, lineno); + exit(1); + } + return 0; +} + +/* Parse a fold rule. */ +static void foldrule(char *p) +{ + uint32_t op = nexttoken(&p, 0, 0); + uint32_t left = nexttoken(&p, 0, 1); + uint32_t right = nexttoken(&p, 1, 1); + uint32_t key = (funcidx << 24) | (op << 16) | (left << 8) | right; + uint32_t i; + if (nkeys >= BUILD_MAX_FOLD) { + fprintf(stderr, "Error: too many fold rules, increase BUILD_MAX_FOLD.\n"); + exit(1); + } + /* Simple insertion sort to detect duplicates. */ + for (i = nkeys; i > 0; i--) { + if ((foldkeys[i-1]&0xffffff) < (key & 0xffffff)) + break; + if ((foldkeys[i-1]&0xffffff) == (key & 0xffffff)) { + fprintf(stderr, "Error: duplicate fold definition at line %d\n", lineno); + exit(1); + } + foldkeys[i] = foldkeys[i-1]; + } + foldkeys[i] = key; + nkeys++; +} + +/* Emit C source code for IR folding hash table. */ +void emit_fold(BuildCtx *ctx) +{ + char buf[256]; /* We don't care about analyzing lines longer than that. */ + const char *fname = ctx->args[0]; + FILE *fp; + + if (fname == NULL) { + fprintf(stderr, "Error: missing input filename\n"); + exit(1); + } + + if (fname[0] == '-' && fname[1] == '\0') { + fp = stdin; + } else { + fp = fopen(fname, "r"); + if (!fp) { + fprintf(stderr, "Error: cannot open input file '%s': %s\n", + fname, strerror(errno)); + exit(1); + } + } + + fprintf(ctx->fp, "/* This is a generated file. DO NOT EDIT! */\n\n"); + fprintf(ctx->fp, "static const FoldFunc fold_func[] = {\n"); + + lineno = 0; + funcidx = 0; + nkeys = 0; + while (fgets(buf, sizeof(buf), fp) != NULL) { + lineno++; + /* The prefix must be at the start of a line, otherwise it's ignored. */ + if (!strncmp(buf, FOLDDEF_PREFIX, sizeof(FOLDDEF_PREFIX)-1)) { + char *p = buf+sizeof(FOLDDEF_PREFIX)-1; + char *q = strchr(p, ')'); + if (p[0] == '(' && q) { + p++; + *q = '\0'; + foldrule(p); + } else if ((p[0] == 'F' || p[0] == 'X') && p[1] == '(' && q) { + p += 2; + *q = '\0'; + fprintf(ctx->fp, funcidx ? ",\n %s" : " %s", p); + funcidx++; + } else { + buf[strlen(buf)-1] = '\0'; + fprintf(stderr, "Error: unknown fold definition tag %s%s at line %d\n", + FOLDDEF_PREFIX, p, lineno); + exit(1); + } + } + } + fclose(fp); + fprintf(ctx->fp, "\n};\n\n"); + + makehash(ctx); +} + diff --git a/src/buildvm_lib.c b/src/buildvm_lib.c new file mode 100644 index 0000000000..cc572200a8 --- /dev/null +++ b/src/buildvm_lib.c @@ -0,0 +1,365 @@ +/* +** LuaJIT VM builder: library definition compiler. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +*/ + +#include "lj_obj.h" +#include "lj_lib.h" + +#include "buildvm.h" + +/* Context for library definitions. */ +static uint8_t obuf[8192]; +static uint8_t *optr; +static char modname[80]; +static size_t modnamelen; +static char funcname[80]; +static int modstate, regfunc; +static int ffid, recffid; + +enum { + REGFUNC_OK, + REGFUNC_NOREG, + REGFUNC_NOREGUV +}; + +static void libdef_name(char *p, int kind) +{ + size_t n = strlen(p); + if (kind != LIBINIT_STRING) { + if (n > modnamelen && p[modnamelen] == '_' && + !strncmp(p, modname, modnamelen)) { + p += modnamelen+1; + n -= modnamelen+1; + } + } + if (n > LIBINIT_MAXSTR) { + fprintf(stderr, "Error: string too long: '%s'\n", p); + exit(1); + } + if (optr+1+n+2 > obuf+sizeof(obuf)) { /* +2 for caller. */ + fprintf(stderr, "Error: output buffer overflow\n"); + exit(1); + } + *optr++ = (uint8_t)(n | kind); + memcpy(optr, p, n); + optr += n; +} + +static void libdef_endmodule(BuildCtx *ctx) +{ + if (modstate != 0) { + char line[80]; + const uint8_t *p; + int n; + if (modstate == 1) + fprintf(ctx->fp, " (lua_CFunction)0"); + fprintf(ctx->fp, "\n};\n"); + fprintf(ctx->fp, "static const uint8_t %s%s[] = {\n", + LABEL_PREFIX_LIBINIT, modname); + line[0] = '\0'; + for (n = 0, p = obuf; p < optr; p++) { + n += sprintf(line+n, "%d,", *p); + if (n >= 75) { + fprintf(ctx->fp, "%s\n", line); + n = 0; + line[0] = '\0'; + } + } + fprintf(ctx->fp, "%s%d\n};\n#endif\n\n", line, LIBINIT_END); + } +} + +static void libdef_module(BuildCtx *ctx, char *p, int arg) +{ + UNUSED(arg); + if (ctx->mode == BUILD_libdef) { + libdef_endmodule(ctx); + optr = obuf; + *optr++ = (uint8_t)ffid; + *optr++ = 0; + modstate = 1; + fprintf(ctx->fp, "#ifdef %sMODULE_%s\n", LIBDEF_PREFIX, p); + fprintf(ctx->fp, "#undef %sMODULE_%s\n", LIBDEF_PREFIX, p); + fprintf(ctx->fp, "static const lua_CFunction %s%s[] = {\n", + LABEL_PREFIX_LIBCF, p); + } + modnamelen = strlen(p); + if (modnamelen > sizeof(modname)-1) { + fprintf(stderr, "Error: module name too long: '%s'\n", p); + exit(1); + } + strcpy(modname, p); +} + +static int find_ffofs(BuildCtx *ctx, const char *name) +{ + int i; + for (i = 0; i < ctx->nglob; i++) { + const char *gl = ctx->globnames[i]; + if (gl[0] == 'f' && gl[1] == 'f' && gl[2] == '_' && !strcmp(gl+3, name)) { + return (int)((uint8_t *)ctx->glob[i] - ctx->code); + } + } + fprintf(stderr, "Error: undefined fast function %s%s\n", + LABEL_PREFIX_FF, name); + exit(1); +} + +static void libdef_func(BuildCtx *ctx, char *p, int arg) +{ + if (ctx->mode == BUILD_libdef) { + int ofs = arg != LIBINIT_CF ? find_ffofs(ctx, p) : 0; + if (modstate == 0) { + fprintf(stderr, "Error: no module for function definition %s\n", p); + exit(1); + } + if (regfunc == REGFUNC_NOREG) { + if (optr+1 > obuf+sizeof(obuf)) { + fprintf(stderr, "Error: output buffer overflow\n"); + exit(1); + } + *optr++ = LIBINIT_FFID; + } else { + if (arg != LIBINIT_ASM_) { + if (modstate != 1) fprintf(ctx->fp, ",\n"); + modstate = 2; + fprintf(ctx->fp, " %s%s", arg ? LABEL_PREFIX_FFH : LABEL_PREFIX_CF, p); + } + if (regfunc != REGFUNC_NOREGUV) obuf[1]++; /* Bump hash table size. */ + libdef_name(regfunc == REGFUNC_NOREGUV ? "" : p, arg); + if (arg) { + *optr++ = (uint8_t)ofs; + *optr++ = (uint8_t)(ofs >> 8); + } + } + } else if (ctx->mode == BUILD_ffdef) { + fprintf(ctx->fp, "FFDEF(%s)\n", p); + } else if (ctx->mode == BUILD_recdef) { + if (strlen(p) > sizeof(funcname)-1) { + fprintf(stderr, "Error: function name too long: '%s'\n", p); + exit(1); + } + strcpy(funcname, p); + } else if (ctx->mode == BUILD_vmdef) { + int i; + for (i = 1; p[i] && modname[i-1]; i++) + if (p[i] == '_') p[i] = '.'; + fprintf(ctx->fp, "\"%s\",\n", p); + } + ffid++; + regfunc = REGFUNC_OK; +} + +static uint32_t find_rec(char *name) +{ + char *p = (char *)obuf; + uint32_t n; + for (n = 2; *p; n++) { + if (strcmp(p, name) == 0) + return n; + p += strlen(p)+1; + } + if (p+strlen(name)+1 >= (char *)obuf+sizeof(obuf)) { + fprintf(stderr, "Error: output buffer overflow\n"); + exit(1); + } + strcpy(p, name); + return n; +} + +static void libdef_rec(BuildCtx *ctx, char *p, int arg) +{ + UNUSED(arg); + if (ctx->mode == BUILD_recdef) { + char *q; + uint32_t n; + for (; recffid+1 < ffid; recffid++) + fprintf(ctx->fp, ",\n0"); + recffid = ffid; + if (*p == '.') p = funcname; + q = strchr(p, ' '); + if (q) *q++ = '\0'; + n = find_rec(p); + if (q) + fprintf(ctx->fp, ",\n0x%02x00+(%s)", n, q); + else + fprintf(ctx->fp, ",\n0x%02x00", n); + } +} + +static void memcpy_endian(void *dst, void *src, size_t n) +{ + union { uint8_t b; uint32_t u; } host_endian; + host_endian.u = 1; + if (host_endian.b == LJ_ENDIAN_SELECT(1, 0)) { + memcpy(dst, src, n); + } else { + size_t i; + for (i = 0; i < n; i++) + ((uint8_t *)dst)[i] = ((uint8_t *)src)[n-i]; + } +} + +static void libdef_push(BuildCtx *ctx, char *p, int arg) +{ + UNUSED(arg); + if (ctx->mode == BUILD_libdef) { + int len = (int)strlen(p); + if (*p == '"') { + if (len > 1 && p[len-1] == '"') { + p[len-1] = '\0'; + libdef_name(p+1, LIBINIT_STRING); + return; + } + } else if (*p >= '0' && *p <= '9') { + char *ep; + double d = strtod(p, &ep); + if (*ep == '\0') { + if (optr+1+sizeof(double) > obuf+sizeof(obuf)) { + fprintf(stderr, "Error: output buffer overflow\n"); + exit(1); + } + *optr++ = LIBINIT_NUMBER; + memcpy_endian(optr, &d, sizeof(double)); + optr += sizeof(double); + return; + } + } else if (!strcmp(p, "lastcl")) { + if (optr+1 > obuf+sizeof(obuf)) { + fprintf(stderr, "Error: output buffer overflow\n"); + exit(1); + } + *optr++ = LIBINIT_LASTCL; + return; + } else if (len > 4 && !strncmp(p, "top-", 4)) { + if (optr+2 > obuf+sizeof(obuf)) { + fprintf(stderr, "Error: output buffer overflow\n"); + exit(1); + } + *optr++ = LIBINIT_COPY; + *optr++ = (uint8_t)atoi(p+4); + return; + } + fprintf(stderr, "Error: bad value for %sPUSH(%s)\n", LIBDEF_PREFIX, p); + exit(1); + } +} + +static void libdef_set(BuildCtx *ctx, char *p, int arg) +{ + UNUSED(arg); + if (ctx->mode == BUILD_libdef) { + if (p[0] == '!' && p[1] == '\0') p[0] = '\0'; /* Set env. */ + libdef_name(p, LIBINIT_STRING); + *optr++ = LIBINIT_SET; + obuf[1]++; /* Bump hash table size. */ + } +} + +static void libdef_regfunc(BuildCtx *ctx, char *p, int arg) +{ + UNUSED(ctx); UNUSED(p); + regfunc = arg; +} + +typedef void (*LibDefFunc)(BuildCtx *ctx, char *p, int arg); + +typedef struct LibDefHandler { + const char *suffix; + const char *stop; + const LibDefFunc func; + const int arg; +} LibDefHandler; + +static const LibDefHandler libdef_handlers[] = { + { "MODULE_", " \t\r\n", libdef_module, 0 }, + { "CF(", ")", libdef_func, LIBINIT_CF }, + { "ASM(", ")", libdef_func, LIBINIT_ASM }, + { "ASM_(", ")", libdef_func, LIBINIT_ASM_ }, + { "REC(", ")", libdef_rec, 0 }, + { "PUSH(", ")", libdef_push, 0 }, + { "SET(", ")", libdef_set, 0 }, + { "NOREGUV", NULL, libdef_regfunc, REGFUNC_NOREGUV }, + { "NOREG", NULL, libdef_regfunc, REGFUNC_NOREG }, + { NULL, NULL, (LibDefFunc)0, 0 } +}; + +/* Emit C source code for library function definitions. */ +void emit_lib(BuildCtx *ctx) +{ + const char *fname; + + if (ctx->mode == BUILD_ffdef || ctx->mode == BUILD_libdef || + ctx->mode == BUILD_recdef) + fprintf(ctx->fp, "/* This is a generated file. DO NOT EDIT! */\n\n"); + else if (ctx->mode == BUILD_vmdef) + fprintf(ctx->fp, "ffnames = {\n[0]=\"Lua\",\n\"C\",\n"); + if (ctx->mode == BUILD_recdef) + fprintf(ctx->fp, "static const uint16_t recff_idmap[] = {\n0,\n0x0100"); + recffid = ffid = FF_C+1; + + while ((fname = *ctx->args++)) { + char buf[256]; /* We don't care about analyzing lines longer than that. */ + FILE *fp; + if (fname[0] == '-' && fname[1] == '\0') { + fp = stdin; + } else { + fp = fopen(fname, "r"); + if (!fp) { + fprintf(stderr, "Error: cannot open input file '%s': %s\n", + fname, strerror(errno)); + exit(1); + } + } + modstate = 0; + regfunc = REGFUNC_OK; + while (fgets(buf, sizeof(buf), fp) != NULL) { + char *p; + for (p = buf; (p = strstr(p, LIBDEF_PREFIX)) != NULL; ) { + const LibDefHandler *ldh; + p += sizeof(LIBDEF_PREFIX)-1; + for (ldh = libdef_handlers; ldh->suffix != NULL; ldh++) { + size_t n, len = strlen(ldh->suffix); + if (!strncmp(p, ldh->suffix, len)) { + p += len; + n = ldh->stop ? strcspn(p, ldh->stop) : 0; + if (!p[n]) break; + p[n] = '\0'; + ldh->func(ctx, p, ldh->arg); + p += n+1; + break; + } + } + if (ldh->suffix == NULL) { + buf[strlen(buf)-1] = '\0'; + fprintf(stderr, "Error: unknown library definition tag %s%s\n", + LIBDEF_PREFIX, p); + exit(1); + } + } + } + fclose(fp); + if (ctx->mode == BUILD_libdef) { + libdef_endmodule(ctx); + } + } + + if (ctx->mode == BUILD_ffdef) { + fprintf(ctx->fp, "\n#undef FFDEF\n\n"); + } else if (ctx->mode == BUILD_vmdef) { + fprintf(ctx->fp, "}\n\n"); + } else if (ctx->mode == BUILD_recdef) { + char *p = (char *)obuf; + fprintf(ctx->fp, "\n};\n\n"); + fprintf(ctx->fp, "static const RecordFunc recff_func[] = {\n" + "recff_nyi,\n" + "recff_c"); + while (*p) { + fprintf(ctx->fp, ",\nrecff_%s", p); + p += strlen(p)+1; + } + fprintf(ctx->fp, "\n};\n\n"); + } +} + diff --git a/src/buildvm_peobj.c b/src/buildvm_peobj.c new file mode 100644 index 0000000000..9acf6b7603 --- /dev/null +++ b/src/buildvm_peobj.c @@ -0,0 +1,303 @@ +/* +** LuaJIT VM builder: PE object emitter. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +** +** Only used for building on Windows, since we cannot assume the presence +** of a suitable assembler. The host and target byte order must match. +*/ + +#include "buildvm.h" +#include "lj_bc.h" + +#if LJ_TARGET_X86ORX64 + +/* Context for PE object emitter. */ +static char *strtab; +static size_t strtabofs; + +/* -- PE object definitions ----------------------------------------------- */ + +/* PE header. */ +typedef struct PEheader { + uint16_t arch; + uint16_t nsects; + uint32_t time; + uint32_t symtabofs; + uint32_t nsyms; + uint16_t opthdrsz; + uint16_t flags; +} PEheader; + +/* PE section. */ +typedef struct PEsection { + char name[8]; + uint32_t vsize; + uint32_t vaddr; + uint32_t size; + uint32_t ofs; + uint32_t relocofs; + uint32_t lineofs; + uint16_t nreloc; + uint16_t nline; + uint32_t flags; +} PEsection; + +/* PE relocation. */ +typedef struct PEreloc { + uint32_t vaddr; + uint32_t symidx; + uint16_t type; +} PEreloc; + +/* Cannot use sizeof, because it pads up to the max. alignment. */ +#define PEOBJ_RELOC_SIZE (4+4+2) + +/* PE symbol table entry. */ +typedef struct PEsym { + union { + char name[8]; + uint32_t nameref[2]; + } n; + uint32_t value; + int16_t sect; + uint16_t type; + uint8_t scl; + uint8_t naux; +} PEsym; + +/* PE symbol table auxiliary entry for a section. */ +typedef struct PEsymaux { + uint32_t size; + uint16_t nreloc; + uint16_t nline; + uint32_t cksum; + uint16_t assoc; + uint8_t comdatsel; + uint8_t unused[3]; +} PEsymaux; + +/* Cannot use sizeof, because it pads up to the max. alignment. */ +#define PEOBJ_SYM_SIZE (8+4+2+2+1+1) + +/* PE object CPU specific defines. */ +#if LJ_TARGET_X86 +#define PEOBJ_ARCH_TARGET 0x014c +#define PEOBJ_RELOC_REL32 0x14 /* MS: REL32, GNU: DISP32. */ +#define PEOBJ_RELOC_DIR32 0x06 +#define PEOBJ_SYM_PREFIX "_" +#elif LJ_TARGET_X64 +#define PEOBJ_ARCH_TARGET 0x8664 +#define PEOBJ_RELOC_REL32 0x04 /* MS: REL32, GNU: DISP32. */ +#define PEOBJ_RELOC_DIR32 0x02 +#define PEOBJ_SYM_PREFIX "" +#endif + +/* Section numbers (0-based). */ +enum { + PEOBJ_SECT_ABS = -2, + PEOBJ_SECT_UNDEF = -1, + PEOBJ_SECT_TEXT, + /* TODO: add .pdata/.xdata for x64. */ + PEOBJ_SECT_RDATA, + PEOBJ_SECT_RDATA_Z, + PEOBJ_NSECTIONS +}; + +/* Symbol types. */ +#define PEOBJ_TYPE_NULL 0 +#define PEOBJ_TYPE_FUNC 0x20 + +/* Symbol storage class. */ +#define PEOBJ_SCL_EXTERN 2 +#define PEOBJ_SCL_STATIC 3 + +/* -- PE object emitter --------------------------------------------------- */ + +/* Emit PE object symbol. */ +static void emit_peobj_sym(BuildCtx *ctx, const char *name, uint32_t value, + int sect, int type, int scl) +{ + PEsym sym; + size_t len = strlen(name); + if (!strtab) { /* Pass 1: only calculate string table length. */ + if (len > 8) strtabofs += len+1; + return; + } + if (len <= 8) { + memcpy(sym.n.name, name, len); + memset(sym.n.name+len, 0, 8-len); + } else { + sym.n.nameref[0] = 0; + sym.n.nameref[1] = strtabofs; + memcpy(strtab + strtabofs, name, len); + strtab[strtabofs+len] = 0; + strtabofs += len+1; + } + sym.value = value; + sym.sect = (int16_t)(sect+1); /* 1-based section number. */ + sym.type = (uint16_t)type; + sym.scl = (uint8_t)scl; + sym.naux = 0; + owrite(ctx, &sym, PEOBJ_SYM_SIZE); +} + +/* Emit PE object section symbol. */ +static void emit_peobj_sym_sect(BuildCtx *ctx, PEsection *pesect, int sect) +{ + PEsym sym; + PEsymaux aux; + if (!strtab) return; /* Pass 1: no output. */ + memcpy(sym.n.name, pesect[sect].name, 8); + sym.value = 0; + sym.sect = (int16_t)(sect+1); /* 1-based section number. */ + sym.type = PEOBJ_TYPE_NULL; + sym.scl = PEOBJ_SCL_STATIC; + sym.naux = 1; + owrite(ctx, &sym, PEOBJ_SYM_SIZE); + memset(&aux, 0, sizeof(PEsymaux)); + aux.size = pesect[sect].size; + aux.nreloc = pesect[sect].nreloc; + owrite(ctx, &aux, PEOBJ_SYM_SIZE); +} + +#define emit_peobj_sym_func(ctx, name, ofs) \ + emit_peobj_sym(ctx, name, (uint32_t)(ofs), \ + PEOBJ_SECT_TEXT, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN) +#define emit_peobj_sym_rdata(ctx, name, ofs) \ + emit_peobj_sym(ctx, name, (uint32_t)(ofs), \ + PEOBJ_SECT_RDATA, PEOBJ_TYPE_NULL, PEOBJ_SCL_EXTERN) + +/* Emit Windows PE object file. */ +void emit_peobj(BuildCtx *ctx) +{ + PEheader pehdr; + PEsection pesect[PEOBJ_NSECTIONS]; + int nzsym, relocsyms; + uint32_t sofs; + int i; + union { uint8_t b; uint32_t u; } host_endian; + + host_endian.u = 1; + if (host_endian.b != LJ_ENDIAN_SELECT(1, 0)) { + fprintf(stderr, "Error: different byte order for host and target\n"); + exit(1); + } + + sofs = sizeof(PEheader) + PEOBJ_NSECTIONS*sizeof(PEsection); + + /* Fill in PE sections. */ + memset(&pesect, 0, PEOBJ_NSECTIONS*sizeof(PEsection)); + memcpy(pesect[PEOBJ_SECT_TEXT].name, ".text", sizeof(".text")-1); + pesect[PEOBJ_SECT_TEXT].ofs = sofs; + sofs += (pesect[PEOBJ_SECT_TEXT].size = (uint32_t)ctx->codesz); + pesect[PEOBJ_SECT_TEXT].relocofs = sofs; + sofs += (pesect[PEOBJ_SECT_TEXT].nreloc = (uint16_t)ctx->nreloc) * PEOBJ_RELOC_SIZE; + /* Flags: 60 = read+execute, 50 = align16, 20 = code. */ + pesect[PEOBJ_SECT_TEXT].flags = 0x60500020; + + memcpy(pesect[PEOBJ_SECT_RDATA].name, ".rdata", sizeof(".rdata")-1); + pesect[PEOBJ_SECT_RDATA].ofs = sofs; + sofs += (pesect[PEOBJ_SECT_RDATA].size = ctx->npc*sizeof(uint16_t)); + /* Flags: 40 = read, 30 = align4, 40 = initialized data. */ + pesect[PEOBJ_SECT_RDATA].flags = 0x40300040; + + memcpy(pesect[PEOBJ_SECT_RDATA_Z].name, ".rdata$Z", sizeof(".rdata$Z")-1); + pesect[PEOBJ_SECT_RDATA_Z].ofs = sofs; + sofs += (pesect[PEOBJ_SECT_RDATA_Z].size = (uint32_t)strlen(ctx->dasm_ident)+1); + /* Flags: 40 = read, 30 = align4, 40 = initialized data. */ + pesect[PEOBJ_SECT_RDATA_Z].flags = 0x40300040; + + /* Fill in PE header. */ + pehdr.arch = PEOBJ_ARCH_TARGET; + pehdr.nsects = PEOBJ_NSECTIONS; + pehdr.time = 0; /* Timestamp is optional. */ + pehdr.symtabofs = sofs; + pehdr.opthdrsz = 0; + pehdr.flags = 0; + + /* Compute the size of the symbol table: + ** @feat.00 + nsections*2 + ** + asm_start + (nsyms-nzsym) + op_ofs + ** + relocsyms + */ + /* Skip _Z syms. */ + for (nzsym = 0; ctx->sym_ofs[ctx->perm[nzsym]] < 0; nzsym++) ; + for (relocsyms = 0; ctx->extnames[relocsyms]; relocsyms++) ; + pehdr.nsyms = 1+PEOBJ_NSECTIONS*2 + 1+(ctx->nsym-nzsym)+1 + relocsyms; + + /* Write PE object header and all sections. */ + owrite(ctx, &pehdr, sizeof(PEheader)); + owrite(ctx, &pesect, sizeof(PEsection)*PEOBJ_NSECTIONS); + + /* Write .text section. */ + owrite(ctx, ctx->code, ctx->codesz); + for (i = 0; i < ctx->nreloc; i++) { + PEreloc reloc; + reloc.vaddr = (uint32_t)ctx->reloc[i].ofs; + reloc.symidx = 1+2+ctx->reloc[i].sym; /* Reloc syms are after .text sym. */ + reloc.type = ctx->reloc[i].type ? PEOBJ_RELOC_REL32 : PEOBJ_RELOC_DIR32; + owrite(ctx, &reloc, PEOBJ_RELOC_SIZE); + } + + /* Write .rdata section. */ + for (i = 0; i < ctx->npc; i++) { + uint16_t pcofs = (uint16_t)ctx->sym_ofs[i]; + owrite(ctx, &pcofs, 2); + } + + /* Write .rdata$Z section. */ + owrite(ctx, ctx->dasm_ident, strlen(ctx->dasm_ident)+1); + + /* Write symbol table. */ + strtab = NULL; /* 1st pass: collect string sizes. */ + for (;;) { + char name[80]; + + strtabofs = 4; + /* Mark as SafeSEH compliant. */ + emit_peobj_sym(ctx, "@feat.00", 1, + PEOBJ_SECT_ABS, PEOBJ_TYPE_NULL, PEOBJ_SCL_STATIC); + + emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_TEXT); + for (i = 0; ctx->extnames[i]; i++) { + sprintf(name, PEOBJ_SYM_PREFIX "%s", ctx->extnames[i]); + emit_peobj_sym(ctx, name, 0, + PEOBJ_SECT_UNDEF, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN); + } + emit_peobj_sym_func(ctx, PEOBJ_SYM_PREFIX LABEL_ASM_BEGIN, 0); + for (i = nzsym; i < ctx->nsym; i++) { + int pi = ctx->perm[i]; + if (pi >= ctx->npc) { + sprintf(name, PEOBJ_SYM_PREFIX LABEL_PREFIX "%s", + ctx->globnames[pi-ctx->npc]); + emit_peobj_sym_func(ctx, name, ctx->sym_ofs[pi]); +#if LJ_HASJIT + } else { +#else + } else if (!(pi == BC_JFORI || pi == BC_JFORL || pi == BC_JITERL || + pi == BC_JLOOP || pi == BC_IFORL || pi == BC_IITERL || + pi == BC_ILOOP)) { +#endif + sprintf(name, PEOBJ_SYM_PREFIX LABEL_PREFIX_BC "%s", + bc_names[pi]); + emit_peobj_sym_func(ctx, name, ctx->sym_ofs[pi]); + } + } + + emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_RDATA); + emit_peobj_sym_rdata(ctx, PEOBJ_SYM_PREFIX LABEL_OP_OFS, 0); + + emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_RDATA_Z); + + if (strtab) + break; + /* 2nd pass: alloc strtab, write syms and copy strings. */ + strtab = (char *)malloc(strtabofs); + *(uint32_t *)strtab = strtabofs; + } + + /* Write string table. */ + owrite(ctx, strtab, strtabofs); +} + +#endif diff --git a/src/buildvm_x86.dasc b/src/buildvm_x86.dasc new file mode 100644 index 0000000000..add00c9d26 --- /dev/null +++ b/src/buildvm_x86.dasc @@ -0,0 +1,3592 @@ +|// Low-level VM code for x86 CPUs. +|// Bytecode interpreter, fast functions and helper functions. +|// Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +| +|.arch x86 +|.section code_op, code_sub +| +|.actionlist build_actionlist +|.globals GLOB_ +|.globalnames globnames +|.externnames extnames +| +|//----------------------------------------------------------------------- +| +|// Fixed register assignments for the interpreter. +|// This is very fragile and has many dependencies. Caveat emptor. +|.define BASE, edx // Not C callee-save, refetched anyway. +|.define KBASE, edi // Must be C callee-save. +|.define PC, esi // Must be C callee-save. +|.define DISPATCH, ebx // Must be C callee-save. +| +|.define RA, ecx +|.define RAL, cl +|.define RB, ebp // Must be ebp (C callee-save). +|.define RC, eax // Must be eax (fcomparepp and others). +|.define RCW, ax +|.define RCH, ah +|.define RCL, al +|.define OP, RB +|.define RD, RC +|.define RDL, RCL +| +|// Type definitions. Some of these are only used for documentation. +|.type L, lua_State +|.type GL, global_State +|.type TVALUE, TValue +|.type GCOBJ, GCobj +|.type STR, GCstr +|.type TAB, GCtab +|.type LFUNC, GCfuncL +|.type CFUNC, GCfuncC +|.type PROTO, GCproto +|.type UPVAL, GCupval +|.type NODE, Node +|.type NARGS, int +|.type TRACE, Trace +|.type EXITINFO, ExitInfo +| +|// Stack layout while in interpreter. Must match with lj_frame.h. +|.macro saveregs +| push ebp; push edi; push esi; push ebx +|.endmacro +|.macro restoreregs +| pop ebx; pop esi; pop edi; pop ebp +|.endmacro +|.define CFRAME_SPACE, aword*7 // Delta for esp (see <--). +| +|.define INARG_4, aword [esp+aword*15] +|.define INARG_3, aword [esp+aword*14] +|.define INARG_2, aword [esp+aword*13] +|.define INARG_1, aword [esp+aword*12] +|//----- 16 byte aligned, ^^^ arguments from C caller +|.define SAVE_RET, aword [esp+aword*11] //<-- esp entering interpreter. +|.define SAVE_R4, aword [esp+aword*10] +|.define SAVE_R3, aword [esp+aword*9] +|.define SAVE_R2, aword [esp+aword*8] +|//----- 16 byte aligned +|.define SAVE_R1, aword [esp+aword*7] //<-- esp after register saves. +|.define SAVE_PC, aword [esp+aword*6] +|.define ARG6, aword [esp+aword*5] +|.define ARG5, aword [esp+aword*4] +|//----- 16 byte aligned +|.define ARG4, aword [esp+aword*3] +|.define ARG3, aword [esp+aword*2] +|.define ARG2, aword [esp+aword*1] +|.define ARG1, aword [esp] //<-- esp while in interpreter. +|//----- 16 byte aligned, ^^^ arguments for C callee +| +|// FPARGx overlaps ARGx and ARG(x+1) on x86. +|.define FPARG5, qword [esp+qword*2] +|.define FPARG3, qword [esp+qword*1] +|.define FPARG1, qword [esp] +|// NRESULTS overlaps ARG6 (and FPARG5) +|.define NRESULTS, ARG6 +| +|// Arguments for vm_call and vm_pcall. +|.define INARG_P_ERRF, INARG_4 // vm_pcall only. +|.define INARG_NRES, INARG_3 +|.define INARG_BASE, INARG_2 +|.define SAVE_L, INARG_1 +| +|.define SAVE_CFRAME, INARG_BASE // Overwrites INARG_BASE! +| +|// Arguments for vm_cpcall. +|.define INARG_CP_UD, INARG_4 +|.define INARG_CP_FUNC, INARG_3 +|.define INARG_CP_CALL, INARG_2 +| +|//----------------------------------------------------------------------- +| +|// Instruction headers. +|.macro ins_A; .endmacro +|.macro ins_AD; .endmacro +|.macro ins_AJ; .endmacro +|.macro ins_ABC; movzx RB, RCH; movzx RC, RCL; .endmacro +|.macro ins_AB_; movzx RB, RCH; .endmacro +|.macro ins_A_C; movzx RC, RCL; .endmacro +|.macro ins_AND; not RD; .endmacro +| +|// Instruction decode+dispatch. Carefully tuned (nope, lodsd is not faster). +|.macro ins_NEXT +| mov RC, [PC] +| movzx RA, RCH +| movzx OP, RCL +| add PC, 4 +| shr RC, 16 +| jmp aword [DISPATCH+OP*4] +|.endmacro +| +|// Instruction footer. +|.if 1 +| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. +| .define ins_next, ins_NEXT +| .define ins_next_, ins_NEXT +|.else +| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. +| // Affects only certain kinds of benchmarks (and only with -j off). +| // Around 10%-30% slower on Core2, a lot more slower on P4. +| .macro ins_next +| jmp ->ins_next +| .endmacro +| .macro ins_next_ +| ->ins_next: +| ins_NEXT +| .endmacro +|.endif +| +|//----------------------------------------------------------------------- +| +|// Macros to test operand types. +|.macro checktp, reg, tp; cmp dword [BASE+reg*8+4], tp; .endmacro +|.macro checknum, reg, target; checktp reg, LJ_TISNUM; ja target; .endmacro +|.macro checkstr, reg, target; checktp reg, LJ_TSTR; jne target; .endmacro +|.macro checktab, reg, target; checktp reg, LJ_TTAB; jne target; .endmacro +| +|// These operands must be used with movzx. +|.define PC_OP, byte [PC-4] +|.define PC_RA, byte [PC-3] +|.define PC_RB, byte [PC-1] +|.define PC_RC, byte [PC-2] +|.define PC_RD, word [PC-2] +| +|.macro branchPC, reg +| lea PC, [PC+reg*4-BCBIAS_J*4] +|.endmacro +| +|// Assumes DISPATCH is relative to GL. +#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field)) +#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field)) +| +|// Decrement hashed hotcount and trigger trace recorder if zero. +|.macro hotloop, reg +| mov reg, PC +| shr reg, 1 +| and reg, HOTCOUNT_PCMASK +| sub word [DISPATCH+reg+GG_DISP2HOT], 1 +| jz ->vm_hotloop +|.endmacro +| +|.macro hotcall, reg +| mov reg, PC +| shr reg, 1 +| and reg, HOTCOUNT_PCMASK +| sub word [DISPATCH+reg+GG_DISP2HOT], 1 +| jz ->vm_hotcall +|.endmacro +| +|// Set current VM state. +|.macro set_vmstate, st +| mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st +|.endmacro +| +|// Annoying x87 stuff: support for two compare variants. +|.macro fcomparepp // Compare and pop st0 >< st1. +||if (cmov) { +| fucomip st1 +| fpop +||} else { +| fucompp +| fnstsw ax // eax modified! +| sahf +||} +|.endmacro +| +|.macro fdup; fld st0; .endmacro +|.macro fpop1; fstp st1; .endmacro +| +|// Move table write barrier back. Overwrites reg. +|.macro barrierback, tab, reg +| and byte tab->marked, cast_byte(~LJ_GC_BLACK) // black2gray(tab) +| mov reg, [DISPATCH+DISPATCH_GL(gc.grayagain)] +| mov [DISPATCH+DISPATCH_GL(gc.grayagain)], tab +| mov tab->gclist, reg +|.endmacro +| +|//----------------------------------------------------------------------- + +/* Generate subroutines used by opcodes and other parts of the VM. */ +/* The .code_sub section should be last to help static branch prediction. */ +static void build_subroutines(BuildCtx *ctx, int cmov) +{ + |.code_sub + | + |//----------------------------------------------------------------------- + |//-- Call and return handling ------------------------------------------- + |//----------------------------------------------------------------------- + | + |// Reminder: A call gate may be called with func/args above L->maxstack, + |// i.e. occupying EXTRA_STACK slots. And vmeta_call may add one extra slot, + |// too. This means all call gates (L*, C and fast functions) must check + |// for stack overflow _before_ adding more slots! + | + |//-- Call gates --------------------------------------------------------- + | + |->gate_lf: // Call gate for fixarg Lua functions. + | // RA = new base, RB = LFUNC, RC = nargs+1, (BASE = old base), PC = return + | // DISPATCH initialized + | mov BASE, RA + | mov PROTO:RB, LFUNC:RB->pt + | mov [BASE-4], PC // Store caller PC. + | movzx RA, byte PROTO:RB->framesize + | mov PC, PROTO:RB->bc + | mov KBASE, PROTO:RB->k + | mov L:RB, SAVE_L + | lea RA, [BASE+RA*8] // Top of frame. + | lea RC, [BASE+NARGS:RC*8-4] // Points to tag of 1st free slot. + | cmp RA, L:RB->maxstack + | ja ->gate_lf_growstack + |9: // Entry point from vararg setup below. + | mov RB, LJ_TNIL + |1: // Clear free slots until top of frame. + | mov [RC], RB + | mov [RC+8], RB + | add RC, 16 + | cmp RC, RA + | jb <1 +#if LJ_HASJIT + | // NYI: Disabled, until the tracer supports recursion/upcalls/leaves. + | // hotcall RB +#endif + | ins_next + | + |->gate_lv: // Call gate for vararg Lua functions. + | // RA = new base, RB = LFUNC, RC = nargs+1, (BASE = old base), PC = return + | // DISPATCH initialized + | mov [RA-4], PC // Store caller PC. + | lea PC, [NARGS:RC*8+FRAME_VARG] + | lea BASE, [RA+PC-FRAME_VARG] + | mov [BASE-8], LFUNC:RB // Store copy of LFUNC. + | mov PROTO:RB, LFUNC:RB->pt + | mov [BASE-4], PC // Store delta + FRAME_VARG. + | movzx PC, byte PROTO:RB->framesize + | lea KBASE, [BASE+PC*8] + | mov L:PC, SAVE_L + | lea RC, [BASE+4] + | cmp KBASE, L:PC->maxstack + | ja ->gate_lv_growstack // Need to grow stack. + | movzx PC, byte PROTO:RB->numparams + | test PC, PC + | jz >2 + |1: // Copy fixarg slots up. + | add RA, 8 + | cmp RA, BASE + | jnb >2 + | mov KBASE, [RA-8] + | mov [RC-4], KBASE + | mov KBASE, [RA-4] + | mov [RC], KBASE + | add RC, 8 + | mov dword [RA-4], LJ_TNIL // Clear old fixarg slot (help the GC). + | sub PC, 1 + | jnz <1 + |2: + | movzx RA, byte PROTO:RB->framesize + | mov PC, PROTO:RB->bc + | mov KBASE, PROTO:RB->k + | lea RA, [BASE+RA*8] + | jmp <9 + | + |->gate_c: // Call gate for C functions. + | // RA = new base, RB = CFUNC, RC = nargs+1, (BASE = old base), PC = return + | mov [RA-4], PC + | mov KBASE, CFUNC:RB->f + | mov L:RB, SAVE_L + | lea RC, [RA+NARGS:RC*8-8] + | mov L:RB->base, RA + | lea RA, [RC+8*LUA_MINSTACK] + | mov ARG1, L:RB + | mov L:RB->top, RC + | cmp RA, L:RB->maxstack + | ja ->gate_c_growstack // Need to grow stack. + | set_vmstate C + | call KBASE // (lua_State *L) + | set_vmstate INTERP + | // nresults returned in eax (RD). + | mov BASE, L:RB->base + | lea RA, [BASE+RD*8] + | neg RA + | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8 + |->vm_returnc: + | add RD, 1 // RD = nresults+1 + | mov NRESULTS, RD + | test PC, FRAME_TYPE + | jz ->BC_RET_Z // Handle regular return to Lua. + | + |//-- Return handling (non-inline) --------------------------------------- + | + |->vm_return: + | // BASE = base, RA = resultofs, RD = nresults+1 (= NRESULTS), PC = return + | test PC, FRAME_C + | jz ->vm_returnp + | + | // Return to C. + | set_vmstate C + | and PC, -8 + | sub PC, BASE + | neg PC // Previous base = BASE - delta. + | + | sub RD, 1 + | jz >2 + |1: + | mov RB, [BASE+RA] // Move results down. + | mov [BASE-8], RB + | mov RB, [BASE+RA+4] + | mov [BASE-4], RB + | add BASE, 8 + | sub RD, 1 + | jnz <1 + |2: + | mov L:RB, SAVE_L + | mov L:RB->base, PC + |3: + | mov RD, NRESULTS + | mov RA, INARG_NRES // RA = wanted nresults+1 + |4: + | cmp RA, RD + | jne >6 // More/less results wanted? + |5: + | sub BASE, 8 + | mov L:RB->top, BASE + | + |->vm_leave_cp: + | mov RA, SAVE_CFRAME // Restore previous C frame. + | mov L:RB->cframe, RA + | xor eax, eax // Ok return status for vm_pcall. + | + |->vm_leave_unw: + | add esp, CFRAME_SPACE + | restoreregs + | ret + | + |6: + | jb >7 // Less results wanted? + | // More results wanted. Check stack size and fill up results with nil. + | cmp BASE, L:RB->maxstack + | ja >8 + | mov dword [BASE-4], LJ_TNIL + | add BASE, 8 + | add RD, 1 + | jmp <4 + | + |7: // Less results wanted. + | test RA, RA + | jz <5 // But check for LUA_MULTRET+1. + | sub RA, RD // Negative result! + | lea BASE, [BASE+RA*8] // Correct top. + | jmp <5 + | + |8: // Corner case: need to grow stack for filling up results. + | // This can happen if: + | // - A C function grows the stack (a lot). + | // - The GC shrinks the stack in between. + | // - A return back from a lua_call() with (high) nresults adjustment. + | mov L:RB->top, BASE // Save current top held in BASE (yes). + | mov NRESULTS, RD // Need to fill only remainder with nil. + | mov ARG2, RA // Grow by wanted nresults+1. + | mov ARG1, L:RB + | call extern lj_state_growstack // (lua_State *L, int n) + | mov BASE, L:RB->top // Need the (realloced) L->top in BASE. + | jmp <3 + | + |->vm_unwind_c: // Unwind C stack, return from vm_pcall. + | // (void *cframe, int errcode) + | mov ecx, [esp+4] + | mov eax, [esp+8] // Error return status for vm_pcall. + | and ecx, CFRAME_RAWMASK + | mov esp, ecx + | mov L:RB, SAVE_L + | mov GL:RB, L:RB->glref + | mov dword GL:RB->vmstate, ~LJ_VMST_C + | jmp ->vm_leave_unw + | + |->vm_unwind_ff: // Unwind C stack, return from ff pcall. + | mov ecx, [esp+4] + | and ecx, CFRAME_RAWMASK + | mov esp, ecx + | mov L:RB, SAVE_L + | mov RA, -8 // Results start at BASE+RA = BASE-8. + | mov RD, 1+1 // Really 1+2 results, incr. later. + | mov BASE, L:RB->base + | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. + | add DISPATCH, GG_G2DISP + | mov PC, [BASE-4] // Fetch PC of previous frame. + | mov dword [BASE-4], LJ_TFALSE // Prepend false to error message. + | set_vmstate INTERP + | jmp ->vm_returnc // Increments RD/NRESULTS and returns. + | + |->vm_returnp: + | test PC, FRAME_P + | jz ->cont_dispatch + | + | // Return from pcall or xpcall fast func. + | and PC, -8 + | sub BASE, PC // Restore caller base. + | lea RA, [RA+PC-8] // Rebase RA and prepend one result. + | mov PC, [BASE-4] // Fetch PC of previous frame. + | // Prepending may overwrite the pcall frame, so do it at the end. + | mov dword [BASE+RA+4], LJ_TTRUE // Prepend true to results. + | jmp ->vm_returnc // Increments RD/NRESULTS and returns. + | + |//-- Grow stack on-demand ----------------------------------------------- + | + |->gate_c_growstack: // Grow stack for C function. + | mov ARG2, LUA_MINSTACK + | jmp >1 + | + |->gate_lv_growstack: // Grow stack for vararg Lua function. + | sub RC, 8 + | mov BASE, RA + | mov RA, KBASE + | mov PC, PROTO:RB->bc + | mov L:RB, SAVE_L + | + |->gate_lf_growstack: // Grow stack for fixarg Lua function. + | // BASE = new base, RA = requested top, RC = top (offset +4 bytes) + | // RB = L, PC = first PC of called function (or anything if C function) + | sub RC, 4 // Adjust top. + | sub RA, BASE + | shr RA, 3 // n = pt->framesize - L->top + | add PC, 4 // Must point after first instruction. + | mov L:RB->base, BASE + | mov L:RB->top, RC + | mov SAVE_PC, PC + | mov ARG2, RA + | mov ARG1, L:RB + |1: + | // L:RB = L, L->base = new base, L->top = top + | // SAVE_PC = initial PC+1 (undefined for C functions) + | call extern lj_state_growstack // (lua_State *L, int n) + | mov RA, L:RB->base + | mov RC, L:RB->top + | mov LFUNC:RB, [RA-8] + | mov PC, [RA-4] + | sub RC, RA + | shr RC, 3 + | add NARGS:RC, 1 + | // RA = new base, RB = LFUNC, RC = nargs+1, (BASE = invalid), PC restored. + | jmp aword LFUNC:RB->gate // Just retry call. + | + |//----------------------------------------------------------------------- + |//-- Entry points into the assembler VM --------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_resume: // Setup C frame and resume thread. + | // (lua_State *L, StkId base, int nres1 = 0, ptrdiff_t ef = 0) + | saveregs + | mov PC, FRAME_C + | sub esp, CFRAME_SPACE + | xor RD, RD + | mov L:RB, SAVE_L + | lea KBASE, [esp+CFRAME_RESUME] + | mov RA, INARG_BASE + | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. + | add DISPATCH, GG_G2DISP + | mov L:RB->cframe, KBASE + | mov SAVE_CFRAME, RD // Caveat: overlaps INARG_BASE! + | mov SAVE_PC, RD // Any value outside of bytecode is ok. + | cmp byte L:RB->status, RDL + | je >3 // Initial resume (like a call). + | + | // Resume after yield (like a return). + | set_vmstate INTERP + | mov byte L:RB->status, RDL + | mov BASE, L:RB->base + | mov RD, L:RB->top + | sub RD, RA + | shr RD, 3 + | add RD, 1 // RD = nresults+1 + | sub RA, BASE // RA = resultofs + | mov PC, [BASE-4] + | mov NRESULTS, RD + | test PC, FRAME_TYPE + | jz ->BC_RET_Z + | jmp ->vm_return + | + |->vm_pcall: // Setup protected C frame and enter VM. + | // (lua_State *L, StkId base, int nres1, ptrdiff_t ef) + | saveregs + | mov PC, FRAME_CP + | jmp >1 + | + |->vm_call: // Setup C frame and enter VM. + | // (lua_State *L, StkId base, int nres1) + | saveregs + | mov PC, FRAME_C + | + |1: // Entry point for vm_pcall above (PC = ftype). + | sub esp, CFRAME_SPACE + | mov L:RB, SAVE_L + | mov RA, INARG_BASE + | + |2: // Entry point for vm_cpcall below (RA = base, RB = L, PC = ftype). + | mov KBASE, L:RB->cframe // Add our C frame to cframe chain. + | mov SAVE_CFRAME, KBASE // Caveat: overlaps INARG_BASE! + | mov SAVE_PC, esp // Any value outside of bytecode is ok. + | mov L:RB->cframe, esp + | + | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. + | add DISPATCH, GG_G2DISP + | + |3: // Entry point for vm_resume above (RA = base, RB = L, PC = ftype). + | set_vmstate INTERP + | mov BASE, L:RB->base // BASE = old base (used in vmeta_call). + | add PC, RA + | sub PC, BASE // PC = frame delta + frame type + | + | mov RC, L:RB->top + | sub RC, RA + | shr NARGS:RC, 3 + | add NARGS:RC, 1 // RC = nargs+1 + | + | mov LFUNC:RB, [RA-8] + | cmp dword [RA-4], LJ_TFUNC + | jne ->vmeta_call // Ensure KBASE defined and != BASE. + | jmp aword LFUNC:RB->gate + | // RA = new base, RB = LFUNC/CFUNC, RC = nargs+1. + | + |->vm_cpcall: // Setup protected C frame, call C. + | // (lua_State *L, lua_CPFunction cp, lua_CFunction func, void *ud) + | saveregs + | sub esp, CFRAME_SPACE + | + | mov L:RB, SAVE_L + | mov RC, INARG_CP_UD + | mov RA, INARG_CP_FUNC + | mov BASE, INARG_CP_CALL + | mov SAVE_PC, esp // Any value outside of bytecode is ok. + | + | // Caveat: INARG_P_* and INARG_CP_* overlap! + | mov KBASE, L:RB->stack // Compute -savestack(L, L->top). + | sub KBASE, L:RB->top + | mov INARG_P_ERRF, 0 // No error function. + | mov INARG_NRES, KBASE // Neg. delta means cframe w/o frame. + | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe). + | + | mov ARG3, RC + | mov ARG2, RA + | mov ARG1, L:RB + | + | mov KBASE, L:RB->cframe // Add our C frame to cframe chain. + | mov SAVE_CFRAME, KBASE // Caveat: overlaps INARG_CP_CALL! + | mov L:RB->cframe, esp + | + | call BASE // (lua_State *L, lua_CFunction func, void *ud) + | // StkId (new base) or NULL returned in eax (RC). + | test RC, RC + | jz ->vm_leave_cp // No base? Just remove C frame. + | mov RA, RC + | mov PC, FRAME_CP + | jmp <2 // Else continue with the call. + | + |//----------------------------------------------------------------------- + |//-- Metamethod handling ------------------------------------------------ + |//----------------------------------------------------------------------- + | + |//-- Continuation dispatch ---------------------------------------------- + | + |->cont_dispatch: + | // BASE = meta base, RA = resultofs, RD = nresults+1 (also in NRESULTS) + | add RA, BASE + | and PC, -8 + | mov RB, BASE + | sub BASE, PC // Restore caller BASE. + | mov dword [RA+RD*8-4], LJ_TNIL // Ensure one valid arg. + | mov RC, RA // ... in [RC] + | mov PC, [RB-12] // Restore PC from [cont|PC]. + | mov LFUNC:KBASE, [BASE-8] + | mov PROTO:KBASE, LFUNC:KBASE->pt + | mov KBASE, PROTO:KBASE->k + | // BASE = base, RC = result, RB = meta base + | jmp dword [RB-16] // Jump to continuation. + | + |->cont_cat: // BASE = base, RC = result, RB = mbase + | movzx RA, PC_RB + | sub RB, 16 + | lea RA, [BASE+RA*8] + | sub RA, RB + | je ->cont_ra + | neg RA + | shr RA, 3 + | mov ARG3, RA + | mov RA, [RC+4] + | mov RC, [RC] + | mov [RB+4], RA + | mov [RB], RC + | mov ARG2, RB + | jmp ->BC_CAT_Z + | + |//-- Table indexing metamethods ----------------------------------------- + | + |->vmeta_tgets: + | mov ARG5, RC // RC = GCstr * + | mov ARG6, LJ_TSTR + | lea RC, ARG5 // Store temp. TValue in ARG5/ARG6. + | cmp PC_OP, BC_GGET + | jne >1 + | lea RA, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv. + | mov [RA], TAB:RB // RB = GCtab * + | mov dword [RA+4], LJ_TTAB + | mov RB, RA + | jmp >2 + | + |->vmeta_tgetb: + | movzx RC, PC_RC // Ugly, cannot fild from a byte. + | mov ARG4, RC + | fild ARG4 + | fstp FPARG5 + | lea RC, ARG5 // Store temp. TValue in ARG5/ARG6. + | jmp >1 + | + |->vmeta_tgetv: + | movzx RC, PC_RC // Reload TValue *k from RC. + | lea RC, [BASE+RC*8] + |1: + | movzx RB, PC_RB // Reload TValue *t from RB. + | lea RB, [BASE+RB*8] + |2: + | mov ARG2, RB + | mov L:RB, SAVE_L + | mov ARG3, RC + | mov ARG1, L:RB + | mov SAVE_PC, PC + | mov L:RB->base, BASE + | call extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k) + | // TValue * (finished) or NULL (metamethod) returned in eax (RC). + | mov BASE, L:RB->base + | test RC, RC + | jz >3 + |->cont_ra: // BASE = base, RC = result + | movzx RA, PC_RA + | mov RB, [RC+4] + | mov RC, [RC] + | mov [BASE+RA*8+4], RB + | mov [BASE+RA*8], RC + | ins_next + | + |3: // Call __index metamethod. + | // BASE = base, L->top = new base, stack = cont/func/t/k + | mov RA, L:RB->top + | mov [RA-12], PC // [cont|PC] + | lea PC, [RA+FRAME_CONT] + | sub PC, BASE + | mov LFUNC:RB, [RA-8] // Guaranteed to be a function here. + | mov NARGS:RC, 3 // 2+1 args for func(t, k). + | jmp aword LFUNC:RB->gate + | + |//----------------------------------------------------------------------- + | + |->vmeta_tsets: + | mov ARG5, RC // RC = GCstr * + | mov ARG6, LJ_TSTR + | lea RC, ARG5 // Store temp. TValue in ARG5/ARG6. + | cmp PC_OP, BC_GSET + | jne >1 + | lea RA, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv. + | mov [RA], TAB:RB // RB = GCtab * + | mov dword [RA+4], LJ_TTAB + | mov RB, RA + | jmp >2 + | + |->vmeta_tsetb: + | movzx RC, PC_RC // Ugly, cannot fild from a byte. + | mov ARG4, RC + | fild ARG4 + | fstp FPARG5 + | lea RC, ARG5 // Store temp. TValue in ARG5/ARG6. + | jmp >1 + | + |->vmeta_tsetv: + | movzx RC, PC_RC // Reload TValue *k from RC. + | lea RC, [BASE+RC*8] + |1: + | movzx RB, PC_RB // Reload TValue *t from RB. + | lea RB, [BASE+RB*8] + |2: + | mov ARG2, RB + | mov L:RB, SAVE_L + | mov ARG3, RC + | mov ARG1, L:RB + | mov SAVE_PC, PC + | mov L:RB->base, BASE + | call extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) + | // TValue * (finished) or NULL (metamethod) returned in eax (RC). + | mov BASE, L:RB->base + | test RC, RC + | jz >3 + | // NOBARRIER: lj_meta_tset ensures the table is not black. + | movzx RA, PC_RA + | mov RB, [BASE+RA*8+4] + | mov RA, [BASE+RA*8] + | mov [RC+4], RB + | mov [RC], RA + |->cont_nop: // BASE = base, (RC = result) + | ins_next + | + |3: // Call __newindex metamethod. + | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) + | mov RA, L:RB->top + | mov [RA-12], PC // [cont|PC] + | movzx RC, PC_RA + | mov RB, [BASE+RC*8+4] // Copy value to third argument. + | mov RC, [BASE+RC*8] + | mov [RA+20], RB + | mov [RA+16], RC + | lea PC, [RA+FRAME_CONT] + | sub PC, BASE + | mov LFUNC:RB, [RA-8] // Guaranteed to be a function here. + | mov NARGS:RC, 4 // 3+1 args for func(t, k, v). + | jmp aword LFUNC:RB->gate + | + |//-- Comparison metamethods --------------------------------------------- + | + |->vmeta_comp: + | movzx RB, PC_OP + | lea RD, [BASE+RD*8] + | lea RA, [BASE+RA*8] + | mov ARG4, RB + | mov L:RB, SAVE_L + | mov ARG3, RD + | mov ARG2, RA + | mov ARG1, L:RB + | mov SAVE_PC, PC + | mov L:RB->base, BASE + | call extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) + | // 0/1 or TValue * (metamethod) returned in eax (RC). + |3: + | mov BASE, L:RB->base + | cmp RC, 1 + | ja ->vmeta_binop + |4: + | lea PC, [PC+4] + | jb >6 + |5: + | movzx RD, PC_RD + | branchPC RD + |6: + | ins_next + | + |->cont_condt: // BASE = base, RC = result + | add PC, 4 + | cmp dword [RC+4], LJ_TISTRUECOND // Branch if result is true. + | jb <5 + | jmp <6 + | + |->cont_condf: // BASE = base, RC = result + | cmp dword [RC+4], LJ_TISTRUECOND // Branch if result is false. + | jmp <4 + | + |->vmeta_equal: + | mov ARG4, RB + | mov L:RB, SAVE_L + | sub PC, 4 + | mov ARG3, RD + | mov ARG2, RA + | mov ARG1, L:RB + | mov SAVE_PC, PC + | mov L:RB->base, BASE + | call extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) + | // 0/1 or TValue * (metamethod) returned in eax (RC). + | jmp <3 + | + |//-- Arithmetic metamethods --------------------------------------------- + | + |->vmeta_arith_vn: + | lea RC, [KBASE+RC*8] + | jmp >1 + | + |->vmeta_arith_nv: + | lea RC, [KBASE+RC*8] + | lea RB, [BASE+RB*8] + | xchg RB, RC + | jmp >2 + | + |->vmeta_unm: + | lea RC, [BASE+RD*8] + | mov RB, RC + | jmp >2 + | + |->vmeta_arith_vv: + | lea RC, [BASE+RC*8] + |1: + | lea RB, [BASE+RB*8] + |2: + | lea RA, [BASE+RA*8] + | mov ARG3, RB + | mov L:RB, SAVE_L + | mov ARG4, RC + | movzx RC, PC_OP + | mov ARG2, RA + | mov ARG5, RC + | mov ARG1, L:RB + | mov SAVE_PC, PC + | mov L:RB->base, BASE + | call extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) + | // NULL (finished) or TValue * (metamethod) returned in eax (RC). + | mov BASE, L:RB->base + | test RC, RC + | jz ->cont_nop + | + | // Call metamethod for binary op. + |->vmeta_binop: + | // BASE = base, RC = new base, stack = cont/func/o1/o2 + | mov RA, RC + | sub RC, BASE + | mov [RA-12], PC // [cont|PC] + | lea PC, [RC+FRAME_CONT] + | mov LFUNC:RB, [RA-8] + | mov NARGS:RC, 3 // 2+1 args for func(o1, o2). + | cmp dword [RA-4], LJ_TFUNC + | jne ->vmeta_call + | jmp aword LFUNC:RB->gate + | + |->vmeta_len: + | lea RD, [BASE+RD*8] + | mov L:RB, SAVE_L + | mov ARG2, RD + | mov ARG1, L:RB + | mov SAVE_PC, PC + | mov L:RB->base, BASE + | call extern lj_meta_len // (lua_State *L, TValue *o) + | // TValue * (metamethod) returned in eax (RC). + | mov BASE, L:RB->base + | jmp ->vmeta_binop // Binop call for compatibility. + | + |//-- Call metamethod ---------------------------------------------------- + | + |->vmeta_call: // Resolve and call __call metamethod. + | // RA = new base, RC = nargs+1, BASE = old base, PC = return + | mov ARG4, RA // Save RA, RC for us. + | mov ARG5, NARGS:RC + | sub RA, 8 + | lea RC, [RA+NARGS:RC*8] + | mov L:RB, SAVE_L + | mov ARG2, RA + | mov ARG3, RC + | mov ARG1, L:RB + | mov SAVE_PC, PC + | mov L:RB->base, BASE // This is the callers base! + | call extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) + | mov BASE, L:RB->base + | mov RA, ARG4 + | mov NARGS:RC, ARG5 + | mov LFUNC:RB, [RA-8] + | add NARGS:RC, 1 + | // This is fragile. L->base must not move, KBASE must always be defined. + | cmp KBASE, BASE // Continue with CALLT if flag set. + | je ->BC_CALLT_Z + | jmp aword LFUNC:RB->gate // Otherwise call resolved metamethod. + | + |//-- Argument coercion for 'for' statement ------------------------------ + | + |->vmeta_for: + | mov L:RB, SAVE_L + | mov ARG2, RA + | mov ARG1, L:RB + | mov SAVE_PC, PC + | mov L:RB->base, BASE + | call extern lj_meta_for // (lua_State *L, StkId base) + | mov BASE, L:RB->base + | mov RC, [PC-4] + | movzx RA, RCH + | movzx OP, RCL + | shr RC, 16 + | jmp aword [DISPATCH+OP*4+GG_DISP_STATIC*4] // Retry FORI or JFORI. + | + |//----------------------------------------------------------------------- + |//-- Fast functions ----------------------------------------------------- + |//----------------------------------------------------------------------- + | + |.macro .ffunc, name + |->ff_ .. name: + |.endmacro + | + |.macro .ffunc_1, name + |->ff_ .. name: + | cmp NARGS:RC, 1+1; jb ->fff_fallback + |.endmacro + | + |.macro .ffunc_2, name + |->ff_ .. name: + | cmp NARGS:RC, 2+1; jb ->fff_fallback + |.endmacro + | + |.macro .ffunc_n, name + | .ffunc_1 name + | cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback + | fld qword [RA] + |.endmacro + | + |.macro .ffunc_n, name, op + | .ffunc_1 name + | cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback + | op + | fld qword [RA] + |.endmacro + | + |.macro .ffunc_nn, name + | .ffunc_2 name + | cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback + | cmp dword [RA+12], LJ_TISNUM; ja ->fff_fallback + | fld qword [RA] + | fld qword [RA+8] + |.endmacro + | + |.macro .ffunc_nnr, name + | .ffunc_2 name + | cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback + | cmp dword [RA+12], LJ_TISNUM; ja ->fff_fallback + | fld qword [RA+8] + | fld qword [RA] + |.endmacro + | + |// Inlined GC threshold check. Caveat: uses label 1. + |.macro ffgccheck + | mov RB, [DISPATCH+DISPATCH_GL(gc.total)] + | cmp RB, [DISPATCH+DISPATCH_GL(gc.threshold)] + | jb >1 + | call ->fff_gcstep + |1: + |.endmacro + | + |//-- Base library: checks ----------------------------------------------- + | + |.ffunc_1 assert + | mov RB, [RA+4] + | cmp RB, LJ_TISTRUECOND; jae ->fff_fallback + | mov NRESULTS, RD + | mov [RA-4], RB + | mov RB, [RA] + | mov [RA-8], RB + | sub RD, 2 + | jz >2 + | mov ARG1, RA + |1: + | add RA, 8 + | mov RB, [RA+4] + | mov [RA-4], RB + | mov RB, [RA] + | mov [RA-8], RB + | sub RD, 1 + | jnz <1 + | mov RA, ARG1 + |2: + | mov RD, NRESULTS + | jmp ->fff_res_ + | + |.ffunc_1 type + | mov RB, [RA+4] + | mov RC, ~LJ_TNUMX + | not RB + | cmp RC, RB + ||if (cmov) { + | cmova RC, RB + ||} else { + | jbe >1; mov RC, RB; 1: + ||} + | mov CFUNC:RB, [RA-8] + | mov STR:RC, [CFUNC:RB+RC*8+((char *)(&((GCfuncC *)0)->upvalue))] + | mov dword [RA-4], LJ_TSTR + | mov [RA-8], STR:RC + | jmp ->fff_res1 + | + |//-- Base library: getters and setters --------------------------------- + | + |.ffunc_1 getmetatable + | mov RB, [RA+4] + | cmp RB, LJ_TTAB; jne >6 + |1: // Field metatable must be at same offset for GCtab and GCudata! + | mov TAB:RB, [RA] + | mov TAB:RB, TAB:RB->metatable + |2: + | test TAB:RB, TAB:RB + | mov dword [RA-4], LJ_TNIL + | jz ->fff_res1 + | mov CFUNC:RC, [RA-8] + | mov STR:RC, [DISPATCH+DISPATCH_GL(mmname)+4*MM_metatable] + | mov dword [RA-4], LJ_TTAB // Store metatable as default result. + | mov [RA-8], TAB:RB + | mov ARG1, RA // Save result pointer. + | mov RA, TAB:RB->hmask + | and RA, STR:RC->hash + | imul RA, #NODE + | add NODE:RA, TAB:RB->node + |3: // Rearranged logic, because we expect _not_ to find the key. + | cmp dword NODE:RA->key.it, LJ_TSTR + | jne >4 + | cmp dword NODE:RA->key.gcr, STR:RC + | je >5 + |4: + | mov NODE:RA, NODE:RA->next + | test NODE:RA, NODE:RA + | jnz <3 + | jmp ->fff_res1 // Not found, keep default result. + |5: + | mov RB, [RA+4] + | cmp RB, LJ_TNIL; je ->fff_res1 // Dito for nil value. + | mov RC, [RA] + | mov RA, ARG1 // Restore result pointer. + | mov [RA-4], RB // Return value of mt.__metatable. + | mov [RA-8], RC + | jmp ->fff_res1 + | + |6: + | cmp RB, LJ_TUDATA; je <1 + | cmp RB, LJ_TISNUM; ja >7 + | mov RB, LJ_TNUMX + |7: + | not RB + | mov TAB:RB, [DISPATCH+RB*4+DISPATCH_GL(basemt)] + | jmp <2 + | + |.ffunc_2 setmetatable + | cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback + | // Fast path: no mt for table yet and not clearing the mt. + | mov TAB:RB, [RA] + | cmp dword TAB:RB->metatable, 0; jne ->fff_fallback + | cmp dword [RA+12], LJ_TTAB; jne ->fff_fallback + | mov TAB:RC, [RA+8] + | mov TAB:RB->metatable, TAB:RC + | mov dword [RA-4], LJ_TTAB // Return original table. + | mov [RA-8], TAB:RB + | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) + | jz >1 + | // Possible write barrier. Table is black, but skip iswhite(mt) check. + | barrierback TAB:RB, RC + |1: + | jmp ->fff_res1 + | + |.ffunc_2 rawget + | cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback + | mov TAB:RC, [RA] + | mov L:RB, SAVE_L + | mov ARG2, TAB:RC + | mov ARG1, L:RB + | mov RB, RA + | mov ARG4, BASE // Save BASE and RA. + | add RA, 8 + | mov ARG3, RA + | call extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) + | // cTValue * returned in eax (RC). + | mov RA, RB + | mov BASE, ARG4 + | mov RB, [RC] // Copy table slot. + | mov RC, [RC+4] + | mov [RA-8], RB + | mov [RA-4], RC + | jmp ->fff_res1 + | + |//-- Base library: conversions ------------------------------------------ + | + |.ffunc tonumber + | // Only handles the number case inline (without a base argument). + | cmp NARGS:RC, 1+1; jne ->fff_fallback // Exactly one argument. + | cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback + | fld qword [RA] + | jmp ->fff_resn + | + |.ffunc_1 tostring + | // Only handles the string or number case inline. + | cmp dword [RA+4], LJ_TSTR; jne >3 + | // A __tostring method in the string base metatable is ignored. + | mov STR:RC, [RA] + |2: + | mov dword [RA-4], LJ_TSTR + | mov [RA-8], STR:RC + | jmp ->fff_res1 + |3: // Handle numbers inline, unless a number base metatable is present. + | cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback + | cmp dword [DISPATCH+DISPATCH_GL(basemt)+4*(~LJ_TNUMX)], 0 + | jne ->fff_fallback + | ffgccheck // Caveat: uses label 1. + | mov L:RB, SAVE_L + | mov ARG1, L:RB + | mov ARG2, RA + | mov L:RB->base, RA // Add frame since C call can throw. + | mov [RA-4], PC + | mov SAVE_PC, PC // Redundant (but a defined value). + | mov ARG3, BASE // Save BASE. + | call extern lj_str_fromnum // (lua_State *L, lua_Number *np) + | // GCstr returned in eax (RC). + | mov RA, L:RB->base + | mov BASE, ARG3 + | jmp <2 + | + |//-- Base library: iterators ------------------------------------------- + | + |.ffunc_1 next + | je >2 // Missing 2nd arg? + |1: + | cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback + | mov TAB:RB, [RA] + | mov ARG2, TAB:RB + | mov L:RB, SAVE_L + | mov ARG1, L:RB + | mov L:RB->base, RA // Add frame since C call can throw. + | mov [RA-4], PC + | mov SAVE_PC, PC // Redundant (but a defined value). + | mov ARG4, BASE // Save BASE. + | add RA, 8 + | mov ARG3, RA + | call extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) + | // Flag returned in eax (RC). + | mov RA, L:RB->base + | mov BASE, ARG4 + | test RC, RC; jz >3 // End of traversal? + | mov RB, [RA+8] // Copy key and value to results. + | mov RC, [RA+12] + | mov [RA-8], RB + | mov [RA-4], RC + | mov RB, [RA+16] + | mov RC, [RA+20] + | mov [RA], RB + | mov [RA+4], RC + |->fff_res2: + | mov RD, 1+2 + | jmp ->fff_res + |2: // Set missing 2nd arg to nil. + | mov dword [RA+12], LJ_TNIL + | jmp <1 + |3: // End of traversal: return nil. + | mov dword [RA-4], LJ_TNIL + | jmp ->fff_res1 + | + |.ffunc_1 pairs + | cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback + | mov CFUNC:RC, CFUNC:RB->upvalue[0] + | mov dword [RA-4], LJ_TFUNC + | mov [RA-8], CFUNC:RC + | mov dword [RA+12], LJ_TNIL + | mov RD, 1+3 + | jmp ->fff_res + | + |.ffunc_1 ipairs_aux + | cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback + | cmp dword [RA+12], LJ_TISNUM; ja ->fff_fallback + | fld qword [RA+8] + | fld1 + | faddp st1 + | fist ARG2 + | fstp qword [RA-8] + | mov TAB:RB, [RA] + | mov RC, ARG2 + | cmp RC, TAB:RB->asize; jae >2 // Not in array part? + | shl RC, 3 + | add RC, TAB:RB->array + |1: + | cmp dword [RC+4], LJ_TNIL; je ->fff_res0 + | mov RB, [RC] // Copy array slot. + | mov RC, [RC+4] + | mov [RA], RB + | mov [RA+4], RC + | jmp ->fff_res2 + |2: // Check for empty hash part first. Otherwise call C function. + | cmp dword TAB:RB->hmask, 0; je ->fff_res0 + | mov ARG1, TAB:RB + | mov ARG3, BASE // Save BASE and RA. + | mov RB, RA + | call extern lj_tab_getinth // (GCtab *t, int32_t key) + | // cTValue * or NULL returned in eax (RC). + | mov RA, RB + | mov BASE, ARG3 + | test RC, RC + | jnz <1 + |->fff_res0: + | mov RD, 1+0 + | jmp ->fff_res + | + |.ffunc_1 ipairs + | cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback + | mov CFUNC:RC, CFUNC:RB->upvalue[0] + | mov dword [RA-4], LJ_TFUNC + | mov [RA-8], CFUNC:RC + | fldz + | fstp qword [RA+8] + | mov RD, 1+3 + | jmp ->fff_res + | + |//-- Base library: catch errors ---------------------------------------- + | + |.ffunc_1 pcall + | mov [RA-4], PC + | mov PC, 8+FRAME_PCALL + | mov BASE, RA + | add RA, 8 + | sub NARGS:RC, 1 + | mov LFUNC:RB, [RA-8] + |1: + | test byte [DISPATCH+DISPATCH_GL(hookmask)], HOOK_ACTIVE + | jnz >3 // Hook active before pcall? + |2: + | cmp dword [RA-4], LJ_TFUNC + | jne ->vmeta_call // Ensure KBASE defined and != BASE. + | jmp aword LFUNC:RB->gate + |3: + | add PC, 1 // Use FRAME_PCALLH if hook was active. + | jmp <2 + | + |.ffunc_2 xpcall + | cmp dword [RA+12], LJ_TFUNC; jne ->fff_fallback + | mov [RA-4], PC + | mov RB, [RA+4] // Swap function and traceback. + | mov [RA+12], RB + | mov dword [RA+4], LJ_TFUNC + | mov LFUNC:RB, [RA] + | mov PC, [RA+8] + | mov [RA+8], LFUNC:RB + | mov [RA], PC + | mov PC, 2*8+FRAME_PCALL + | mov BASE, RA + | add RA, 2*8 + | sub NARGS:RC, 2 + | jmp <1 + | + |//-- Coroutine library -------------------------------------------------- + | + |.macro coroutine_resume_wrap, resume + |9: // Need to restore PC for fallback handler. + | mov PC, SAVE_PC + | jmp ->fff_fallback + | + |.if resume + |.ffunc_1 coroutine_resume + | mov L:RB, [RA] + |.else + |.ffunc coroutine_wrap_aux + | mov L:RB, CFUNC:RB->upvalue[0].gcr + |.endif + | mov [RA-4], PC + | mov SAVE_PC, PC + | mov ARG1, L:RB + |.if resume + | cmp dword [RA+4], LJ_TTHREAD; jne <9 + |.endif + | cmp aword L:RB->cframe, 0; jne <9 + | cmp byte L:RB->status, LUA_YIELD; ja <9 + | mov PC, L:RB->top + | mov ARG2, PC + | je >1 // Status != LUA_YIELD (i.e. 0)? + | cmp PC, L:RB->base; je <9 // Check for presence of initial func. + |1: + |.if resume + | lea PC, [PC+NARGS:RC*8-16] // Check stack space (-1-thread). + |.else + | lea PC, [PC+NARGS:RC*8-8] // Check stack space (-1). + |.endif + | cmp PC, L:RB->maxstack; ja <9 + | mov L:RB->top, PC + | + | mov L:RB, SAVE_L + | mov L:RB->base, RA + |.if resume + | add RA, 8 // Keep resumed thread in stack for GC. + |.endif + | mov L:RB->top, RA + | mov RB, ARG2 + |.if resume + | lea RA, [RA+NARGS:RC*8-24] // RA = end of source for stack move. + |.else + | lea RA, [RA+NARGS:RC*8-16] // RA = end of source for stack move. + |.endif + | sub RA, PC // Relative to PC. + | + | cmp PC, RB + | je >3 + |2: // Move args to coroutine. + | mov RC, [PC+RA+4] + | mov [PC-4], RC + | mov RC, [PC+RA] + | mov [PC-8], RC + | sub PC, 8 + | cmp PC, RB + | jne <2 + |3: + | xor RA, RA + | mov ARG4, RA + | mov ARG3, RA + | call ->vm_resume // (lua_State *L, StkId base, 0, 0) + | set_vmstate INTERP + | + | mov L:RB, SAVE_L + | mov L:PC, ARG1 // The callee doesn't modify SAVE_L. + | mov BASE, L:RB->base + | cmp eax, LUA_YIELD + | ja >8 + |4: + | mov RA, L:PC->base + | mov KBASE, L:PC->top + | mov L:PC->top, RA // Clear coroutine stack. + | mov PC, KBASE + | sub PC, RA + | je >6 // No results? + | lea RD, [BASE+PC] + | shr PC, 3 + | cmp RD, L:RB->maxstack + | ja >9 // Need to grow stack? + | + | mov RB, BASE + | sub RB, RA + |5: // Move results from coroutine. + | mov RD, [RA] + | mov [RA+RB], RD + | mov RD, [RA+4] + | mov [RA+RB+4], RD + | add RA, 8 + | cmp RA, KBASE + | jne <5 + |6: + |.if resume + | lea RD, [PC+2] // nresults+1 = 1 + true + results. + | mov dword [BASE-4], LJ_TTRUE // Prepend true to results. + |.else + | lea RD, [PC+1] // nresults+1 = 1 + results. + |.endif + |7: + | mov PC, SAVE_PC + | mov NRESULTS, RD + |.if resume + | mov RA, -8 + |.else + | xor RA, RA + |.endif + | test PC, FRAME_TYPE + | jz ->BC_RET_Z + | jmp ->vm_return + | + |8: // Coroutine returned with error (at co->top-1). + |.if resume + | mov dword [BASE-4], LJ_TFALSE // Prepend false to results. + | mov RA, L:PC->top + | sub RA, 8 + | mov L:PC->top, RA // Clear error from coroutine stack. + | mov RD, [RA] // Copy error message. + | mov [BASE], RD + | mov RD, [RA+4] + | mov [BASE+4], RD + | mov RD, 1+2 // nresults+1 = 1 + false + error. + | jmp <7 + |.else + | mov ARG2, L:PC + | mov ARG1, L:RB + | call extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co) + | // Error function does not return. + |.endif + | + |9: // Handle stack expansion on return from yield. + | mov L:RA, ARG1 // The callee doesn't modify SAVE_L. + | mov L:RA->top, KBASE // Undo coroutine stack clearing. + | mov ARG2, PC + | mov ARG1, L:RB + | call extern lj_state_growstack // (lua_State *L, int n) + | mov BASE, L:RB->base + | jmp <4 // Retry the stack move. + |.endmacro + | + | coroutine_resume_wrap 1 // coroutine.resume + | coroutine_resume_wrap 0 // coroutine.wrap + | + |.ffunc coroutine_yield + | mov L:RB, SAVE_L + | mov [RA-4], PC + | test aword L:RB->cframe, CFRAME_CANYIELD + | jz ->fff_fallback + | mov L:RB->base, RA + | lea RC, [RA+NARGS:RC*8-8] + | mov L:RB->top, RC + | xor eax, eax + | mov aword L:RB->cframe, eax + | mov al, LUA_YIELD + | mov byte L:RB->status, al + | jmp ->vm_leave_unw + | + |//-- Math library ------------------------------------------------------- + | + |.ffunc_n math_abs + | fabs + | // fallthrough + |->fff_resn: + | fstp qword [RA-8] + |->fff_res1: + | mov RD, 1+1 + |->fff_res: + | mov NRESULTS, RD + |->fff_res_: + | test PC, FRAME_TYPE + | jnz >7 + |5: + | cmp PC_RB, RDL // More results expected? + | ja >6 + | // BASE and KBASE are assumed to be set for the calling frame. + | ins_next + | + |6: // Fill up results with nil. + | mov dword [RA+RD*8-12], LJ_TNIL + | add RD, 1 + | jmp <5 + | + |7: // Non-standard return case. + | mov BASE, RA + | mov RA, -8 // Results start at BASE+RA = BASE-8. + | jmp ->vm_return + | + |.ffunc_n math_floor; call ->vm_floor; jmp ->fff_resn + |.ffunc_n math_ceil; call ->vm_ceil; jmp ->fff_resn + | + |.ffunc_n math_sqrt; fsqrt; jmp ->fff_resn + | + |.ffunc_n math_log, fldln2; fyl2x; jmp ->fff_resn + |.ffunc_n math_log10, fldlg2; fyl2x; jmp ->fff_resn + |.ffunc_n math_exp; call ->vm_exp; jmp ->fff_resn + | + |.ffunc_n math_sin; fsin; jmp ->fff_resn + |.ffunc_n math_cos; fcos; jmp ->fff_resn + |.ffunc_n math_tan; fptan; fpop; jmp ->fff_resn + | + |.ffunc_n math_asin + | fdup; fmul st0; fld1; fsubrp st1; fsqrt; fpatan + | jmp ->fff_resn + |.ffunc_n math_acos + | fdup; fmul st0; fld1; fsubrp st1; fsqrt; fxch; fpatan + | jmp ->fff_resn + |.ffunc_n math_atan; fld1; fpatan; jmp ->fff_resn + | + |.macro math_extern, func + |.ffunc_n math_ .. func + | mov ARG5, RA + | fstp FPARG1 + | mov RB, BASE + | call extern func + | mov RA, ARG5 + | mov BASE, RB + | jmp ->fff_resn + |.endmacro + | + | math_extern sinh + | math_extern cosh + | math_extern tanh + | + |->ff_math_deg: + |.ffunc_n math_rad; fmul qword CFUNC:RB->upvalue[0]; jmp ->fff_resn + | + |.ffunc_nn math_atan2; fpatan; jmp ->fff_resn + |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn + | + |.ffunc_1 math_frexp + | mov RB, [RA+4] + | cmp RB, LJ_TISNUM; ja ->fff_fallback + | mov RC, [RA] + | mov [RA-4], RB; mov [RA-8], RC + | shl RB, 1; cmp RB, 0xffe00000; jae >3 + | or RC, RB; jz >3 + | mov RC, 1022 + | cmp RB, 0x00200000; jb >4 + |1: + | shr RB, 21; sub RB, RC // Extract and unbias exponent. + | mov ARG1, RB; fild ARG1 + | mov RB, [RA-4] + | and RB, 0x800fffff // Mask off exponent. + | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0. + | mov [RA-4], RB + |2: + | fstp qword [RA] + | mov RD, 1+2 + | jmp ->fff_res + |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0. + | fldz; jmp <2 + |4: // Handle denormals by multiplying with 2^54 and adjusting the bias. + | fld qword [RA] + | mov ARG1, 0x5a800000; fmul ARG1 // x = x*2^54 + | fstp qword [RA-8] + | mov RB, [RA-4]; mov RC, 1076; shl RB, 1; jmp <1 + | + |.ffunc_n math_modf + | mov RB, [RA+4] + | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf? + | fdup + | call ->vm_trunc + | fsub st1, st0 + |1: + | fstp qword [RA-8]; fstp qword [RA] + | mov RC, [RA-4]; mov RB, [RA+4] + | xor RC, RB; js >3 // Need to adjust sign? + |2: + | mov RD, 1+2 + | jmp ->fff_res + |3: + | xor RB, 0x80000000; mov [RA+4], RB; jmp <2 // Flip sign of fraction. + |4: + | fldz; fxch; jmp <1 // Return +-Inf and +-0. + | + |.ffunc_nnr math_fmod + |1: ; fprem; fnstsw ax; sahf; jp <1 + | fpop1 + | jmp ->fff_resn + | + |.ffunc_nn math_pow; call ->vm_pow; jmp ->fff_resn + | + |.macro math_minmax, name, cmovop, nocmovop + |.ffunc_n name + | mov RB, 2 + |1: + | cmp RB, RD; jae ->fff_resn + | cmp dword [RA+RB*8-4], LJ_TISNUM; ja >5 + | fld qword [RA+RB*8-8] + ||if (cmov) { + | fucomi st1; cmovop st1; fpop1 + ||} else { + | push eax + | fucom st1; fnstsw ax; test ah, 1; nocmovop >2; fxch; 2: ; fpop + | pop eax + ||} + | add RB, 1 + | jmp <1 + |.endmacro + | + | math_minmax math_min, fcmovnbe, jz + | math_minmax math_max, fcmovbe, jnz + |5: + | fpop; jmp ->fff_fallback + | + |//-- String library ----------------------------------------------------- + | + |.ffunc_1 string_len + | cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback + | mov STR:RB, [RA] + | fild dword STR:RB->len + | jmp ->fff_resn + | + |.ffunc string_byte // Only handle the 1-arg case here. + | cmp NARGS:RC, 1+1; jne ->fff_fallback + | cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback + | mov STR:RB, [RA] + | cmp dword STR:RB->len, 1 + | jb ->fff_res0 // Return no results for empty string. + | movzx RB, byte STR:RB[1] + | mov ARG1, RB + | fild ARG1 + | jmp ->fff_resn + | + |.ffunc string_char // Only handle the 1-arg case here. + | ffgccheck + | cmp NARGS:RC, 1+1; jne ->fff_fallback // *Exactly* 1 arg. + | cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback + | fld qword [RA] + | fistp ARG4 + | cmp ARG4, 255; ja ->fff_fallback + | lea RC, ARG4 // Little-endian. + | mov ARG5, RA // Save RA. + | mov ARG3, 1 + | mov ARG2, RC + |->fff_newstr: + | mov L:RB, SAVE_L + | mov ARG1, L:RB + | mov SAVE_PC, PC + | mov L:RB->base, BASE + | call extern lj_str_new // (lua_State *L, char *str, size_t l) + | // GCstr * returned in eax (RC). + | mov RA, ARG5 + | mov BASE, L:RB->base + | mov dword [RA-4], LJ_TSTR + | mov [RA-8], STR:RC + | jmp ->fff_res1 + | + |.ffunc string_sub + | ffgccheck + | mov ARG5, RA // Save RA. + | mov ARG4, -1 + | cmp NARGS:RC, 1+2; jb ->fff_fallback + | jna >1 + | cmp dword [RA+20], LJ_TISNUM; ja ->fff_fallback + | fld qword [RA+16] + | fistp ARG4 + |1: + | cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback + | cmp dword [RA+12], LJ_TISNUM; ja ->fff_fallback + | mov STR:RB, [RA] + | mov ARG2, STR:RB + | mov RB, STR:RB->len + | fld qword [RA+8] + | fistp ARG3 + | mov RC, ARG4 + | cmp RB, RC // len < end? (unsigned compare) + | jb >5 + |2: + | mov RA, ARG3 + | test RA, RA // start <= 0? + | jle >7 + |3: + | mov STR:RB, ARG2 + | sub RC, RA // start > end? + | jl ->fff_emptystr + | lea RB, [STR:RB+RA+#STR-1] + | add RC, 1 + |4: + | mov ARG2, RB + | mov ARG3, RC + | jmp ->fff_newstr + | + |5: // Negative end or overflow. + | jl >6 + | lea RC, [RC+RB+1] // end = end+(len+1) + | jmp <2 + |6: // Overflow. + | mov RC, RB // end = len + | jmp <2 + | + |7: // Negative start or underflow. + | je >8 + | add RA, RB // start = start+(len+1) + | add RA, 1 + | jg <3 // start > 0? + |8: // Underflow. + | mov RA, 1 // start = 1 + | jmp <3 + | + |->fff_emptystr: // Range underflow. + | xor RC, RC // Zero length. Any ptr in RB is ok. + | jmp <4 + | + |.ffunc_2 string_rep // Only handle the 1-char case inline. + | ffgccheck + | mov ARG5, RA // Save RA. + | cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback + | cmp dword [RA+12], LJ_TISNUM; ja ->fff_fallback + | mov STR:RB, [RA] + | fld qword [RA+8] + | fistp ARG4 + | mov RC, ARG4 + | test RC, RC + | jle ->fff_emptystr // Count <= 0? (or non-int) + | cmp dword STR:RB->len, 1 + | jb ->fff_emptystr // Zero length string? + | jne ->fff_fallback_2 // Fallback for > 1-char strings. + | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_2 + | movzx RA, byte STR:RB[1] + | mov RB, [DISPATCH+DISPATCH_GL(tmpbuf.buf)] + | mov ARG3, RC + | mov ARG2, RB + |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?). + | mov [RB], RAL + | add RB, 1 + | sub RC, 1 + | jnz <1 + | jmp ->fff_newstr + | + |.ffunc_1 string_reverse + | ffgccheck + | mov ARG5, RA // Save RA. + | cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback + | mov STR:RB, [RA] + | mov RC, STR:RB->len + | test RC, RC + | jz ->fff_emptystr // Zero length string? + | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1 + | add RB, #STR + | mov ARG4, PC // Need another temp register. + | mov ARG3, RC + | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)] + | mov ARG2, PC + |1: + | movzx RA, byte [RB] + | add RB, 1 + | sub RC, 1 + | mov [PC+RC], RAL + | jnz <1 + | mov PC, ARG4 + | jmp ->fff_newstr + | + |.macro ffstring_case, name, lo, hi + | .ffunc_1 name + | ffgccheck + | mov ARG5, RA // Save RA. + | cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback + | mov STR:RB, [RA] + | mov RC, STR:RB->len + | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1 + | add RB, #STR + | mov ARG4, PC // Need another temp register. + | mov ARG3, RC + | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)] + | mov ARG2, PC + | jmp >3 + |1: // ASCII case conversion. Yes, this is suboptimal code (do you care?). + | movzx RA, byte [RB+RC] + | cmp RA, lo + | jb >2 + | cmp RA, hi + | ja >2 + | xor RA, 0x20 + |2: + | mov [PC+RC], RAL + |3: + | sub RC, 1 + | jns <1 + | mov PC, ARG4 + | jmp ->fff_newstr + |.endmacro + | + |ffstring_case string_lower, 0x41, 0x5a + |ffstring_case string_upper, 0x61, 0x7a + | + |//-- Table library ------------------------------------------------------ + | + |.ffunc_1 table_getn + | cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback + | mov TAB:RB, [RA] + | mov ARG1, TAB:RB + | mov RB, RA // Save RA and BASE. + | mov ARG2, BASE + | call extern lj_tab_len // (GCtab *t) + | // Length of table returned in eax (RC). + | mov ARG1, RC + | mov RA, RB // Restore RA and BASE. + | mov BASE, ARG2 + | fild ARG1 + | jmp ->fff_resn + | + |//-- Bit library -------------------------------------------------------- + | + |.define TOBIT_BIAS, 0x59c00000 // 2^52 + 2^51 (float, not double!). + | + |.ffunc_n bit_tobit + | mov ARG5, TOBIT_BIAS + | fadd ARG5 + | fstp FPARG1 // 64 bit FP store. + | fild ARG1 // 32 bit integer load (s2lfwd ok). + | jmp ->fff_resn + | + |.macro .ffunc_bit, name + | .ffunc_n name + | mov ARG5, TOBIT_BIAS + | fadd ARG5 + | fstp FPARG1 + | mov RB, ARG1 + |.endmacro + | + |.macro .ffunc_bit_op, name, ins + | .ffunc_bit name + | mov NRESULTS, NARGS:RC // Save for fallback. + | lea RC, [RA+NARGS:RC*8-16] + |1: + | cmp RC, RA + | jbe ->fff_resbit + | cmp dword [RC+4], LJ_TISNUM; ja ->fff_fallback_bit_op + | fld qword [RC] + | fadd ARG5 + | fstp FPARG1 + | ins RB, ARG1 + | sub RC, 8 + | jmp <1 + |.endmacro + | + |.ffunc_bit_op bit_band, and + |.ffunc_bit_op bit_bor, or + |.ffunc_bit_op bit_bxor, xor + | + |.ffunc_bit bit_bswap + | bswap RB + | jmp ->fff_resbit + | + |.ffunc_bit bit_bnot + | not RB + |->fff_resbit: + | mov ARG1, RB + | fild ARG1 + | jmp ->fff_resn + | + |->fff_fallback_bit_op: + | mov NARGS:RC, NRESULTS // Restore for fallback + | jmp ->fff_fallback + | + |.macro .ffunc_bit_sh, name, ins + | .ffunc_nn name + | mov ARG5, TOBIT_BIAS + | fadd ARG5 + | fstp FPARG3 + | fadd ARG5 + | fstp FPARG1 + | mov RC, RA // Assumes RA is ecx. + | mov RA, ARG3 + | mov RB, ARG1 + | ins RB, cl + | mov RA, RC + | jmp ->fff_resbit + |.endmacro + | + |.ffunc_bit_sh bit_lshift, shl + |.ffunc_bit_sh bit_rshift, shr + |.ffunc_bit_sh bit_arshift, sar + |.ffunc_bit_sh bit_rol, rol + |.ffunc_bit_sh bit_ror, ror + | + |//----------------------------------------------------------------------- + | + |->fff_fallback_2: + | mov NARGS:RC, 1+2 // Other args are ignored, anyway. + | jmp ->fff_fallback + |->fff_fallback_1: + | mov NARGS:RC, 1+1 // Other args are ignored, anyway. + |->fff_fallback: // Call fast function fallback handler. + | // RA = new base, RC = nargs+1 + | mov L:RB, SAVE_L + | sub BASE, RA + | mov [RA-4], PC + | mov SAVE_PC, PC // Redundant (but a defined value). + | mov ARG3, BASE // Save old BASE (relative). + | mov L:RB->base, RA + | lea RC, [RA+NARGS:RC*8-8] + | mov ARG1, L:RB + | lea BASE, [RC+8*LUA_MINSTACK] // Ensure enough space for handler. + | mov L:RB->top, RC + | mov CFUNC:RA, [RA-8] + | cmp BASE, L:RB->maxstack + | ja >5 // Need to grow stack. + | call aword CFUNC:RA->f // (lua_State *L) + | // Either throws an error or recovers and returns 0 or NRESULTS (+1). + | test RC, RC; jnz >3 + |1: // Returned 0: retry fast path. + | mov RA, L:RB->base + | mov RC, L:RB->top + | sub RC, RA + | shr RC, 3 + | add NARGS:RC, 1 + | mov LFUNC:RB, [RA-8] + | mov BASE, ARG3 // Restore old BASE. + | add BASE, RA + | cmp [RA-4], PC; jne >2 // Callable modified by handler? + | jmp aword LFUNC:RB->gate // Retry the call. + | + |2: // Run modified callable. + | cmp dword [RA-4], LJ_TFUNC + | jne ->vmeta_call + | jmp aword LFUNC:RB->gate // Retry the call. + | + |3: // Returned NRESULTS (already in RC/RD). + | mov RA, L:RB->base + | mov BASE, ARG3 // Restore old BASE. + | add BASE, RA + | jmp ->fff_res + | + |5: // Grow stack for fallback handler. + | mov ARG2, LUA_MINSTACK + | call extern lj_state_growstack // (lua_State *L, int n) + | jmp <1 // Dumb retry (goes through ff first). + | + |->fff_gcstep: // Call GC step function. + | // RA = new base, RC = nargs+1 + | pop RB // Must keep stack at same level. + | mov ARG3, RB // Save return address + | mov L:RB, SAVE_L + | sub BASE, RA + | mov ARG2, BASE // Save old BASE (relative). + | mov [RA-4], PC + | mov SAVE_PC, PC // Redundant (but a defined value). + | mov L:RB->base, RA + | lea RC, [RA+NARGS:RC*8-8] + | mov ARG1, L:RB + | mov L:RB->top, RC + | call extern lj_gc_step // (lua_State *L) + | mov RA, L:RB->base + | mov RC, L:RB->top + | sub RC, RA + | shr RC, 3 + | add NARGS:RC, 1 + | mov PC, [RA-4] + | mov BASE, ARG2 // Restore old BASE. + | add BASE, RA + | mov RB, ARG3 + | push RB // Restore return address. + | mov LFUNC:RB, [RA-8] + | ret + | + |//----------------------------------------------------------------------- + |//-- Special dispatch targets ------------------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_record: // Dispatch target for recording phase. +#if LJ_HASJIT + | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)] + | test RDL, HOOK_VMEVENT // No recording while in vmevent. + | jnz >5 + | // Decrement the hookcount for consistency, but always do the call. + | test RDL, HOOK_ACTIVE + | jnz >1 + | test RDL, LUA_MASKLINE|LUA_MASKCOUNT + | jz >1 + | dec dword [DISPATCH+DISPATCH_GL(hookcount)] + | jmp >1 +#endif + | + |->vm_hook: // Dispatch target with enabled hooks. + | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)] + | test RDL, HOOK_ACTIVE // Hook already active? + | jnz >5 + | + | test RDL, LUA_MASKLINE|LUA_MASKCOUNT + | jz >5 + | dec dword [DISPATCH+DISPATCH_GL(hookcount)] + | jz >1 + | test RDL, LUA_MASKLINE + | jz >5 + |1: + | mov L:RB, SAVE_L + | mov RD, NRESULTS // Dynamic top for *M instructions. + | mov ARG3, RD + | mov L:RB->base, BASE + | mov ARG2, PC + | mov ARG1, L:RB + | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. + | call extern lj_dispatch_ins // (lua_State *L, BCIns *pc, int nres) + |4: + | mov BASE, L:RB->base + | movzx RA, PC_RA + |5: + | movzx OP, PC_OP + | movzx RD, PC_RD + | jmp aword [DISPATCH+OP*4+GG_DISP_STATIC*4] // Re-dispatch to static ins. + | + |->vm_hotloop: // Hot loop counter underflow. +#if LJ_HASJIT + | mov L:RB, SAVE_L + | lea RA, [DISPATCH+GG_DISP2J] + | mov ARG2, PC + | mov ARG1, RA + | mov [DISPATCH+DISPATCH_J(L)], L:RB + | mov SAVE_PC, PC + | mov L:RB->base, BASE + | call extern lj_trace_hot // (jit_State *J, const BCIns *pc) + | jmp <4 +#endif + | + |->vm_hotcall: // Hot call counter underflow. +#if LJ_HASJIT + | mov L:RB, SAVE_L + | lea RA, [DISPATCH+GG_DISP2J] + | mov ARG2, PC + | mov ARG1, RA + | mov [DISPATCH+DISPATCH_J(L)], L:RB + | mov SAVE_PC, PC + | mov L:RB->base, BASE + | call extern lj_trace_hot // (jit_State *J, const BCIns *pc) + | mov BASE, L:RB->base + | // Dispatch the first instruction and optionally record it. + | ins_next +#endif + | + |//----------------------------------------------------------------------- + |//-- Trace exit handler ------------------------------------------------- + |//----------------------------------------------------------------------- + | + |// Called from an exit stub with the exit number on the stack. + |// The 16 bit exit number is stored with two (sign-extended) push imm8. + |->vm_exit_handler: +#if LJ_HASJIT + | push ebp; lea ebp, [esp+12]; push ebp + | push ebx; push edx; push ecx; push eax + | movzx RC, byte [ebp-4] // Reconstruct exit number. + | mov RCH, byte [ebp-8] + | mov [ebp-4], edi; mov [ebp-8], esi + | // Caveat: DISPATCH is ebx. + | mov DISPATCH, [ebp] + | mov RA, [DISPATCH+DISPATCH_GL(vmstate)] // Get trace number. + | set_vmstate EXIT + | mov [DISPATCH+DISPATCH_J(exitno)], RC + | mov [DISPATCH+DISPATCH_J(parent)], RA + | sub esp, 8*8+16 // Room for SSE regs + args. + | + | // Must not access SSE regs if SSE2 is not present. + | test dword [DISPATCH+DISPATCH_J(flags)], JIT_F_SSE2 + | jz >1 + | movsd qword [ebp-40], xmm7; movsd qword [ebp-48], xmm6 + | movsd qword [ebp-56], xmm5; movsd qword [ebp-64], xmm4 + | movsd qword [ebp-72], xmm3; movsd qword [ebp-80], xmm2 + | movsd qword [ebp-88], xmm1; movsd qword [ebp-96], xmm0 + |1: + | // Caveat: RB is ebp. + | mov L:RB, [DISPATCH+DISPATCH_GL(jit_L)] + | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)] + | mov [DISPATCH+DISPATCH_J(L)], L:RB + | lea RC, [esp+16] + | mov L:RB->base, BASE + | lea RA, [DISPATCH+GG_DISP2J] + | mov ARG2, RC + | mov ARG1, RA + | call extern lj_trace_exit // (jit_State *J, ExitState *ex) + | // Interpreter C frame returned in eax. + | mov esp, eax // Reposition stack to C frame. + | mov BASE, L:RB->base + | mov PC, SAVE_PC + | mov SAVE_L, L:RB // Needed for on-trace resume/yield. +#endif + |->vm_exit_interp: +#if LJ_HASJIT + | mov LFUNC:KBASE, [BASE-8] + | mov PROTO:KBASE, LFUNC:KBASE->pt + | mov KBASE, PROTO:KBASE->k + | mov dword [DISPATCH+DISPATCH_GL(jit_L)], 0 + | set_vmstate INTERP + | ins_next +#endif + | + |//----------------------------------------------------------------------- + |//-- Math helper functions ---------------------------------------------- + |//----------------------------------------------------------------------- + | + |// FP value rounding. Called by math.floor/math.ceil fast functions + |// and from JIT code. Arg/ret on x87 stack. No int/xmm registers modified. + |.macro vm_round, mode1, mode2 + | fnstcw word [esp+4] // Caveat: overwrites ARG1 and ARG2. + | mov [esp+8], eax + | mov ax, mode1 + | or ax, [esp+4] + |.if mode2 ~= 0xffff + | and ax, mode2 + |.endif + | mov [esp+6], ax + | fldcw word [esp+6] + | frndint + | fldcw word [esp+4] + | mov eax, [esp+8] + | ret + |.endmacro + | + |->vm_floor: + | vm_round 0x0400, 0xf7ff + | + |->vm_ceil: + | vm_round 0x0800, 0xfbff + | + |->vm_trunc: + | vm_round 0x0c00, 0xffff + | + |// FP modulo x%y. Called by BC_MOD* and vm_arith. + |// Args/ret on x87 stack (y on top). No xmm registers modified. + |// Caveat: needs 3 slots on x87 stack! RC (eax) modified! + |->vm_mod: + | fld st1 + | fdiv st1 + | fnstcw word [esp+4] + | mov ax, 0x0400 + | or ax, [esp+4] + | and ax, 0xf7ff + | mov [esp+6], ax + | fldcw word [esp+6] + | frndint + | fldcw word [esp+4] + | fmulp st1 + | fsubp st1 + | ret + | + |// FP exponentiation e^x and 2^x. Called by math.exp fast function and + |// from JIT code. Arg/ret on x87 stack. No int/xmm regs modified. + |// Caveat: needs 3 slots on x87 stack! + |->vm_exp: + | fldl2e; fmulp st1 // e^x ==> 2^(x*log2(e)) + |->vm_exp2: + | fst dword [esp+4] // Caveat: overwrites ARG1. + | cmp dword [esp+4], 0x7f800000; je >1 // Special case: e^+Inf = +Inf + | cmp dword [esp+4], 0xff800000; je >2 // Special case: e^-Inf = 0 + |->vm_exp2raw: // Entry point for vm_pow. Without +-Inf check. + | fdup; frndint; fsub st1, st0; fxch // Split into frac/int part. + | f2xm1; fld1; faddp st1; fscale; fpop1 // ==> (2^frac-1 +1) << int + |1: + | ret + |2: + | fpop; fldz; ret + | + |// Generic power function x^y. Called by BC_POW, math.pow fast function + |// and vm_arith. Args/ret on x87 stack (y on top). No int/xmm regs modified. + |// Caveat: needs 3 slots on x87 stack! + |->vm_pow: + | fist dword [esp+4] // Store/reload int before comparison. + | fild dword [esp+4] // Integral exponent used in vm_powi. + ||if (cmov) { + | fucomip st1 + ||} else { + | push eax; fucomp st1; fnstsw ax; sahf; pop eax + ||} + | jnz >8 // Branch for FP exponents. + | jp >9 // Branch for NaN exponent. + | fpop // Pop y and fallthrough to vm_powi. + | + |// FP/int power function x^i. Called from JIT code. Arg1/ret on x87 stack. + |// Arg2 (int) on C stack. No int/xmm regs modified. + |// Caveat: needs 2 slots on x87 stack! + |->vm_powi: + | push eax + | mov eax, [esp+8] + | cmp eax, 1; jle >6 // i<=1? + | // Now 1 < (unsigned)i <= 0x80000000. + |1: // Handle leading zeros. + | test eax, 1; jnz >2 + | fmul st0 + | shr eax, 1 + | jmp <1 + |2: + | shr eax, 1; jz >5 + | fdup + |3: // Handle trailing bits. + | fmul st0 + | shr eax, 1; jz >4 + | jnc <3 + | fmul st1, st0 + | jmp <3 + |4: + | fmulp st1 + |5: + | pop eax + | ret + |6: + | je <5 // x^1 ==> x + | jb >7 + | fld1; fdivrp st1 + | neg eax + | cmp eax, 1; je <5 // x^-1 ==> 1/x + | jmp <1 // x^-i ==> (1/x)^i + |7: + | fpop; fld1 // x^0 ==> 1 + | pop eax + | ret + | + |8: // FP/FP power function x^y. + | push eax + | fst dword [esp+8] + | fxch + | fst dword [esp+12] + | mov eax, [esp+8]; shl eax, 1 + | cmp eax, 0xff000000; je >2 // x^+-Inf? + | mov eax, [esp+12]; shl eax, 1; je >4 // +-0^y? + | cmp eax, 0xff000000; je >4 // +-Inf^y? + | pop eax + | fyl2x + | jmp ->vm_exp2raw + | + |9: // Handle x^NaN. + | fld1 + ||if (cmov) { + | fucomip st2 + ||} else { + | push eax; fucomp st2; fnstsw ax; sahf; pop eax + ||} + | je >1 // 1^NaN ==> 1 + | fxch // x^NaN ==> NaN + |1: + | fpop + | ret + | + |2: // Handle x^+-Inf. + | fabs + | fld1 + ||if (cmov) { + | fucomip st1 + ||} else { + | fucomp st1; fnstsw ax; sahf + ||} + | je >3 // +-1^+-Inf ==> 1 + | fpop; fabs; fldz; mov eax, 0; setc al + | ror eax, 1; xor eax, [esp+8]; jns >3 // |x|<>1, x^+-Inf ==> +Inf/0 + | fxch + |3: + | fpop1; fabs; pop eax + | ret + | + |4: // Handle +-0^y or +-Inf^y. + | cmp dword [esp+8], 0; jge <3 // y >= 0, x^y ==> |x| + | fpop; fpop + | test eax, eax; pop eax; jz >5 // y < 0, +-0^y ==> +Inf + | fldz // y < 0, +-Inf^y ==> 0 + | ret + |5: + | mov dword [esp+8], 0x7f800000 // Return +Inf. + | fld dword [esp+8] + | ret + | + |// Callable from C: double lj_vm_foldfpm(double x, int fpm) + |// Computes fpm(x) for extended math functions. ORDER FPM. + |->vm_foldfpm: + | mov eax, [esp+12] + | fld qword [esp+4] + | cmp eax, 1; jb ->vm_floor; je ->vm_ceil + | cmp eax, 3; jb ->vm_trunc; ja >1 + | fsqrt; ret + |1: ; cmp eax, 5; jb ->vm_exp; je ->vm_exp2 + | cmp eax, 7; je >1; ja >2 + | fldln2; fxch; fyl2x; ret + |1: ; fld1; fxch; fyl2x; ret + |2: ; cmp eax, 9; je >1; ja >2 + | fldlg2; fxch; fyl2x; ret + |1: ; fsin; ret + |2: ; cmp eax, 11; je >1; ja >9 + | fcos; ret + |1: ; fptan; fpop; ret + |9: ; int3 // Bad fpm. + | + |// Callable from C: double lj_vm_foldarith(double x, double y, int op) + |// Compute x op y for basic arithmetic operators (+ - * / % ^ and unary -) + |// and basic math functions. ORDER ARITH + |->vm_foldarith: + | mov eax, [esp+20] + | fld qword [esp+4] + | fld qword [esp+12] + | cmp eax, 1; je >1; ja >2 + | faddp st1; ret + |1: ; fsubp st1; ret + |2: ; cmp eax, 3; je >1; ja >2 + | fmulp st1; ret + |1: ; fdivp st1; ret + |2: ; cmp eax, 5; jb ->vm_mod; je ->vm_pow + | cmp eax, 7; je >1; ja >2 + | fpop; fchs; ret + |1: ; fpop; fabs; ret + |2: ; cmp eax, 9; je >1; ja >2 + | fpatan; ret + |1: ; fxch; fscale; fpop1; ret + |2: ; cmp eax, 11; je >1; ja >9 + ||if (cmov) { + | fucomi st1; fcmovnbe st1; fpop1; ret + |1: ; fucomi st1; fcmovbe st1; fpop1; ret + ||} else { + | fucom st1; fnstsw ax; test ah, 1; jz >2; fxch; 2: ; fpop; ret + |1: ; fucom st1; fnstsw ax; test ah, 1; jnz >2; fxch; 2: ; fpop; ret + ||} + |9: ; int3 // Bad op. + | + |//----------------------------------------------------------------------- + |//-- Miscellaneous functions -------------------------------------------- + |//----------------------------------------------------------------------- + | + |// int lj_vm_cpuid(uint32_t f, uint32_t res[4]) + |->vm_cpuid: + | pushfd + | pop edx + | mov ecx, edx + | xor edx, 0x00200000 // Toggle ID bit in flags. + | push edx + | popfd + | pushfd + | pop edx + | xor eax, eax // Zero means no features supported. + | cmp ecx, edx + | jz >1 // No ID toggle means no CPUID support. + | mov eax, [esp+4] // Argument 1 is function number. + | push edi + | push ebx + | cpuid + | mov edi, [esp+16] // Argument 2 is result area. + | mov [edi], eax + | mov [edi+4], ebx + | mov [edi+8], ecx + | mov [edi+12], edx + | pop ebx + | pop edi + |1: + | ret + | + |//----------------------------------------------------------------------- +} + +/* Generate the code for a single instruction. */ +static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov) +{ + int vk = 0; + |// Note: aligning all instructions does not pay off. + |=>defop: + + switch (op) { + + /* -- Comparison ops ---------------------------------------------------- */ + + /* Remember: all ops branch for a true comparison, fall through otherwise. */ + + case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: + | // RA = src1, RD = src2, JMP with RD = target + | ins_AD + | checknum RA, ->vmeta_comp + | checknum RD, ->vmeta_comp + | fld qword [BASE+RA*8] // Reverse order, i.e like cmp D, A. + | fld qword [BASE+RD*8] + | add PC, 4 + | fcomparepp // eax (RD) modified! + | // Unordered: all of ZF CF PF set, ordered: PF clear. + | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. + switch (op) { + case BC_ISLT: + | jbe >2 + break; + case BC_ISGE: + | ja >2 + break; + case BC_ISLE: + | jb >2 + break; + case BC_ISGT: + | jae >2 + break; + default: break; /* Shut up GCC. */ + } + |1: + | movzx RD, PC_RD + | branchPC RD + |2: + | ins_next + break; + + case BC_ISEQV: case BC_ISNEV: + vk = op == BC_ISEQV; + | ins_AD // RA = src1, RD = src2, JMP with RD = target + | mov RB, [BASE+RD*8+4] + | add PC, 4 + | cmp RB, LJ_TISNUM; ja >5 + | checknum RA, >5 + | fld qword [BASE+RA*8] + | fld qword [BASE+RD*8] + | fcomparepp // eax (RD) modified! + iseqne_fp: + if (vk) { + | jp >2 // Unordered means not equal. + | jne >2 + } else { + | jp >2 // Unordered means not equal. + | je >1 + } + iseqne_end: + if (vk) { + |1: // EQ: Branch to the target. + | movzx RD, PC_RD + | branchPC RD + |2: // NE: Fallthrough to next instruction. + } else { + |2: // NE: Branch to the target. + | movzx RD, PC_RD + | branchPC RD + |1: // EQ: Fallthrough to next instruction. + } + | ins_next + | + if (op == BC_ISEQV || op == BC_ISNEV) { + |5: // Either or both types are not numbers. + | checktp RA, RB // Compare types. + | jne <2 // Not the same type? + | cmp RB, LJ_TISPRI + | jae <1 // Same type and primitive type? + | + | // Same types and not a primitive type. Compare GCobj or pvalue. + | mov RA, [BASE+RA*8] + | mov RD, [BASE+RD*8] + | cmp RA, RD + | je <1 // Same GCobjs or pvalues? + | cmp RB, LJ_TISTABUD + | ja <2 // Different objects and not table/ud? + | + | // Different tables or userdatas. Need to check __eq metamethod. + | // Field metatable must be at same offset for GCtab and GCudata! + | mov TAB:RB, TAB:RA->metatable + | test TAB:RB, TAB:RB + | jz <2 // No metatable? + | test byte TAB:RB->nomm, 1<vmeta_equal // Handle __eq metamethod. + } + break; + case BC_ISEQS: case BC_ISNES: + vk = op == BC_ISEQS; + | ins_AND // RA = src, RD = str const, JMP with RD = target + | add PC, 4 + | checkstr RA, >2 + | mov RA, [BASE+RA*8] + | cmp RA, [KBASE+RD*4] + iseqne_test: + if (vk) { + | jne >2 + } else { + | je >1 + } + goto iseqne_end; + case BC_ISEQN: case BC_ISNEN: + vk = op == BC_ISEQN; + | ins_AD // RA = src, RD = num const, JMP with RD = target + | add PC, 4 + | checknum RA, >2 + | fld qword [BASE+RA*8] + | fld qword [KBASE+RD*8] + | fcomparepp // eax (RD) modified! + goto iseqne_fp; + case BC_ISEQP: case BC_ISNEP: + vk = op == BC_ISEQP; + | ins_AND // RA = src, RD = primitive type (~), JMP with RD = target + | add PC, 4 + | checktp RA, RD + goto iseqne_test; + + /* -- Unary test and copy ops ------------------------------------------- */ + + case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: + | ins_AD // RA = dst or unused, RD = src, JMP with RD = target + | mov RB, [BASE+RD*8+4] + | add PC, 4 + | cmp RB, LJ_TISTRUECOND + if (op == BC_IST || op == BC_ISTC) { + | jae >1 + } else { + | jb >1 + } + if (op == BC_ISTC || op == BC_ISFC) { + | mov [BASE+RA*8+4], RB + | mov RB, [BASE+RD*8] + | mov [BASE+RA*8], RB + } + | movzx RD, PC_RD + | branchPC RD + |1: // Fallthrough to the next instruction. + | ins_next + break; + + /* -- Unary ops --------------------------------------------------------- */ + + case BC_MOV: + | ins_AD // RA = dst, RD = src + | mov RB, [BASE+RD*8+4] + | mov RD, [BASE+RD*8] // Overwrites RD. + | mov [BASE+RA*8+4], RB + | mov [BASE+RA*8], RD + | ins_next_ + break; + case BC_NOT: + | ins_AD // RA = dst, RD = src + | xor RB, RB + | checktp RD, LJ_TISTRUECOND + | adc RB, LJ_TTRUE + | mov [BASE+RA*8+4], RB + | ins_next + break; + case BC_UNM: + | ins_AD // RA = dst, RD = src + | checknum RD, ->vmeta_unm + | fld qword [BASE+RD*8] + | fchs + | fstp qword [BASE+RA*8] + | ins_next + break; + case BC_LEN: + | ins_AD // RA = dst, RD = src + | checkstr RD, >2 + | mov STR:RD, [BASE+RD*8] + | fild dword STR:RD->len + |1: + | fstp qword [BASE+RA*8] + | ins_next + |2: + | checktab RD, ->vmeta_len + | mov TAB:RD, [BASE+RD*8] + | mov ARG1, TAB:RD + | mov RB, BASE // Save BASE. + | call extern lj_tab_len // (GCtab *t) + | // Length of table returned in eax (RC). + | mov ARG1, RC + | mov BASE, RB // Restore BASE. + | fild ARG1 + | movzx RA, PC_RA + | jmp <1 + break; + + /* -- Binary ops -------------------------------------------------------- */ + + |.macro ins_arithpre, ins + | ins_ABC + ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); + ||switch (vk) { + ||case 0: + | checknum RB, ->vmeta_arith_vn + | fld qword [BASE+RB*8] + | ins qword [KBASE+RC*8] + || break; + ||case 1: + | checknum RB, ->vmeta_arith_nv + | fld qword [KBASE+RC*8] + | ins qword [BASE+RB*8] + || break; + ||default: + | checknum RB, ->vmeta_arith_vv + | checknum RC, ->vmeta_arith_vv + | fld qword [BASE+RB*8] + | ins qword [BASE+RC*8] + || break; + ||} + |.endmacro + | + |.macro ins_arith, ins + | ins_arithpre ins + | fstp qword [BASE+RA*8] + | ins_next + |.endmacro + + | // RA = dst, RB = src1 or num const, RC = src2 or num const + case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: + | ins_arith fadd + break; + case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: + | ins_arith fsub + break; + case BC_MULVN: case BC_MULNV: case BC_MULVV: + | ins_arith fmul + break; + case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: + | ins_arith fdiv + break; + case BC_MODVN: + | ins_arithpre fld + |->BC_MODVN_Z: + | call ->vm_mod + | fstp qword [BASE+RA*8] + | ins_next + break; + case BC_MODNV: case BC_MODVV: + | ins_arithpre fld + | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. + break; + case BC_POW: + | ins_arithpre fld + | call ->vm_pow + | fstp qword [BASE+RA*8] + | ins_next + break; + + case BC_CAT: + | ins_ABC // RA = dst, RB = src_start, RC = src_end + | lea RA, [BASE+RC*8] + | sub RC, RB + | mov ARG2, RA + | mov ARG3, RC + |->BC_CAT_Z: + | mov L:RB, SAVE_L + | mov ARG1, L:RB + | mov SAVE_PC, PC + | mov L:RB->base, BASE + | call extern lj_meta_cat // (lua_State *L, TValue *top, int left) + | // NULL (finished) or TValue * (metamethod) returned in eax (RC). + | mov BASE, L:RB->base + | test RC, RC + | jnz ->vmeta_binop + | movzx RB, PC_RB // Copy result to Stk[RA] from Stk[RB]. + | movzx RA, PC_RA + | mov RC, [BASE+RB*8+4] + | mov RB, [BASE+RB*8] + | mov [BASE+RA*8+4], RC + | mov [BASE+RA*8], RB + | ins_next + break; + + /* -- Constant ops ------------------------------------------------------ */ + + case BC_KSTR: + | ins_AND // RA = dst, RD = str const (~) + | mov RD, [KBASE+RD*4] + | mov dword [BASE+RA*8+4], LJ_TSTR + | mov [BASE+RA*8], RD + | ins_next + break; + case BC_KSHORT: + | ins_AD // RA = dst, RD = signed int16 literal + | fild PC_RD // Refetch signed RD from instruction. + | fstp qword [BASE+RA*8] + | ins_next + break; + case BC_KNUM: + | ins_AD // RA = dst, RD = num const + | fld qword [KBASE+RD*8] + | fstp qword [BASE+RA*8] + | ins_next + break; + case BC_KPRI: + | ins_AND // RA = dst, RD = primitive type (~) + | mov [BASE+RA*8+4], RD + | ins_next + break; + case BC_KNIL: + | ins_AD // RA = dst_start, RD = dst_end + | lea RA, [BASE+RA*8+12] + | lea RD, [BASE+RD*8+4] + | mov RB, LJ_TNIL + | mov [RA-8], RB // Sets minimum 2 slots. + |1: + | mov [RA], RB + | add RA, 8 + | cmp RA, RD + | jbe <1 + | ins_next + break; + + /* -- Upvalue and function ops ------------------------------------------ */ + + case BC_UGET: + | ins_AD // RA = dst, RD = upvalue # + | mov LFUNC:RB, [BASE-8] + | mov UPVAL:RB, [LFUNC:RB+RD*4+offsetof(GCfuncL, uvptr)] + | mov RB, UPVAL:RB->v + | mov RD, [RB+4] + | mov RB, [RB] + | mov [BASE+RA*8+4], RD + | mov [BASE+RA*8], RB + | ins_next + break; + case BC_USETV: + | ins_AD // RA = upvalue #, RD = src + | // Really ugly code due to the lack of a 4th free register. + | mov LFUNC:RB, [BASE-8] + | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] + | test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv) + | jnz >4 + |1: + | mov RA, [BASE+RD*8] + |2: + | mov RB, UPVAL:RB->v + | mov RD, [BASE+RD*8+4] + | mov [RB], RA + | mov [RB+4], RD + |3: + | ins_next + | + |4: // Upvalue is black. Check if new value is collectable and white. + | mov RA, [BASE+RD*8+4] + | sub RA, LJ_TISGCV + | cmp RA, LJ_TISNUM - LJ_TISGCV // tvisgcv(v) + | jbe <1 + | mov GCOBJ:RA, [BASE+RD*8] + | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v) + | jz <2 + | // Crossed a write barrier. So move the barrier forward. + | mov ARG2, UPVAL:RB + | mov ARG3, GCOBJ:RA + | mov RB, UPVAL:RB->v + | mov RD, [BASE+RD*8+4] + | mov [RB], GCOBJ:RA + | mov [RB+4], RD + |->BC_USETV_Z: + | mov L:RB, SAVE_L + | lea GL:RA, [DISPATCH+GG_DISP2G] + | mov L:RB->base, BASE + | mov ARG1, GL:RA + | call extern lj_gc_barrieruv // (global_State *g, GCobj *o, GCobj *v) + | mov BASE, L:RB->base + | jmp <3 + break; + case BC_USETS: + | ins_AND // RA = upvalue #, RD = str const (~) + | mov LFUNC:RB, [BASE-8] + | mov GCOBJ:RD, [KBASE+RD*4] + | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] + | mov RA, UPVAL:RB->v + | mov dword [RA+4], LJ_TSTR + | mov [RA], GCOBJ:RD + | test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv) + | jnz >2 + |1: + | ins_next + | + |2: // Upvalue is black. Check if string is white. + | test byte GCOBJ:RD->gch.marked, LJ_GC_WHITES // iswhite(str) + | jz <1 + | // Crossed a write barrier. So move the barrier forward. + | mov ARG3, GCOBJ:RD + | mov ARG2, UPVAL:RB + | jmp ->BC_USETV_Z + break; + case BC_USETN: + | ins_AD // RA = upvalue #, RD = num const + | mov LFUNC:RB, [BASE-8] + | fld qword [KBASE+RD*8] + | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] + | mov RA, UPVAL:RB->v + | fstp qword [RA] + | ins_next + break; + case BC_USETP: + | ins_AND // RA = upvalue #, RD = primitive type (~) + | mov LFUNC:RB, [BASE-8] + | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] + | mov RA, UPVAL:RB->v + | mov [RA+4], RD + | ins_next + break; + case BC_UCLO: + | ins_AD // RA = level, RD = target + | branchPC RD // Do this first to free RD. + | mov L:RB, SAVE_L + | cmp dword L:RB->openupval, 0 + | je >1 + | lea RA, [BASE+RA*8] + | mov ARG2, RA + | mov ARG1, L:RB + | mov L:RB->base, BASE + | call extern lj_func_closeuv // (lua_State *L, StkId level) + | mov BASE, L:RB->base + |1: + | ins_next + break; + + case BC_FNEW: + | ins_AND // RA = dst, RD = proto const (~) (holding function prototype) + | mov LFUNC:RA, [BASE-8] + | mov PROTO:RD, [KBASE+RD*4] // Fetch GCproto *. + | mov L:RB, SAVE_L + | mov ARG3, LFUNC:RA + | mov ARG2, PROTO:RD + | mov SAVE_PC, PC + | mov ARG1, L:RB + | mov L:RB->base, BASE + | // (lua_State *L, GCproto *pt, GCfuncL *parent) + | call extern lj_func_newL_gc + | // GCfuncL * returned in eax (RC). + | mov BASE, L:RB->base + | movzx RA, PC_RA + | mov [BASE+RA*8], LFUNC:RC + | mov dword [BASE+RA*8+4], LJ_TFUNC + | ins_next + break; + + /* -- Table ops --------------------------------------------------------- */ + + case BC_TNEW: + | ins_AD // RA = dst, RD = hbits|asize + | mov RB, RD + | and RD, 0x7ff + | shr RB, 11 + | cmp RD, 0x7ff // Turn 0x7ff into 0x801. + | sete RAL + | mov ARG3, RB + | add RD, RA + | mov L:RB, SAVE_L + | add RD, RA + | mov ARG2, RD + | mov SAVE_PC, PC + | mov RA, [DISPATCH+DISPATCH_GL(gc.total)] + | mov ARG1, L:RB + | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)] + | mov L:RB->base, BASE + | jae >2 + |1: + | call extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits) + | // Table * returned in eax (RC). + | mov BASE, L:RB->base + | movzx RA, PC_RA + | mov [BASE+RA*8], TAB:RC + | mov dword [BASE+RA*8+4], LJ_TTAB + | ins_next + |2: + | call extern lj_gc_step_fixtop // (lua_State *L) + | mov ARG1, L:RB // Args owned by callee. Set it again. + | jmp <1 + break; + case BC_TDUP: + | ins_AND // RA = dst, RD = table const (~) (holding template table) + | mov TAB:RD, [KBASE+RD*4] + | mov L:RB, SAVE_L + | mov ARG2, TAB:RD + | mov ARG1, L:RB + | mov RA, [DISPATCH+DISPATCH_GL(gc.total)] + | mov SAVE_PC, PC + | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)] + | mov L:RB->base, BASE + | jae >3 + |2: + | call extern lj_tab_dup // (lua_State *L, Table *kt) + | // Table * returned in eax (RC). + | mov BASE, L:RB->base + | movzx RA, PC_RA + | mov [BASE+RA*8], TAB:RC + | mov dword [BASE+RA*8+4], LJ_TTAB + | ins_next + |3: + | call extern lj_gc_step_fixtop // (lua_State *L) + | mov ARG1, L:RB // Args owned by callee. Set it again. + | jmp <2 + break; + + case BC_GGET: + | ins_AND // RA = dst, RD = str const (~) + | mov LFUNC:RB, [BASE-8] + | mov TAB:RB, LFUNC:RB->env + | mov STR:RC, [KBASE+RD*4] + | jmp ->BC_TGETS_Z + break; + case BC_GSET: + | ins_AND // RA = src, RD = str const (~) + | mov LFUNC:RB, [BASE-8] + | mov TAB:RB, LFUNC:RB->env + | mov STR:RC, [KBASE+RD*4] + | jmp ->BC_TSETS_Z + break; + + case BC_TGETV: + | ins_ABC // RA = dst, RB = table, RC = key + | checktab RB, ->vmeta_tgetv + | mov TAB:RB, [BASE+RB*8] + | + | // Integer key? Convert number to int and back and compare. + | checknum RC, >5 + | fld qword [BASE+RC*8] + | fist ARG1 + | fild ARG1 + | fcomparepp // eax (RC) modified! + | mov RC, ARG1 + | jne ->vmeta_tgetv // Generic numeric key? Use fallback. + | cmp RC, TAB:RB->asize // Takes care of unordered, too. + | jae ->vmeta_tgetv // Not in array part? Use fallback. + | shl RC, 3 + | add RC, TAB:RB->array + | cmp dword [RC+4], LJ_TNIL // Avoid overwriting RB in fastpath. + | je >2 + |1: + | mov RB, [RC] // Get array slot. + | mov RC, [RC+4] + | mov [BASE+RA*8], RB + | mov [BASE+RA*8+4], RC + | ins_next + | + |2: // Check for __index if table value is nil. + | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath. + | jz <1 + | mov TAB:RA, TAB:RB->metatable + | test byte TAB:RA->nomm, 1<vmeta_tgetv // 'no __index' flag NOT set: check. + | movzx RA, PC_RA // Restore RA. + | jmp <1 + | + |5: // String key? + | checkstr RC, ->vmeta_tgetv + | mov STR:RC, [BASE+RC*8] + | jmp ->BC_TGETS_Z + break; + case BC_TGETS: + | ins_ABC // RA = dst, RB = table, RC = str const (~) + | not RC + | mov STR:RC, [KBASE+RC*4] + | checktab RB, ->vmeta_tgets + | mov TAB:RB, [BASE+RB*8] + |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA. + | mov RA, TAB:RB->hmask + | and RA, STR:RC->hash + | imul RA, #NODE + | add NODE:RA, TAB:RB->node + |1: + | cmp dword NODE:RA->key.it, LJ_TSTR + | jne >4 + | cmp dword NODE:RA->key.gcr, STR:RC + | jne >4 + | // Ok, key found. Assumes: offsetof(Node, val) == 0 + | cmp dword [RA+4], LJ_TNIL // Avoid overwriting RB in fastpath. + | je >5 // Key found, but nil value? + | movzx RC, PC_RA + | mov RB, [RA] // Get node value. + | mov RA, [RA+4] + | mov [BASE+RC*8], RB + |2: + | mov [BASE+RC*8+4], RA + | ins_next + | + |3: + | movzx RC, PC_RA + | mov RA, LJ_TNIL + | jmp <2 + | + |4: // Follow hash chain. + | mov NODE:RA, NODE:RA->next + | test NODE:RA, NODE:RA + | jnz <1 + | // End of hash chain: key not found, nil result. + | + |5: // Check for __index if table value is nil. + | mov TAB:RA, TAB:RB->metatable + | test TAB:RA, TAB:RA + | jz <3 // No metatable: done. + | test byte TAB:RA->nomm, 1<vmeta_tgets // Caveat: preserve STR:RC. + break; + case BC_TGETB: + | ins_ABC // RA = dst, RB = table, RC = byte literal + | checktab RB, ->vmeta_tgetb + | mov TAB:RB, [BASE+RB*8] + | cmp RC, TAB:RB->asize + | jae ->vmeta_tgetb + | shl RC, 3 + | add RC, TAB:RB->array + | cmp dword [RC+4], LJ_TNIL // Avoid overwriting RB in fastpath. + | je >2 + |1: + | mov RB, [RC] // Get array slot. + | mov RC, [RC+4] + | mov [BASE+RA*8], RB + | mov [BASE+RA*8+4], RC + | ins_next + | + |2: // Check for __index if table value is nil. + | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath. + | jz <1 + | mov TAB:RA, TAB:RB->metatable + | test byte TAB:RA->nomm, 1<vmeta_tgetb // 'no __index' flag NOT set: check. + | movzx RA, PC_RA // Restore RA. + | jmp <1 + break; + + case BC_TSETV: + | ins_ABC // RA = src, RB = table, RC = key + | checktab RB, ->vmeta_tsetv + | mov TAB:RB, [BASE+RB*8] + | + | // Integer key? Convert number to int and back and compare. + | checknum RC, >5 + | fld qword [BASE+RC*8] + | fist ARG1 + | fild ARG1 + | fcomparepp // eax (RC) modified! + | mov RC, ARG1 + | jne ->vmeta_tsetv // Generic numeric key? Use fallback. + | cmp RC, TAB:RB->asize // Takes care of unordered, too. + | jae ->vmeta_tsetv + | shl RC, 3 + | add RC, TAB:RB->array + | cmp dword [RC+4], LJ_TNIL + | je >3 // Previous value is nil? + |1: + | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) + | jnz >7 + |2: + | mov RB, [BASE+RA*8+4] // Set array slot. + | mov RA, [BASE+RA*8] + | mov [RC+4], RB + | mov [RC], RA + | ins_next + | + |3: // Check for __newindex if previous value is nil. + | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath. + | jz <1 + | mov TAB:RA, TAB:RB->metatable + | test byte TAB:RA->nomm, 1<vmeta_tsetv // 'no __newindex' flag NOT set: check. + | movzx RA, PC_RA // Restore RA. + | jmp <1 + | + |5: // String key? + | checkstr RC, ->vmeta_tsetv + | mov STR:RC, [BASE+RC*8] + | jmp ->BC_TSETS_Z + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:RB, RA + | movzx RA, PC_RA // Restore RA. + | jmp <2 + break; + case BC_TSETS: + | ins_ABC // RA = src, RB = table, RC = str const (~) + | not RC + | mov STR:RC, [KBASE+RC*4] + | checktab RB, ->vmeta_tsets + | mov TAB:RB, [BASE+RB*8] + |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA. + | mov RA, TAB:RB->hmask + | and RA, STR:RC->hash + | imul RA, #NODE + | mov byte TAB:RB->nomm, 0 // Clear metamethod cache. + | add NODE:RA, TAB:RB->node + |1: + | cmp dword NODE:RA->key.it, LJ_TSTR + | jne >5 + | cmp dword NODE:RA->key.gcr, STR:RC + | jne >5 + | // Ok, key found. Assumes: offsetof(Node, val) == 0 + | cmp dword [RA+4], LJ_TNIL + | je >4 // Previous value is nil? + |2: + | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) + | jnz >7 + |3: + | movzx RC, PC_RA + | mov RB, [BASE+RC*8+4] // Set node value. + | mov RC, [BASE+RC*8] + | mov [RA+4], RB + | mov [RA], RC + | ins_next + | + |4: // Check for __newindex if previous value is nil. + | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath. + | jz <2 + | mov ARG1, RA // Save RA. + | mov TAB:RA, TAB:RB->metatable + | test byte TAB:RA->nomm, 1<vmeta_tsets // 'no __newindex' flag NOT set: check. + | mov RA, ARG1 // Restore RA. + | jmp <2 + | + |5: // Follow hash chain. + | mov NODE:RA, NODE:RA->next + | test NODE:RA, NODE:RA + | jnz <1 + | // End of hash chain: key not found, add a new one. + | + | // But check for __newindex first. + | mov TAB:RA, TAB:RB->metatable + | test TAB:RA, TAB:RA + | jz >6 // No metatable: continue. + | test byte TAB:RA->nomm, 1<vmeta_tsets // 'no __newindex' flag NOT set: check. + |6: + | mov ARG5, STR:RC + | mov ARG6, LJ_TSTR + | lea RC, ARG5 // Store temp. TValue in ARG5/ARG6. + | mov ARG4, TAB:RB // Save TAB:RB for us. + | mov ARG2, TAB:RB + | mov L:RB, SAVE_L + | mov ARG3, RC + | mov ARG1, L:RB + | mov SAVE_PC, PC + | mov L:RB->base, BASE + | call extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) + | // Handles write barrier for the new key. TValue * returned in eax (RC). + | mov BASE, L:RB->base + | mov TAB:RB, ARG4 // Need TAB:RB for barrier. + | mov RA, eax + | jmp <2 // Must check write barrier for value. + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:RB, RC // Destroys STR:RC. + | jmp <3 + break; + case BC_TSETB: + | ins_ABC // RA = src, RB = table, RC = byte literal + | checktab RB, ->vmeta_tsetb + | mov TAB:RB, [BASE+RB*8] + | cmp RC, TAB:RB->asize + | jae ->vmeta_tsetb + | shl RC, 3 + | add RC, TAB:RB->array + | cmp dword [RC+4], LJ_TNIL + | je >3 // Previous value is nil? + |1: + | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) + | jnz >7 + |2: + | mov RB, [BASE+RA*8+4] // Set array slot. + | mov RA, [BASE+RA*8] + | mov [RC+4], RB + | mov [RC], RA + | ins_next + | + |3: // Check for __newindex if previous value is nil. + | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath. + | jz <1 + | mov TAB:RA, TAB:RB->metatable + | test byte TAB:RA->nomm, 1<vmeta_tsetb // 'no __newindex' flag NOT set: check. + | movzx RA, PC_RA // Restore RA. + | jmp <1 + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:RB, RA + | movzx RA, PC_RA // Restore RA. + | jmp <2 + break; + + case BC_TSETM: + | ins_AD // RA = base (table at base-1), RD = num const (start index) + | mov ARG5, KBASE // Need one more free register. + | fld qword [KBASE+RD*8] + | fistp ARG4 // Const is guaranteed to be an int. + |1: + | lea RA, [BASE+RA*8] + | mov TAB:RB, [RA-8] // Guaranteed to be a table. + | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) + | jnz >7 + |2: + | mov RD, NRESULTS + | mov KBASE, ARG4 + | sub RD, 1 + | jz >4 // Nothing to copy? + | add RD, KBASE // Compute needed size. + | cmp RD, TAB:RB->asize + | jae >5 // Does not fit into array part? + | sub RD, KBASE + | shl KBASE, 3 + | add KBASE, TAB:RB->array + |3: // Copy result slots to table. + | mov RB, [RA] + | mov [KBASE], RB + | mov RB, [RA+4] + | add RA, 8 + | mov [KBASE+4], RB + | add KBASE, 8 + | sub RD, 1 + | jnz <3 + |4: + | mov KBASE, ARG5 + | ins_next + | + |5: // Need to resize array part. + | mov ARG2, TAB:RB + | mov L:RB, SAVE_L + | mov ARG3, RD + | mov ARG1, L:RB + | mov SAVE_PC, PC + | mov L:RB->base, BASE + | call extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize) + | mov BASE, L:RB->base + | movzx RA, PC_RA // Restore RA. + | jmp <1 // Retry. + | + |7: // Possible table write barrier for any value. Skip valiswhite check. + | barrierback TAB:RB, RD + | jmp <2 + break; + + /* -- Calls and vararg handling ----------------------------------------- */ + + case BC_CALL: case BC_CALLM: + | ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs + if (op == BC_CALLM) { + | add NARGS:RC, NRESULTS + } + | lea RA, [BASE+RA*8+8] + | mov LFUNC:RB, [RA-8] + | cmp dword [RA-4], LJ_TFUNC + | jne ->vmeta_call + | jmp aword LFUNC:RB->gate + break; + + case BC_CALLMT: + | ins_AD // RA = base, RD = extra_nargs + | add NARGS:RD, NRESULTS + | // Fall through. Assumes BC_CALLMT follows and ins_AD is a no-op. + break; + case BC_CALLT: + | ins_AD // RA = base, RD = nargs+1 + | lea RA, [BASE+RA*8+8] + | mov KBASE, BASE // Use KBASE for move + vmeta_call hint. + | mov LFUNC:RB, [RA-8] + | cmp dword [RA-4], LJ_TFUNC + | jne ->vmeta_call + |->BC_CALLT_Z: + | mov PC, [BASE-4] + | test PC, FRAME_TYPE + | jnz >7 + |1: + | mov [BASE-8], LFUNC:RB // Copy function down, reloaded below. + | mov NRESULTS, NARGS:RD + | sub NARGS:RD, 1 + | jz >3 + |2: + | mov RB, [RA] // Move args down. + | mov [KBASE], RB + | mov RB, [RA+4] + | mov [KBASE+4], RB + | add KBASE, 8 + | add RA, 8 + | sub NARGS:RD, 1 + | jnz <2 + | + | mov LFUNC:RB, [BASE-8] + |3: + | mov RA, BASE // BASE is ignored, except when ... + | cmp byte LFUNC:RB->ffid, 1 // (> FF_C) Calling a fast function? + | ja >5 + |4: + | mov NARGS:RD, NRESULTS + | jmp aword LFUNC:RB->gate + | + |5: // Tailcall to a fast function. + | test PC, FRAME_TYPE // Lua frame below? + | jnz <4 + | movzx RD, PC_RA // Need to prepare BASE/KBASE. + | not RD + | lea BASE, [BASE+RD*8] + | mov LFUNC:KBASE, [BASE-8] + | mov PROTO:KBASE, LFUNC:KBASE->pt + | mov KBASE, PROTO:KBASE->k + | jmp <4 + | + |7: // Tailcall from a vararg function. + | jnp <1 // Vararg frame below? + | and PC, -8 + | sub BASE, PC // Need to relocate BASE/KBASE down. + | mov KBASE, BASE + | mov PC, [BASE-4] + | jmp <1 + break; + + case BC_ITERC: + | ins_A // RA = base, (RB = nresults+1,) RC = nargs+1 (2+1) + | lea RA, [BASE+RA*8+8] // fb = base+1 + | mov RB, [RA-24] // Copy state. fb[0] = fb[-3]. + | mov RC, [RA-20] + | mov [RA], RB + | mov [RA+4], RC + | mov RB, [RA-16] // Copy control var. fb[1] = fb[-2]. + | mov RC, [RA-12] + | mov [RA+8], RB + | mov [RA+12], RC + | mov LFUNC:RB, [RA-32] // Copy callable. fb[-1] = fb[-4] + | mov RC, [RA-28] + | mov [RA-8], LFUNC:RB + | mov [RA-4], RC + | cmp RC, LJ_TFUNC // Handle like a regular 2-arg call. + | mov NARGS:RC, 3 + | jne ->vmeta_call + | jmp aword LFUNC:RB->gate + break; + + case BC_VARG: + | ins_AB_ // RA = base, RB = nresults+1, (RC = 1) + | mov LFUNC:RC, [BASE-8] + | lea RA, [BASE+RA*8] + | mov PROTO:RC, LFUNC:RC->pt + | movzx RC, byte PROTO:RC->numparams + | mov ARG3, KBASE // Need one more free register. + | lea KBASE, [BASE+RC*8+(8+FRAME_VARG)] + | sub KBASE, [BASE-4] + | // Note: KBASE may now be even _above_ BASE if nargs was < numparams. + | test RB, RB + | jz >5 // Copy all varargs? + | lea RB, [RA+RB*8-8] + | cmp KBASE, BASE // No vararg slots? + | jnb >2 + |1: // Copy vararg slots to destination slots. + | mov RC, [KBASE-8] + | mov [RA], RC + | mov RC, [KBASE-4] + | add KBASE, 8 + | mov [RA+4], RC + | add RA, 8 + | cmp RA, RB // All destination slots filled? + | jnb >3 + | cmp KBASE, BASE // No more vararg slots? + | jb <1 + |2: // Fill up remainder with nil. + | mov dword [RA+4], LJ_TNIL + | add RA, 8 + | cmp RA, RB + | jb <2 + |3: + | mov KBASE, ARG3 + | ins_next + | + |5: // Copy all varargs. + | mov NRESULTS, 1 // NRESULTS = 0+1 + | mov RC, BASE + | sub RC, KBASE + | jbe <3 // No vararg slots? + | mov RB, RC + | shr RB, 3 + | mov ARG2, RB // Store this for stack growth below. + | add RB, 1 + | mov NRESULTS, RB // NRESULTS = #varargs+1 + | mov L:RB, SAVE_L + | add RC, RA + | cmp RC, L:RB->maxstack + | ja >7 // Need to grow stack? + |6: // Copy all vararg slots. + | mov RC, [KBASE-8] + | mov [RA], RC + | mov RC, [KBASE-4] + | add KBASE, 8 + | mov [RA+4], RC + | add RA, 8 + | cmp KBASE, BASE // No more vararg slots? + | jb <6 + | jmp <3 + | + |7: // Grow stack for varargs. + | mov L:RB->base, BASE + | mov L:RB->top, RA + | mov SAVE_PC, PC + | sub KBASE, BASE // Need delta, because BASE may change. + | mov ARG1, L:RB + | call extern lj_state_growstack // (lua_State *L, int n) + | mov BASE, L:RB->base + | mov RA, L:RB->top + | add KBASE, BASE + | jmp <6 + break; + + /* -- Returns ----------------------------------------------------------- */ + + case BC_RETM: + | ins_AD // RA = results, RD = extra_nresults + | add RD, NRESULTS // NRESULTS >=1, so RD >=1. + | // Fall through. Assumes BC_RET follows and ins_AD is a no-op. + break; + + case BC_RET: case BC_RET0: case BC_RET1: + | ins_AD // RA = results, RD = nresults+1 + if (op != BC_RET0) { + | shl RA, 3 + } + |1: + | mov PC, [BASE-4] + | mov NRESULTS, RD // Save nresults+1. + | test PC, FRAME_TYPE // Check frame type marker. + | jnz >7 // Not returning to a fixarg Lua func? + switch (op) { + case BC_RET: + |->BC_RET_Z: + | mov KBASE, BASE // Use KBASE for result move. + | sub RD, 1 + | jz >3 + |2: + | mov RB, [KBASE+RA] // Move results down. + | mov [KBASE-8], RB + | mov RB, [KBASE+RA+4] + | mov [KBASE-4], RB + | add KBASE, 8 + | sub RD, 1 + | jnz <2 + |3: + | mov RD, NRESULTS // Note: NRESULTS may be >255. + | movzx RB, PC_RB // So cannot compare with RDL! + |5: + | cmp RB, RD // More results expected? + | ja >6 + break; + case BC_RET1: + | mov RB, [BASE+RA+4] + | mov [BASE-4], RB + | mov RB, [BASE+RA] + | mov [BASE-8], RB + /* fallthrough */ + case BC_RET0: + |5: + | cmp PC_RB, RDL // More results expected? + | ja >6 + default: + break; + } + | movzx RA, PC_RA + | not RA // Note: ~RA = -(RA+1) + | lea BASE, [BASE+RA*8] // base = base - (RA+1)*8 + | mov LFUNC:KBASE, [BASE-8] + | mov PROTO:KBASE, LFUNC:KBASE->pt + | mov KBASE, PROTO:KBASE->k + | ins_next + | + |6: // Fill up results with nil. + if (op == BC_RET) { + | mov dword [KBASE-4], LJ_TNIL // Note: relies on shifted base. + | add KBASE, 8 + } else { + | mov dword [BASE+RD*8-12], LJ_TNIL + } + | add RD, 1 + | jmp <5 + | + |7: // Non-standard return case. + | jnp ->vm_return + | // Return from vararg function: relocate BASE down and RA up. + | and PC, -8 + | sub BASE, PC + if (op != BC_RET0) { + | add RA, PC + } + | jmp <1 + break; + + /* -- Loops and branches ------------------------------------------------ */ + + |.define FOR_IDX, qword [RA]; .define FOR_TIDX, dword [RA+4] + |.define FOR_STOP, qword [RA+8]; .define FOR_TSTOP, dword [RA+12] + |.define FOR_STEP, qword [RA+16]; .define FOR_TSTEP, dword [RA+20] + |.define FOR_EXT, qword [RA+24]; .define FOR_TEXT, dword [RA+28] + + case BC_FORL: +#if LJ_HASJIT + | hotloop RB +#endif + | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op. + break; + + case BC_JFORI: + case BC_JFORL: +#if !LJ_HASJIT + break; +#endif + case BC_FORI: + case BC_IFORL: + vk = (op == BC_IFORL || op == BC_JFORL); + | ins_AJ // RA = base, RD = target (after end of loop or start of loop) + | lea RA, [BASE+RA*8] + if (!vk) { + | cmp FOR_TIDX, LJ_TISNUM; ja ->vmeta_for // Type checks + | cmp FOR_TSTOP, LJ_TISNUM; ja ->vmeta_for + } + | mov RB, FOR_TSTEP // Load type/hiword of for step. + if (!vk) { + | cmp RB, LJ_TISNUM; ja ->vmeta_for + } + | fld FOR_STOP + | fld FOR_IDX + if (vk) { + | fadd FOR_STEP // nidx = idx + step + | fst FOR_IDX + } + | fst FOR_EXT + | test RB, RB // Swap lim/(n)idx if step non-negative. + | js >1 + | fxch + |1: + | fcomparepp // eax (RD) modified if !cmov. + if (!cmov) { + | movzx RD, PC_RD // Need to reload RD. + } + if (op == BC_FORI) { + | jnb >2 + | branchPC RD + } else if (op == BC_JFORI) { + | branchPC RD + | movzx RD, PC_RD + | jnb =>BC_JLOOP + } else if (op == BC_IFORL) { + | jb >2 + | branchPC RD + } else { + | jnb =>BC_JLOOP + } + |2: + | ins_next + break; + + case BC_ITERL: +#if LJ_HASJIT + | hotloop RB +#endif + | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op. + break; + + case BC_JITERL: +#if !LJ_HASJIT + break; +#endif + case BC_IITERL: + | ins_AJ // RA = base, RD = target + | lea RA, [BASE+RA*8] + | mov RB, [RA+4] + | cmp RB, LJ_TNIL; je >1 // Stop if iterator returned nil. + if (op == BC_JITERL) { + | mov [RA-4], RB + | mov RB, [RA] + | mov [RA-8], RB + | jmp =>BC_JLOOP + } else { + | branchPC RD // Otherwise save control var + branch. + | mov RD, [RA] + | mov [RA-4], RB + | mov [RA-8], RD + } + |1: + | ins_next + break; + + case BC_LOOP: + | ins_A // RA = base, RD = target (loop extent) + | // Note: RA/RD is only used by trace recorder to determine scope/extent + | // This opcode does NOT jump, it's only purpose is to detect a hot loop. +#if LJ_HASJIT + | hotloop RB +#endif + | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op. + break; + + case BC_ILOOP: + | ins_A // RA = base, RD = target (loop extent) + | ins_next + break; + + case BC_JLOOP: +#if LJ_HASJIT + | ins_AD // RA = base (ignored), RD = traceno + | mov RA, [DISPATCH+DISPATCH_J(trace)] + | mov TRACE:RD, [RA+RD*4] + | mov RD, TRACE:RD->mcode + | mov L:RB, SAVE_L + | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE + | mov [DISPATCH+DISPATCH_GL(jit_L)], L:RB + | jmp RD +#endif + break; + + case BC_JMP: + | ins_AJ // RA = unused, RD = target + | branchPC RD + | ins_next + break; + + /* ---------------------------------------------------------------------- */ + + default: + fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); + exit(2); + break; + } +} + +static int build_backend(BuildCtx *ctx) +{ + int op; + int cmov = 1; +#ifdef LUAJIT_CPU_NOCMOV + cmov = 0; +#endif + + dasm_growpc(Dst, BC__MAX); + + build_subroutines(ctx, cmov); + + |.code_op + for (op = 0; op < BC__MAX; op++) + build_ins(ctx, (BCOp)op, op, cmov); + + return BC__MAX; +} + +/* Emit pseudo frame-info for all assembler functions. */ +static void emit_asm_debug(BuildCtx *ctx) +{ + switch (ctx->mode) { + case BUILD_elfasm: + fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n"); + fprintf(ctx->fp, + ".Lframe0:\n" + "\t.long .LECIE0-.LSCIE0\n" + ".LSCIE0:\n" + "\t.long 0xffffffff\n" + "\t.byte 0x1\n" + "\t.string \"\"\n" + "\t.uleb128 0x1\n" + "\t.sleb128 -4\n" + "\t.byte 0x8\n" + "\t.byte 0xc\n\t.uleb128 0x4\n\t.uleb128 0x4\n" + "\t.byte 0x88\n\t.uleb128 0x1\n" + "\t.align 4\n" + ".LECIE0:\n\n"); + fprintf(ctx->fp, + ".LSFDE0:\n" + "\t.long .LEFDE0-.LASFDE0\n" + ".LASFDE0:\n" + "\t.long .Lframe0\n" + "\t.long .Lbegin\n" + "\t.long %d\n" + "\t.byte 0xe\n\t.uleb128 0x30\n" /* def_cfa_offset */ + "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */ + "\t.byte 0x87\n\t.uleb128 0x3\n" /* offset edi */ + "\t.byte 0x86\n\t.uleb128 0x4\n" /* offset esi */ + "\t.byte 0x83\n\t.uleb128 0x5\n" /* offset ebx */ + "\t.align 4\n" + ".LEFDE0:\n\n", (int)ctx->codesz); + break; + default: /* Difficult for other modes. */ + break; + } +} + diff --git a/src/lauxlib.h b/src/lauxlib.h new file mode 100644 index 0000000000..505a9f5228 --- /dev/null +++ b/src/lauxlib.h @@ -0,0 +1,159 @@ +/* +** $Id: lauxlib.h,v 1.88.1.1 2007/12/27 13:02:25 roberto Exp $ +** Auxiliary functions for building Lua libraries +** See Copyright Notice in lua.h +*/ + + +#ifndef lauxlib_h +#define lauxlib_h + + +#include +#include + +#include "lua.h" + + +#define luaL_getn(L,i) ((int)lua_objlen(L, i)) +#define luaL_setn(L,i,j) ((void)0) /* no op! */ + +/* extra error code for `luaL_load' */ +#define LUA_ERRFILE (LUA_ERRERR+1) + +typedef struct luaL_Reg { + const char *name; + lua_CFunction func; +} luaL_Reg; + +LUALIB_API void (luaL_openlib) (lua_State *L, const char *libname, + const luaL_Reg *l, int nup); +LUALIB_API void (luaL_register) (lua_State *L, const char *libname, + const luaL_Reg *l); +LUALIB_API int (luaL_getmetafield) (lua_State *L, int obj, const char *e); +LUALIB_API int (luaL_callmeta) (lua_State *L, int obj, const char *e); +LUALIB_API int (luaL_typerror) (lua_State *L, int narg, const char *tname); +LUALIB_API int (luaL_argerror) (lua_State *L, int numarg, const char *extramsg); +LUALIB_API const char *(luaL_checklstring) (lua_State *L, int numArg, + size_t *l); +LUALIB_API const char *(luaL_optlstring) (lua_State *L, int numArg, + const char *def, size_t *l); +LUALIB_API lua_Number (luaL_checknumber) (lua_State *L, int numArg); +LUALIB_API lua_Number (luaL_optnumber) (lua_State *L, int nArg, lua_Number def); + +LUALIB_API lua_Integer (luaL_checkinteger) (lua_State *L, int numArg); +LUALIB_API lua_Integer (luaL_optinteger) (lua_State *L, int nArg, + lua_Integer def); + +LUALIB_API void (luaL_checkstack) (lua_State *L, int sz, const char *msg); +LUALIB_API void (luaL_checktype) (lua_State *L, int narg, int t); +LUALIB_API void (luaL_checkany) (lua_State *L, int narg); + +LUALIB_API int (luaL_newmetatable) (lua_State *L, const char *tname); +LUALIB_API void *(luaL_checkudata) (lua_State *L, int ud, const char *tname); + +LUALIB_API void (luaL_where) (lua_State *L, int lvl); +LUALIB_API int (luaL_error) (lua_State *L, const char *fmt, ...); + +LUALIB_API int (luaL_checkoption) (lua_State *L, int narg, const char *def, + const char *const lst[]); + +LUALIB_API int (luaL_ref) (lua_State *L, int t); +LUALIB_API void (luaL_unref) (lua_State *L, int t, int ref); + +LUALIB_API int (luaL_loadfile) (lua_State *L, const char *filename); +LUALIB_API int (luaL_loadbuffer) (lua_State *L, const char *buff, size_t sz, + const char *name); +LUALIB_API int (luaL_loadstring) (lua_State *L, const char *s); + +LUALIB_API lua_State *(luaL_newstate) (void); + + +LUALIB_API const char *(luaL_gsub) (lua_State *L, const char *s, const char *p, + const char *r); + +LUALIB_API const char *(luaL_findtable) (lua_State *L, int idx, + const char *fname, int szhint); + + + + +/* +** =============================================================== +** some useful macros +** =============================================================== +*/ + +#define luaL_argcheck(L, cond,numarg,extramsg) \ + ((void)((cond) || luaL_argerror(L, (numarg), (extramsg)))) +#define luaL_checkstring(L,n) (luaL_checklstring(L, (n), NULL)) +#define luaL_optstring(L,n,d) (luaL_optlstring(L, (n), (d), NULL)) +#define luaL_checkint(L,n) ((int)luaL_checkinteger(L, (n))) +#define luaL_optint(L,n,d) ((int)luaL_optinteger(L, (n), (d))) +#define luaL_checklong(L,n) ((long)luaL_checkinteger(L, (n))) +#define luaL_optlong(L,n,d) ((long)luaL_optinteger(L, (n), (d))) + +#define luaL_typename(L,i) lua_typename(L, lua_type(L,(i))) + +#define luaL_dofile(L, fn) \ + (luaL_loadfile(L, fn) || lua_pcall(L, 0, LUA_MULTRET, 0)) + +#define luaL_dostring(L, s) \ + (luaL_loadstring(L, s) || lua_pcall(L, 0, LUA_MULTRET, 0)) + +#define luaL_getmetatable(L,n) (lua_getfield(L, LUA_REGISTRYINDEX, (n))) + +#define luaL_opt(L,f,n,d) (lua_isnoneornil(L,(n)) ? (d) : f(L,(n))) + +/* +** {====================================================== +** Generic Buffer manipulation +** ======================================================= +*/ + + + +typedef struct luaL_Buffer { + char *p; /* current position in buffer */ + int lvl; /* number of strings in the stack (level) */ + lua_State *L; + char buffer[LUAL_BUFFERSIZE]; +} luaL_Buffer; + +#define luaL_addchar(B,c) \ + ((void)((B)->p < ((B)->buffer+LUAL_BUFFERSIZE) || luaL_prepbuffer(B)), \ + (*(B)->p++ = (char)(c))) + +/* compatibility only */ +#define luaL_putchar(B,c) luaL_addchar(B,c) + +#define luaL_addsize(B,n) ((B)->p += (n)) + +LUALIB_API void (luaL_buffinit) (lua_State *L, luaL_Buffer *B); +LUALIB_API char *(luaL_prepbuffer) (luaL_Buffer *B); +LUALIB_API void (luaL_addlstring) (luaL_Buffer *B, const char *s, size_t l); +LUALIB_API void (luaL_addstring) (luaL_Buffer *B, const char *s); +LUALIB_API void (luaL_addvalue) (luaL_Buffer *B); +LUALIB_API void (luaL_pushresult) (luaL_Buffer *B); + + +/* }====================================================== */ + + +/* compatibility with ref system */ + +/* pre-defined references */ +#define LUA_NOREF (-2) +#define LUA_REFNIL (-1) + +#define lua_ref(L,lock) ((lock) ? luaL_ref(L, LUA_REGISTRYINDEX) : \ + (lua_pushstring(L, "unlocked references are obsolete"), lua_error(L), 0)) + +#define lua_unref(L,ref) luaL_unref(L, LUA_REGISTRYINDEX, (ref)) + +#define lua_getref(L,ref) lua_rawgeti(L, LUA_REGISTRYINDEX, (ref)) + + +#define luaL_reg luaL_Reg + +#endif diff --git a/src/lib_aux.c b/src/lib_aux.c new file mode 100644 index 0000000000..1ae32dbc7a --- /dev/null +++ b/src/lib_aux.c @@ -0,0 +1,438 @@ +/* +** Auxiliary library for the Lua/C API. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +** +** Major parts taken verbatim or adapted from the Lua interpreter. +** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h +*/ + +#include +#include +#include + +#define lib_aux_c +#define LUA_LIB + +#include "lua.h" +#include "lauxlib.h" + +#include "lj_obj.h" +#include "lj_err.h" +#include "lj_lib.h" + +/* convert a stack index to positive */ +#define abs_index(L, i) \ + ((i) > 0 || (i) <= LUA_REGISTRYINDEX ? (i) : lua_gettop(L) + (i) + 1) + +/* -- Type checks --------------------------------------------------------- */ + +LUALIB_API void luaL_checkstack(lua_State *L, int size, const char *msg) +{ + if (!lua_checkstack(L, size)) + lj_err_callerv(L, LJ_ERR_STKOVM, msg); +} + +LUALIB_API void luaL_checktype(lua_State *L, int narg, int tt) +{ + if (lua_type(L, narg) != tt) + lj_err_argt(L, narg, tt); +} + +LUALIB_API void luaL_checkany(lua_State *L, int narg) +{ + lj_lib_checkany(L, narg); +} + +LUALIB_API const char *luaL_checklstring(lua_State *L, int narg, size_t *len) +{ + GCstr *s = lj_lib_checkstr(L, narg); + if (len != NULL) *len = s->len; + return strdata(s); +} + +LUALIB_API const char *luaL_optlstring(lua_State *L, int narg, + const char *def, size_t *len) +{ + GCstr *s = lj_lib_optstr(L, narg); + if (s) { + if (len != NULL) *len = s->len; + return strdata(s); + } + if (len != NULL) *len = def ? strlen(def) : 0; + return def; +} + +LUALIB_API lua_Number luaL_checknumber(lua_State *L, int narg) +{ + return lj_lib_checknum(L, narg); +} + +LUALIB_API lua_Number luaL_optnumber(lua_State *L, int narg, lua_Number def) +{ + lj_lib_opt(L, narg, + return lj_lib_checknum(L, narg); + , + return def; + ) +} + +LUALIB_API lua_Integer luaL_checkinteger(lua_State *L, int narg) +{ +#if LJ_64 + return (lua_Integer)lj_lib_checknum(L, narg); +#else + return lj_lib_checkint(L, narg); +#endif +} + +LUALIB_API lua_Integer luaL_optinteger(lua_State *L, int narg, lua_Integer def) +{ +#if LJ_64 + lj_lib_opt(L, narg, + return (lua_Integer)lj_lib_checknum(L, narg); + , + return def; + ) +#else + return lj_lib_optint(L, narg, def); +#endif +} + +LUALIB_API int luaL_checkoption(lua_State *L, int narg, const char *def, + const char *const lst[]) +{ + GCstr *s = lj_lib_optstr(L, narg); + const char *opt = s ? strdata(s) : def; + uint32_t i; + if (!opt) lj_err_argt(L, narg, LUA_TSTRING); + for (i = 0; lst[i]; i++) + if (strcmp(lst[i], opt) == 0) + return (int)i; + lj_err_argv(L, narg, LJ_ERR_INVOPTM, opt); +} + +/* -- Module registration ------------------------------------------------- */ + +LUALIB_API const char *luaL_findtable(lua_State *L, int idx, + const char *fname, int szhint) +{ + const char *e; + lua_pushvalue(L, idx); + do { + e = strchr(fname, '.'); + if (e == NULL) e = fname + strlen(fname); + lua_pushlstring(L, fname, (size_t)(e - fname)); + lua_rawget(L, -2); + if (lua_isnil(L, -1)) { /* no such field? */ + lua_pop(L, 1); /* remove this nil */ + lua_createtable(L, 0, (*e == '.' ? 1 : szhint)); /* new table for field */ + lua_pushlstring(L, fname, (size_t)(e - fname)); + lua_pushvalue(L, -2); + lua_settable(L, -4); /* set new table into field */ + } else if (!lua_istable(L, -1)) { /* field has a non-table value? */ + lua_pop(L, 2); /* remove table and value */ + return fname; /* return problematic part of the name */ + } + lua_remove(L, -2); /* remove previous table */ + fname = e + 1; + } while (*e == '.'); + return NULL; +} + +static int libsize(const luaL_Reg *l) +{ + int size = 0; + for (; l->name; l++) size++; + return size; +} + +LUALIB_API void luaL_openlib(lua_State *L, const char *libname, + const luaL_Reg *l, int nup) +{ + if (libname) { + int size = libsize(l); + /* check whether lib already exists */ + luaL_findtable(L, LUA_REGISTRYINDEX, "_LOADED", 16); + lua_getfield(L, -1, libname); /* get _LOADED[libname] */ + if (!lua_istable(L, -1)) { /* not found? */ + lua_pop(L, 1); /* remove previous result */ + /* try global variable (and create one if it does not exist) */ + if (luaL_findtable(L, LUA_GLOBALSINDEX, libname, size) != NULL) + lj_err_callerv(L, LJ_ERR_BADMODN, libname); + lua_pushvalue(L, -1); + lua_setfield(L, -3, libname); /* _LOADED[libname] = new table */ + } + lua_remove(L, -2); /* remove _LOADED table */ + lua_insert(L, -(nup+1)); /* move library table to below upvalues */ + } + for (; l->name; l++) { + int i; + for (i = 0; i < nup; i++) /* copy upvalues to the top */ + lua_pushvalue(L, -nup); + lua_pushcclosure(L, l->func, nup); + lua_setfield(L, -(nup+2), l->name); + } + lua_pop(L, nup); /* remove upvalues */ +} + +LUALIB_API void luaL_register(lua_State *L, const char *libname, + const luaL_Reg *l) +{ + luaL_openlib(L, libname, l, 0); +} + +LUALIB_API const char *luaL_gsub(lua_State *L, const char *s, + const char *p, const char *r) +{ + const char *wild; + size_t l = strlen(p); + luaL_Buffer b; + luaL_buffinit(L, &b); + while ((wild = strstr(s, p)) != NULL) { + luaL_addlstring(&b, s, (size_t)(wild - s)); /* push prefix */ + luaL_addstring(&b, r); /* push replacement in place of pattern */ + s = wild + l; /* continue after `p' */ + } + luaL_addstring(&b, s); /* push last suffix */ + luaL_pushresult(&b); + return lua_tostring(L, -1); +} + +/* -- Buffer handling ----------------------------------------------------- */ + +#define bufflen(B) ((size_t)((B)->p - (B)->buffer)) +#define bufffree(B) ((size_t)(LUAL_BUFFERSIZE - bufflen(B))) + +static int emptybuffer(luaL_Buffer *B) +{ + size_t l = bufflen(B); + if (l == 0) + return 0; /* put nothing on stack */ + lua_pushlstring(B->L, B->buffer, l); + B->p = B->buffer; + B->lvl++; + return 1; +} + +static void adjuststack(luaL_Buffer *B) +{ + if (B->lvl > 1) { + lua_State *L = B->L; + int toget = 1; /* number of levels to concat */ + size_t toplen = lua_strlen(L, -1); + do { + size_t l = lua_strlen(L, -(toget+1)); + if (!(B->lvl - toget + 1 >= LUA_MINSTACK/2 || toplen > l)) + break; + toplen += l; + toget++; + } while (toget < B->lvl); + lua_concat(L, toget); + B->lvl = B->lvl - toget + 1; + } +} + +LUALIB_API char *luaL_prepbuffer(luaL_Buffer *B) +{ + if (emptybuffer(B)) + adjuststack(B); + return B->buffer; +} + +LUALIB_API void luaL_addlstring(luaL_Buffer *B, const char *s, size_t l) +{ + while (l--) + luaL_addchar(B, *s++); +} + +LUALIB_API void luaL_addstring(luaL_Buffer *B, const char *s) +{ + luaL_addlstring(B, s, strlen(s)); +} + +LUALIB_API void luaL_pushresult(luaL_Buffer *B) +{ + emptybuffer(B); + lua_concat(B->L, B->lvl); + B->lvl = 1; +} + +LUALIB_API void luaL_addvalue(luaL_Buffer *B) +{ + lua_State *L = B->L; + size_t vl; + const char *s = lua_tolstring(L, -1, &vl); + if (vl <= bufffree(B)) { /* fit into buffer? */ + memcpy(B->p, s, vl); /* put it there */ + B->p += vl; + lua_pop(L, 1); /* remove from stack */ + } else { + if (emptybuffer(B)) + lua_insert(L, -2); /* put buffer before new value */ + B->lvl++; /* add new value into B stack */ + adjuststack(B); + } +} + +LUALIB_API void luaL_buffinit(lua_State *L, luaL_Buffer *B) +{ + B->L = L; + B->p = B->buffer; + B->lvl = 0; +} + +/* -- Reference management ------------------------------------------------ */ + +#define FREELIST_REF 0 + +LUALIB_API int luaL_ref(lua_State *L, int t) +{ + int ref; + t = abs_index(L, t); + if (lua_isnil(L, -1)) { + lua_pop(L, 1); /* remove from stack */ + return LUA_REFNIL; /* `nil' has a unique fixed reference */ + } + lua_rawgeti(L, t, FREELIST_REF); /* get first free element */ + ref = (int)lua_tointeger(L, -1); /* ref = t[FREELIST_REF] */ + lua_pop(L, 1); /* remove it from stack */ + if (ref != 0) { /* any free element? */ + lua_rawgeti(L, t, ref); /* remove it from list */ + lua_rawseti(L, t, FREELIST_REF); /* (t[FREELIST_REF] = t[ref]) */ + } else { /* no free elements */ + ref = (int)lua_objlen(L, t); + ref++; /* create new reference */ + } + lua_rawseti(L, t, ref); + return ref; +} + +LUALIB_API void luaL_unref(lua_State *L, int t, int ref) +{ + if (ref >= 0) { + t = abs_index(L, t); + lua_rawgeti(L, t, FREELIST_REF); + lua_rawseti(L, t, ref); /* t[ref] = t[FREELIST_REF] */ + lua_pushinteger(L, ref); + lua_rawseti(L, t, FREELIST_REF); /* t[FREELIST_REF] = ref */ + } +} + +/* -- Load Lua code ------------------------------------------------------- */ + +typedef struct FileReaderCtx { + FILE *fp; + char buf[LUAL_BUFFERSIZE]; +} FileReaderCtx; + +static const char *reader_file(lua_State *L, void *ud, size_t *size) +{ + FileReaderCtx *ctx = (FileReaderCtx *)ud; + UNUSED(L); + if (feof(ctx->fp)) return NULL; + *size = fread(ctx->buf, 1, sizeof(ctx->buf), ctx->fp); + return *size > 0 ? ctx->buf : NULL; +} + +LUALIB_API int luaL_loadfile(lua_State *L, const char *filename) +{ + FileReaderCtx ctx; + int status; + const char *chunkname; + if (filename) { + ctx.fp = fopen(filename, "r"); + if (ctx.fp == NULL) { + lua_pushfstring(L, "cannot open %s: %s", filename, strerror(errno)); + return LUA_ERRFILE; + } + chunkname = lua_pushfstring(L, "@%s", filename); + } else { + ctx.fp = stdin; + chunkname = "=stdin"; + } + status = lua_load(L, reader_file, &ctx, chunkname); + if (ferror(ctx.fp)) { + L->top -= filename ? 2 : 1; + lua_pushfstring(L, "cannot read %s: %s", chunkname+1, strerror(errno)); + if (filename) + fclose(ctx.fp); + return LUA_ERRFILE; + } + if (filename) { + L->top--; + copyTV(L, L->top-1, L->top); + fclose(ctx.fp); + } + return status; +} + +typedef struct StringReaderCtx { + const char *str; + size_t size; +} StringReaderCtx; + +static const char *reader_string(lua_State *L, void *ud, size_t *size) +{ + StringReaderCtx *ctx = (StringReaderCtx *)ud; + UNUSED(L); + if (ctx->size == 0) return NULL; + *size = ctx->size; + ctx->size = 0; + return ctx->str; +} + +LUALIB_API int luaL_loadbuffer(lua_State *L, const char *buf, size_t size, + const char *name) +{ + StringReaderCtx ctx; + ctx.str = buf; + ctx.size = size; + return lua_load(L, reader_string, &ctx, name); +} + +LUALIB_API int luaL_loadstring(lua_State *L, const char *s) +{ + return luaL_loadbuffer(L, s, strlen(s), s); +} + +/* -- Default allocator and panic function -------------------------------- */ + +#ifdef LUAJIT_USE_SYSMALLOC + +static void *mem_alloc(void *ud, void *ptr, size_t osize, size_t nsize) +{ + (void)ud; + (void)osize; + if (nsize == 0) { + free(ptr); + return NULL; + } else { + return realloc(ptr, nsize); + } +} + +#define mem_create() NULL + +#else + +#include "lj_alloc.h" + +#define mem_alloc lj_alloc_f +#define mem_create lj_alloc_create + +#endif + +static int panic(lua_State *L) +{ + fprintf(stderr, "PANIC: unprotected error in call to Lua API (%s)\n", + lua_tostring(L, -1)); + return 0; +} + +LUALIB_API lua_State *luaL_newstate(void) +{ + lua_State *L = lua_newstate(mem_alloc, mem_create()); + if (L) G(L)->panic = panic; + return L; +} + diff --git a/src/lib_base.c b/src/lib_base.c new file mode 100644 index 0000000000..6b9e8eef9f --- /dev/null +++ b/src/lib_base.c @@ -0,0 +1,560 @@ +/* +** Base and coroutine library. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +** +** Major portions taken verbatim or adapted from the Lua interpreter. +** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h +*/ + +#include + +#define lib_base_c +#define LUA_LIB + +#include "lua.h" +#include "lauxlib.h" +#include "lualib.h" + +#include "lj_obj.h" +#include "lj_gc.h" +#include "lj_err.h" +#include "lj_str.h" +#include "lj_tab.h" +#include "lj_meta.h" +#include "lj_state.h" +#include "lj_ff.h" +#include "lj_ctype.h" +#include "lj_lib.h" + +/* -- Base library: checks ------------------------------------------------ */ + +#define LJLIB_MODULE_base + +LJLIB_ASM(assert) LJLIB_REC(.) +{ + GCstr *s; + lj_lib_checkany(L, 1); + s = lj_lib_optstr(L, 2); + if (s) + lj_err_callermsg(L, strdata(s)); + else + lj_err_caller(L, LJ_ERR_ASSERT); + return FFH_UNREACHABLE; +} + +/* ORDER LJ_T */ +LJLIB_PUSH("nil") +LJLIB_PUSH("boolean") +LJLIB_PUSH(top-1) /* boolean */ +LJLIB_PUSH("userdata") +LJLIB_PUSH("string") +LJLIB_PUSH("upval") +LJLIB_PUSH("thread") +LJLIB_PUSH("proto") +LJLIB_PUSH("function") +LJLIB_PUSH("deadkey") +LJLIB_PUSH("table") +LJLIB_PUSH(top-8) /* userdata */ +LJLIB_PUSH("number") +LJLIB_ASM_(type) LJLIB_REC(.) +/* Recycle the lj_lib_checkany(L, 1) from assert. */ + +/* -- Base library: getters and setters ----------------------------------- */ + +LJLIB_ASM_(getmetatable) LJLIB_REC(.) +/* Recycle the lj_lib_checkany(L, 1) from assert. */ + +LJLIB_ASM(setmetatable) LJLIB_REC(.) +{ + GCtab *t = lj_lib_checktab(L, 1); + GCtab *mt = lj_lib_checktabornil(L, 2); + if (!tvisnil(lj_meta_lookup(L, L->base, MM_metatable))) + lj_err_caller(L, LJ_ERR_PROTMT); + setgcref(t->metatable, obj2gco(mt)); + if (mt) { lj_gc_objbarriert(L, t, mt); } + settabV(L, L->base-1, t); + return FFH_RES(1); +} + +LJLIB_CF(getfenv) +{ + GCfunc *fn; + cTValue *o = L->base; + if (!(o < L->top && tvisfunc(o))) { + int level = lj_lib_optint(L, 1, 1); + o = lj_err_getframe(L, level, &level); + if (o == NULL) + lj_err_arg(L, 1, LJ_ERR_INVLVL); + } + fn = &gcval(o)->fn; + settabV(L, L->top++, isluafunc(fn) ? tabref(fn->l.env) : tabref(L->env)); + return 1; +} + +LJLIB_CF(setfenv) +{ + GCfunc *fn; + GCtab *t = lj_lib_checktab(L, 2); + cTValue *o = L->base; + if (!(o < L->top && tvisfunc(o))) { + int level = lj_lib_checkint(L, 1); + if (level == 0) { + /* NOBARRIER: A thread (i.e. L) is never black. */ + setgcref(L->env, obj2gco(t)); + return 0; + } + o = lj_err_getframe(L, level, &level); + if (o == NULL) + lj_err_arg(L, 1, LJ_ERR_INVLVL); + } + fn = &gcval(o)->fn; + if (!isluafunc(fn)) + lj_err_caller(L, LJ_ERR_SETFENV); + setgcref(fn->l.env, obj2gco(t)); + lj_gc_objbarrier(L, obj2gco(fn), t); + setfuncV(L, L->top++, fn); + return 1; +} + +LJLIB_ASM(rawget) LJLIB_REC(.) +{ + lj_lib_checktab(L, 1); + lj_lib_checkany(L, 2); + return FFH_UNREACHABLE; +} + +LJLIB_CF(rawset) LJLIB_REC(.) +{ + lj_lib_checktab(L, 1); + lj_lib_checkany(L, 2); + L->top = 1+lj_lib_checkany(L, 3); + lua_rawset(L, 1); + return 1; +} + +LJLIB_CF(rawequal) LJLIB_REC(.) +{ + cTValue *o1 = lj_lib_checkany(L, 1); + cTValue *o2 = lj_lib_checkany(L, 2); + setboolV(L->top-1, lj_obj_equal(o1, o2)); + return 1; +} + +LJLIB_CF(unpack) +{ + GCtab *t = lj_lib_checktab(L, 1); + int32_t n, i = lj_lib_optint(L, 2, 1); + int32_t e = (L->base+3-1 < L->top && !tvisnil(L->base+3-1)) ? + lj_lib_checkint(L, 3) : (int32_t)lj_tab_len(t); + if (i > e) return 0; + n = e - i + 1; + if (n <= 0 || !lua_checkstack(L, n)) + lj_err_caller(L, LJ_ERR_UNPACK); + do { + cTValue *tv = lj_tab_getint(t, i); + if (tv) { + copyTV(L, L->top++, tv); + } else { + setnilV(L->top++); + } + } while (i++ < e); + return n; +} + +LJLIB_CF(select) +{ + int32_t n = (int32_t)(L->top - L->base); + if (n >= 1 && tvisstr(L->base) && *strVdata(L->base) == '#') { + setintV(L->top-1, n-1); + return 1; + } else { + int32_t i = lj_lib_checkint(L, 1); + if (i < 0) i = n + i; else if (i > n) i = n; + if (i < 1) + lj_err_arg(L, 1, LJ_ERR_IDXRNG); + return n - i; + } +} + +/* -- Base library: conversions ------------------------------------------- */ + +LJLIB_ASM(tonumber) LJLIB_REC(.) +{ + int32_t base = lj_lib_optint(L, 2, 10); + if (base == 10) { + TValue *o = lj_lib_checkany(L, 1); + if (tvisnum(o) || (tvisstr(o) && lj_str_numconv(strVdata(o), o))) { + setnumV(L->base-1, numV(o)); + return FFH_RES(1); + } + } else { + const char *p = strdata(lj_lib_checkstr(L, 1)); + char *ep; + unsigned long ul; + if (base < 2 || base > 36) + lj_err_arg(L, 2, LJ_ERR_BASERNG); + ul = strtoul(p, &ep, base); + if (p != ep) { + while (lj_ctype_isspace((unsigned char)(*ep))) ep++; + if (*ep == '\0') { + setnumV(L->base-1, cast_num(ul)); + return FFH_RES(1); + } + } + } + setnilV(L->base-1); + return FFH_RES(1); +} + +LJLIB_ASM(tostring) LJLIB_REC(.) +{ + TValue *o = lj_lib_checkany(L, 1); + cTValue *mo; + L->top = o+1; /* Only keep one argument. */ + if (!tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) { + copyTV(L, L->base-1, mo); /* Replace callable. */ + return FFH_RETRY; + } else { + GCstr *s; + if (tvisnum(o)) { + s = lj_str_fromnum(L, &o->n); + } else if (tvisnil(o)) { + s = lj_str_newlit(L, "nil"); + } else if (tvisfalse(o)) { + s = lj_str_newlit(L, "false"); + } else if (tvistrue(o)) { + s = lj_str_newlit(L, "true"); + } else { + if (tvisfunc(o) && isffunc(funcV(o))) + lua_pushfstring(L, "function: fast#%d", funcV(o)->c.ffid); + else + lua_pushfstring(L, "%s: %p", typename(o), lua_topointer(L, 1)); + /* Note: lua_pushfstring calls the GC which may invalidate o. */ + s = strV(L->top-1); + } + setstrV(L, L->base-1, s); + return FFH_RES(1); + } +} + +/* -- Base library: iterators --------------------------------------------- */ + +LJLIB_ASM(next) +{ + lj_lib_checktab(L, 1); + lj_lib_checknum(L, 2); /* For ipairs_aux. */ + return FFH_UNREACHABLE; +} + +LJLIB_PUSH(lastcl) +LJLIB_ASM_(pairs) + +LJLIB_NOREGUV LJLIB_ASM_(ipairs_aux) LJLIB_REC(.) + +LJLIB_PUSH(lastcl) +LJLIB_ASM_(ipairs) LJLIB_REC(.) + +/* -- Base library: throw and catch errors -------------------------------- */ + +LJLIB_CF(error) +{ + int32_t level = lj_lib_optint(L, 2, 1); + lua_settop(L, 1); + if (lua_isstring(L, 1) && level > 0) { + luaL_where(L, level); + lua_pushvalue(L, 1); + lua_concat(L, 2); + } + return lua_error(L); +} + +LJLIB_ASM(pcall) LJLIB_REC(.) +{ + lj_lib_checkany(L, 1); + lj_lib_checkfunc(L, 2); /* For xpcall only. */ + return FFH_UNREACHABLE; +} +LJLIB_ASM_(xpcall) LJLIB_REC(.) + +/* -- Base library: load Lua code ----------------------------------------- */ + +static int load_aux(lua_State *L, int status) +{ + if (status == 0) + return 1; + copyTV(L, L->top, L->top-1); + setnilV(L->top-1); + L->top++; + return 2; +} + +LJLIB_CF(loadstring) +{ + GCstr *s = lj_lib_checkstr(L, 1); + GCstr *name = lj_lib_optstr(L, 2); + return load_aux(L, + luaL_loadbuffer(L, strdata(s), s->len, strdata(name ? name : s))); +} + +LJLIB_CF(loadfile) +{ + GCstr *fname = lj_lib_optstr(L, 1); + return load_aux(L, luaL_loadfile(L, fname ? strdata(fname) : NULL)); +} + +static const char *reader_func(lua_State *L, void *ud, size_t *size) +{ + UNUSED(ud); + luaL_checkstack(L, 2, "too many nested functions"); + copyTV(L, L->top++, L->base); + lua_call(L, 0, 1); /* Call user-supplied function. */ + L->top--; + if (tvisnil(L->top)) { + *size = 0; + return NULL; + } else if (tvisstr(L->top) || tvisnum(L->top)) { + copyTV(L, L->base+2, L->top); /* Anchor string in reserved stack slot. */ + return lua_tolstring(L, 3, size); + } else { + lj_err_caller(L, LJ_ERR_RDRSTR); + return NULL; + } +} + +LJLIB_CF(load) +{ + GCstr *name = lj_lib_optstr(L, 2); + lj_lib_checkfunc(L, 1); + lua_settop(L, 3); /* Reserve a slot for the string from the reader. */ + return load_aux(L, + lua_load(L, reader_func, NULL, name ? strdata(name) : "=(load)")); +} + +LJLIB_CF(dofile) +{ + GCstr *fname = lj_lib_optstr(L, 1); + setnilV(L->top); + L->top = L->base+1; + if (luaL_loadfile(L, fname ? strdata(fname) : NULL) != 0) + lua_error(L); + lua_call(L, 0, LUA_MULTRET); + return (L->top - L->base) - 1; +} + +/* -- Base library: GC control -------------------------------------------- */ + +LJLIB_CF(gcinfo) +{ + setintV(L->top++, (G(L)->gc.total >> 10)); + return 1; +} + +LJLIB_CF(collectgarbage) +{ + int opt = lj_lib_checkopt(L, 1, LUA_GCCOLLECT, /* ORDER LUA_GC* */ + "\4stop\7restart\7collect\5count\1\377\4step\10setpause\12setstepmul"); + int32_t data = lj_lib_optint(L, 2, 0); + if (opt == LUA_GCCOUNT) { + setnumV(L->top-1, cast_num((int32_t)G(L)->gc.total)/1024.0); + } else { + int res = lua_gc(L, opt, data); + if (opt == LUA_GCSTEP) + setboolV(L->top-1, res); + else + setintV(L->top-1, res); + } + return 1; +} + +/* -- Base library: miscellaneous functions ------------------------------- */ + +LJLIB_PUSH(top-2) /* Upvalue holds weak table. */ +LJLIB_CF(newproxy) +{ + lua_settop(L, 1); + lua_newuserdata(L, 0); + if (lua_toboolean(L, 1) == 0) { /* newproxy(): without metatable. */ + return 1; + } else if (lua_isboolean(L, 1)) { /* newproxy(true): with metatable. */ + lua_newtable(L); + lua_pushvalue(L, -1); + lua_pushboolean(L, 1); + lua_rawset(L, lua_upvalueindex(1)); /* Remember mt in weak table. */ + } else { /* newproxy(proxy): inherit metatable. */ + int validproxy = 0; + if (lua_getmetatable(L, 1)) { + lua_rawget(L, lua_upvalueindex(1)); + validproxy = lua_toboolean(L, -1); + lua_pop(L, 1); + } + if (!validproxy) + lj_err_arg(L, 1, LJ_ERR_NOPROXY); + lua_getmetatable(L, 1); + } + lua_setmetatable(L, 2); + return 1; +} + +LJLIB_PUSH("tostring") +LJLIB_CF(print) +{ + ptrdiff_t i, nargs = L->top - L->base; + cTValue *tv = lj_tab_getstr(tabref(L->env), strV(lj_lib_upvalue(L, 1))); + int shortcut = (tv && tvisfunc(tv) && funcV(tv)->c.ffid == FF_tostring); + copyTV(L, L->top++, tv ? tv : niltv(L)); + for (i = 0; i < nargs; i++) { + const char *str; + size_t size; + cTValue *o = &L->base[i]; + if (shortcut && tvisstr(o)) { + str = strVdata(o); + size = strV(o)->len; + } else if (shortcut && tvisnum(o)) { + char buf[LUAI_MAXNUMBER2STR]; + lua_Number n = numV(o); + size = (size_t)lua_number2str(buf, n); + str = buf; + } else { + copyTV(L, L->top+1, o); + copyTV(L, L->top, L->top-1); + L->top += 2; + lua_call(L, 1, 1); + str = lua_tolstring(L, -1, &size); + if (!str) + lj_err_caller(L, LJ_ERR_PRTOSTR); + L->top--; + } + if (i) + putchar('\t'); + fwrite(str, 1, size, stdout); + } + putchar('\n'); + return 0; +} + +LJLIB_PUSH(top-3) +LJLIB_SET(_VERSION) + +#include "lj_libdef.h" + +/* -- Coroutine library --------------------------------------------------- */ + +#define LJLIB_MODULE_coroutine + +LJLIB_CF(coroutine_status) +{ + const char *s; + lua_State *co; + if (!(L->top > L->base && tvisthread(L->base))) + lj_err_arg(L, 1, LJ_ERR_NOCORO); + co = threadV(L->base); + if (co == L) s = "running"; + else if (co->status == LUA_YIELD) s = "suspended"; + else if (co->status != 0) s = "dead"; + else if (co->base > co->stack+1) s = "normal"; + else if (co->top == co->base) s = "dead"; + else s = "suspended"; + lua_pushstring(L, s); + return 1; +} + +LJLIB_CF(coroutine_running) +{ + if (lua_pushthread(L)) + setnilV(L->top++); + return 1; +} + +LJLIB_CF(coroutine_create) +{ + lua_State *L1 = lua_newthread(L); + if (!(L->top > L->base && tvisfunc(L->base) && isluafunc(funcV(L->base)))) + lj_err_arg(L, 1, LJ_ERR_NOLFUNC); + setfuncV(L, L1->top++, funcV(L->base)); + return 1; +} + +LJLIB_ASM(coroutine_yield) +{ + lj_err_caller(L, LJ_ERR_CYIELD); + return FFH_UNREACHABLE; +} + +static int ffh_resume(lua_State *L, lua_State *co, int wrap) +{ + if (co->cframe != NULL || co->status > LUA_YIELD || + (co->status == 0 && co->top == co->base)) { + ErrMsg em = co->cframe ? LJ_ERR_CORUN : LJ_ERR_CODEAD; + if (wrap) lj_err_caller(L, em); + setboolV(L->base-1, 0); + setstrV(L, L->base, lj_err_str(L, em)); + return FFH_RES(2); + } + lj_state_growstack(co, (MSize)(L->top - L->base - 1)); + return FFH_RETRY; +} + +LJLIB_ASM(coroutine_resume) +{ + if (!(L->top > L->base && tvisthread(L->base))) + lj_err_arg(L, 1, LJ_ERR_NOCORO); + return ffh_resume(L, threadV(L->base), 0); +} + +LJLIB_NOREG LJLIB_ASM(coroutine_wrap_aux) +{ + return ffh_resume(L, threadV(lj_lib_upvalue(L, 1)), 1); +} + +/* Inline declarations. */ +LJ_ASMF void lj_ff_coroutine_wrap_aux(void); +LJ_FUNCA_NORET void lj_ffh_coroutine_wrap_err(lua_State *L, lua_State *co); + +/* Error handler, called from assembler VM. */ +void lj_ffh_coroutine_wrap_err(lua_State *L, lua_State *co) +{ + co->top--; copyTV(L, L->top, co->top); L->top++; + if (tvisstr(L->top-1)) + lj_err_callermsg(L, strVdata(L->top-1)); + else + lj_err_run(L); +} + +LJLIB_CF(coroutine_wrap) +{ + GCfunc *fn; + lj_cf_coroutine_create(L); + lua_pushcclosure(L, lj_ffh_coroutine_wrap_aux, 1); + fn = funcV(L->top-1); + fn->c.gate = lj_ff_coroutine_wrap_aux; + fn->c.ffid = FF_coroutine_wrap_aux; + return 1; +} + +#include "lj_libdef.h" + +/* ------------------------------------------------------------------------ */ + +static void newproxy_weaktable(lua_State *L) +{ + /* NOBARRIER: The table is new (marked white). */ + GCtab *t = lj_tab_new(L, 0, 1); + settabV(L, L->top++, t); + setgcref(t->metatable, obj2gco(t)); + setstrV(L, lj_tab_setstr(L, t, lj_str_newlit(L, "__mode")), + lj_str_newlit(L, "kv")); + t->nomm = cast_byte(~(1u<env); + settabV(L, lj_tab_setstr(L, env, lj_str_newlit(L, "_G")), env); + lua_pushliteral(L, LUA_VERSION); /* top-3. */ + newproxy_weaktable(L); /* top-2. */ + LJ_LIB_REG_(L, "_G", base); + LJ_LIB_REG(L, coroutine); + return 2; +} + diff --git a/src/lib_bit.c b/src/lib_bit.c new file mode 100644 index 0000000000..2f727e68e7 --- /dev/null +++ b/src/lib_bit.c @@ -0,0 +1,74 @@ +/* +** Bit manipulation library. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +*/ + +#define lib_bit_c +#define LUA_LIB + +#include "lua.h" +#include "lauxlib.h" +#include "lualib.h" + +#include "lj_obj.h" +#include "lj_err.h" +#include "lj_str.h" +#include "lj_lib.h" + +/* ------------------------------------------------------------------------ */ + +#define LJLIB_MODULE_bit + +LJLIB_ASM(bit_tobit) LJLIB_REC(bit_unary IR_TOBIT) +{ + lj_lib_checknum(L, 1); + return FFH_RETRY; +} +LJLIB_ASM_(bit_bnot) LJLIB_REC(bit_unary IR_BNOT) +LJLIB_ASM_(bit_bswap) LJLIB_REC(bit_unary IR_BSWAP) + +LJLIB_ASM(bit_lshift) LJLIB_REC(bit_shift IR_BSHL) +{ + lj_lib_checknum(L, 1); + lj_lib_checknum(L, 2); + return FFH_RETRY; +} +LJLIB_ASM_(bit_rshift) LJLIB_REC(bit_shift IR_BSHR) +LJLIB_ASM_(bit_arshift) LJLIB_REC(bit_shift IR_BSAR) +LJLIB_ASM_(bit_rol) LJLIB_REC(bit_shift IR_BROL) +LJLIB_ASM_(bit_ror) LJLIB_REC(bit_shift IR_BROR) + +LJLIB_ASM(bit_band) LJLIB_REC(bit_nary IR_BAND) +{ + int i = 0; + do { lj_lib_checknum(L, ++i); } while (L->base+i < L->top); + return FFH_RETRY; +} +LJLIB_ASM_(bit_bor) LJLIB_REC(bit_nary IR_BOR) +LJLIB_ASM_(bit_bxor) LJLIB_REC(bit_nary IR_BXOR) + +/* ------------------------------------------------------------------------ */ + +LJLIB_CF(bit_tohex) +{ + uint32_t b = (uint32_t)lj_num2bit(lj_lib_checknum(L, 1)); + int32_t i, n = L->base+1 >= L->top ? 8 : lj_num2bit(lj_lib_checknum(L, 2)); + const char *hexdigits = "0123456789abcdef"; + char buf[8]; + if (n < 0) { n = -n; hexdigits = "0123456789ABCDEF"; } + if (n > 8) n = 8; + for (i = n; --i >= 0; ) { buf[i] = hexdigits[b & 15]; b >>= 4; } + lua_pushlstring(L, buf, (size_t)n); + return 1; +} + +/* ------------------------------------------------------------------------ */ + +#include "lj_libdef.h" + +LUALIB_API int luaopen_bit(lua_State *L) +{ + LJ_LIB_REG(L, bit); + return 1; +} + diff --git a/src/lib_debug.c b/src/lib_debug.c new file mode 100644 index 0000000000..0e6c35e525 --- /dev/null +++ b/src/lib_debug.c @@ -0,0 +1,366 @@ +/* +** Debug library. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +** +** Major portions taken verbatim or adapted from the Lua interpreter. +** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h +*/ + +#define lib_debug_c +#define LUA_LIB + +#include "lua.h" +#include "lauxlib.h" +#include "lualib.h" + +#include "lj_obj.h" +#include "lj_err.h" +#include "lj_lib.h" + +/* ------------------------------------------------------------------------ */ + +#define LJLIB_MODULE_debug + +LJLIB_CF(debug_getregistry) +{ + copyTV(L, L->top++, registry(L)); + return 1; +} + +LJLIB_CF(debug_getmetatable) +{ + lj_lib_checkany(L, 1); + if (!lua_getmetatable(L, 1)) { + setnilV(L->top-1); + } + return 1; +} + +LJLIB_CF(debug_setmetatable) +{ + lj_lib_checktabornil(L, 2); + L->top = L->base+2; + lua_setmetatable(L, 1); + setboolV(L->top-1, 1); + return 1; +} + +LJLIB_CF(debug_getfenv) +{ + lj_lib_checkany(L, 1); + lua_getfenv(L, 1); + return 1; +} + +LJLIB_CF(debug_setfenv) +{ + lj_lib_checktab(L, 2); + L->top = L->base+2; + if (!lua_setfenv(L, 1)) + lj_err_caller(L, LJ_ERR_SETFENV); + return 1; +} + +/* ------------------------------------------------------------------------ */ + +static void settabss(lua_State *L, const char *i, const char *v) +{ + lua_pushstring(L, v); + lua_setfield(L, -2, i); +} + +static void settabsi(lua_State *L, const char *i, int v) +{ + lua_pushinteger(L, v); + lua_setfield(L, -2, i); +} + +static lua_State *getthread(lua_State *L, int *arg) +{ + if (L->base < L->top && tvisthread(L->base)) { + *arg = 1; + return threadV(L->base); + } else { + *arg = 0; + return L; + } +} + +static void treatstackoption(lua_State *L, lua_State *L1, const char *fname) +{ + if (L == L1) { + lua_pushvalue(L, -2); + lua_remove(L, -3); + } + else + lua_xmove(L1, L, 1); + lua_setfield(L, -2, fname); +} + +LJLIB_CF(debug_getinfo) +{ + lua_Debug ar; + int arg; + lua_State *L1 = getthread(L, &arg); + const char *options = luaL_optstring(L, arg+2, "flnSu"); + if (lua_isnumber(L, arg+1)) { + if (!lua_getstack(L1, (int)lua_tointeger(L, arg+1), &ar)) { + setnilV(L->top-1); + return 1; + } + } else if (L->base+arg < L->top && tvisfunc(L->base+arg)) { + options = lua_pushfstring(L, ">%s", options); + setfuncV(L1, L1->top++, funcV(L->base+arg)); + } else { + lj_err_arg(L, arg+1, LJ_ERR_NOFUNCL); + } + if (!lua_getinfo(L1, options, &ar)) + lj_err_arg(L, arg+2, LJ_ERR_INVOPT); + lua_createtable(L, 0, 16); + if (strchr(options, 'S')) { + settabss(L, "source", ar.source); + settabss(L, "short_src", ar.short_src); + settabsi(L, "linedefined", ar.linedefined); + settabsi(L, "lastlinedefined", ar.lastlinedefined); + settabss(L, "what", ar.what); + } + if (strchr(options, 'l')) + settabsi(L, "currentline", ar.currentline); + if (strchr(options, 'u')) + settabsi(L, "nups", ar.nups); + if (strchr(options, 'n')) { + settabss(L, "name", ar.name); + settabss(L, "namewhat", ar.namewhat); + } + if (strchr(options, 'L')) + treatstackoption(L, L1, "activelines"); + if (strchr(options, 'f')) + treatstackoption(L, L1, "func"); + return 1; /* return table */ +} + +LJLIB_CF(debug_getlocal) +{ + int arg; + lua_State *L1 = getthread(L, &arg); + lua_Debug ar; + const char *name; + if (!lua_getstack(L1, lj_lib_checkint(L, arg+1), &ar)) + lj_err_arg(L, arg+1, LJ_ERR_LVLRNG); + name = lua_getlocal(L1, &ar, lj_lib_checkint(L, arg+2)); + if (name) { + lua_xmove(L1, L, 1); + lua_pushstring(L, name); + lua_pushvalue(L, -2); + return 2; + } else { + setnilV(L->top-1); + return 1; + } +} + +LJLIB_CF(debug_setlocal) +{ + int arg; + lua_State *L1 = getthread(L, &arg); + lua_Debug ar; + TValue *tv; + if (!lua_getstack(L1, lj_lib_checkint(L, arg+1), &ar)) + lj_err_arg(L, arg+1, LJ_ERR_LVLRNG); + tv = lj_lib_checkany(L, arg+3); + copyTV(L1, L1->top++, tv); + lua_pushstring(L, lua_setlocal(L1, &ar, lj_lib_checkint(L, arg+2))); + return 1; +} + +static int debug_getupvalue(lua_State *L, int get) +{ + int32_t n = lj_lib_checkint(L, 2); + if (isluafunc(lj_lib_checkfunc(L, 1))) { + const char *name = get ? lua_getupvalue(L, 1, n) : lua_setupvalue(L, 1, n); + if (name) { + lua_pushstring(L, name); + if (!get) return 1; + copyTV(L, L->top, L->top-2); + L->top++; + return 2; + } + } + return 0; +} + +LJLIB_CF(debug_getupvalue) +{ + return debug_getupvalue(L, 1); +} + +LJLIB_CF(debug_setupvalue) +{ + lj_lib_checkany(L, 3); + return debug_getupvalue(L, 0); +} + +/* ------------------------------------------------------------------------ */ + +static const char KEY_HOOK = 'h'; + +static void hookf(lua_State *L, lua_Debug *ar) +{ + static const char *const hooknames[] = + {"call", "return", "line", "count", "tail return"}; + lua_pushlightuserdata(L, (void *)&KEY_HOOK); + lua_rawget(L, LUA_REGISTRYINDEX); + if (lua_isfunction(L, -1)) { + lua_pushstring(L, hooknames[(int)ar->event]); + if (ar->currentline >= 0) + lua_pushinteger(L, ar->currentline); + else lua_pushnil(L); + lua_call(L, 2, 0); + } +} + +static int makemask(const char *smask, int count) +{ + int mask = 0; + if (strchr(smask, 'c')) mask |= LUA_MASKCALL; + if (strchr(smask, 'r')) mask |= LUA_MASKRET; + if (strchr(smask, 'l')) mask |= LUA_MASKLINE; + if (count > 0) mask |= LUA_MASKCOUNT; + return mask; +} + +static char *unmakemask(int mask, char *smask) +{ + int i = 0; + if (mask & LUA_MASKCALL) smask[i++] = 'c'; + if (mask & LUA_MASKRET) smask[i++] = 'r'; + if (mask & LUA_MASKLINE) smask[i++] = 'l'; + smask[i] = '\0'; + return smask; +} + +LJLIB_CF(debug_sethook) +{ + int arg, mask, count; + lua_Hook func; + (void)getthread(L, &arg); + if (lua_isnoneornil(L, arg+1)) { + lua_settop(L, arg+1); + func = NULL; mask = 0; count = 0; /* turn off hooks */ + } else { + const char *smask = luaL_checkstring(L, arg+2); + luaL_checktype(L, arg+1, LUA_TFUNCTION); + count = luaL_optint(L, arg+3, 0); + func = hookf; mask = makemask(smask, count); + } + lua_pushlightuserdata(L, (void *)&KEY_HOOK); + lua_pushvalue(L, arg+1); + lua_rawset(L, LUA_REGISTRYINDEX); + lua_sethook(L, func, mask, count); + return 0; +} + +LJLIB_CF(debug_gethook) +{ + char buff[5]; + int mask = lua_gethookmask(L); + lua_Hook hook = lua_gethook(L); + if (hook != NULL && hook != hookf) { /* external hook? */ + lua_pushliteral(L, "external hook"); + } else { + lua_pushlightuserdata(L, (void *)&KEY_HOOK); + lua_rawget(L, LUA_REGISTRYINDEX); /* get hook */ + } + lua_pushstring(L, unmakemask(mask, buff)); + lua_pushinteger(L, lua_gethookcount(L)); + return 3; +} + +/* ------------------------------------------------------------------------ */ + +LJLIB_CF(debug_debug) +{ + for (;;) { + char buffer[250]; + fputs("lua_debug> ", stderr); + if (fgets(buffer, sizeof(buffer), stdin) == 0 || + strcmp(buffer, "cont\n") == 0) + return 0; + if (luaL_loadbuffer(L, buffer, strlen(buffer), "=(debug command)") || + lua_pcall(L, 0, 0, 0)) { + fputs(lua_tostring(L, -1), stderr); + fputs("\n", stderr); + } + lua_settop(L, 0); /* remove eventual returns */ + } +} + +/* ------------------------------------------------------------------------ */ + +#define LEVELS1 12 /* size of the first part of the stack */ +#define LEVELS2 10 /* size of the second part of the stack */ + +LJLIB_CF(debug_traceback) +{ + int level; + int firstpart = 1; /* still before eventual `...' */ + int arg; + lua_State *L1 = getthread(L, &arg); + lua_Debug ar; + if (lua_isnumber(L, arg+2)) { + level = (int)lua_tointeger(L, arg+2); + lua_pop(L, 1); + } + else + level = (L == L1) ? 1 : 0; /* level 0 may be this own function */ + if (lua_gettop(L) == arg) + lua_pushliteral(L, ""); + else if (!lua_isstring(L, arg+1)) return 1; /* message is not a string */ + else lua_pushliteral(L, "\n"); + lua_pushliteral(L, "stack traceback:"); + while (lua_getstack(L1, level++, &ar)) { + if (level > LEVELS1 && firstpart) { + /* no more than `LEVELS2' more levels? */ + if (!lua_getstack(L1, level+LEVELS2, &ar)) { + level--; /* keep going */ + } else { + lua_pushliteral(L, "\n\t..."); /* too many levels */ + /* This only works with LuaJIT 2.x. Avoids O(n^2) behaviour. */ + lua_getstack(L1, -10, &ar); + level = ar.i_ci - LEVELS2; + } + firstpart = 0; + continue; + } + lua_pushliteral(L, "\n\t"); + lua_getinfo(L1, "Snl", &ar); + lua_pushfstring(L, "%s:", ar.short_src); + if (ar.currentline > 0) + lua_pushfstring(L, "%d:", ar.currentline); + if (*ar.namewhat != '\0') { /* is there a name? */ + lua_pushfstring(L, " in function " LUA_QS, ar.name); + } else { + if (*ar.what == 'm') /* main? */ + lua_pushfstring(L, " in main chunk"); + else if (*ar.what == 'C' || *ar.what == 't') + lua_pushliteral(L, " ?"); /* C function or tail call */ + else + lua_pushfstring(L, " in function <%s:%d>", + ar.short_src, ar.linedefined); + } + lua_concat(L, lua_gettop(L) - arg); + } + lua_concat(L, lua_gettop(L) - arg); + return 1; +} + +/* ------------------------------------------------------------------------ */ + +#include "lj_libdef.h" + +LUALIB_API int luaopen_debug(lua_State *L) +{ + LJ_LIB_REG(L, debug); + return 1; +} + diff --git a/src/lib_init.c b/src/lib_init.c new file mode 100644 index 0000000000..04ca60d988 --- /dev/null +++ b/src/lib_init.c @@ -0,0 +1,37 @@ +/* +** Library initialization. +** Major parts taken verbatim from the Lua interpreter. +** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h +*/ + +#define lib_init_c +#define LUA_LIB + +#include "lua.h" +#include "lauxlib.h" +#include "lualib.h" + +static const luaL_Reg lualibs[] = { + { "", luaopen_base }, + { LUA_LOADLIBNAME, luaopen_package }, + { LUA_TABLIBNAME, luaopen_table }, + { LUA_IOLIBNAME, luaopen_io }, + { LUA_OSLIBNAME, luaopen_os }, + { LUA_STRLIBNAME, luaopen_string }, + { LUA_MATHLIBNAME, luaopen_math }, + { LUA_DBLIBNAME, luaopen_debug }, + { LUA_BITLIBNAME, luaopen_bit }, + { LUA_JITLIBNAME, luaopen_jit }, + { NULL, NULL } +}; + +LUALIB_API void luaL_openlibs(lua_State *L) +{ + const luaL_Reg *lib = lualibs; + for (; lib->func; lib++) { + lua_pushcfunction(L, lib->func); + lua_pushstring(L, lib->name); + lua_call(L, 1, 0); + } +} + diff --git a/src/lib_io.c b/src/lib_io.c new file mode 100644 index 0000000000..01623258bb --- /dev/null +++ b/src/lib_io.c @@ -0,0 +1,538 @@ +/* +** I/O library. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +** +** Major portions taken verbatim or adapted from the Lua interpreter. +** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h +*/ + +#include +#include + +#define lib_io_c +#define LUA_LIB + +#include "lua.h" +#include "lauxlib.h" +#include "lualib.h" + +#include "lj_obj.h" +#include "lj_err.h" +#include "lj_gc.h" +#include "lj_ff.h" +#include "lj_lib.h" + +/* Index of standard handles in function environment. */ +#define IO_INPUT 1 +#define IO_OUTPUT 2 + +/* -- Error handling ------------------------------------------------------ */ + +static int io_pushresult(lua_State *L, int ok, const char *fname) +{ + if (ok) { + setboolV(L->top++, 1); + return 1; + } else { + int en = errno; /* Lua API calls may change this value. */ + lua_pushnil(L); + if (fname) + lua_pushfstring(L, "%s: %s", fname, strerror(en)); + else + lua_pushfstring(L, "%s", strerror(en)); + lua_pushinteger(L, en); + return 3; + } +} + +static void io_file_error(lua_State *L, int arg, const char *fname) +{ + lua_pushfstring(L, "%s: %s", fname, strerror(errno)); + luaL_argerror(L, arg, lua_tostring(L, -1)); +} + +/* -- Open helpers -------------------------------------------------------- */ + +#define io_tofilep(L) ((FILE **)luaL_checkudata(L, 1, LUA_FILEHANDLE)) + +static FILE *io_tofile(lua_State *L) +{ + FILE **f = io_tofilep(L); + if (*f == NULL) + lj_err_caller(L, LJ_ERR_IOCLFL); + return *f; +} + +static FILE **io_file_new(lua_State *L) +{ + FILE **pf = (FILE **)lua_newuserdata(L, sizeof(FILE *)); + *pf = NULL; + luaL_getmetatable(L, LUA_FILEHANDLE); + lua_setmetatable(L, -2); + return pf; +} + +/* -- Close helpers ------------------------------------------------------- */ + +static int lj_cf_io_std_close(lua_State *L) +{ + lua_pushnil(L); + lua_pushliteral(L, "cannot close standard file"); + return 2; +} + +static int lj_cf_io_pipe_close(lua_State *L) +{ + FILE **p = io_tofilep(L); +#if defined(LUA_USE_POSIX) + int ok = (pclose(*p) != -1); +#elif defined(LUA_USE_WIN) + int ok = (_pclose(*p) != -1); +#else + int ok = 0; +#endif + *p = NULL; + return io_pushresult(L, ok, NULL); +} + +static int lj_cf_io_file_close(lua_State *L) +{ + FILE **p = io_tofilep(L); + int ok = (fclose(*p) == 0); + *p = NULL; + return io_pushresult(L, ok, NULL); +} + +static int io_file_close(lua_State *L) +{ + lua_getfenv(L, 1); + lua_getfield(L, -1, "__close"); + return (lua_tocfunction(L, -1))(L); +} + +/* -- Read/write helpers -------------------------------------------------- */ + +static int io_file_readnum(lua_State *L, FILE *fp) +{ + lua_Number d; + if (fscanf(fp, LUA_NUMBER_SCAN, &d) == 1) { + lua_pushnumber(L, d); + return 1; + } else { + return 0; /* read fails */ + } +} + +static int test_eof(lua_State *L, FILE *fp) +{ + int c = getc(fp); + ungetc(c, fp); + lua_pushlstring(L, NULL, 0); + return (c != EOF); +} + +static int io_file_readline(lua_State *L, FILE *fp) +{ + luaL_Buffer b; + luaL_buffinit(L, &b); + for (;;) { + size_t len; + char *p = luaL_prepbuffer(&b); + if (fgets(p, LUAL_BUFFERSIZE, fp) == NULL) { /* EOF? */ + luaL_pushresult(&b); + return (strV(L->top-1)->len > 0); /* Anything read? */ + } + len = strlen(p); + if (len == 0 || p[len-1] != '\n') { /* Partial line? */ + luaL_addsize(&b, len); + } else { + luaL_addsize(&b, len - 1); /* Don't include EOL. */ + luaL_pushresult(&b); + return 1; /* Got at least an EOL. */ + } + } +} + +static int io_file_readchars(lua_State *L, FILE *fp, size_t n) +{ + size_t rlen; /* how much to read */ + size_t nr; /* number of chars actually read */ + luaL_Buffer b; + luaL_buffinit(L, &b); + rlen = LUAL_BUFFERSIZE; /* try to read that much each time */ + do { + char *p = luaL_prepbuffer(&b); + if (rlen > n) rlen = n; /* cannot read more than asked */ + nr = fread(p, 1, rlen, fp); + luaL_addsize(&b, nr); + n -= nr; /* still have to read `n' chars */ + } while (n > 0 && nr == rlen); /* until end of count or eof */ + luaL_pushresult(&b); /* close buffer */ + return (n == 0 || lua_objlen(L, -1) > 0); +} + +static int io_file_read(lua_State *L, FILE *fp, int start) +{ + int ok, n, nargs = (L->top - L->base) - start; + clearerr(fp); + if (nargs == 0) { + ok = io_file_readline(L, fp); + n = start+1; /* Return 1 result. */ + } else { + /* The results plus the buffers go on top of the args. */ + luaL_checkstack(L, nargs+LUA_MINSTACK, "too many arguments"); + ok = 1; + for (n = start; nargs-- && ok; n++) { + if (tvisstr(L->base+n)) { + const char *p = strVdata(L->base+n); + if (p[0] != '*') + lj_err_arg(L, n+1, LJ_ERR_INVOPT); + if (p[1] == 'n') + ok = io_file_readnum(L, fp); + else if (p[1] == 'l') + ok = io_file_readline(L, fp); + else if (p[1] == 'a') + io_file_readchars(L, fp, ~((size_t)0)); + else + lj_err_arg(L, n+1, LJ_ERR_INVFMT); + } else if (tvisnum(L->base+n)) { + size_t len = (size_t)lj_lib_checkint(L, n+1); + ok = len ? io_file_readchars(L, fp, len) : test_eof(L, fp); + } else { + lj_err_arg(L, n+1, LJ_ERR_INVOPT); + } + } + } + if (ferror(fp)) + return io_pushresult(L, 0, NULL); + if (!ok) + setnilV(L->top-1); /* Replace last result with nil. */ + return n - start; +} + +static int io_file_write(lua_State *L, FILE *fp, int start) +{ + cTValue *tv; + int status = 1; + for (tv = L->base+start; tv < L->top; tv++) { + if (tvisstr(tv)) { + MSize len = strV(tv)->len; + status = status && (fwrite(strVdata(tv), 1, len, fp) == len); + } else if (tvisnum(tv)) { + status = status && (fprintf(fp, LUA_NUMBER_FMT, numV(tv)) > 0); + } else { + lj_lib_checkstr(L, tv-L->base+1); + } + } + return io_pushresult(L, status, NULL); +} + +/* -- I/O file methods ---------------------------------------------------- */ + +#define LJLIB_MODULE_io_method + +LJLIB_CF(io_method_close) +{ + if (lua_isnone(L, 1)) + lua_rawgeti(L, LUA_ENVIRONINDEX, IO_OUTPUT); + io_tofile(L); + return io_file_close(L); +} + +LJLIB_CF(io_method_read) +{ + return io_file_read(L, io_tofile(L), 1); +} + +LJLIB_CF(io_method_write) +{ + return io_file_write(L, io_tofile(L), 1); +} + +LJLIB_CF(io_method_flush) +{ + return io_pushresult(L, fflush(io_tofile(L)) == 0, NULL); +} + +LJLIB_CF(io_method_seek) +{ + FILE *fp = io_tofile(L); + int opt = lj_lib_checkopt(L, 2, 1, "\3set\3cur\3end"); + lua_Number ofs; + int res; + if (opt == 0) opt = SEEK_SET; + else if (opt == 1) opt = SEEK_CUR; + else if (opt == 2) opt = SEEK_END; + lj_lib_opt(L, 3, + ofs = lj_lib_checknum(L, 3); + , + ofs = 0; + ) +#if defined(LUA_USE_POSIX) + res = fseeko(fp, (int64_t)ofs, opt); +#elif _MSC_VER >= 1400 + res = _fseeki64(fp, (int64_t)ofs, opt); +#elif defined(__MINGW32__) + res = fseeko64(fp, (int64_t)ofs, opt); +#else + res = fseek(fp, (long)ofs, opt); +#endif + if (res) + return io_pushresult(L, 0, NULL); +#if defined(LUA_USE_POSIX) + ofs = cast_num(ftello(fp)); +#elif _MSC_VER >= 1400 + ofs = cast_num(_ftelli64(fp)); +#elif defined(__MINGW32__) + ofs = cast_num(ftello64(fp)); +#else + ofs = cast_num(ftell(fp)); +#endif + setnumV(L->top-1, ofs); + return 1; +} + +LJLIB_CF(io_method_setvbuf) +{ + FILE *fp = io_tofile(L); + int opt = lj_lib_checkopt(L, 2, -1, "\4full\4line\2no"); + size_t sz = (size_t)lj_lib_optint(L, 3, LUAL_BUFFERSIZE); + if (opt == 0) opt = _IOFBF; + else if (opt == 1) opt = _IOLBF; + else if (opt == 2) opt = _IONBF; + return io_pushresult(L, (setvbuf(fp, NULL, opt, sz) == 0), NULL); +} + +/* Forward declaration. */ +static void io_file_lines(lua_State *L, int idx, int toclose); + +LJLIB_CF(io_method_lines) +{ + io_tofile(L); + io_file_lines(L, 1, 0); + return 1; +} + +LJLIB_CF(io_method___gc) +{ + FILE *fp = *io_tofilep(L); + if (fp != NULL) io_file_close(L); + return 0; +} + +LJLIB_CF(io_method___tostring) +{ + FILE *fp = *io_tofilep(L); + if (fp == NULL) + lua_pushliteral(L, "file (closed)"); + else + lua_pushfstring(L, "file (%p)", fp); + return 1; +} + +LJLIB_PUSH(top-1) LJLIB_SET(__index) + +#include "lj_libdef.h" + +/* -- I/O library functions ----------------------------------------------- */ + +#define LJLIB_MODULE_io + +LJLIB_PUSH(top-2) LJLIB_SET(!) /* Set environment. */ + +static FILE *io_file_get(lua_State *L, int findex) +{ + GCtab *fenv = tabref(curr_func(L)->c.env); + GCudata *ud = udataV(&tvref(fenv->array)[findex]); + FILE *fp = *(FILE **)uddata(ud); + if (fp == NULL) + lj_err_caller(L, LJ_ERR_IOSTDCL); + return fp; +} + +LJLIB_CF(io_open) +{ + const char *fname = luaL_checkstring(L, 1); + const char *mode = luaL_optstring(L, 2, "r"); + FILE **pf = io_file_new(L); + *pf = fopen(fname, mode); + return (*pf == NULL) ? io_pushresult(L, 0, fname) : 1; +} + +LJLIB_CF(io_tmpfile) +{ + FILE **pf = io_file_new(L); + *pf = tmpfile(); + return (*pf == NULL) ? io_pushresult(L, 0, NULL) : 1; +} + +LJLIB_CF(io_close) +{ + return lj_cf_io_method_close(L); +} + +LJLIB_CF(io_read) +{ + return io_file_read(L, io_file_get(L, IO_INPUT), 0); +} + +LJLIB_CF(io_write) +{ + return io_file_write(L, io_file_get(L, IO_OUTPUT), 0); +} + +LJLIB_CF(io_flush) +{ + return io_pushresult(L, fflush(io_file_get(L, IO_OUTPUT)) == 0, NULL); +} + +LJLIB_NOREG LJLIB_CF(io_lines_iter) +{ + FILE *fp = *(FILE **)uddata(udataV(lj_lib_upvalue(L, 1))); + int ok; + if (fp == NULL) + lj_err_caller(L, LJ_ERR_IOCLFL); + ok = io_file_readline(L, fp); + if (ferror(fp)) + return luaL_error(L, "%s", strerror(errno)); + if (ok) + return 1; + if (tvistrue(lj_lib_upvalue(L, 2))) { /* Need to close file? */ + L->top = L->base+1; + setudataV(L, L->base, udataV(lj_lib_upvalue(L, 1))); + io_file_close(L); + } + return 0; +} + +static void io_file_lines(lua_State *L, int idx, int toclose) +{ + lua_pushvalue(L, idx); + lua_pushboolean(L, toclose); + lua_pushcclosure(L, lj_cf_io_lines_iter, 2); + funcV(L->top-1)->c.ffid = FF_io_lines_iter; +} + +LJLIB_CF(io_lines) +{ + if (lua_isnoneornil(L, 1)) { /* no arguments? */ + /* will iterate over default input */ + lua_rawgeti(L, LUA_ENVIRONINDEX, IO_INPUT); + return lj_cf_io_method_lines(L); + } else { + const char *fname = luaL_checkstring(L, 1); + FILE **pf = io_file_new(L); + *pf = fopen(fname, "r"); + if (*pf == NULL) + io_file_error(L, 1, fname); + io_file_lines(L, lua_gettop(L), 1); + return 1; + } +} + +static int io_std_get(lua_State *L, int fp, const char *mode) +{ + if (!lua_isnoneornil(L, 1)) { + const char *fname = lua_tostring(L, 1); + if (fname) { + FILE **pf = io_file_new(L); + *pf = fopen(fname, mode); + if (*pf == NULL) + io_file_error(L, 1, fname); + } else { + io_tofile(L); /* check that it's a valid file handle */ + lua_pushvalue(L, 1); + } + lua_rawseti(L, LUA_ENVIRONINDEX, fp); + } + /* return current value */ + lua_rawgeti(L, LUA_ENVIRONINDEX, fp); + return 1; +} + +LJLIB_CF(io_input) +{ + return io_std_get(L, IO_INPUT, "r"); +} + +LJLIB_CF(io_output) +{ + return io_std_get(L, IO_OUTPUT, "w"); +} + +LJLIB_CF(io_type) +{ + void *ud; + luaL_checkany(L, 1); + ud = lua_touserdata(L, 1); + lua_getfield(L, LUA_REGISTRYINDEX, LUA_FILEHANDLE); + if (ud == NULL || !lua_getmetatable(L, 1) || !lua_rawequal(L, -2, -1)) + lua_pushnil(L); /* not a file */ + else if (*((FILE **)ud) == NULL) + lua_pushliteral(L, "closed file"); + else + lua_pushliteral(L, "file"); + return 1; +} + +LJLIB_PUSH(top-3) LJLIB_SET(!) /* Set environment. */ + +LJLIB_CF(io_popen) +{ +#if defined(LUA_USE_POSIX) || defined(LUA_USE_WIN) + const char *fname = luaL_checkstring(L, 1); + const char *mode = luaL_optstring(L, 2, "r"); + FILE **pf = io_file_new(L); +#ifdef LUA_USE_POSIX + fflush(NULL); + *pf = popen(fname, mode); +#else + *pf = _popen(fname, mode); +#endif + return (*pf == NULL) ? io_pushresult(L, 0, fname) : 1; +#else + luaL_error(L, LUA_QL("popen") " not supported"); +#endif +} + +#include "lj_libdef.h" + +/* ------------------------------------------------------------------------ */ + +static void io_std_new(lua_State *L, FILE *fp, int k, const char *fname) +{ + FILE **pf = io_file_new(L); + GCudata *ud = udataV(L->top-1); + GCtab *envt = tabV(L->top-2); + *pf = fp; + setgcref(ud->env, obj2gco(envt)); + lj_gc_objbarrier(L, obj2gco(ud), envt); + if (k > 0) { + lua_pushvalue(L, -1); + lua_rawseti(L, -5, k); + } + lua_setfield(L, -3, fname); +} + +static void io_fenv_new(lua_State *L, int narr, lua_CFunction cls) +{ + lua_createtable(L, narr, 1); + lua_pushcfunction(L, cls); + lua_setfield(L, -2, "__close"); +} + +LUALIB_API int luaopen_io(lua_State *L) +{ + LJ_LIB_REG_(L, NULL, io_method); + lua_setfield(L, LUA_REGISTRYINDEX, LUA_FILEHANDLE); + io_fenv_new(L, 0, lj_cf_io_pipe_close); /* top-3 */ + io_fenv_new(L, 2, lj_cf_io_file_close); /* top-2 */ + LJ_LIB_REG(L, io); + io_fenv_new(L, 0, lj_cf_io_std_close); + io_std_new(L, stdin, IO_INPUT, "stdin"); + io_std_new(L, stdout, IO_OUTPUT, "stdout"); + io_std_new(L, stderr, 0, "stderr"); + lua_pop(L, 1); + return 1; +} + diff --git a/src/lib_jit.c b/src/lib_jit.c new file mode 100644 index 0000000000..4a57f3b40b --- /dev/null +++ b/src/lib_jit.c @@ -0,0 +1,589 @@ +/* +** JIT library. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +*/ + +#define lib_jit_c +#define LUA_LIB + +#include "lua.h" +#include "lauxlib.h" +#include "lualib.h" + +#include "lj_arch.h" +#include "lj_obj.h" +#include "lj_err.h" +#include "lj_str.h" +#include "lj_tab.h" +#if LJ_HASJIT +#include "lj_ir.h" +#include "lj_jit.h" +#include "lj_iropt.h" +#endif +#include "lj_dispatch.h" +#include "lj_vm.h" +#include "lj_vmevent.h" +#include "lj_lib.h" + +#include "luajit.h" + +/* -- jit.* functions ----------------------------------------------------- */ + +#define LJLIB_MODULE_jit + +static int setjitmode(lua_State *L, int mode) +{ + int idx = 0; + if (L->base == L->top || tvisnil(L->base)) { /* jit.on/off/flush([nil]) */ + mode |= LUAJIT_MODE_ENGINE; + } else { + /* jit.on/off/flush(func|proto, nil|true|false) */ + if (tvisfunc(L->base) || tvisproto(L->base)) + idx = 1; + else if (!tvistrue(L->base)) /* jit.on/off/flush(true, nil|true|false) */ + goto err; + if (L->base+1 < L->top && tvisbool(L->base+1)) + mode |= boolV(L->base+1) ? LUAJIT_MODE_ALLFUNC : LUAJIT_MODE_ALLSUBFUNC; + else + mode |= LUAJIT_MODE_FUNC; + } + if (luaJIT_setmode(L, idx, mode) != 1) { + err: +#if LJ_HASJIT + lj_err_arg(L, 1, LJ_ERR_NOLFUNC); +#else + lj_err_caller(L, LJ_ERR_NOJIT); +#endif + } + return 0; +} + +LJLIB_CF(jit_on) +{ + return setjitmode(L, LUAJIT_MODE_ON); +} + +LJLIB_CF(jit_off) +{ + return setjitmode(L, LUAJIT_MODE_OFF); +} + +LJLIB_CF(jit_flush) +{ +#if LJ_HASJIT + if (L->base < L->top && (tvisnum(L->base) || tvisstr(L->base))) { + int traceno = lj_lib_checkint(L, 1); + luaJIT_setmode(L, traceno, LUAJIT_MODE_FLUSH|LUAJIT_MODE_TRACE); + return 0; + } +#endif + return setjitmode(L, LUAJIT_MODE_FLUSH); +} + +#if LJ_HASJIT +/* Push a string for every flag bit that is set. */ +static void flagbits_to_strings(lua_State *L, uint32_t flags, uint32_t base, + const char *str) +{ + for (; *str; base <<= 1, str += 1+*str) + if (flags & base) + setstrV(L, L->top++, lj_str_new(L, str+1, *(uint8_t *)str)); +} +#endif + +LJLIB_CF(jit_status) +{ +#if LJ_HASJIT + jit_State *J = L2J(L); + L->top = L->base; + setboolV(L->top++, (J->flags & JIT_F_ON) ? 1 : 0); + flagbits_to_strings(L, J->flags, JIT_F_CPU_FIRST, JIT_F_CPUSTRING); + flagbits_to_strings(L, J->flags, JIT_F_OPT_FIRST, JIT_F_OPTSTRING); + return L->top - L->base; +#else + setboolV(L->top++, 0); + return 1; +#endif +} + +LJLIB_CF(jit_attach) +{ +#ifdef LUAJIT_DISABLE_VMEVENT + luaL_error(L, "vmevent API disabled"); +#else + GCfunc *fn = lj_lib_checkfunc(L, 1); + GCstr *s = lj_lib_optstr(L, 2); + luaL_findtable(L, LUA_REGISTRYINDEX, LJ_VMEVENTS_REGKEY, LJ_VMEVENTS_HSIZE); + if (s) { /* Attach to given event. */ + lua_pushvalue(L, 1); + lua_rawseti(L, -2, VMEVENT_HASHIDX(s->hash)); + G(L)->vmevmask = VMEVENT_NOCACHE; /* Invalidate cache. */ + } else { /* Detach if no event given. */ + setnilV(L->top++); + while (lua_next(L, -2)) { + L->top--; + if (tvisfunc(L->top) && funcV(L->top) == fn) { + setnilV(lj_tab_set(L, tabV(L->top-2), L->top-1)); + } + } + } +#endif + return 0; +} + +LJLIB_PUSH(top-4) LJLIB_SET(arch) +LJLIB_PUSH(top-3) LJLIB_SET(version_num) +LJLIB_PUSH(top-2) LJLIB_SET(version) + +#include "lj_libdef.h" + +/* -- jit.util.* functions ------------------------------------------------ */ + +#define LJLIB_MODULE_jit_util + +/* -- Reflection API for Lua functions ------------------------------------ */ + +/* Return prototype of first argument (Lua function or prototype object) */ +static GCproto *check_Lproto(lua_State *L, int nolua) +{ + TValue *o = L->base; + if (L->top > o) { + if (tvisproto(o)) { + return protoV(o); + } else if (tvisfunc(o)) { + if (isluafunc(funcV(o))) + return funcproto(funcV(o)); + else if (nolua) + return NULL; + } + } + lj_err_argt(L, 1, LUA_TFUNCTION); + return NULL; /* unreachable */ +} + +static void setintfield(lua_State *L, GCtab *t, const char *name, int32_t val) +{ + setintV(lj_tab_setstr(L, t, lj_str_newz(L, name)), val); +} + +/* local info = jit.util.funcinfo(func [,pc]) */ +LJLIB_CF(jit_util_funcinfo) +{ + GCproto *pt = check_Lproto(L, 1); + if (pt) { + BCPos pc = (BCPos)lj_lib_optint(L, 2, 0); + GCtab *t; + lua_createtable(L, 0, 16); /* Increment hash size if fields are added. */ + t = tabV(L->top-1); + setintfield(L, t, "linedefined", pt->linedefined); + setintfield(L, t, "lastlinedefined", pt->lastlinedefined); + setintfield(L, t, "stackslots", pt->framesize); + setintfield(L, t, "params", pt->numparams); + setintfield(L, t, "bytecodes", (int32_t)pt->sizebc); + setintfield(L, t, "gcconsts", (int32_t)pt->sizekgc); + setintfield(L, t, "nconsts", (int32_t)pt->sizekn); + setintfield(L, t, "upvalues", (int32_t)pt->sizeuv); + if (pc > 0) + setintfield(L, t, "currentline", pt->lineinfo ? pt->lineinfo[pc-1] : 0); + lua_pushboolean(L, (pt->flags & PROTO_IS_VARARG)); + lua_setfield(L, -2, "isvararg"); + setstrV(L, L->top++, pt->chunkname); + lua_setfield(L, -2, "source"); + lj_err_pushloc(L, pt, pc); + lua_setfield(L, -2, "loc"); + } else { + GCfunc *fn = funcV(L->base); + GCtab *t; + lua_createtable(L, 0, 2); /* Increment hash size if fields are added. */ + t = tabV(L->top-1); + setintfield(L, t, "ffid", fn->c.ffid); + setintfield(L, t, "upvalues", fn->c.nupvalues); + } + return 1; +} + +/* local ins, m = jit.util.funcbc(func, pc) */ +LJLIB_CF(jit_util_funcbc) +{ + GCproto *pt = check_Lproto(L, 0); + BCPos pc = (BCPos)lj_lib_checkint(L, 2) - 1; + if (pc < pt->sizebc) { + BCIns ins = pt->bc[pc]; + BCOp op = bc_op(ins); + lua_assert(op < BC__MAX); + setintV(L->top, ins); + setintV(L->top+1, lj_bc_mode[op]); + L->top += 2; + return 2; + } + return 0; +} + +/* local k = jit.util.funck(func, idx) */ +LJLIB_CF(jit_util_funck) +{ + GCproto *pt = check_Lproto(L, 0); + MSize idx = (MSize)lj_lib_checkint(L, 2); + if ((int32_t)idx >= 0) { + if (idx < pt->sizekn) { + setnumV(L->top-1, pt->k.n[idx]); + return 1; + } + } else { + if (~idx < pt->sizekgc) { + GCobj *gc = gcref(pt->k.gc[idx]); + setgcV(L, L->top-1, &gc->gch, ~gc->gch.gct); + return 1; + } + } + return 0; +} + +/* local name = jit.util.funcuvname(func, idx) */ +LJLIB_CF(jit_util_funcuvname) +{ + GCproto *pt = check_Lproto(L, 0); + uint32_t idx = (uint32_t)lj_lib_checkint(L, 2); + if (idx < pt->sizeuvname) { + setstrV(L, L->top-1, pt->uvname[idx]); + return 1; + } + return 0; +} + +/* -- Reflection API for traces ------------------------------------------- */ + +#if LJ_HASJIT + +/* Check trace argument. Must not throw for non-existent trace numbers. */ +static Trace *jit_checktrace(lua_State *L) +{ + TraceNo tr = (TraceNo)lj_lib_checkint(L, 1); + jit_State *J = L2J(L); + if (tr > 0 && tr < J->sizetrace) + return J->trace[tr]; + return NULL; +} + +/* local info = jit.util.traceinfo(tr) */ +LJLIB_CF(jit_util_traceinfo) +{ + Trace *T = jit_checktrace(L); + if (T) { + GCtab *t; + lua_createtable(L, 0, 4); /* Increment hash size if fields are added. */ + t = tabV(L->top-1); + setintfield(L, t, "nins", (int32_t)T->nins - REF_BIAS - 1); + setintfield(L, t, "nk", REF_BIAS - (int32_t)T->nk); + setintfield(L, t, "link", T->link); + setintfield(L, t, "nexit", T->nsnap); + /* There are many more fields. Add them only when needed. */ + return 1; + } + return 0; +} + +/* local m, ot, op1, op2, prev = jit.util.traceir(tr, idx) */ +LJLIB_CF(jit_util_traceir) +{ + Trace *T = jit_checktrace(L); + IRRef ref = (IRRef)lj_lib_checkint(L, 2) + REF_BIAS; + if (T && ref >= REF_BIAS && ref < T->nins) { + IRIns *ir = &T->ir[ref]; + int32_t m = lj_ir_mode[ir->o]; + setintV(L->top-2, m); + setintV(L->top-1, ir->ot); + setintV(L->top++, (int32_t)ir->op1 - (irm_op1(m)==IRMref ? REF_BIAS : 0)); + setintV(L->top++, (int32_t)ir->op2 - (irm_op2(m)==IRMref ? REF_BIAS : 0)); + setintV(L->top++, ir->prev); + return 5; + } + return 0; +} + +/* local k, t [, slot] = jit.util.tracek(tr, idx) */ +LJLIB_CF(jit_util_tracek) +{ + Trace *T = jit_checktrace(L); + IRRef ref = (IRRef)lj_lib_checkint(L, 2) + REF_BIAS; + if (T && ref >= T->nk && ref < REF_BIAS) { + IRIns *ir = &T->ir[ref]; + int32_t slot = -1; + if (ir->o == IR_KSLOT) { + slot = ir->op2; + ir = &T->ir[ir->op1]; + } + lj_ir_kvalue(L, L->top-2, ir); + setintV(L->top-1, (int32_t)irt_type(ir->t)); + if (slot == -1) + return 2; + setintV(L->top++, slot); + return 3; + } + return 0; +} + +/* local snap = jit.util.tracesnap(tr, sn) */ +LJLIB_CF(jit_util_tracesnap) +{ + Trace *T = jit_checktrace(L); + SnapNo sn = (SnapNo)lj_lib_checkint(L, 2); + if (T && sn < T->nsnap) { + SnapShot *snap = &T->snap[sn]; + IRRef2 *map = &T->snapmap[snap->mapofs]; + BCReg s, nslots = snap->nslots; + GCtab *t; + lua_createtable(L, nslots ? (int)nslots : 1, 0); + t = tabV(L->top-1); + setintV(lj_tab_setint(L, t, 0), (int32_t)snap->ref - REF_BIAS); + for (s = 0; s < nslots; s++) { + TValue *o = lj_tab_setint(L, t, (int32_t)(s+1)); + IRRef ref = snap_ref(map[s]); + if (ref) + setintV(o, (int32_t)ref - REF_BIAS); + else + setboolV(o, 0); + } + return 1; + } + return 0; +} + +/* local mcode, addr, loop = jit.util.tracemc(tr) */ +LJLIB_CF(jit_util_tracemc) +{ + Trace *T = jit_checktrace(L); + if (T && T->mcode != NULL) { + setstrV(L, L->top-1, lj_str_new(L, (const char *)T->mcode, T->szmcode)); + setnumV(L->top++, cast_num((intptr_t)T->mcode)); + setintV(L->top++, T->mcloop); + return 3; + } + return 0; +} + +/* local addr = jit.util.traceexitstub(idx) */ +LJLIB_CF(jit_util_traceexitstub) +{ + ExitNo exitno = (ExitNo)lj_lib_checkint(L, 1); + jit_State *J = L2J(L); + if (exitno < EXITSTUBS_PER_GROUP*LJ_MAX_EXITSTUBGR) { + setnumV(L->top-1, cast_num((intptr_t)exitstub_addr(J, exitno))); + return 1; + } + return 0; +} + +#else + +static int trace_nojit(lua_State *L) +{ + UNUSED(L); + return 0; +} +#define lj_cf_jit_util_traceinfo trace_nojit +#define lj_cf_jit_util_traceir trace_nojit +#define lj_cf_jit_util_tracek trace_nojit +#define lj_cf_jit_util_tracesnap trace_nojit +#define lj_cf_jit_util_tracemc trace_nojit +#define lj_cf_jit_util_traceexitstub trace_nojit + +#endif + +#include "lj_libdef.h" + +/* -- jit.opt module ------------------------------------------------------ */ + +#define LJLIB_MODULE_jit_opt + +#if LJ_HASJIT +/* Parse optimization level. */ +static int jitopt_level(jit_State *J, const char *str) +{ + if (str[0] >= '0' && str[0] <= '9' && str[1] == '\0') { + uint32_t flags; + if (str[0] == '0') flags = JIT_F_OPT_0; + else if (str[0] == '1') flags = JIT_F_OPT_1; + else if (str[0] == '2') flags = JIT_F_OPT_2; + else flags = JIT_F_OPT_3; + J->flags = (J->flags & ~JIT_F_OPT_MASK) | flags; + return 1; /* Ok. */ + } + return 0; /* No match. */ +} + +/* Parse optimization flag. */ +static int jitopt_flag(jit_State *J, const char *str) +{ + const char *lst = JIT_F_OPTSTRING; + uint32_t opt; + int set = 1; + if (str[0] == '+') { + str++; + } else if (str[0] == '-') { + str++; + set = 0; + } else if (str[0] == 'n' && str[1] == 'o') { + str += str[2] == '-' ? 3 : 2; + set = 0; + } + for (opt = JIT_F_OPT_FIRST; ; opt <<= 1) { + size_t len = *(const uint8_t *)lst; + if (len == 0) + break; + if (strncmp(str, lst+1, len) == 0 && str[len] == '\0') { + if (set) J->flags |= opt; else J->flags &= ~opt; + return 1; /* Ok. */ + } + lst += 1+len; + } + return 0; /* No match. */ +} + +/* Forward declaration. */ +static void jit_init_hotcount(jit_State *J); + +/* Parse optimization parameter. */ +static int jitopt_param(jit_State *J, const char *str) +{ + const char *lst = JIT_P_STRING; + int i; + for (i = 0; i < JIT_P__MAX; i++) { + size_t len = *(const uint8_t *)lst; + TValue tv; + lua_assert(len != 0); + if (strncmp(str, lst+1, len) == 0 && str[len] == '=' && + lj_str_numconv(&str[len+1], &tv)) { + J->param[i] = lj_num2int(tv.n); + if (i == JIT_P_hotloop) + jit_init_hotcount(J); + return 1; /* Ok. */ + } + lst += 1+len; + } + return 0; /* No match. */ +} +#endif + +/* jit.opt.start(flags...) */ +LJLIB_CF(jit_opt_start) +{ +#if LJ_HASJIT + jit_State *J = L2J(L); + int nargs = (int)(L->top - L->base); + if (nargs == 0) { + J->flags = (J->flags & ~JIT_F_OPT_MASK) | JIT_F_OPT_DEFAULT; + } else { + int i; + for (i = 1; i <= nargs; i++) { + const char *str = strdata(lj_lib_checkstr(L, i)); + if (!jitopt_level(J, str) && + !jitopt_flag(J, str) && + !jitopt_param(J, str)) + lj_err_callerv(L, LJ_ERR_JITOPT, str); + } + } +#else + lj_err_caller(L, LJ_ERR_NOJIT); +#endif + return 0; +} + +#include "lj_libdef.h" + +/* -- JIT compiler initialization ----------------------------------------- */ + +#if LJ_HASJIT +/* Default values for JIT parameters. */ +static const int32_t jit_param_default[JIT_P__MAX+1] = { +#define JIT_PARAMINIT(len, name, value) (value), +JIT_PARAMDEF(JIT_PARAMINIT) +#undef JIT_PARAMINIT + 0 +}; + +/* Initialize hotcount table. */ +static void jit_init_hotcount(jit_State *J) +{ + HotCount start = (HotCount)J->param[JIT_P_hotloop]; + HotCount *hotcount = J2GG(J)->hotcount; + uint32_t i; + for (i = 0; i < HOTCOUNT_SIZE; i++) + hotcount[i] = start; +} +#endif + +/* Arch-dependent CPU detection. */ +static uint32_t jit_cpudetect(lua_State *L) +{ + uint32_t flags = 0; +#if LJ_TARGET_X86ORX64 + uint32_t vendor[4]; + uint32_t features[4]; + if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) { +#if !LJ_HASJIT +#define JIT_F_CMOV 1 +#endif + flags |= ((features[3] >> 15)&1) * JIT_F_CMOV; +#if LJ_HASJIT + flags |= ((features[3] >> 26)&1) * JIT_F_SSE2; + flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1; + if (vendor[2] == 0x6c65746e) { /* Intel. */ + if ((features[0] & 0x0ff00f00) == 0x00000f00) /* P4. */ + flags |= JIT_F_P4; /* Currently unused. */ + else if ((features[0] & 0x0fff0ff0) == 0x000106c0) /* Atom. */ + flags |= JIT_F_LEA_AGU; + } else if (vendor[2] == 0x444d4163) { /* AMD. */ + uint32_t fam = (features[0] & 0x0ff00f00); + if (fam == 0x00000f00) /* K8. */ + flags |= JIT_F_SPLIT_XMM; + if (fam >= 0x00000f00) /* K8, K10. */ + flags |= JIT_F_PREFER_IMUL; + } +#endif + } +#ifndef LUAJIT_CPU_NOCMOV + if (!(flags & JIT_F_CMOV)) + luaL_error(L, "Ancient CPU lacks CMOV support (recompile with -DLUAJIT_CPU_NOCMOV)"); +#endif +#if LJ_HASJIT + if (!(flags & JIT_F_SSE2)) + luaL_error(L, "Sorry, SSE2 CPU support required for this beta release"); +#endif + UNUSED(L); +#else +#error "Missing CPU detection for this architecture" +#endif + return flags; +} + +/* Initialize JIT compiler. */ +static void jit_init(lua_State *L) +{ + uint32_t flags = jit_cpudetect(L); +#if LJ_HASJIT + jit_State *J = L2J(L); + J->flags = flags | JIT_F_ON | JIT_F_OPT_DEFAULT; + memcpy(J->param, jit_param_default, sizeof(J->param)); + jit_init_hotcount(J); + lj_dispatch_update(G(L)); +#else + UNUSED(flags); +#endif +} + +LUALIB_API int luaopen_jit(lua_State *L) +{ + lua_pushliteral(L, LJ_ARCH_NAME); + lua_pushinteger(L, LUAJIT_VERSION_NUM); + lua_pushliteral(L, LUAJIT_VERSION); + LJ_LIB_REG(L, jit); +#ifndef LUAJIT_DISABLE_JITUTIL + LJ_LIB_REG_(L, "jit.util", jit_util); +#endif + LJ_LIB_REG_(L, "jit.opt", jit_opt); + L->top -= 2; + jit_init(L); + return 1; +} + diff --git a/src/lib_math.c b/src/lib_math.c new file mode 100644 index 0000000000..ec8b0c2b4c --- /dev/null +++ b/src/lib_math.c @@ -0,0 +1,188 @@ +/* +** Math library. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +*/ + +#include + +#define lib_math_c +#define LUA_LIB + +#include "lua.h" +#include "lauxlib.h" +#include "lualib.h" + +#include "lj_obj.h" +#include "lj_lib.h" + +/* ------------------------------------------------------------------------ */ + +#define LJLIB_MODULE_math + +LJLIB_ASM(math_abs) LJLIB_REC(.) +{ + lj_lib_checknum(L, 1); + return FFH_RETRY; +} +LJLIB_ASM_(math_floor) LJLIB_REC(math_round IRFPM_FLOOR) +LJLIB_ASM_(math_ceil) LJLIB_REC(math_round IRFPM_CEIL) +LJLIB_ASM_(math_sqrt) LJLIB_REC(math_unary IRFPM_SQRT) +LJLIB_ASM_(math_log) LJLIB_REC(math_unary IRFPM_LOG) +LJLIB_ASM_(math_log10) LJLIB_REC(math_unary IRFPM_LOG10) +LJLIB_ASM_(math_exp) LJLIB_REC(math_unary IRFPM_EXP) +LJLIB_ASM_(math_sin) LJLIB_REC(math_unary IRFPM_SIN) +LJLIB_ASM_(math_cos) LJLIB_REC(math_unary IRFPM_COS) +LJLIB_ASM_(math_tan) LJLIB_REC(math_unary IRFPM_TAN) +LJLIB_ASM_(math_asin) LJLIB_REC(math_atrig FF_math_asin) +LJLIB_ASM_(math_acos) LJLIB_REC(math_atrig FF_math_acos) +LJLIB_ASM_(math_atan) LJLIB_REC(math_atrig FF_math_atan) +LJLIB_ASM_(math_sinh) +LJLIB_ASM_(math_cosh) +LJLIB_ASM_(math_tanh) +LJLIB_ASM_(math_frexp) +LJLIB_ASM_(math_modf) LJLIB_REC(.) + +LJLIB_PUSH(57.29577951308232) +LJLIB_ASM_(math_deg) LJLIB_REC(math_degrad) + +LJLIB_PUSH(0.017453292519943295) +LJLIB_ASM_(math_rad) LJLIB_REC(math_degrad) + +LJLIB_ASM(math_atan2) LJLIB_REC(math_binary IR_ATAN2) +{ + lj_lib_checknum(L, 1); + lj_lib_checknum(L, 2); + return FFH_RETRY; +} +LJLIB_ASM_(math_ldexp) LJLIB_REC(math_binary IR_LDEXP) +LJLIB_ASM_(math_pow) LJLIB_REC(.) +LJLIB_ASM_(math_fmod) + +LJLIB_ASM(math_min) LJLIB_REC(math_minmax IR_MIN) +{ + int i = 0; + do { lj_lib_checknum(L, ++i); } while (L->base+i < L->top); + return FFH_RETRY; +} +LJLIB_ASM_(math_max) LJLIB_REC(math_minmax IR_MAX) + +LJLIB_PUSH(3.14159265358979323846) LJLIB_SET(pi) +LJLIB_PUSH(1e310) LJLIB_SET(huge) + +#ifdef __MACH__ +LJ_FUNCA double lj_wrapper_sinh(double x) { return sinh(x); } +LJ_FUNCA double lj_wrapper_cosh(double x) { return cosh(x); } +LJ_FUNCA double lj_wrapper_tanh(double x) { return tanh(x); } +#endif + +/* ------------------------------------------------------------------------ */ + +/* This implements a Tausworthe PRNG with period 2^223. Based on: +** Tables of maximally-equidistributed combined LFSR generators, +** Pierre L'Ecuyer, 1991, table 3, 1st entry. +** Full-period ME-CF generator with L=64, J=4, k=223, N1=49. +*/ + +/* PRNG state. */ +typedef struct TW223State { + uint64_t gen[4]; /* State of the 4 LFSR generators. */ + int valid; /* State is valid. */ +} TW223State; + +/* Union needed for bit-pattern conversion between uint64_t and double. */ +typedef union { uint64_t u64; double d; } U64double; + +/* Update generator i and compute a running xor of all states. */ +#define TW223_GEN(i, k, q, s) \ + z = tw->gen[i]; \ + z = (((z<> (k-s)) ^ ((z&((uint64_t)(int64_t)-1 << (64-k)))<gen[i] = z; + +/* PRNG step function. Returns a double in the range 0.0 <= d < 1.0. */ +static double tw223_step(TW223State *tw) +{ + uint64_t z, r = 0; + U64double u; + TW223_GEN(0, 63, 31, 18) + TW223_GEN(1, 58, 19, 28) + TW223_GEN(2, 55, 24, 7) + TW223_GEN(3, 47, 21, 8) + u.u64 = (r & (((uint64_t)1 << 52)-1)) | ((uint64_t)0x3ff << 52); +#if defined(__GNUC__) && LJ_TARGET_X86 && __pic__ + /* Compensate for unbelievable GCC pessimization. */ + { + volatile U64double u1; + u1.u64 = (uint64_t)0x3f8 << 52; + return u.d - u1.d; + } +#else + return u.d - 1.0; +#endif +} + +/* PRNG initialization function. */ +static void tw223_init(TW223State *tw, double d) +{ + uint32_t r = 0x11090601; /* 64-k[i] as four 8 bit constants. */ + int i; + for (i = 0; i < 4; i++) { + U64double u; + uint32_t m = 1u << (r&255); + r >>= 8; + u.d = d = d * 3.14159265358979323846 + 2.7182818284590452354; + if (u.u64 < m) u.u64 += m; /* Ensure k[i] MSB of gen[i] are non-zero. */ + tw->gen[i] = u.u64; + } + tw->valid = 1; + for (i = 0; i < 10; i++) + tw223_step(tw); +} + +/* PRNG extract function. */ +LJLIB_PUSH(top-2) /* Upvalue holds userdata with TW223State. */ +LJLIB_CF(math_random) +{ + int n = cast_int(L->top - L->base); + TW223State *tw = (TW223State *)(uddata(udataV(lj_lib_upvalue(L, 1)))); + double d; + if (LJ_UNLIKELY(!tw->valid)) tw223_init(tw, 0.0); + d = tw223_step(tw); + if (n > 0) { + double r1 = lj_lib_checknum(L, 1); + if (n == 1) { + d = floor(d*r1) + 1.0; /* d is an int in range [1, r1] */ + } else { + double r2 = lj_lib_checknum(L, 2); + d = floor(d*(r2-r1+1.0)) + r1; /* d is an int in range [r1, r2] */ + } + } /* else: d is a double in range [0, 1] */ + setnumV(L->top++, d); + return 1; +} + +/* PRNG seed function. */ +LJLIB_PUSH(top-2) /* Upvalue holds userdata with TW223State. */ +LJLIB_CF(math_randomseed) +{ + TW223State *tw = (TW223State *)(uddata(udataV(lj_lib_upvalue(L, 1)))); + tw223_init(tw, lj_lib_checknum(L, 1)); + return 0; +} + +/* ------------------------------------------------------------------------ */ + +#include "lj_libdef.h" + +LUALIB_API int luaopen_math(lua_State *L) +{ + TW223State *tw; + tw = (TW223State *)lua_newuserdata(L, sizeof(TW223State)); + tw->valid = 0; /* Use lazy initialization to save some time on startup. */ + LJ_LIB_REG(L, math); +#if defined(LUA_COMPAT_MOD) + lua_getfield(L, -1, "fmod"); + lua_setfield(L, -2, "mod"); +#endif + return 1; +} + diff --git a/src/lib_os.c b/src/lib_os.c new file mode 100644 index 0000000000..bee7216a08 --- /dev/null +++ b/src/lib_os.c @@ -0,0 +1,249 @@ +/* +** OS library. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +** +** Major portions taken verbatim or adapted from the Lua interpreter. +** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h +*/ + +#include +#include +#include + +#define lib_os_c +#define LUA_LIB + +#include "lua.h" +#include "lauxlib.h" +#include "lualib.h" + +#ifdef LUA_USE_POSIX +#include +#else +#include +#endif + +#include "lj_obj.h" +#include "lj_err.h" +#include "lj_lib.h" + +/* ------------------------------------------------------------------------ */ + +#define LJLIB_MODULE_os + +static int os_pushresult(lua_State *L, int i, const char *filename) +{ + int en = errno; /* calls to Lua API may change this value */ + if (i) { + setboolV(L->top-1, 1); + return 1; + } else { + setnilV(L->top-1); + lua_pushfstring(L, "%s: %s", filename, strerror(en)); + lua_pushinteger(L, en); + return 3; + } +} + +LJLIB_CF(os_execute) +{ + lua_pushinteger(L, system(luaL_optstring(L, 1, NULL))); + return 1; +} + +LJLIB_CF(os_remove) +{ + const char *filename = luaL_checkstring(L, 1); + return os_pushresult(L, remove(filename) == 0, filename); +} + +LJLIB_CF(os_rename) +{ + const char *fromname = luaL_checkstring(L, 1); + const char *toname = luaL_checkstring(L, 2); + return os_pushresult(L, rename(fromname, toname) == 0, fromname); +} + +LJLIB_CF(os_tmpname) +{ +#ifdef LUA_USE_POSIX + char buf[15+1]; + int fp; + strcpy(buf, "/tmp/lua_XXXXXX"); + fp = mkstemp(buf); + if (fp != -1) + close(fp); + else + lj_err_caller(L, LJ_ERR_OSUNIQF); +#else + char buf[L_tmpnam]; + if (tmpnam(buf) == NULL) + lj_err_caller(L, LJ_ERR_OSUNIQF); +#endif + lua_pushstring(L, buf); + return 1; +} + +LJLIB_CF(os_getenv) +{ + lua_pushstring(L, getenv(luaL_checkstring(L, 1))); /* if NULL push nil */ + return 1; +} + +LJLIB_CF(os_exit) +{ + exit(lj_lib_optint(L, 1, EXIT_SUCCESS)); + return 0; /* to avoid warnings */ +} + +LJLIB_CF(os_clock) +{ + setnumV(L->top++, ((lua_Number)clock())*(1.0/(lua_Number)CLOCKS_PER_SEC)); + return 1; +} + +/* ------------------------------------------------------------------------ */ + +static void setfield(lua_State *L, const char *key, int value) +{ + lua_pushinteger(L, value); + lua_setfield(L, -2, key); +} + +static void setboolfield(lua_State *L, const char *key, int value) +{ + if (value < 0) /* undefined? */ + return; /* does not set field */ + lua_pushboolean(L, value); + lua_setfield(L, -2, key); +} + +static int getboolfield(lua_State *L, const char *key) +{ + int res; + lua_getfield(L, -1, key); + res = lua_isnil(L, -1) ? -1 : lua_toboolean(L, -1); + lua_pop(L, 1); + return res; +} + +static int getfield(lua_State *L, const char *key, int d) +{ + int res; + lua_getfield(L, -1, key); + if (lua_isnumber(L, -1)) { + res = (int)lua_tointeger(L, -1); + } else { + if (d < 0) + lj_err_callerv(L, LJ_ERR_OSDATEF, key); + res = d; + } + lua_pop(L, 1); + return res; +} + +LJLIB_CF(os_date) +{ + const char *s = luaL_optstring(L, 1, "%c"); + time_t t = luaL_opt(L, (time_t)luaL_checknumber, 2, time(NULL)); + struct tm *stm; + if (*s == '!') { /* UTC? */ + stm = gmtime(&t); + s++; /* skip `!' */ + } else { + stm = localtime(&t); + } + if (stm == NULL) { /* invalid date? */ + setnilV(L->top-1); + } else if (strcmp(s, "*t") == 0) { + lua_createtable(L, 0, 9); /* 9 = number of fields */ + setfield(L, "sec", stm->tm_sec); + setfield(L, "min", stm->tm_min); + setfield(L, "hour", stm->tm_hour); + setfield(L, "day", stm->tm_mday); + setfield(L, "month", stm->tm_mon+1); + setfield(L, "year", stm->tm_year+1900); + setfield(L, "wday", stm->tm_wday+1); + setfield(L, "yday", stm->tm_yday+1); + setboolfield(L, "isdst", stm->tm_isdst); + } else { + char cc[3]; + luaL_Buffer b; + cc[0] = '%'; cc[2] = '\0'; + luaL_buffinit(L, &b); + for (; *s; s++) { + if (*s != '%' || *(s + 1) == '\0') { /* no conversion specifier? */ + luaL_addchar(&b, *s); + } else { + size_t reslen; + char buff[200]; /* should be big enough for any conversion result */ + cc[1] = *(++s); + reslen = strftime(buff, sizeof(buff), cc, stm); + luaL_addlstring(&b, buff, reslen); + } + } + luaL_pushresult(&b); + } + return 1; +} + +LJLIB_CF(os_time) +{ + time_t t; + if (lua_isnoneornil(L, 1)) { /* called without args? */ + t = time(NULL); /* get current time */ + } else { + struct tm ts; + luaL_checktype(L, 1, LUA_TTABLE); + lua_settop(L, 1); /* make sure table is at the top */ + ts.tm_sec = getfield(L, "sec", 0); + ts.tm_min = getfield(L, "min", 0); + ts.tm_hour = getfield(L, "hour", 12); + ts.tm_mday = getfield(L, "day", -1); + ts.tm_mon = getfield(L, "month", -1) - 1; + ts.tm_year = getfield(L, "year", -1) - 1900; + ts.tm_isdst = getboolfield(L, "isdst"); + t = mktime(&ts); + } + if (t == (time_t)(-1)) + lua_pushnil(L); + else + lua_pushnumber(L, (lua_Number)t); + return 1; +} + +LJLIB_CF(os_difftime) +{ + lua_pushnumber(L, difftime((time_t)(luaL_checknumber(L, 1)), + (time_t)(luaL_optnumber(L, 2, (lua_Number)0)))); + return 1; +} + +/* ------------------------------------------------------------------------ */ + +LJLIB_CF(os_setlocale) +{ + GCstr *s = lj_lib_optstr(L, 1); + const char *str = s ? strdata(s) : NULL; + int opt = lj_lib_checkopt(L, 2, 6, + "\5ctype\7numeric\4time\7collate\10monetary\1\377\3all"); + if (opt == 0) opt = LC_CTYPE; + else if (opt == 1) opt = LC_NUMERIC; + else if (opt == 2) opt = LC_TIME; + else if (opt == 3) opt = LC_COLLATE; + else if (opt == 4) opt = LC_MONETARY; + else if (opt == 6) opt = LC_ALL; + lua_pushstring(L, setlocale(opt, str)); + return 1; +} + +/* ------------------------------------------------------------------------ */ + +#include "lj_libdef.h" + +LUALIB_API int luaopen_os(lua_State *L) +{ + LJ_LIB_REG(L, os); + return 1; +} + diff --git a/src/lib_package.c b/src/lib_package.c new file mode 100644 index 0000000000..69fa1db9f5 --- /dev/null +++ b/src/lib_package.c @@ -0,0 +1,508 @@ +/* +** Package library. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +** +** Major portions taken verbatim or adapted from the Lua interpreter. +** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h +*/ + +#define lib_package_c +#define LUA_LIB + +#include "lua.h" +#include "lauxlib.h" +#include "lualib.h" + +#include "lj_obj.h" +#include "lj_err.h" +#include "lj_lib.h" + +/* ------------------------------------------------------------------------ */ + +/* Error codes for ll_loadfunc. */ +#define PACKAGE_ERR_LIB 1 +#define PACKAGE_ERR_FUNC 2 + +/* Redefined in platform specific part. */ +#define PACKAGE_LIB_FAIL "open" +#define setprogdir(L) ((void)0) + +#if defined(LUA_DL_DLOPEN) + +#include + +static void ll_unloadlib(void *lib) +{ + dlclose(lib); +} + +static void *ll_load(lua_State *L, const char *path) +{ + void *lib = dlopen(path, RTLD_NOW); + if (lib == NULL) lua_pushstring(L, dlerror()); + return lib; +} + +static lua_CFunction ll_sym(lua_State *L, void *lib, const char *sym) +{ + lua_CFunction f = (lua_CFunction)dlsym(lib, sym); + if (f == NULL) lua_pushstring(L, dlerror()); + return f; +} + +#elif defined(LUA_DL_DLL) + +#define WIN32_LEAN_AND_MEAN +#include + +#undef setprogdir + +static void setprogdir(lua_State *L) +{ + char buff[MAX_PATH + 1]; + char *lb; + DWORD nsize = sizeof(buff); + DWORD n = GetModuleFileNameA(NULL, buff, nsize); + if (n == 0 || n == nsize || (lb = strrchr(buff, '\\')) == NULL) { + luaL_error(L, "unable to get ModuleFileName"); + } else { + *lb = '\0'; + luaL_gsub(L, lua_tostring(L, -1), LUA_EXECDIR, buff); + lua_remove(L, -2); /* remove original string */ + } +} + +static void pusherror(lua_State *L) +{ + DWORD error = GetLastError(); + char buffer[128]; + if (FormatMessageA(FORMAT_MESSAGE_IGNORE_INSERTS | FORMAT_MESSAGE_FROM_SYSTEM, + NULL, error, 0, buffer, sizeof(buffer), NULL)) + lua_pushstring(L, buffer); + else + lua_pushfstring(L, "system error %d\n", error); +} + +static void ll_unloadlib(void *lib) +{ + FreeLibrary((HINSTANCE)lib); +} + +static void *ll_load(lua_State *L, const char *path) +{ + HINSTANCE lib = LoadLibraryA(path); + if (lib == NULL) pusherror(L); + return lib; +} + +static lua_CFunction ll_sym(lua_State *L, void *lib, const char *sym) +{ + lua_CFunction f = (lua_CFunction)GetProcAddress((HINSTANCE)lib, sym); + if (f == NULL) pusherror(L); + return f; +} + +#else + +#undef PACKAGE_LIB_FAIL +#define PACKAGE_LIB_FAIL "absent" + +#define DLMSG "dynamic libraries not enabled; check your Lua installation" + +static void ll_unloadlib(void *lib) +{ + (void)lib; +} + +static void *ll_load(lua_State *L, const char *path) +{ + (void)path; + lua_pushliteral(L, DLMSG); + return NULL; +} + +static lua_CFunction ll_sym(lua_State *L, void *lib, const char *sym) +{ + (void)lib; (void)sym; + lua_pushliteral(L, DLMSG); + return NULL; +} +#endif + +/* ------------------------------------------------------------------------ */ + +static void **ll_register(lua_State *L, const char *path) +{ + void **plib; + lua_pushfstring(L, "LOADLIB: %s", path); + lua_gettable(L, LUA_REGISTRYINDEX); /* check library in registry? */ + if (!lua_isnil(L, -1)) { /* is there an entry? */ + plib = (void **)lua_touserdata(L, -1); + } else { /* no entry yet; create one */ + lua_pop(L, 1); + plib = (void **)lua_newuserdata(L, sizeof(void *)); + *plib = NULL; + luaL_getmetatable(L, "_LOADLIB"); + lua_setmetatable(L, -2); + lua_pushfstring(L, "LOADLIB: %s", path); + lua_pushvalue(L, -2); + lua_settable(L, LUA_REGISTRYINDEX); + } + return plib; +} + +static int ll_loadfunc(lua_State *L, const char *path, const char *sym) +{ + void **reg = ll_register(L, path); + if (*reg == NULL) *reg = ll_load(L, path); + if (*reg == NULL) { + return PACKAGE_ERR_LIB; /* unable to load library */ + } else { + lua_CFunction f = ll_sym(L, *reg, sym); + if (f == NULL) + return PACKAGE_ERR_FUNC; /* unable to find function */ + lua_pushcfunction(L, f); + return 0; /* return function */ + } +} + +static int lj_cf_package_loadlib(lua_State *L) +{ + const char *path = luaL_checkstring(L, 1); + const char *init = luaL_checkstring(L, 2); + int stat = ll_loadfunc(L, path, init); + if (stat == 0) { /* no errors? */ + return 1; /* return the loaded function */ + } else { /* error; error message is on stack top */ + lua_pushnil(L); + lua_insert(L, -2); + lua_pushstring(L, (stat == PACKAGE_ERR_LIB) ? PACKAGE_LIB_FAIL : "init"); + return 3; /* return nil, error message, and where */ + } +} + +static int lj_cf_package_unloadlib(lua_State *L) +{ + void **lib = (void **)luaL_checkudata(L, 1, "_LOADLIB"); + if (*lib) ll_unloadlib(*lib); + *lib = NULL; /* mark library as closed */ + return 0; +} + +/* ------------------------------------------------------------------------ */ + +static int readable(const char *filename) +{ + FILE *f = fopen(filename, "r"); /* try to open file */ + if (f == NULL) return 0; /* open failed */ + fclose(f); + return 1; +} + +static const char *pushnexttemplate(lua_State *L, const char *path) +{ + const char *l; + while (*path == *LUA_PATHSEP) path++; /* skip separators */ + if (*path == '\0') return NULL; /* no more templates */ + l = strchr(path, *LUA_PATHSEP); /* find next separator */ + if (l == NULL) l = path + strlen(path); + lua_pushlstring(L, path, (size_t)(l - path)); /* template */ + return l; +} + +static const char *findfile(lua_State *L, const char *name, + const char *pname) +{ + const char *path; + name = luaL_gsub(L, name, ".", LUA_DIRSEP); + lua_getfield(L, LUA_ENVIRONINDEX, pname); + path = lua_tostring(L, -1); + if (path == NULL) + luaL_error(L, LUA_QL("package.%s") " must be a string", pname); + lua_pushliteral(L, ""); /* error accumulator */ + while ((path = pushnexttemplate(L, path)) != NULL) { + const char *filename; + filename = luaL_gsub(L, lua_tostring(L, -1), LUA_PATH_MARK, name); + lua_remove(L, -2); /* remove path template */ + if (readable(filename)) /* does file exist and is readable? */ + return filename; /* return that file name */ + lua_pushfstring(L, "\n\tno file " LUA_QS, filename); + lua_remove(L, -2); /* remove file name */ + lua_concat(L, 2); /* add entry to possible error message */ + } + return NULL; /* not found */ +} + +static void loaderror(lua_State *L, const char *filename) +{ + luaL_error(L, "error loading module " LUA_QS " from file " LUA_QS ":\n\t%s", + lua_tostring(L, 1), filename, lua_tostring(L, -1)); +} + +static int lj_cf_package_loader_lua(lua_State *L) +{ + const char *filename; + const char *name = luaL_checkstring(L, 1); + filename = findfile(L, name, "path"); + if (filename == NULL) return 1; /* library not found in this path */ + if (luaL_loadfile(L, filename) != 0) + loaderror(L, filename); + return 1; /* library loaded successfully */ +} + +static const char *mkfuncname(lua_State *L, const char *modname) +{ + const char *funcname; + const char *mark = strchr(modname, *LUA_IGMARK); + if (mark) modname = mark + 1; + funcname = luaL_gsub(L, modname, ".", "_"); + funcname = lua_pushfstring(L, "luaopen_%s", funcname); + lua_remove(L, -2); /* remove 'gsub' result */ + return funcname; +} + +static int lj_cf_package_loader_c(lua_State *L) +{ + const char *funcname; + const char *name = luaL_checkstring(L, 1); + const char *filename = findfile(L, name, "cpath"); + if (filename == NULL) return 1; /* library not found in this path */ + funcname = mkfuncname(L, name); + if (ll_loadfunc(L, filename, funcname) != 0) + loaderror(L, filename); + return 1; /* library loaded successfully */ +} + +static int lj_cf_package_loader_croot(lua_State *L) +{ + const char *funcname; + const char *filename; + const char *name = luaL_checkstring(L, 1); + const char *p = strchr(name, '.'); + int stat; + if (p == NULL) return 0; /* is root */ + lua_pushlstring(L, name, (size_t)(p - name)); + filename = findfile(L, lua_tostring(L, -1), "cpath"); + if (filename == NULL) return 1; /* root not found */ + funcname = mkfuncname(L, name); + if ((stat = ll_loadfunc(L, filename, funcname)) != 0) { + if (stat != PACKAGE_ERR_FUNC) loaderror(L, filename); /* real error */ + lua_pushfstring(L, "\n\tno module " LUA_QS " in file " LUA_QS, + name, filename); + return 1; /* function not found */ + } + return 1; +} + +static int lj_cf_package_loader_preload(lua_State *L) +{ + const char *name = luaL_checkstring(L, 1); + lua_getfield(L, LUA_ENVIRONINDEX, "preload"); + if (!lua_istable(L, -1)) + luaL_error(L, LUA_QL("package.preload") " must be a table"); + lua_getfield(L, -1, name); + if (lua_isnil(L, -1)) /* not found? */ + lua_pushfstring(L, "\n\tno field package.preload['%s']", name); + return 1; +} + +/* ------------------------------------------------------------------------ */ + +static const int sentinel_ = 0; +#define sentinel ((void *)&sentinel_) + +static int lj_cf_package_require(lua_State *L) +{ + const char *name = luaL_checkstring(L, 1); + int i; + lua_settop(L, 1); /* _LOADED table will be at index 2 */ + lua_getfield(L, LUA_REGISTRYINDEX, "_LOADED"); + lua_getfield(L, 2, name); + if (lua_toboolean(L, -1)) { /* is it there? */ + if (lua_touserdata(L, -1) == sentinel) /* check loops */ + luaL_error(L, "loop or previous error loading module " LUA_QS, name); + return 1; /* package is already loaded */ + } + /* else must load it; iterate over available loaders */ + lua_getfield(L, LUA_ENVIRONINDEX, "loaders"); + if (!lua_istable(L, -1)) + luaL_error(L, LUA_QL("package.loaders") " must be a table"); + lua_pushliteral(L, ""); /* error message accumulator */ + for (i = 1; ; i++) { + lua_rawgeti(L, -2, i); /* get a loader */ + if (lua_isnil(L, -1)) + luaL_error(L, "module " LUA_QS " not found:%s", + name, lua_tostring(L, -2)); + lua_pushstring(L, name); + lua_call(L, 1, 1); /* call it */ + if (lua_isfunction(L, -1)) /* did it find module? */ + break; /* module loaded successfully */ + else if (lua_isstring(L, -1)) /* loader returned error message? */ + lua_concat(L, 2); /* accumulate it */ + else + lua_pop(L, 1); + } + lua_pushlightuserdata(L, sentinel); + lua_setfield(L, 2, name); /* _LOADED[name] = sentinel */ + lua_pushstring(L, name); /* pass name as argument to module */ + lua_call(L, 1, 1); /* run loaded module */ + if (!lua_isnil(L, -1)) /* non-nil return? */ + lua_setfield(L, 2, name); /* _LOADED[name] = returned value */ + lua_getfield(L, 2, name); + if (lua_touserdata(L, -1) == sentinel) { /* module did not set a value? */ + lua_pushboolean(L, 1); /* use true as result */ + lua_pushvalue(L, -1); /* extra copy to be returned */ + lua_setfield(L, 2, name); /* _LOADED[name] = true */ + } + return 1; +} + +/* ------------------------------------------------------------------------ */ + +static void setfenv(lua_State *L) +{ + lua_Debug ar; + if (lua_getstack(L, 1, &ar) == 0 || + lua_getinfo(L, "f", &ar) == 0 || /* get calling function */ + lua_iscfunction(L, -1)) + luaL_error(L, LUA_QL("module") " not called from a Lua function"); + lua_pushvalue(L, -2); + lua_setfenv(L, -2); + lua_pop(L, 1); +} + +static void dooptions(lua_State *L, int n) +{ + int i; + for (i = 2; i <= n; i++) { + lua_pushvalue(L, i); /* get option (a function) */ + lua_pushvalue(L, -2); /* module */ + lua_call(L, 1, 0); + } +} + +static void modinit(lua_State *L, const char *modname) +{ + const char *dot; + lua_pushvalue(L, -1); + lua_setfield(L, -2, "_M"); /* module._M = module */ + lua_pushstring(L, modname); + lua_setfield(L, -2, "_NAME"); + dot = strrchr(modname, '.'); /* look for last dot in module name */ + if (dot == NULL) dot = modname; else dot++; + /* set _PACKAGE as package name (full module name minus last part) */ + lua_pushlstring(L, modname, (size_t)(dot - modname)); + lua_setfield(L, -2, "_PACKAGE"); +} + +static int lj_cf_package_module(lua_State *L) +{ + const char *modname = luaL_checkstring(L, 1); + int loaded = lua_gettop(L) + 1; /* index of _LOADED table */ + lua_getfield(L, LUA_REGISTRYINDEX, "_LOADED"); + lua_getfield(L, loaded, modname); /* get _LOADED[modname] */ + if (!lua_istable(L, -1)) { /* not found? */ + lua_pop(L, 1); /* remove previous result */ + /* try global variable (and create one if it does not exist) */ + if (luaL_findtable(L, LUA_GLOBALSINDEX, modname, 1) != NULL) + lj_err_callerv(L, LJ_ERR_BADMODN, modname); + lua_pushvalue(L, -1); + lua_setfield(L, loaded, modname); /* _LOADED[modname] = new table */ + } + /* check whether table already has a _NAME field */ + lua_getfield(L, -1, "_NAME"); + if (!lua_isnil(L, -1)) { /* is table an initialized module? */ + lua_pop(L, 1); + } else { /* no; initialize it */ + lua_pop(L, 1); + modinit(L, modname); + } + lua_pushvalue(L, -1); + setfenv(L); + dooptions(L, loaded - 1); + return 0; +} + +static int lj_cf_package_seeall(lua_State *L) +{ + luaL_checktype(L, 1, LUA_TTABLE); + if (!lua_getmetatable(L, 1)) { + lua_createtable(L, 0, 1); /* create new metatable */ + lua_pushvalue(L, -1); + lua_setmetatable(L, 1); + } + lua_pushvalue(L, LUA_GLOBALSINDEX); + lua_setfield(L, -2, "__index"); /* mt.__index = _G */ + return 0; +} + +/* ------------------------------------------------------------------------ */ + +#define AUXMARK "\1" + +static void setpath(lua_State *L, const char *fieldname, const char *envname, + const char *def) +{ + const char *path = getenv(envname); + if (path == NULL) { + lua_pushstring(L, def); + } else { + path = luaL_gsub(L, path, LUA_PATHSEP LUA_PATHSEP, + LUA_PATHSEP AUXMARK LUA_PATHSEP); + luaL_gsub(L, path, AUXMARK, def); + lua_remove(L, -2); + } + setprogdir(L); + lua_setfield(L, -2, fieldname); +} + +static const luaL_Reg package_lib[] = { + { "loadlib", lj_cf_package_loadlib }, + { "seeall", lj_cf_package_seeall }, + { NULL, NULL } +}; + +static const luaL_Reg package_global[] = { + { "module", lj_cf_package_module }, + { "require", lj_cf_package_require }, + { NULL, NULL } +}; + +static const lua_CFunction package_loaders[] = +{ + lj_cf_package_loader_preload, + lj_cf_package_loader_lua, + lj_cf_package_loader_c, + lj_cf_package_loader_croot, + NULL +}; + +LUALIB_API int luaopen_package(lua_State *L) +{ + int i; + luaL_newmetatable(L, "_LOADLIB"); + lua_pushcfunction(L, lj_cf_package_unloadlib); + lua_setfield(L, -2, "__gc"); + luaL_register(L, LUA_LOADLIBNAME, package_lib); + lua_pushvalue(L, -1); + lua_replace(L, LUA_ENVIRONINDEX); + lua_createtable(L, sizeof(package_loaders)/sizeof(package_loaders[0])-1, 0); + for (i = 0; package_loaders[i] != NULL; i++) { + lua_pushcfunction(L, package_loaders[i]); + lua_rawseti(L, -2, i+1); + } + lua_setfield(L, -2, "loaders"); + setpath(L, "path", LUA_PATH, LUA_PATH_DEFAULT); + setpath(L, "cpath", LUA_CPATH, LUA_CPATH_DEFAULT); + lua_pushliteral(L, LUA_PATH_CONFIG); + lua_setfield(L, -2, "config"); + luaL_findtable(L, LUA_REGISTRYINDEX, "_LOADED", 16); + lua_setfield(L, -2, "loaded"); + lua_newtable(L); + lua_setfield(L, -2, "preload"); + lua_pushvalue(L, LUA_GLOBALSINDEX); + luaL_register(L, NULL, package_global); + lua_pop(L, 1); + return 1; +} + diff --git a/src/lib_string.c b/src/lib_string.c new file mode 100644 index 0000000000..fdd7fbcb01 --- /dev/null +++ b/src/lib_string.c @@ -0,0 +1,790 @@ +/* +** String library. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +** +** Major portions taken verbatim or adapted from the Lua interpreter. +** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h +*/ + +#include + +#define lib_string_c +#define LUA_LIB + +#include "lua.h" +#include "lauxlib.h" +#include "lualib.h" + +#include "lj_obj.h" +#include "lj_err.h" +#include "lj_str.h" +#include "lj_tab.h" +#include "lj_state.h" +#include "lj_ff.h" +#include "lj_ctype.h" +#include "lj_lib.h" + +/* ------------------------------------------------------------------------ */ + +#define LJLIB_MODULE_string + +LJLIB_ASM(string_len) LJLIB_REC(.) +{ + lj_lib_checkstr(L, 1); + return FFH_RETRY; +} + +LJLIB_ASM(string_byte) LJLIB_REC(string_range 0) +{ + GCstr *s = lj_lib_checkstr(L, 1); + int32_t len = (int32_t)s->len; + int32_t start = lj_lib_optint(L, 2, 1); + int32_t stop = lj_lib_optint(L, 3, start); + int32_t n, i; + const unsigned char *p; + if (stop < 0) stop += len+1; + if (start < 0) start += len+1; + if (start <= 0) start = 1; + if (stop > len) stop = len; + if (start > stop) return FFH_RES(0); /* Empty interval: return no results. */ + start--; + n = stop - start; + if ((uint32_t)n > LUAI_MAXCSTACK) + lj_err_caller(L, LJ_ERR_STRSLC); + lj_state_checkstack(L, (MSize)n); + p = (const unsigned char *)strdata(s) + start; + for (i = 0; i < n; i++) + setintV(L->base + i-1, p[i]); + return FFH_RES(n); +} + +LJLIB_ASM(string_char) +{ + int i, nargs = cast_int(L->top - L->base); + char *buf = lj_str_needbuf(L, &G(L)->tmpbuf, (size_t)nargs); + for (i = 1; i <= nargs; i++) { + int32_t k = lj_lib_checkint(L, i); + if (!checku8(k)) + lj_err_arg(L, i, LJ_ERR_BADVAL); + buf[i-1] = (char)k; + } + setstrV(L, L->base-1, lj_str_new(L, buf, (size_t)nargs)); + return FFH_RES(1); +} + +LJLIB_ASM(string_sub) LJLIB_REC(string_range 1) +{ + lj_lib_checkstr(L, 1); + lj_lib_checkint(L, 2); + setintV(L->base+2, lj_lib_optint(L, 3, -1)); + return FFH_RETRY; +} + +LJLIB_ASM(string_rep) +{ + GCstr *s = lj_lib_checkstr(L, 1); + int32_t len = (int32_t)s->len; + int32_t k = lj_lib_checkint(L, 2); + int64_t tlen = (int64_t)k * len; + const char *src; + char *buf; + if (k <= 0) return FFH_RETRY; + if (tlen > LJ_MAX_STR) + lj_err_caller(L, LJ_ERR_STROV); + buf = lj_str_needbuf(L, &G(L)->tmpbuf, (MSize)tlen); + if (len <= 1) return FFH_RETRY; /* ASM code only needed buffer resize. */ + src = strdata(s); + do { + int32_t i = 0; + do { *buf++ = src[i++]; } while (i < len); + } while (--k > 0); + setstrV(L, L->base-1, lj_str_new(L, G(L)->tmpbuf.buf, (size_t)tlen)); + return FFH_RES(1); +} + +LJLIB_ASM(string_reverse) +{ + GCstr *s = lj_lib_checkstr(L, 1); + lj_str_needbuf(L, &G(L)->tmpbuf, s->len); + return FFH_RETRY; +} +LJLIB_ASM_(string_lower) +LJLIB_ASM_(string_upper) + +/* ------------------------------------------------------------------------ */ + +LJLIB_CF(string_dump) +{ + lj_err_caller(L, LJ_ERR_STRDUMP); + return 0; /* unreachable */ +} + +/* ------------------------------------------------------------------------ */ + +/* macro to `unsign' a character */ +#define uchar(c) ((unsigned char)(c)) + +#define CAP_UNFINISHED (-1) +#define CAP_POSITION (-2) + +typedef struct MatchState { + const char *src_init; /* init of source string */ + const char *src_end; /* end (`\0') of source string */ + lua_State *L; + int level; /* total number of captures (finished or unfinished) */ + struct { + const char *init; + ptrdiff_t len; + } capture[LUA_MAXCAPTURES]; +} MatchState; + +#define L_ESC '%' +#define SPECIALS "^$*+?.([%-" + +static int check_capture(MatchState *ms, int l) +{ + l -= '1'; + if (l < 0 || l >= ms->level || ms->capture[l].len == CAP_UNFINISHED) + lj_err_caller(ms->L, LJ_ERR_STRCAPI); + return l; +} + +static int capture_to_close(MatchState *ms) +{ + int level = ms->level; + for (level--; level>=0; level--) + if (ms->capture[level].len == CAP_UNFINISHED) return level; + lj_err_caller(ms->L, LJ_ERR_STRPATC); + return 0; /* unreachable */ +} + +static const char *classend(MatchState *ms, const char *p) +{ + switch (*p++) { + case L_ESC: + if (*p == '\0') + lj_err_caller(ms->L, LJ_ERR_STRPATE); + return p+1; + case '[': + if (*p == '^') p++; + do { /* look for a `]' */ + if (*p == '\0') + lj_err_caller(ms->L, LJ_ERR_STRPATM); + if (*(p++) == L_ESC && *p != '\0') + p++; /* skip escapes (e.g. `%]') */ + } while (*p != ']'); + return p+1; + default: + return p; + } +} + +static const unsigned char match_class_map[32] = { + 0, LJ_CTYPE_ALPHA, 0, LJ_CTYPE_CNTRL, LJ_CTYPE_DIGIT, 0,0,0,0,0,0,0, + LJ_CTYPE_LOWER, 0,0,0, LJ_CTYPE_PUNCT, 0,0, LJ_CTYPE_SPACE, 0, + LJ_CTYPE_UPPER, 0, LJ_CTYPE_ALNUM, LJ_CTYPE_XDIGIT, 0,0,0,0,0,0,0 +}; + +static int match_class(int c, int cl) +{ + if ((cl & 0xc0) == 0x40) { + int t = match_class_map[(cl&0x1f)]; + if (t) { + t = lj_ctype_isa(c, t); + return (cl & 0x20) ? t : !t; + } + if (cl == 'z') return c == 0; + if (cl == 'Z') return c != 0; + } + return (cl == c); +} + +static int matchbracketclass(int c, const char *p, const char *ec) +{ + int sig = 1; + if (*(p+1) == '^') { + sig = 0; + p++; /* skip the `^' */ + } + while (++p < ec) { + if (*p == L_ESC) { + p++; + if (match_class(c, uchar(*p))) + return sig; + } + else if ((*(p+1) == '-') && (p+2 < ec)) { + p+=2; + if (uchar(*(p-2)) <= c && c <= uchar(*p)) + return sig; + } + else if (uchar(*p) == c) return sig; + } + return !sig; +} + +static int singlematch(int c, const char *p, const char *ep) +{ + switch (*p) { + case '.': return 1; /* matches any char */ + case L_ESC: return match_class(c, uchar(*(p+1))); + case '[': return matchbracketclass(c, p, ep-1); + default: return (uchar(*p) == c); + } +} + +static const char *match(MatchState *ms, const char *s, const char *p); + +static const char *matchbalance(MatchState *ms, const char *s, const char *p) +{ + if (*p == 0 || *(p+1) == 0) + lj_err_caller(ms->L, LJ_ERR_STRPATU); + if (*s != *p) { + return NULL; + } else { + int b = *p; + int e = *(p+1); + int cont = 1; + while (++s < ms->src_end) { + if (*s == e) { + if (--cont == 0) return s+1; + } else if (*s == b) { + cont++; + } + } + } + return NULL; /* string ends out of balance */ +} + +static const char *max_expand(MatchState *ms, const char *s, + const char *p, const char *ep) +{ + ptrdiff_t i = 0; /* counts maximum expand for item */ + while ((s+i)src_end && singlematch(uchar(*(s+i)), p, ep)) + i++; + /* keeps trying to match with the maximum repetitions */ + while (i>=0) { + const char *res = match(ms, (s+i), ep+1); + if (res) return res; + i--; /* else didn't match; reduce 1 repetition to try again */ + } + return NULL; +} + +static const char *min_expand(MatchState *ms, const char *s, + const char *p, const char *ep) +{ + for (;;) { + const char *res = match(ms, s, ep+1); + if (res != NULL) + return res; + else if (ssrc_end && singlematch(uchar(*s), p, ep)) + s++; /* try with one more repetition */ + else + return NULL; + } +} + +static const char *start_capture(MatchState *ms, const char *s, + const char *p, int what) +{ + const char *res; + int level = ms->level; + if (level >= LUA_MAXCAPTURES) lj_err_caller(ms->L, LJ_ERR_STRCAPN); + ms->capture[level].init = s; + ms->capture[level].len = what; + ms->level = level+1; + if ((res=match(ms, s, p)) == NULL) /* match failed? */ + ms->level--; /* undo capture */ + return res; +} + +static const char *end_capture(MatchState *ms, const char *s, + const char *p) +{ + int l = capture_to_close(ms); + const char *res; + ms->capture[l].len = s - ms->capture[l].init; /* close capture */ + if ((res = match(ms, s, p)) == NULL) /* match failed? */ + ms->capture[l].len = CAP_UNFINISHED; /* undo capture */ + return res; +} + +static const char *match_capture(MatchState *ms, const char *s, int l) +{ + size_t len; + l = check_capture(ms, l); + len = (size_t)ms->capture[l].len; + if ((size_t)(ms->src_end-s) >= len && + memcmp(ms->capture[l].init, s, len) == 0) + return s+len; + else + return NULL; +} + +static const char *match(MatchState *ms, const char *s, const char *p) +{ + init: /* using goto's to optimize tail recursion */ + switch (*p) { + case '(': /* start capture */ + if (*(p+1) == ')') /* position capture? */ + return start_capture(ms, s, p+2, CAP_POSITION); + else + return start_capture(ms, s, p+1, CAP_UNFINISHED); + case ')': /* end capture */ + return end_capture(ms, s, p+1); + case L_ESC: + switch (*(p+1)) { + case 'b': /* balanced string? */ + s = matchbalance(ms, s, p+2); + if (s == NULL) return NULL; + p+=4; + goto init; /* else return match(ms, s, p+4); */ + case 'f': { /* frontier? */ + const char *ep; char previous; + p += 2; + if (*p != '[') + lj_err_caller(ms->L, LJ_ERR_STRPATB); + ep = classend(ms, p); /* points to what is next */ + previous = (s == ms->src_init) ? '\0' : *(s-1); + if (matchbracketclass(uchar(previous), p, ep-1) || + !matchbracketclass(uchar(*s), p, ep-1)) return NULL; + p=ep; + goto init; /* else return match(ms, s, ep); */ + } + default: + if (lj_ctype_isdigit(uchar(*(p+1)))) { /* capture results (%0-%9)? */ + s = match_capture(ms, s, uchar(*(p+1))); + if (s == NULL) return NULL; + p+=2; + goto init; /* else return match(ms, s, p+2) */ + } + goto dflt; /* case default */ + } + case '\0': /* end of pattern */ + return s; /* match succeeded */ + case '$': + if (*(p+1) == '\0') /* is the `$' the last char in pattern? */ + return (s == ms->src_end) ? s : NULL; /* check end of string */ + else + goto dflt; + default: dflt: { /* it is a pattern item */ + const char *ep = classend(ms, p); /* points to what is next */ + int m = ssrc_end && singlematch(uchar(*s), p, ep); + switch (*ep) { + case '?': { /* optional */ + const char *res; + if (m && ((res=match(ms, s+1, ep+1)) != NULL)) + return res; + p=ep+1; + goto init; /* else return match(ms, s, ep+1); */ + } + case '*': /* 0 or more repetitions */ + return max_expand(ms, s, p, ep); + case '+': /* 1 or more repetitions */ + return (m ? max_expand(ms, s+1, p, ep) : NULL); + case '-': /* 0 or more repetitions (minimum) */ + return min_expand(ms, s, p, ep); + default: + if (!m) return NULL; + s++; p=ep; + goto init; /* else return match(ms, s+1, ep); */ + } + } + } +} + +static const char *lmemfind(const char *s1, size_t l1, + const char *s2, size_t l2) +{ + if (l2 == 0) { + return s1; /* empty strings are everywhere */ + } else if (l2 > l1) { + return NULL; /* avoids a negative `l1' */ + } else { + const char *init; /* to search for a `*s2' inside `s1' */ + l2--; /* 1st char will be checked by `memchr' */ + l1 = l1-l2; /* `s2' cannot be found after that */ + while (l1 > 0 && (init = (const char *)memchr(s1, *s2, l1)) != NULL) { + init++; /* 1st char is already checked */ + if (memcmp(init, s2+1, l2) == 0) { + return init-1; + } else { /* correct `l1' and `s1' to try again */ + l1 -= (size_t)(init-s1); + s1 = init; + } + } + return NULL; /* not found */ + } +} + +static void push_onecapture(MatchState *ms, int i, const char *s, const char *e) +{ + if (i >= ms->level) { + if (i == 0) /* ms->level == 0, too */ + lua_pushlstring(ms->L, s, (size_t)(e - s)); /* add whole match */ + else + lj_err_caller(ms->L, LJ_ERR_STRCAPI); + } else { + ptrdiff_t l = ms->capture[i].len; + if (l == CAP_UNFINISHED) lj_err_caller(ms->L, LJ_ERR_STRCAPU); + if (l == CAP_POSITION) + lua_pushinteger(ms->L, ms->capture[i].init - ms->src_init + 1); + else + lua_pushlstring(ms->L, ms->capture[i].init, (size_t)l); + } +} + +static int push_captures(MatchState *ms, const char *s, const char *e) +{ + int i; + int nlevels = (ms->level == 0 && s) ? 1 : ms->level; + luaL_checkstack(ms->L, nlevels, "too many captures"); + for (i = 0; i < nlevels; i++) + push_onecapture(ms, i, s, e); + return nlevels; /* number of strings pushed */ +} + +static ptrdiff_t posrelat(ptrdiff_t pos, size_t len) +{ + /* relative string position: negative means back from end */ + if (pos < 0) pos += (ptrdiff_t)len + 1; + return (pos >= 0) ? pos : 0; +} + +static int str_find_aux(lua_State *L, int find) +{ + size_t l1, l2; + const char *s = luaL_checklstring(L, 1, &l1); + const char *p = luaL_checklstring(L, 2, &l2); + ptrdiff_t init = posrelat(luaL_optinteger(L, 3, 1), l1) - 1; + if (init < 0) + init = 0; + else if ((size_t)(init) > l1) + init = (ptrdiff_t)l1; + if (find && (lua_toboolean(L, 4) || /* explicit request? */ + strpbrk(p, SPECIALS) == NULL)) { /* or no special characters? */ + /* do a plain search */ + const char *s2 = lmemfind(s+init, l1-(size_t)init, p, l2); + if (s2) { + lua_pushinteger(L, s2-s+1); + lua_pushinteger(L, s2-s+(ptrdiff_t)l2); + return 2; + } + } else { + MatchState ms; + int anchor = (*p == '^') ? (p++, 1) : 0; + const char *s1=s+init; + ms.L = L; + ms.src_init = s; + ms.src_end = s+l1; + do { + const char *res; + ms.level = 0; + if ((res=match(&ms, s1, p)) != NULL) { + if (find) { + lua_pushinteger(L, s1-s+1); /* start */ + lua_pushinteger(L, res-s); /* end */ + return push_captures(&ms, NULL, 0) + 2; + } else { + return push_captures(&ms, s1, res); + } + } + } while (s1++ < ms.src_end && !anchor); + } + lua_pushnil(L); /* not found */ + return 1; +} + +LJLIB_CF(string_find) +{ + return str_find_aux(L, 1); +} + +LJLIB_CF(string_match) +{ + return str_find_aux(L, 0); +} + +LJLIB_NOREG LJLIB_CF(string_gmatch_aux) +{ + const char *p = strVdata(lj_lib_upvalue(L, 2)); + GCstr *str = strV(lj_lib_upvalue(L, 1)); + const char *s = strdata(str); + TValue *tvpos = lj_lib_upvalue(L, 3); + const char *src = s + tvpos->u32.lo; + MatchState ms; + ms.L = L; + ms.src_init = s; + ms.src_end = s + str->len; + for (; src <= ms.src_end; src++) { + const char *e; + ms.level = 0; + if ((e = match(&ms, src, p)) != NULL) { + int32_t pos = (int32_t)(e - s); + if (e == src) pos++; /* Ensure progress for empty match. */ + tvpos->u32.lo = (uint32_t)pos; + return push_captures(&ms, src, e); + } + } + return 0; /* not found */ +} + +LJLIB_CF(string_gmatch) +{ + lj_lib_checkstr(L, 1); + lj_lib_checkstr(L, 2); + L->top = L->base+3; + (L->top-1)->u64 = 0; + lua_pushcclosure(L, lj_cf_string_gmatch_aux, 3); + funcV(L->top-1)->c.ffid = FF_string_gmatch_aux; + return 1; +} + +static void add_s(MatchState *ms, luaL_Buffer *b, const char *s, const char *e) +{ + size_t l, i; + const char *news = lua_tolstring(ms->L, 3, &l); + for (i = 0; i < l; i++) { + if (news[i] != L_ESC) { + luaL_addchar(b, news[i]); + } else { + i++; /* skip ESC */ + if (!lj_ctype_isdigit(uchar(news[i]))) { + luaL_addchar(b, news[i]); + } else if (news[i] == '0') { + luaL_addlstring(b, s, (size_t)(e - s)); + } else { + push_onecapture(ms, news[i] - '1', s, e); + luaL_addvalue(b); /* add capture to accumulated result */ + } + } + } +} + +static void add_value(MatchState *ms, luaL_Buffer *b, + const char *s, const char *e) +{ + lua_State *L = ms->L; + switch (lua_type(L, 3)) { + case LUA_TNUMBER: + case LUA_TSTRING: { + add_s(ms, b, s, e); + return; + } + case LUA_TFUNCTION: { + int n; + lua_pushvalue(L, 3); + n = push_captures(ms, s, e); + lua_call(L, n, 1); + break; + } + case LUA_TTABLE: { + push_onecapture(ms, 0, s, e); + lua_gettable(L, 3); + break; + } + } + if (!lua_toboolean(L, -1)) { /* nil or false? */ + lua_pop(L, 1); + lua_pushlstring(L, s, (size_t)(e - s)); /* keep original text */ + } else if (!lua_isstring(L, -1)) { + lj_err_callerv(L, LJ_ERR_STRGSRV, luaL_typename(L, -1)); + } + luaL_addvalue(b); /* add result to accumulator */ +} + +LJLIB_CF(string_gsub) +{ + size_t srcl; + const char *src = luaL_checklstring(L, 1, &srcl); + const char *p = luaL_checkstring(L, 2); + int tr = lua_type(L, 3); + int max_s = luaL_optint(L, 4, (int)(srcl+1)); + int anchor = (*p == '^') ? (p++, 1) : 0; + int n = 0; + MatchState ms; + luaL_Buffer b; + if (!(tr == LUA_TNUMBER || tr == LUA_TSTRING || + tr == LUA_TFUNCTION || tr == LUA_TTABLE)) + lj_err_arg(L, 3, LJ_ERR_NOSFT); + luaL_buffinit(L, &b); + ms.L = L; + ms.src_init = src; + ms.src_end = src+srcl; + while (n < max_s) { + const char *e; + ms.level = 0; + e = match(&ms, src, p); + if (e) { + n++; + add_value(&ms, &b, src, e); + } + if (e && e>src) /* non empty match? */ + src = e; /* skip it */ + else if (src < ms.src_end) + luaL_addchar(&b, *src++); + else + break; + if (anchor) + break; + } + luaL_addlstring(&b, src, (size_t)(ms.src_end-src)); + luaL_pushresult(&b); + lua_pushinteger(L, n); /* number of substitutions */ + return 2; +} + +/* ------------------------------------------------------------------------ */ + +/* maximum size of each formatted item (> len(format('%99.99f', -1e308))) */ +#define MAX_FMTITEM 512 +/* valid flags in a format specification */ +#define FMT_FLAGS "-+ #0" +/* +** maximum size of each format specification (such as '%-099.99d') +** (+10 accounts for %99.99x plus margin of error) +*/ +#define MAX_FMTSPEC (sizeof(FMT_FLAGS) + sizeof(LUA_INTFRMLEN) + 10) + +static void addquoted(lua_State *L, luaL_Buffer *b, int arg) +{ + GCstr *str = lj_lib_checkstr(L, arg); + int32_t len = (int32_t)str->len; + const char *s = strdata(str); + luaL_addchar(b, '"'); + while (len--) { + switch (*s) { + case '"': case '\\': case '\n': + luaL_addchar(b, '\\'); + luaL_addchar(b, *s); + break; + case '\r': + luaL_addlstring(b, "\\r", 2); + break; + case '\0': + luaL_addlstring(b, "\\000", 4); + break; + default: + luaL_addchar(b, *s); + break; + } + s++; + } + luaL_addchar(b, '"'); +} + +static const char *scanformat(lua_State *L, const char *strfrmt, char *form) +{ + const char *p = strfrmt; + while (*p != '\0' && strchr(FMT_FLAGS, *p) != NULL) p++; /* skip flags */ + if ((size_t)(p - strfrmt) >= sizeof(FMT_FLAGS)) + lj_err_caller(L, LJ_ERR_STRFMTR); + if (lj_ctype_isdigit(uchar(*p))) p++; /* skip width */ + if (lj_ctype_isdigit(uchar(*p))) p++; /* (2 digits at most) */ + if (*p == '.') { + p++; + if (lj_ctype_isdigit(uchar(*p))) p++; /* skip precision */ + if (lj_ctype_isdigit(uchar(*p))) p++; /* (2 digits at most) */ + } + if (lj_ctype_isdigit(uchar(*p))) + lj_err_caller(L, LJ_ERR_STRFMTW); + *(form++) = '%'; + strncpy(form, strfrmt, (size_t)(p - strfrmt + 1)); + form += p - strfrmt + 1; + *form = '\0'; + return p; +} + +static void addintlen(char *form) +{ + size_t l = strlen(form); + char spec = form[l - 1]; + strcpy(form + l - 1, LUA_INTFRMLEN); + form[l + sizeof(LUA_INTFRMLEN) - 2] = spec; + form[l + sizeof(LUA_INTFRMLEN) - 1] = '\0'; +} + +LJLIB_CF(string_format) +{ + int arg = 1; + GCstr *fmt = lj_lib_checkstr(L, arg); + const char *strfrmt = strdata(fmt); + const char *strfrmt_end = strfrmt + fmt->len; + luaL_Buffer b; + luaL_buffinit(L, &b); + while (strfrmt < strfrmt_end) { + if (*strfrmt != L_ESC) { + luaL_addchar(&b, *strfrmt++); + } else if (*++strfrmt == L_ESC) { + luaL_addchar(&b, *strfrmt++); /* %% */ + } else { /* format item */ + char form[MAX_FMTSPEC]; /* to store the format (`%...') */ + char buff[MAX_FMTITEM]; /* to store the formatted item */ + arg++; + strfrmt = scanformat(L, strfrmt, form); + switch (*strfrmt++) { + case 'c': + sprintf(buff, form, lj_lib_checkint(L, arg)); + break; + case 'd': case 'i': + addintlen(form); + sprintf(buff, form, (LUA_INTFRM_T)lj_lib_checknum(L, arg)); + break; + case 'o': case 'u': case 'x': case 'X': + addintlen(form); + sprintf(buff, form, (unsigned LUA_INTFRM_T)lj_lib_checknum(L, arg)); + break; + case 'e': case 'E': case 'f': case 'g': case 'G': + sprintf(buff, form, (double)lj_lib_checknum(L, arg)); + break; + case 'q': + addquoted(L, &b, arg); + continue; + case 'p': + lj_str_pushf(L, "%p", lua_topointer(L, arg)); + luaL_addvalue(&b); + continue; + case 's': { + GCstr *str = lj_lib_checkstr(L, arg); + if (!strchr(form, '.') && str->len >= 100) { + /* no precision and string is too long to be formatted; + keep original string */ + setstrV(L, L->top++, str); + luaL_addvalue(&b); + continue; + } + sprintf(buff, form, strdata(str)); + break; + } + default: + lj_err_callerv(L, LJ_ERR_STRFMTO, *(strfrmt -1)); + break; + } + luaL_addlstring(&b, buff, strlen(buff)); + } + } + luaL_pushresult(&b); + return 1; +} + +/* ------------------------------------------------------------------------ */ + +#include "lj_libdef.h" + +LUALIB_API int luaopen_string(lua_State *L) +{ + GCtab *mt; + LJ_LIB_REG(L, string); +#if defined(LUA_COMPAT_GFIND) + lua_getfield(L, -1, "gmatch"); + lua_setfield(L, -2, "gfind"); +#endif + mt = lj_tab_new(L, 0, 1); + /* NOBARRIER: G(L)->mmname[] is a GC root. */ + setgcref(G(L)->basemt[~LJ_TSTR], obj2gco(mt)); + settabV(L, lj_tab_setstr(L, mt, strref(G(L)->mmname[MM_index])), + tabV(L->top-1)); + mt->nomm = cast_byte(~(1u<top, func); + setintV(L->top+1, i); + val = lj_tab_getint(t, (int32_t)i); + if (val) { copyTV(L, L->top+2, val); } else { setnilV(L->top+2); } + L->top += 3; + lua_call(L, 2, 1); + if (!tvisnil(L->top-1)) + return 1; + L->top--; + } + return 0; +} + +LJLIB_CF(table_foreach) +{ + GCtab *t = lj_lib_checktab(L, 1); + GCfunc *func = lj_lib_checkfunc(L, 2); + L->top = L->base+3; + setnilV(L->top-1); + while (lj_tab_next(L, t, L->top-1)) { + copyTV(L, L->top+2, L->top); + copyTV(L, L->top+1, L->top-1); + setfuncV(L, L->top, func); + L->top += 3; + lua_call(L, 2, 1); + if (!tvisnil(L->top-1)) + return 1; + L->top--; + } + return 0; +} + +LJLIB_ASM(table_getn) LJLIB_REC(.) +{ + lj_lib_checktab(L, 1); + return FFH_UNREACHABLE; +} + +LJLIB_CF(table_maxn) +{ + GCtab *t = lj_lib_checktab(L, 1); + TValue *array = tvref(t->array); + Node *node; + lua_Number m = 0; + uint32_t i; + for (i = 0; i < t->asize; i++) + if (!tvisnil(&array[i])) { + m = (lua_Number)i; + break; + } + node = noderef(t->node); + for (i = 0; i <= t->hmask; i++) + if (tvisnum(&node[i].key) && numV(&node[i].key) > m) + m = numV(&node[i].key); + setnumV(L->top-1, m); + return 1; +} + +LJLIB_CF(table_insert) +{ + GCtab *t = lj_lib_checktab(L, 1); + int32_t n, i = (int32_t)lj_tab_len(t) + 1; + int nargs = (int)((char *)L->top - (char *)L->base); + if (nargs != 2*sizeof(TValue)) { + if (nargs != 3*sizeof(TValue)) + lj_err_caller(L, LJ_ERR_TABINS); + /* NOBARRIER: This just moves existing elements around. */ + for (n = lj_lib_checkint(L, 2); i > n; i--) { + /* The set may invalidate the get pointer, so need to do it first! */ + TValue *dst = lj_tab_setint(L, t, i); + cTValue *src = lj_tab_getint(t, i-1); + if (src) { + copyTV(L, dst, src); + } else { + setnilV(dst); + } + } + i = n; + } + { + TValue *dst = lj_tab_setint(L, t, i); + copyTV(L, dst, L->top-1); + lj_gc_barriert(L, t, dst); + } + return 0; +} + +LJLIB_CF(table_remove) +{ + GCtab *t = lj_lib_checktab(L, 1); + int32_t e = (int32_t)lj_tab_len(t); + int32_t pos = lj_lib_optint(L, 2, e); + if (!(1 <= pos && pos <= e)) /* position is outside bounds? */ + return 0; /* nothing to remove */ + lua_rawgeti(L, 1, pos); + /* NOBARRIER: This just moves existing elements around. */ + for (; pos < e; pos++) { + cTValue *src = lj_tab_getint(t, pos+1); + TValue *dst = lj_tab_setint(L, t, pos); + if (src) { + copyTV(L, dst, src); + } else { + setnilV(dst); + } + } + setnilV(lj_tab_setint(L, t, e)); + return 1; +} + +LJLIB_CF(table_concat) +{ + luaL_Buffer b; + GCtab *t = lj_lib_checktab(L, 1); + GCstr *sep = lj_lib_optstr(L, 2); + MSize seplen = sep ? sep->len : 0; + int32_t i = lj_lib_optint(L, 3, 1); + int32_t e = L->base+3 < L->top ? lj_lib_checkint(L, 4) : + (int32_t)lj_tab_len(t); + luaL_buffinit(L, &b); + if (i <= e) { + for (;;) { + cTValue *o; + lua_rawgeti(L, 1, i); + o = L->top-1; + if (!(tvisstr(o) || tvisnum(o))) + lj_err_callerv(L, LJ_ERR_TABCAT, typename(o), i); + luaL_addvalue(&b); + if (i++ == e) break; + if (seplen) + luaL_addlstring(&b, strdata(sep), seplen); + } + } + luaL_pushresult(&b); + return 1; +} + +/* ------------------------------------------------------------------------ */ + +static void set2(lua_State *L, int i, int j) +{ + lua_rawseti(L, 1, i); + lua_rawseti(L, 1, j); +} + +static int sort_comp(lua_State *L, int a, int b) +{ + if (!lua_isnil(L, 2)) { /* function? */ + int res; + lua_pushvalue(L, 2); + lua_pushvalue(L, a-1); /* -1 to compensate function */ + lua_pushvalue(L, b-2); /* -2 to compensate function and `a' */ + lua_call(L, 2, 1); + res = lua_toboolean(L, -1); + lua_pop(L, 1); + return res; + } else { /* a < b? */ + return lua_lessthan(L, a, b); + } +} + +static void auxsort(lua_State *L, int l, int u) +{ + while (l < u) { /* for tail recursion */ + int i, j; + /* sort elements a[l], a[(l+u)/2] and a[u] */ + lua_rawgeti(L, 1, l); + lua_rawgeti(L, 1, u); + if (sort_comp(L, -1, -2)) /* a[u] < a[l]? */ + set2(L, l, u); /* swap a[l] - a[u] */ + else + lua_pop(L, 2); + if (u-l == 1) break; /* only 2 elements */ + i = (l+u)/2; + lua_rawgeti(L, 1, i); + lua_rawgeti(L, 1, l); + if (sort_comp(L, -2, -1)) { /* a[i]= P */ + while (lua_rawgeti(L, 1, ++i), sort_comp(L, -1, -2)) { + if (i>u) lj_err_caller(L, LJ_ERR_TABSORT); + lua_pop(L, 1); /* remove a[i] */ + } + /* repeat --j until a[j] <= P */ + while (lua_rawgeti(L, 1, --j), sort_comp(L, -3, -1)) { + if (jbase+1)) + lj_lib_checkfunc(L, 2); + auxsort(L, 1, n); + return 0; +} + +/* ------------------------------------------------------------------------ */ + +#include "lj_libdef.h" + +LUALIB_API int luaopen_table(lua_State *L) +{ + LJ_LIB_REG(L, table); + return 1; +} + diff --git a/src/lj.supp b/src/lj.supp new file mode 100644 index 0000000000..9a1379d711 --- /dev/null +++ b/src/lj.supp @@ -0,0 +1,6 @@ +# Valgrind suppression file for LuaJIT 2.x. +{ + Optimized string compare + Memcheck:Addr4 + fun:lj_str_cmp +} diff --git a/src/lj_alloc.c b/src/lj_alloc.c new file mode 100644 index 0000000000..8ad4f8fbfa --- /dev/null +++ b/src/lj_alloc.c @@ -0,0 +1,1232 @@ +/* +** Bundled memory allocator. +** +** Beware: this is a HEAVILY CUSTOMIZED version of dlmalloc. +** The original bears the following remark: +** +** This is a version (aka dlmalloc) of malloc/free/realloc written by +** Doug Lea and released to the public domain, as explained at +** http://creativecommons.org/licenses/publicdomain. +** +** * Version pre-2.8.4 Wed Mar 29 19:46:29 2006 (dl at gee) +** +** No additional copyright is claimed over the customizations. +** Please do NOT bother the original author about this version here! +** +** If you want to use dlmalloc in another project, you should get +** the original from: ftp://gee.cs.oswego.edu/pub/misc/ +** For thread-safe derivatives, take a look at: +** - ptmalloc: http://www.malloc.de/ +** - nedmalloc: http://www.nedprod.com/programs/portable/nedmalloc/ +*/ + +#define lj_alloc_c +#define LUA_CORE + +/* To get the mremap prototype. Must be defind before any system includes. */ +#if defined(__linux__) && !defined(_GNU_SOURCE) +#define _GNU_SOURCE +#endif + +#include "lj_def.h" +#include "lj_arch.h" +#include "lj_alloc.h" + +#ifndef LUAJIT_USE_SYSMALLOC + +#define MAX_SIZE_T (~(size_t)0) +#define MALLOC_ALIGNMENT ((size_t)8U) + +#define DEFAULT_GRANULARITY ((size_t)128U * (size_t)1024U) +#define DEFAULT_TRIM_THRESHOLD ((size_t)2U * (size_t)1024U * (size_t)1024U) +#define DEFAULT_MMAP_THRESHOLD ((size_t)128U * (size_t)1024U) +#define MAX_RELEASE_CHECK_RATE 255 + +/* ------------------- size_t and alignment properties -------------------- */ + +/* The byte and bit size of a size_t */ +#define SIZE_T_SIZE (sizeof(size_t)) +#define SIZE_T_BITSIZE (sizeof(size_t) << 3) + +/* Some constants coerced to size_t */ +/* Annoying but necessary to avoid errors on some platforms */ +#define SIZE_T_ZERO ((size_t)0) +#define SIZE_T_ONE ((size_t)1) +#define SIZE_T_TWO ((size_t)2) +#define TWO_SIZE_T_SIZES (SIZE_T_SIZE<<1) +#define FOUR_SIZE_T_SIZES (SIZE_T_SIZE<<2) +#define SIX_SIZE_T_SIZES (FOUR_SIZE_T_SIZES+TWO_SIZE_T_SIZES) + +/* The bit mask value corresponding to MALLOC_ALIGNMENT */ +#define CHUNK_ALIGN_MASK (MALLOC_ALIGNMENT - SIZE_T_ONE) + +/* the number of bytes to offset an address to align it */ +#define align_offset(A)\ + ((((size_t)(A) & CHUNK_ALIGN_MASK) == 0)? 0 :\ + ((MALLOC_ALIGNMENT - ((size_t)(A) & CHUNK_ALIGN_MASK)) & CHUNK_ALIGN_MASK)) + +/* -------------------------- MMAP support ------------------------------- */ + +#define MFAIL ((void *)(MAX_SIZE_T)) +#define CMFAIL ((char *)(MFAIL)) /* defined for convenience */ + +#define IS_DIRECT_BIT (SIZE_T_ONE) + +#ifdef LUA_USE_WIN + +#if LJ_64 +#error "missing support for WIN64 to allocate in lower 2G" +#endif + +#define WIN32_LEAN_AND_MEAN +#include + +/* Win32 MMAP via VirtualAlloc */ +static LJ_AINLINE void *CALL_MMAP(size_t size) +{ + void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); + return (ptr != 0)? ptr: MFAIL; +} + +/* For direct MMAP, use MEM_TOP_DOWN to minimize interference */ +static LJ_AINLINE void *DIRECT_MMAP(size_t size) +{ + void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, + PAGE_READWRITE); + return (ptr != 0)? ptr: MFAIL; +} + +/* This function supports releasing coalesed segments */ +static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size) +{ + MEMORY_BASIC_INFORMATION minfo; + char *cptr = (char *)ptr; + while (size) { + if (VirtualQuery(cptr, &minfo, sizeof(minfo)) == 0) + return -1; + if (minfo.BaseAddress != cptr || minfo.AllocationBase != cptr || + minfo.State != MEM_COMMIT || minfo.RegionSize > size) + return -1; + if (VirtualFree(cptr, 0, MEM_RELEASE) == 0) + return -1; + cptr += minfo.RegionSize; + size -= minfo.RegionSize; + } + return 0; +} + +#else + +#include + +#define MMAP_PROT (PROT_READ|PROT_WRITE) +#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON) +#define MAP_ANONYMOUS MAP_ANON +#endif /* MAP_ANON */ + +#if LJ_64 +#define MMAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS|MAP_32BIT) +#else +#define MMAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS) +#endif + +#define CALL_MMAP(s) mmap(0, (s), MMAP_PROT, MMAP_FLAGS, -1, 0) +#define DIRECT_MMAP(s) CALL_MMAP(s) +#define CALL_MUNMAP(a, s) munmap((a), (s)) + +#ifdef __linux__ +/* Need to define _GNU_SOURCE to get the mremap prototype. */ +#define CALL_MREMAP(addr, osz, nsz, mv) mremap((addr), (osz), (nsz), (mv)) +#endif + +#endif + +#ifndef CALL_MREMAP +#define CALL_MREMAP(addr, osz, nsz, mv) ((void)osz, MFAIL) +#endif + +/* ----------------------- Chunk representations ------------------------ */ + +struct malloc_chunk { + size_t prev_foot; /* Size of previous chunk (if free). */ + size_t head; /* Size and inuse bits. */ + struct malloc_chunk *fd; /* double links -- used only if free. */ + struct malloc_chunk *bk; +}; + +typedef struct malloc_chunk mchunk; +typedef struct malloc_chunk *mchunkptr; +typedef struct malloc_chunk *sbinptr; /* The type of bins of chunks */ +typedef unsigned int bindex_t; /* Described below */ +typedef unsigned int binmap_t; /* Described below */ +typedef unsigned int flag_t; /* The type of various bit flag sets */ + +/* ------------------- Chunks sizes and alignments ----------------------- */ + +#define MCHUNK_SIZE (sizeof(mchunk)) + +#define CHUNK_OVERHEAD (SIZE_T_SIZE) + +/* Direct chunks need a second word of overhead ... */ +#define DIRECT_CHUNK_OVERHEAD (TWO_SIZE_T_SIZES) +/* ... and additional padding for fake next-chunk at foot */ +#define DIRECT_FOOT_PAD (FOUR_SIZE_T_SIZES) + +/* The smallest size we can malloc is an aligned minimal chunk */ +#define MIN_CHUNK_SIZE\ + ((MCHUNK_SIZE + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK) + +/* conversion from malloc headers to user pointers, and back */ +#define chunk2mem(p) ((void *)((char *)(p) + TWO_SIZE_T_SIZES)) +#define mem2chunk(mem) ((mchunkptr)((char *)(mem) - TWO_SIZE_T_SIZES)) +/* chunk associated with aligned address A */ +#define align_as_chunk(A) (mchunkptr)((A) + align_offset(chunk2mem(A))) + +/* Bounds on request (not chunk) sizes. */ +#define MAX_REQUEST ((~MIN_CHUNK_SIZE+1) << 2) +#define MIN_REQUEST (MIN_CHUNK_SIZE - CHUNK_OVERHEAD - SIZE_T_ONE) + +/* pad request bytes into a usable size */ +#define pad_request(req) \ + (((req) + CHUNK_OVERHEAD + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK) + +/* pad request, checking for minimum (but not maximum) */ +#define request2size(req) \ + (((req) < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(req)) + +/* ------------------ Operations on head and foot fields ----------------- */ + +#define PINUSE_BIT (SIZE_T_ONE) +#define CINUSE_BIT (SIZE_T_TWO) +#define INUSE_BITS (PINUSE_BIT|CINUSE_BIT) + +/* Head value for fenceposts */ +#define FENCEPOST_HEAD (INUSE_BITS|SIZE_T_SIZE) + +/* extraction of fields from head words */ +#define cinuse(p) ((p)->head & CINUSE_BIT) +#define pinuse(p) ((p)->head & PINUSE_BIT) +#define chunksize(p) ((p)->head & ~(INUSE_BITS)) + +#define clear_pinuse(p) ((p)->head &= ~PINUSE_BIT) +#define clear_cinuse(p) ((p)->head &= ~CINUSE_BIT) + +/* Treat space at ptr +/- offset as a chunk */ +#define chunk_plus_offset(p, s) ((mchunkptr)(((char *)(p)) + (s))) +#define chunk_minus_offset(p, s) ((mchunkptr)(((char *)(p)) - (s))) + +/* Ptr to next or previous physical malloc_chunk. */ +#define next_chunk(p) ((mchunkptr)(((char *)(p)) + ((p)->head & ~INUSE_BITS))) +#define prev_chunk(p) ((mchunkptr)(((char *)(p)) - ((p)->prev_foot) )) + +/* extract next chunk's pinuse bit */ +#define next_pinuse(p) ((next_chunk(p)->head) & PINUSE_BIT) + +/* Get/set size at footer */ +#define get_foot(p, s) (((mchunkptr)((char *)(p) + (s)))->prev_foot) +#define set_foot(p, s) (((mchunkptr)((char *)(p) + (s)))->prev_foot = (s)) + +/* Set size, pinuse bit, and foot */ +#define set_size_and_pinuse_of_free_chunk(p, s)\ + ((p)->head = (s|PINUSE_BIT), set_foot(p, s)) + +/* Set size, pinuse bit, foot, and clear next pinuse */ +#define set_free_with_pinuse(p, s, n)\ + (clear_pinuse(n), set_size_and_pinuse_of_free_chunk(p, s)) + +#define is_direct(p)\ + (!((p)->head & PINUSE_BIT) && ((p)->prev_foot & IS_DIRECT_BIT)) + +/* Get the internal overhead associated with chunk p */ +#define overhead_for(p)\ + (is_direct(p)? DIRECT_CHUNK_OVERHEAD : CHUNK_OVERHEAD) + +/* ---------------------- Overlaid data structures ----------------------- */ + +struct malloc_tree_chunk { + /* The first four fields must be compatible with malloc_chunk */ + size_t prev_foot; + size_t head; + struct malloc_tree_chunk *fd; + struct malloc_tree_chunk *bk; + + struct malloc_tree_chunk *child[2]; + struct malloc_tree_chunk *parent; + bindex_t index; +}; + +typedef struct malloc_tree_chunk tchunk; +typedef struct malloc_tree_chunk *tchunkptr; +typedef struct malloc_tree_chunk *tbinptr; /* The type of bins of trees */ + +/* A little helper macro for trees */ +#define leftmost_child(t) ((t)->child[0] != 0? (t)->child[0] : (t)->child[1]) + +/* ----------------------------- Segments -------------------------------- */ + +struct malloc_segment { + char *base; /* base address */ + size_t size; /* allocated size */ + struct malloc_segment *next; /* ptr to next segment */ +}; + +typedef struct malloc_segment msegment; +typedef struct malloc_segment *msegmentptr; + +/* ---------------------------- malloc_state ----------------------------- */ + +/* Bin types, widths and sizes */ +#define NSMALLBINS (32U) +#define NTREEBINS (32U) +#define SMALLBIN_SHIFT (3U) +#define SMALLBIN_WIDTH (SIZE_T_ONE << SMALLBIN_SHIFT) +#define TREEBIN_SHIFT (8U) +#define MIN_LARGE_SIZE (SIZE_T_ONE << TREEBIN_SHIFT) +#define MAX_SMALL_SIZE (MIN_LARGE_SIZE - SIZE_T_ONE) +#define MAX_SMALL_REQUEST (MAX_SMALL_SIZE - CHUNK_ALIGN_MASK - CHUNK_OVERHEAD) + +struct malloc_state { + binmap_t smallmap; + binmap_t treemap; + size_t dvsize; + size_t topsize; + mchunkptr dv; + mchunkptr top; + size_t trim_check; + size_t release_checks; + mchunkptr smallbins[(NSMALLBINS+1)*2]; + tbinptr treebins[NTREEBINS]; + msegment seg; +}; + +typedef struct malloc_state *mstate; + +#define is_initialized(M) ((M)->top != 0) + +/* -------------------------- system alloc setup ------------------------- */ + +/* page-align a size */ +#define page_align(S)\ + (((S) + (LJ_PAGESIZE - SIZE_T_ONE)) & ~(LJ_PAGESIZE - SIZE_T_ONE)) + +/* granularity-align a size */ +#define granularity_align(S)\ + (((S) + (DEFAULT_GRANULARITY - SIZE_T_ONE))\ + & ~(DEFAULT_GRANULARITY - SIZE_T_ONE)) + +#ifdef LUA_USE_WIN +#define mmap_align(S) granularity_align(S) +#else +#define mmap_align(S) page_align(S) +#endif + +/* True if segment S holds address A */ +#define segment_holds(S, A)\ + ((char *)(A) >= S->base && (char *)(A) < S->base + S->size) + +/* Return segment holding given address */ +static msegmentptr segment_holding(mstate m, char *addr) +{ + msegmentptr sp = &m->seg; + for (;;) { + if (addr >= sp->base && addr < sp->base + sp->size) + return sp; + if ((sp = sp->next) == 0) + return 0; + } +} + +/* Return true if segment contains a segment link */ +static int has_segment_link(mstate m, msegmentptr ss) +{ + msegmentptr sp = &m->seg; + for (;;) { + if ((char *)sp >= ss->base && (char *)sp < ss->base + ss->size) + return 1; + if ((sp = sp->next) == 0) + return 0; + } +} + +/* + TOP_FOOT_SIZE is padding at the end of a segment, including space + that may be needed to place segment records and fenceposts when new + noncontiguous segments are added. +*/ +#define TOP_FOOT_SIZE\ + (align_offset(chunk2mem(0))+pad_request(sizeof(struct malloc_segment))+MIN_CHUNK_SIZE) + +/* ---------------------------- Indexing Bins ---------------------------- */ + +#define is_small(s) (((s) >> SMALLBIN_SHIFT) < NSMALLBINS) +#define small_index(s) ((s) >> SMALLBIN_SHIFT) +#define small_index2size(i) ((i) << SMALLBIN_SHIFT) +#define MIN_SMALL_INDEX (small_index(MIN_CHUNK_SIZE)) + +/* addressing by index. See above about smallbin repositioning */ +#define smallbin_at(M, i) ((sbinptr)((char *)&((M)->smallbins[(i)<<1]))) +#define treebin_at(M,i) (&((M)->treebins[i])) + +/* assign tree index for size S to variable I */ +#define compute_tree_index(S, I)\ +{\ + unsigned int X = S >> TREEBIN_SHIFT;\ + if (X == 0) {\ + I = 0;\ + } else if (X > 0xFFFF) {\ + I = NTREEBINS-1;\ + } else {\ + unsigned int K = lj_fls(X);\ + I = (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\ + }\ +} + +/* Bit representing maximum resolved size in a treebin at i */ +#define bit_for_tree_index(i) \ + (i == NTREEBINS-1)? (SIZE_T_BITSIZE-1) : (((i) >> 1) + TREEBIN_SHIFT - 2) + +/* Shift placing maximum resolved bit in a treebin at i as sign bit */ +#define leftshift_for_tree_index(i) \ + ((i == NTREEBINS-1)? 0 : \ + ((SIZE_T_BITSIZE-SIZE_T_ONE) - (((i) >> 1) + TREEBIN_SHIFT - 2))) + +/* The size of the smallest chunk held in bin with index i */ +#define minsize_for_tree_index(i) \ + ((SIZE_T_ONE << (((i) >> 1) + TREEBIN_SHIFT)) | \ + (((size_t)((i) & SIZE_T_ONE)) << (((i) >> 1) + TREEBIN_SHIFT - 1))) + +/* ------------------------ Operations on bin maps ----------------------- */ + +/* bit corresponding to given index */ +#define idx2bit(i) ((binmap_t)(1) << (i)) + +/* Mark/Clear bits with given index */ +#define mark_smallmap(M,i) ((M)->smallmap |= idx2bit(i)) +#define clear_smallmap(M,i) ((M)->smallmap &= ~idx2bit(i)) +#define smallmap_is_marked(M,i) ((M)->smallmap & idx2bit(i)) + +#define mark_treemap(M,i) ((M)->treemap |= idx2bit(i)) +#define clear_treemap(M,i) ((M)->treemap &= ~idx2bit(i)) +#define treemap_is_marked(M,i) ((M)->treemap & idx2bit(i)) + +/* mask with all bits to left of least bit of x on */ +#define left_bits(x) ((x<<1) | (~(x<<1)+1)) + +/* Set cinuse bit and pinuse bit of next chunk */ +#define set_inuse(M,p,s)\ + ((p)->head = (((p)->head & PINUSE_BIT)|s|CINUSE_BIT),\ + ((mchunkptr)(((char *)(p)) + (s)))->head |= PINUSE_BIT) + +/* Set cinuse and pinuse of this chunk and pinuse of next chunk */ +#define set_inuse_and_pinuse(M,p,s)\ + ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\ + ((mchunkptr)(((char *)(p)) + (s)))->head |= PINUSE_BIT) + +/* Set size, cinuse and pinuse bit of this chunk */ +#define set_size_and_pinuse_of_inuse_chunk(M, p, s)\ + ((p)->head = (s|PINUSE_BIT|CINUSE_BIT)) + +/* ----------------------- Operations on smallbins ----------------------- */ + +/* Link a free chunk into a smallbin */ +#define insert_small_chunk(M, P, S) {\ + bindex_t I = small_index(S);\ + mchunkptr B = smallbin_at(M, I);\ + mchunkptr F = B;\ + if (!smallmap_is_marked(M, I))\ + mark_smallmap(M, I);\ + else\ + F = B->fd;\ + B->fd = P;\ + F->bk = P;\ + P->fd = F;\ + P->bk = B;\ +} + +/* Unlink a chunk from a smallbin */ +#define unlink_small_chunk(M, P, S) {\ + mchunkptr F = P->fd;\ + mchunkptr B = P->bk;\ + bindex_t I = small_index(S);\ + if (F == B) {\ + clear_smallmap(M, I);\ + } else {\ + F->bk = B;\ + B->fd = F;\ + }\ +} + +/* Unlink the first chunk from a smallbin */ +#define unlink_first_small_chunk(M, B, P, I) {\ + mchunkptr F = P->fd;\ + if (B == F) {\ + clear_smallmap(M, I);\ + } else {\ + B->fd = F;\ + F->bk = B;\ + }\ +} + +/* Replace dv node, binning the old one */ +/* Used only when dvsize known to be small */ +#define replace_dv(M, P, S) {\ + size_t DVS = M->dvsize;\ + if (DVS != 0) {\ + mchunkptr DV = M->dv;\ + insert_small_chunk(M, DV, DVS);\ + }\ + M->dvsize = S;\ + M->dv = P;\ +} + +/* ------------------------- Operations on trees ------------------------- */ + +/* Insert chunk into tree */ +#define insert_large_chunk(M, X, S) {\ + tbinptr *H;\ + bindex_t I;\ + compute_tree_index(S, I);\ + H = treebin_at(M, I);\ + X->index = I;\ + X->child[0] = X->child[1] = 0;\ + if (!treemap_is_marked(M, I)) {\ + mark_treemap(M, I);\ + *H = X;\ + X->parent = (tchunkptr)H;\ + X->fd = X->bk = X;\ + } else {\ + tchunkptr T = *H;\ + size_t K = S << leftshift_for_tree_index(I);\ + for (;;) {\ + if (chunksize(T) != S) {\ + tchunkptr *C = &(T->child[(K >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1]);\ + K <<= 1;\ + if (*C != 0) {\ + T = *C;\ + } else {\ + *C = X;\ + X->parent = T;\ + X->fd = X->bk = X;\ + break;\ + }\ + } else {\ + tchunkptr F = T->fd;\ + T->fd = F->bk = X;\ + X->fd = F;\ + X->bk = T;\ + X->parent = 0;\ + break;\ + }\ + }\ + }\ +} + +#define unlink_large_chunk(M, X) {\ + tchunkptr XP = X->parent;\ + tchunkptr R;\ + if (X->bk != X) {\ + tchunkptr F = X->fd;\ + R = X->bk;\ + F->bk = R;\ + R->fd = F;\ + } else {\ + tchunkptr *RP;\ + if (((R = *(RP = &(X->child[1]))) != 0) ||\ + ((R = *(RP = &(X->child[0]))) != 0)) {\ + tchunkptr *CP;\ + while ((*(CP = &(R->child[1])) != 0) ||\ + (*(CP = &(R->child[0])) != 0)) {\ + R = *(RP = CP);\ + }\ + *RP = 0;\ + }\ + }\ + if (XP != 0) {\ + tbinptr *H = treebin_at(M, X->index);\ + if (X == *H) {\ + if ((*H = R) == 0) \ + clear_treemap(M, X->index);\ + } else {\ + if (XP->child[0] == X) \ + XP->child[0] = R;\ + else \ + XP->child[1] = R;\ + }\ + if (R != 0) {\ + tchunkptr C0, C1;\ + R->parent = XP;\ + if ((C0 = X->child[0]) != 0) {\ + R->child[0] = C0;\ + C0->parent = R;\ + }\ + if ((C1 = X->child[1]) != 0) {\ + R->child[1] = C1;\ + C1->parent = R;\ + }\ + }\ + }\ +} + +/* Relays to large vs small bin operations */ + +#define insert_chunk(M, P, S)\ + if (is_small(S)) { insert_small_chunk(M, P, S)\ + } else { tchunkptr TP = (tchunkptr)(P); insert_large_chunk(M, TP, S); } + +#define unlink_chunk(M, P, S)\ + if (is_small(S)) { unlink_small_chunk(M, P, S)\ + } else { tchunkptr TP = (tchunkptr)(P); unlink_large_chunk(M, TP); } + +/* ----------------------- Direct-mmapping chunks ----------------------- */ + +static void *direct_alloc(size_t nb) +{ + size_t mmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK); + if (LJ_LIKELY(mmsize > nb)) { /* Check for wrap around 0 */ + char *mm = (char *)(DIRECT_MMAP(mmsize)); + if (mm != CMFAIL) { + size_t offset = align_offset(chunk2mem(mm)); + size_t psize = mmsize - offset - DIRECT_FOOT_PAD; + mchunkptr p = (mchunkptr)(mm + offset); + p->prev_foot = offset | IS_DIRECT_BIT; + p->head = psize|CINUSE_BIT; + chunk_plus_offset(p, psize)->head = FENCEPOST_HEAD; + chunk_plus_offset(p, psize+SIZE_T_SIZE)->head = 0; + return chunk2mem(p); + } + } + return NULL; +} + +static mchunkptr direct_resize(mchunkptr oldp, size_t nb) +{ + size_t oldsize = chunksize(oldp); + if (is_small(nb)) /* Can't shrink direct regions below small size */ + return NULL; + /* Keep old chunk if big enough but not too big */ + if (oldsize >= nb + SIZE_T_SIZE && + (oldsize - nb) <= (DEFAULT_GRANULARITY << 1)) { + return oldp; + } else { + size_t offset = oldp->prev_foot & ~IS_DIRECT_BIT; + size_t oldmmsize = oldsize + offset + DIRECT_FOOT_PAD; + size_t newmmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK); + char *cp = (char *)CALL_MREMAP((char *)oldp - offset, + oldmmsize, newmmsize, 1); + if (cp != CMFAIL) { + mchunkptr newp = (mchunkptr)(cp + offset); + size_t psize = newmmsize - offset - DIRECT_FOOT_PAD; + newp->head = psize|CINUSE_BIT; + chunk_plus_offset(newp, psize)->head = FENCEPOST_HEAD; + chunk_plus_offset(newp, psize+SIZE_T_SIZE)->head = 0; + return newp; + } + } + return NULL; +} + +/* -------------------------- mspace management -------------------------- */ + +/* Initialize top chunk and its size */ +static void init_top(mstate m, mchunkptr p, size_t psize) +{ + /* Ensure alignment */ + size_t offset = align_offset(chunk2mem(p)); + p = (mchunkptr)((char *)p + offset); + psize -= offset; + + m->top = p; + m->topsize = psize; + p->head = psize | PINUSE_BIT; + /* set size of fake trailing chunk holding overhead space only once */ + chunk_plus_offset(p, psize)->head = TOP_FOOT_SIZE; + m->trim_check = DEFAULT_TRIM_THRESHOLD; /* reset on each update */ +} + +/* Initialize bins for a new mstate that is otherwise zeroed out */ +static void init_bins(mstate m) +{ + /* Establish circular links for smallbins */ + bindex_t i; + for (i = 0; i < NSMALLBINS; i++) { + sbinptr bin = smallbin_at(m,i); + bin->fd = bin->bk = bin; + } +} + +/* Allocate chunk and prepend remainder with chunk in successor base. */ +static void *prepend_alloc(mstate m, char *newbase, char *oldbase, size_t nb) +{ + mchunkptr p = align_as_chunk(newbase); + mchunkptr oldfirst = align_as_chunk(oldbase); + size_t psize = (size_t)((char *)oldfirst - (char *)p); + mchunkptr q = chunk_plus_offset(p, nb); + size_t qsize = psize - nb; + set_size_and_pinuse_of_inuse_chunk(m, p, nb); + + /* consolidate remainder with first chunk of old base */ + if (oldfirst == m->top) { + size_t tsize = m->topsize += qsize; + m->top = q; + q->head = tsize | PINUSE_BIT; + } else if (oldfirst == m->dv) { + size_t dsize = m->dvsize += qsize; + m->dv = q; + set_size_and_pinuse_of_free_chunk(q, dsize); + } else { + if (!cinuse(oldfirst)) { + size_t nsize = chunksize(oldfirst); + unlink_chunk(m, oldfirst, nsize); + oldfirst = chunk_plus_offset(oldfirst, nsize); + qsize += nsize; + } + set_free_with_pinuse(q, qsize, oldfirst); + insert_chunk(m, q, qsize); + } + + return chunk2mem(p); +} + +/* Add a segment to hold a new noncontiguous region */ +static void add_segment(mstate m, char *tbase, size_t tsize) +{ + /* Determine locations and sizes of segment, fenceposts, old top */ + char *old_top = (char *)m->top; + msegmentptr oldsp = segment_holding(m, old_top); + char *old_end = oldsp->base + oldsp->size; + size_t ssize = pad_request(sizeof(struct malloc_segment)); + char *rawsp = old_end - (ssize + FOUR_SIZE_T_SIZES + CHUNK_ALIGN_MASK); + size_t offset = align_offset(chunk2mem(rawsp)); + char *asp = rawsp + offset; + char *csp = (asp < (old_top + MIN_CHUNK_SIZE))? old_top : asp; + mchunkptr sp = (mchunkptr)csp; + msegmentptr ss = (msegmentptr)(chunk2mem(sp)); + mchunkptr tnext = chunk_plus_offset(sp, ssize); + mchunkptr p = tnext; + + /* reset top to new space */ + init_top(m, (mchunkptr)tbase, tsize - TOP_FOOT_SIZE); + + /* Set up segment record */ + set_size_and_pinuse_of_inuse_chunk(m, sp, ssize); + *ss = m->seg; /* Push current record */ + m->seg.base = tbase; + m->seg.size = tsize; + m->seg.next = ss; + + /* Insert trailing fenceposts */ + for (;;) { + mchunkptr nextp = chunk_plus_offset(p, SIZE_T_SIZE); + p->head = FENCEPOST_HEAD; + if ((char *)(&(nextp->head)) < old_end) + p = nextp; + else + break; + } + + /* Insert the rest of old top into a bin as an ordinary free chunk */ + if (csp != old_top) { + mchunkptr q = (mchunkptr)old_top; + size_t psize = (size_t)(csp - old_top); + mchunkptr tn = chunk_plus_offset(q, psize); + set_free_with_pinuse(q, psize, tn); + insert_chunk(m, q, psize); + } +} + +/* -------------------------- System allocation -------------------------- */ + +static void *alloc_sys(mstate m, size_t nb) +{ + char *tbase = CMFAIL; + size_t tsize = 0; + + /* Directly map large chunks */ + if (LJ_UNLIKELY(nb >= DEFAULT_MMAP_THRESHOLD)) { + void *mem = direct_alloc(nb); + if (mem != 0) + return mem; + } + + { + size_t req = nb + TOP_FOOT_SIZE + SIZE_T_ONE; + size_t rsize = granularity_align(req); + if (LJ_LIKELY(rsize > nb)) { /* Fail if wraps around zero */ + char *mp = (char *)(CALL_MMAP(rsize)); + if (mp != CMFAIL) { + tbase = mp; + tsize = rsize; + } + } + } + + if (tbase != CMFAIL) { + msegmentptr sp = &m->seg; + /* Try to merge with an existing segment */ + while (sp != 0 && tbase != sp->base + sp->size) + sp = sp->next; + if (sp != 0 && segment_holds(sp, m->top)) { /* append */ + sp->size += tsize; + init_top(m, m->top, m->topsize + tsize); + } else { + sp = &m->seg; + while (sp != 0 && sp->base != tbase + tsize) + sp = sp->next; + if (sp != 0) { + char *oldbase = sp->base; + sp->base = tbase; + sp->size += tsize; + return prepend_alloc(m, tbase, oldbase, nb); + } else { + add_segment(m, tbase, tsize); + } + } + + if (nb < m->topsize) { /* Allocate from new or extended top space */ + size_t rsize = m->topsize -= nb; + mchunkptr p = m->top; + mchunkptr r = m->top = chunk_plus_offset(p, nb); + r->head = rsize | PINUSE_BIT; + set_size_and_pinuse_of_inuse_chunk(m, p, nb); + return chunk2mem(p); + } + } + + return NULL; +} + +/* ----------------------- system deallocation -------------------------- */ + +/* Unmap and unlink any mmapped segments that don't contain used chunks */ +static size_t release_unused_segments(mstate m) +{ + size_t released = 0; + size_t nsegs = 0; + msegmentptr pred = &m->seg; + msegmentptr sp = pred->next; + while (sp != 0) { + char *base = sp->base; + size_t size = sp->size; + msegmentptr next = sp->next; + nsegs++; + { + mchunkptr p = align_as_chunk(base); + size_t psize = chunksize(p); + /* Can unmap if first chunk holds entire segment and not pinned */ + if (!cinuse(p) && (char *)p + psize >= base + size - TOP_FOOT_SIZE) { + tchunkptr tp = (tchunkptr)p; + if (p == m->dv) { + m->dv = 0; + m->dvsize = 0; + } else { + unlink_large_chunk(m, tp); + } + if (CALL_MUNMAP(base, size) == 0) { + released += size; + /* unlink obsoleted record */ + sp = pred; + sp->next = next; + } else { /* back out if cannot unmap */ + insert_large_chunk(m, tp, psize); + } + } + } + pred = sp; + sp = next; + } + /* Reset check counter */ + m->release_checks = nsegs > MAX_RELEASE_CHECK_RATE ? + nsegs : MAX_RELEASE_CHECK_RATE; + return released; +} + +static int alloc_trim(mstate m, size_t pad) +{ + size_t released = 0; + if (pad < MAX_REQUEST && is_initialized(m)) { + pad += TOP_FOOT_SIZE; /* ensure enough room for segment overhead */ + + if (m->topsize > pad) { + /* Shrink top space in granularity-size units, keeping at least one */ + size_t unit = DEFAULT_GRANULARITY; + size_t extra = ((m->topsize - pad + (unit - SIZE_T_ONE)) / unit - + SIZE_T_ONE) * unit; + msegmentptr sp = segment_holding(m, (char *)m->top); + + if (sp->size >= extra && + !has_segment_link(m, sp)) { /* can't shrink if pinned */ + size_t newsize = sp->size - extra; + /* Prefer mremap, fall back to munmap */ + if ((CALL_MREMAP(sp->base, sp->size, newsize, 0) != MFAIL) || + (CALL_MUNMAP(sp->base + newsize, extra) == 0)) { + released = extra; + } + } + + if (released != 0) { + sp->size -= released; + init_top(m, m->top, m->topsize - released); + } + } + + /* Unmap any unused mmapped segments */ + released += release_unused_segments(m); + + /* On failure, disable autotrim to avoid repeated failed future calls */ + if (released == 0 && m->topsize > m->trim_check) + m->trim_check = MAX_SIZE_T; + } + + return (released != 0)? 1 : 0; +} + +/* ---------------------------- malloc support --------------------------- */ + +/* allocate a large request from the best fitting chunk in a treebin */ +static void *tmalloc_large(mstate m, size_t nb) +{ + tchunkptr v = 0; + size_t rsize = ~nb+1; /* Unsigned negation */ + tchunkptr t; + bindex_t idx; + compute_tree_index(nb, idx); + + if ((t = *treebin_at(m, idx)) != 0) { + /* Traverse tree for this bin looking for node with size == nb */ + size_t sizebits = nb << leftshift_for_tree_index(idx); + tchunkptr rst = 0; /* The deepest untaken right subtree */ + for (;;) { + tchunkptr rt; + size_t trem = chunksize(t) - nb; + if (trem < rsize) { + v = t; + if ((rsize = trem) == 0) + break; + } + rt = t->child[1]; + t = t->child[(sizebits >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1]; + if (rt != 0 && rt != t) + rst = rt; + if (t == 0) { + t = rst; /* set t to least subtree holding sizes > nb */ + break; + } + sizebits <<= 1; + } + } + + if (t == 0 && v == 0) { /* set t to root of next non-empty treebin */ + binmap_t leftbits = left_bits(idx2bit(idx)) & m->treemap; + if (leftbits != 0) + t = *treebin_at(m, lj_ffs(leftbits)); + } + + while (t != 0) { /* find smallest of tree or subtree */ + size_t trem = chunksize(t) - nb; + if (trem < rsize) { + rsize = trem; + v = t; + } + t = leftmost_child(t); + } + + /* If dv is a better fit, return NULL so malloc will use it */ + if (v != 0 && rsize < (size_t)(m->dvsize - nb)) { + mchunkptr r = chunk_plus_offset(v, nb); + unlink_large_chunk(m, v); + if (rsize < MIN_CHUNK_SIZE) { + set_inuse_and_pinuse(m, v, (rsize + nb)); + } else { + set_size_and_pinuse_of_inuse_chunk(m, v, nb); + set_size_and_pinuse_of_free_chunk(r, rsize); + insert_chunk(m, r, rsize); + } + return chunk2mem(v); + } + return NULL; +} + +/* allocate a small request from the best fitting chunk in a treebin */ +static void *tmalloc_small(mstate m, size_t nb) +{ + tchunkptr t, v; + mchunkptr r; + size_t rsize; + bindex_t i = lj_ffs(m->treemap); + + v = t = *treebin_at(m, i); + rsize = chunksize(t) - nb; + + while ((t = leftmost_child(t)) != 0) { + size_t trem = chunksize(t) - nb; + if (trem < rsize) { + rsize = trem; + v = t; + } + } + + r = chunk_plus_offset(v, nb); + unlink_large_chunk(m, v); + if (rsize < MIN_CHUNK_SIZE) { + set_inuse_and_pinuse(m, v, (rsize + nb)); + } else { + set_size_and_pinuse_of_inuse_chunk(m, v, nb); + set_size_and_pinuse_of_free_chunk(r, rsize); + replace_dv(m, r, rsize); + } + return chunk2mem(v); +} + +/* ----------------------------------------------------------------------- */ + +void *lj_alloc_create(void) +{ + size_t tsize = DEFAULT_GRANULARITY; + char *tbase = (char *)(CALL_MMAP(tsize)); + if (tbase != CMFAIL) { + size_t msize = pad_request(sizeof(struct malloc_state)); + mchunkptr mn; + mchunkptr msp = align_as_chunk(tbase); + mstate m = (mstate)(chunk2mem(msp)); + memset(m, 0, msize); + msp->head = (msize|PINUSE_BIT|CINUSE_BIT); + m->seg.base = tbase; + m->seg.size = tsize; + m->release_checks = MAX_RELEASE_CHECK_RATE; + init_bins(m); + mn = next_chunk(mem2chunk(m)); + init_top(m, mn, (size_t)((tbase + tsize) - (char *)mn) - TOP_FOOT_SIZE); + return m; + } + return NULL; +} + +void lj_alloc_destroy(void *msp) +{ + mstate ms = (mstate)msp; + msegmentptr sp = &ms->seg; + while (sp != 0) { + char *base = sp->base; + size_t size = sp->size; + sp = sp->next; + CALL_MUNMAP(base, size); + } +} + +static LJ_NOINLINE void *lj_alloc_malloc(void *msp, size_t nsize) +{ + mstate ms = (mstate)msp; + void *mem; + size_t nb; + if (nsize <= MAX_SMALL_REQUEST) { + bindex_t idx; + binmap_t smallbits; + nb = (nsize < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(nsize); + idx = small_index(nb); + smallbits = ms->smallmap >> idx; + + if ((smallbits & 0x3U) != 0) { /* Remainderless fit to a smallbin. */ + mchunkptr b, p; + idx += ~smallbits & 1; /* Uses next bin if idx empty */ + b = smallbin_at(ms, idx); + p = b->fd; + unlink_first_small_chunk(ms, b, p, idx); + set_inuse_and_pinuse(ms, p, small_index2size(idx)); + mem = chunk2mem(p); + return mem; + } else if (nb > ms->dvsize) { + if (smallbits != 0) { /* Use chunk in next nonempty smallbin */ + mchunkptr b, p, r; + size_t rsize; + binmap_t leftbits = (smallbits << idx) & left_bits(idx2bit(idx)); + bindex_t i = lj_ffs(leftbits); + b = smallbin_at(ms, i); + p = b->fd; + unlink_first_small_chunk(ms, b, p, i); + rsize = small_index2size(i) - nb; + /* Fit here cannot be remainderless if 4byte sizes */ + if (SIZE_T_SIZE != 4 && rsize < MIN_CHUNK_SIZE) { + set_inuse_and_pinuse(ms, p, small_index2size(i)); + } else { + set_size_and_pinuse_of_inuse_chunk(ms, p, nb); + r = chunk_plus_offset(p, nb); + set_size_and_pinuse_of_free_chunk(r, rsize); + replace_dv(ms, r, rsize); + } + mem = chunk2mem(p); + return mem; + } else if (ms->treemap != 0 && (mem = tmalloc_small(ms, nb)) != 0) { + return mem; + } + } + } else if (nsize >= MAX_REQUEST) { + nb = MAX_SIZE_T; /* Too big to allocate. Force failure (in sys alloc) */ + } else { + nb = pad_request(nsize); + if (ms->treemap != 0 && (mem = tmalloc_large(ms, nb)) != 0) { + return mem; + } + } + + if (nb <= ms->dvsize) { + size_t rsize = ms->dvsize - nb; + mchunkptr p = ms->dv; + if (rsize >= MIN_CHUNK_SIZE) { /* split dv */ + mchunkptr r = ms->dv = chunk_plus_offset(p, nb); + ms->dvsize = rsize; + set_size_and_pinuse_of_free_chunk(r, rsize); + set_size_and_pinuse_of_inuse_chunk(ms, p, nb); + } else { /* exhaust dv */ + size_t dvs = ms->dvsize; + ms->dvsize = 0; + ms->dv = 0; + set_inuse_and_pinuse(ms, p, dvs); + } + mem = chunk2mem(p); + return mem; + } else if (nb < ms->topsize) { /* Split top */ + size_t rsize = ms->topsize -= nb; + mchunkptr p = ms->top; + mchunkptr r = ms->top = chunk_plus_offset(p, nb); + r->head = rsize | PINUSE_BIT; + set_size_and_pinuse_of_inuse_chunk(ms, p, nb); + mem = chunk2mem(p); + return mem; + } + return alloc_sys(ms, nb); +} + +static LJ_NOINLINE void *lj_alloc_free(void *msp, void *ptr) +{ + if (ptr != 0) { + mchunkptr p = mem2chunk(ptr); + mstate fm = (mstate)msp; + size_t psize = chunksize(p); + mchunkptr next = chunk_plus_offset(p, psize); + if (!pinuse(p)) { + size_t prevsize = p->prev_foot; + if ((prevsize & IS_DIRECT_BIT) != 0) { + prevsize &= ~IS_DIRECT_BIT; + psize += prevsize + DIRECT_FOOT_PAD; + CALL_MUNMAP((char *)p - prevsize, psize); + return NULL; + } else { + mchunkptr prev = chunk_minus_offset(p, prevsize); + psize += prevsize; + p = prev; + /* consolidate backward */ + if (p != fm->dv) { + unlink_chunk(fm, p, prevsize); + } else if ((next->head & INUSE_BITS) == INUSE_BITS) { + fm->dvsize = psize; + set_free_with_pinuse(p, psize, next); + return NULL; + } + } + } + if (!cinuse(next)) { /* consolidate forward */ + if (next == fm->top) { + size_t tsize = fm->topsize += psize; + fm->top = p; + p->head = tsize | PINUSE_BIT; + if (p == fm->dv) { + fm->dv = 0; + fm->dvsize = 0; + } + if (tsize > fm->trim_check) + alloc_trim(fm, 0); + return NULL; + } else if (next == fm->dv) { + size_t dsize = fm->dvsize += psize; + fm->dv = p; + set_size_and_pinuse_of_free_chunk(p, dsize); + return NULL; + } else { + size_t nsize = chunksize(next); + psize += nsize; + unlink_chunk(fm, next, nsize); + set_size_and_pinuse_of_free_chunk(p, psize); + if (p == fm->dv) { + fm->dvsize = psize; + return NULL; + } + } + } else { + set_free_with_pinuse(p, psize, next); + } + + if (is_small(psize)) { + insert_small_chunk(fm, p, psize); + } else { + tchunkptr tp = (tchunkptr)p; + insert_large_chunk(fm, tp, psize); + if (--fm->release_checks == 0) + release_unused_segments(fm); + } + } + return NULL; +} + +static LJ_NOINLINE void *lj_alloc_realloc(void *msp, void *ptr, size_t nsize) +{ + if (nsize >= MAX_REQUEST) { + return NULL; + } else { + mstate m = (mstate)msp; + mchunkptr oldp = mem2chunk(ptr); + size_t oldsize = chunksize(oldp); + mchunkptr next = chunk_plus_offset(oldp, oldsize); + mchunkptr newp = 0; + size_t nb = request2size(nsize); + + /* Try to either shrink or extend into top. Else malloc-copy-free */ + if (is_direct(oldp)) { + newp = direct_resize(oldp, nb); /* this may return NULL. */ + } else if (oldsize >= nb) { /* already big enough */ + size_t rsize = oldsize - nb; + newp = oldp; + if (rsize >= MIN_CHUNK_SIZE) { + mchunkptr remainder = chunk_plus_offset(newp, nb); + set_inuse(m, newp, nb); + set_inuse(m, remainder, rsize); + lj_alloc_free(m, chunk2mem(remainder)); + } + } else if (next == m->top && oldsize + m->topsize > nb) { + /* Expand into top */ + size_t newsize = oldsize + m->topsize; + size_t newtopsize = newsize - nb; + mchunkptr newtop = chunk_plus_offset(oldp, nb); + set_inuse(m, oldp, nb); + newtop->head = newtopsize |PINUSE_BIT; + m->top = newtop; + m->topsize = newtopsize; + newp = oldp; + } + + if (newp != 0) { + return chunk2mem(newp); + } else { + void *newmem = lj_alloc_malloc(m, nsize); + if (newmem != 0) { + size_t oc = oldsize - overhead_for(oldp); + memcpy(newmem, ptr, oc < nsize ? oc : nsize); + lj_alloc_free(m, ptr); + } + return newmem; + } + } +} + +void *lj_alloc_f(void *msp, void *ptr, size_t osize, size_t nsize) +{ + (void)osize; + if (nsize == 0) { + return lj_alloc_free(msp, ptr); + } else if (ptr == NULL) { + return lj_alloc_malloc(msp, nsize); + } else { + return lj_alloc_realloc(msp, ptr, nsize); + } +} + +#endif diff --git a/src/lj_alloc.h b/src/lj_alloc.h new file mode 100644 index 0000000000..f87a7cf342 --- /dev/null +++ b/src/lj_alloc.h @@ -0,0 +1,17 @@ +/* +** Bundled memory allocator. +** Donated to the public domain. +*/ + +#ifndef _LJ_ALLOC_H +#define _LJ_ALLOC_H + +#include "lj_def.h" + +#ifndef LUAJIT_USE_SYSMALLOC +LJ_FUNC void *lj_alloc_create(void); +LJ_FUNC void lj_alloc_destroy(void *msp); +LJ_FUNC void *lj_alloc_f(void *msp, void *ptr, size_t osize, size_t nsize); +#endif + +#endif diff --git a/src/lj_api.c b/src/lj_api.c new file mode 100644 index 0000000000..ea4eaf6683 --- /dev/null +++ b/src/lj_api.c @@ -0,0 +1,1046 @@ +/* +** Public Lua/C API. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +** +** Major portions taken verbatim or adapted from the Lua interpreter. +** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h +*/ + +#define lj_api_c +#define LUA_CORE + +#include "lj_obj.h" +#include "lj_gc.h" +#include "lj_err.h" +#include "lj_str.h" +#include "lj_tab.h" +#include "lj_func.h" +#include "lj_udata.h" +#include "lj_meta.h" +#include "lj_state.h" +#include "lj_frame.h" +#include "lj_trace.h" +#include "lj_vm.h" +#include "lj_lex.h" +#include "lj_parse.h" + +/* -- Common helper functions --------------------------------------------- */ + +#define api_checknelems(L, n) api_check(L, (n) <= (L->top - L->base)) +#define api_checkvalidindex(L, i) api_check(L, (i) != niltv(L)) + +static TValue *index2adr(lua_State *L, int idx) +{ + if (idx > 0) { + TValue *o = L->base + (idx - 1); + return o < L->top ? o : niltv(L); + } else if (idx > LUA_REGISTRYINDEX) { + api_check(L, idx != 0 && -idx <= L->top - L->base); + return L->top + idx; + } else if (idx == LUA_GLOBALSINDEX) { + TValue *o = &G(L)->tmptv; + settabV(L, o, tabref(L->env)); + return o; + } else if (idx == LUA_REGISTRYINDEX) { + return registry(L); + } else { + GCfunc *fn = curr_func(L); + api_check(L, fn->c.gct == ~LJ_TFUNC && !isluafunc(fn)); + if (idx == LUA_ENVIRONINDEX) { + TValue *o = &G(L)->tmptv; + settabV(L, o, tabref(fn->c.env)); + return o; + } else { + idx = LUA_GLOBALSINDEX - idx; + return idx <= fn->c.nupvalues ? &fn->c.upvalue[idx-1] : niltv(L); + } + } +} + +static TValue *stkindex2adr(lua_State *L, int idx) +{ + if (idx > 0) { + TValue *o = L->base + (idx - 1); + return o < L->top ? o : niltv(L); + } else { + api_check(L, idx != 0 && -idx <= L->top - L->base); + return L->top + idx; + } +} + +static GCtab *getcurrenv(lua_State *L) +{ + GCfunc *fn = curr_func(L); + return fn->c.gct == ~LJ_TFUNC ? tabref(fn->c.env) : tabref(L->env); +} + +/* -- Miscellaneous API functions ----------------------------------------- */ + +LUA_API int lua_status(lua_State *L) +{ + return L->status; +} + +LUA_API int lua_checkstack(lua_State *L, int size) +{ + if (size > LUAI_MAXCSTACK || (L->top - L->base + size) > LUAI_MAXCSTACK) { + return 0; /* Stack overflow. */ + } else if (size > 0) { + lj_state_checkstack(L, (MSize)size); + } + return 1; +} + +LUA_API void lua_xmove(lua_State *from, lua_State *to, int n) +{ + TValue *f, *t; + if (from == to) return; + api_checknelems(from, n); + api_check(from, G(from) == G(to)); + lj_state_checkstack(to, (MSize)n); + f = from->top; + t = to->top = to->top + n; + while (--n >= 0) copyTV(to, --t, --f); + from->top = f; +} + +/* -- Stack manipulation -------------------------------------------------- */ + +LUA_API int lua_gettop(lua_State *L) +{ + return cast_int(L->top - L->base); +} + +LUA_API void lua_settop(lua_State *L, int idx) +{ + if (idx >= 0) { + api_check(L, idx <= L->maxstack - L->base); + if (L->base + idx > L->top) { + if (L->base + idx >= L->maxstack) + lj_state_growstack(L, (MSize)idx - (MSize)(L->top - L->base)); + do { setnilV(L->top++); } while (L->top < L->base + idx); + } else { + L->top = L->base + idx; + } + } else { + api_check(L, -(idx+1) <= (L->top - L->base)); + L->top += idx+1; /* Shrinks top (idx < 0). */ + } +} + +LUA_API void lua_remove(lua_State *L, int idx) +{ + TValue *p = stkindex2adr(L, idx); + api_checkvalidindex(L, p); + while (++p < L->top) copyTV(L, p-1, p); + L->top--; +} + +LUA_API void lua_insert(lua_State *L, int idx) +{ + TValue *q, *p = stkindex2adr(L, idx); + api_checkvalidindex(L, p); + for (q = L->top; q > p; q--) copyTV(L, q, q-1); + copyTV(L, p, L->top); +} + +LUA_API void lua_replace(lua_State *L, int idx) +{ + api_checknelems(L, 1); + if (idx == LUA_GLOBALSINDEX) { + api_check(L, tvistab(L->top-1)); + /* NOBARRIER: A thread (i.e. L) is never black. */ + setgcref(L->env, obj2gco(tabV(L->top-1))); + } else if (idx == LUA_ENVIRONINDEX) { + GCfunc *fn = curr_func(L); + if (fn->c.gct != ~LJ_TFUNC) + lj_err_msg(L, LJ_ERR_NOENV); + api_check(L, tvistab(L->top-1)); + setgcref(fn->c.env, obj2gco(tabV(L->top-1))); + lj_gc_barrier(L, fn, L->top-1); + } else { + TValue *o = index2adr(L, idx); + api_checkvalidindex(L, o); + copyTV(L, o, L->top-1); + if (idx < LUA_GLOBALSINDEX) /* Need a barrier for upvalues. */ + lj_gc_barrier(L, curr_func(L), L->top-1); + } + L->top--; +} + +LUA_API void lua_pushvalue(lua_State *L, int idx) +{ + copyTV(L, L->top, index2adr(L, idx)); + incr_top(L); +} + +/* -- Stack getters ------------------------------------------------------- */ + +LUA_API int lua_type(lua_State *L, int idx) +{ + cTValue *o = index2adr(L, idx); + if (tvisnum(o)) { + return LUA_TNUMBER; +#if LJ_64 + } else if (tvislightud(o)) { + return LUA_TLIGHTUSERDATA; +#endif + } else if (o == niltv(L)) { + return LUA_TNONE; + } else { /* Magic internal/external tag conversion. ORDER LJ_T */ + int t = ~itype(o); + return (int)(((t < 8 ? 0x98a42110 : 0x75b6) >> 4*(t&7)) & 15u); + } +} + +LUA_API const char *lua_typename(lua_State *L, int t) +{ + UNUSED(L); + return lj_obj_typename[t+1]; +} + +LUA_API int lua_iscfunction(lua_State *L, int idx) +{ + cTValue *o = index2adr(L, idx); + return !isluafunc(funcV(o)); +} + +LUA_API int lua_isnumber(lua_State *L, int idx) +{ + cTValue *o = index2adr(L, idx); + TValue tmp; + return (tvisnum(o) || (tvisstr(o) && lj_str_numconv(strVdata(o), &tmp))); +} + +LUA_API int lua_isstring(lua_State *L, int idx) +{ + cTValue *o = index2adr(L, idx); + return (tvisstr(o) || tvisnum(o)); +} + +LUA_API int lua_isuserdata(lua_State *L, int idx) +{ + cTValue *o = index2adr(L, idx); + return (tvisudata(o) || tvislightud(o)); +} + +LUA_API int lua_rawequal(lua_State *L, int idx1, int idx2) +{ + cTValue *o1 = index2adr(L, idx1); + cTValue *o2 = index2adr(L, idx2); + return (o1 == niltv(L) || o2 == niltv(L)) ? 0 : lj_obj_equal(o1, o2); +} + +LUA_API int lua_equal(lua_State *L, int idx1, int idx2) +{ + cTValue *o1 = index2adr(L, idx1); + cTValue *o2 = index2adr(L, idx2); + if (tvisnum(o1) && tvisnum(o2)) { + return numV(o1) == numV(o2); + } else if (itype(o1) != itype(o2)) { + return 0; + } else if (tvispri(o1)) { + return o1 != niltv(L) && o2 != niltv(L); +#if LJ_64 + } else if (tvislightud(o1)) { + return o1->u64 == o2->u64; +#endif + } else if (gcrefeq(o1->gcr, o2->gcr)) { + return 1; + } else if (!tvistabud(o1)) { + return 0; + } else { + TValue *base = lj_meta_equal(L, gcV(o1), gcV(o2), 0); + if ((uintptr_t)base <= 1) { + return (int)(uintptr_t)base; + } else { + L->top = base+2; + lj_vm_call(L, base, 1+1); + L->top -= 2; + return tvistruecond(L->top+1); + } + } +} + +LUA_API int lua_lessthan(lua_State *L, int idx1, int idx2) +{ + cTValue *o1 = index2adr(L, idx1); + cTValue *o2 = index2adr(L, idx2); + if (o1 == niltv(L) || o2 == niltv(L)) { + return 0; + } else if (tvisnum(o1) && tvisnum(o2)) { + return numV(o1) < numV(o2); + } else { + TValue *base = lj_meta_comp(L, o1, o2, 0); + if ((uintptr_t)base <= 1) { + return (int)(uintptr_t)base; + } else { + L->top = base+2; + lj_vm_call(L, base, 1+1); + L->top -= 2; + return tvistruecond(L->top+1); + } + } +} + +LUA_API lua_Number lua_tonumber(lua_State *L, int idx) +{ + cTValue *o = index2adr(L, idx); + TValue tmp; + if (LJ_LIKELY(tvisnum(o))) + return numV(o); + else if (tvisstr(o) && lj_str_numconv(strVdata(o), &tmp)) + return numV(&tmp); + else + return 0; +} + +LUA_API lua_Integer lua_tointeger(lua_State *L, int idx) +{ + cTValue *o = index2adr(L, idx); + TValue tmp; + lua_Number n; + if (LJ_LIKELY(tvisnum(o))) + n = numV(o); + else if (tvisstr(o) && lj_str_numconv(strVdata(o), &tmp)) + n = numV(&tmp); + else + return 0; +#if LJ_64 + return (lua_Integer)n; +#else + return lj_num2int(n); +#endif +} + +LUA_API int lua_toboolean(lua_State *L, int idx) +{ + cTValue *o = index2adr(L, idx); + return tvistruecond(o); +} + +LUA_API const char *lua_tolstring(lua_State *L, int idx, size_t *len) +{ + TValue *o = index2adr(L, idx); + GCstr *s; + if (LJ_LIKELY(tvisstr(o))) { + s = strV(o); + } else if (tvisnum(o)) { + lj_gc_check(L); + o = index2adr(L, idx); /* GC may move the stack. */ + s = lj_str_fromnum(L, &o->n); + } else { + if (len != NULL) *len = 0; + return NULL; + } + if (len != NULL) *len = s->len; + return strdata(s); +} + +LUA_API size_t lua_objlen(lua_State *L, int idx) +{ + TValue *o = index2adr(L, idx); + if (tvisstr(o)) + return strV(o)->len; + else if (tvistab(o)) + return cast(size_t, lj_tab_len(tabV(o))); + else if (tvisudata(o)) + return udataV(o)->len; + else if (tvisnum(o)) + return lj_str_fromnum(L, &o->n)->len; + else + return 0; +} + +LUA_API lua_CFunction lua_tocfunction(lua_State *L, int idx) +{ + cTValue *o = index2adr(L, idx); + return funcV(o)->c.gate == lj_gate_c ? funcV(o)->c.f : NULL; +} + +LUA_API void *lua_touserdata(lua_State *L, int idx) +{ + cTValue *o = index2adr(L, idx); + if (tvisudata(o)) + return uddata(udataV(o)); + else if (tvislightud(o)) + return lightudV(o); + else + return NULL; +} + +LUA_API lua_State *lua_tothread(lua_State *L, int idx) +{ + cTValue *o = index2adr(L, idx); + return (!tvisthread(o)) ? NULL : threadV(o); +} + +LUA_API const void *lua_topointer(lua_State *L, int idx) +{ + cTValue *o = index2adr(L, idx); + if (tvisudata(o)) + return uddata(udataV(o)); + else if (tvislightud(o)) + return lightudV(o); + else if (tvisgcv(o)) + return gcV(o); + else + return NULL; +} + +/* -- Stack setters (object creation) ------------------------------------- */ + +LUA_API void lua_pushnil(lua_State *L) +{ + setnilV(L->top); + incr_top(L); +} + +LUA_API void lua_pushnumber(lua_State *L, lua_Number n) +{ + setnumV(L->top, n); + if (LJ_UNLIKELY(tvisnan(L->top))) + setnanV(L->top); /* Canonicalize injected NaNs. */ + incr_top(L); +} + +LUA_API void lua_pushinteger(lua_State *L, lua_Integer n) +{ + setnumV(L->top, cast_num(n)); + incr_top(L); +} + +LUA_API void lua_pushlstring(lua_State *L, const char *str, size_t len) +{ + GCstr *s; + lj_gc_check(L); + s = lj_str_new(L, str, len); + setstrV(L, L->top, s); + incr_top(L); +} + +LUA_API void lua_pushstring(lua_State *L, const char *str) +{ + if (str == NULL) { + setnilV(L->top); + } else { + GCstr *s; + lj_gc_check(L); + s = lj_str_newz(L, str); + setstrV(L, L->top, s); + } + incr_top(L); +} + +LUA_API const char *lua_pushvfstring(lua_State *L, const char *fmt, + va_list argp) +{ + lj_gc_check(L); + return lj_str_pushvf(L, fmt, argp); +} + +LUA_API const char *lua_pushfstring(lua_State *L, const char *fmt, ...) +{ + const char *ret; + va_list argp; + lj_gc_check(L); + va_start(argp, fmt); + ret = lj_str_pushvf(L, fmt, argp); + va_end(argp); + return ret; +} + +LUA_API void lua_pushcclosure(lua_State *L, lua_CFunction f, int n) +{ + GCfunc *fn; + lj_gc_check(L); + api_checknelems(L, n); + fn = lj_func_newC(L, (MSize)n, getcurrenv(L)); + fn->c.f = f; + L->top -= n; + while (n--) + copyTV(L, &fn->c.upvalue[n], L->top+n); + setfuncV(L, L->top, fn); + lua_assert(iswhite(obj2gco(fn))); + incr_top(L); +} + +LUA_API void lua_pushboolean(lua_State *L, int b) +{ + setboolV(L->top, (b != 0)); + incr_top(L); +} + +LUA_API void lua_pushlightuserdata(lua_State *L, void *p) +{ + setlightudV(L->top, checklightudptr(L, p)); + incr_top(L); +} + +LUA_API void lua_createtable(lua_State *L, int narray, int nrec) +{ + GCtab *t; + lj_gc_check(L); + t = lj_tab_new(L, (uint32_t)(narray > 0 ? narray+1 : 0), hsize2hbits(nrec)); + settabV(L, L->top, t); + incr_top(L); +} + +LUALIB_API int luaL_newmetatable(lua_State *L, const char *tname) +{ + GCtab *regt = tabV(registry(L)); + TValue *tv = lj_tab_setstr(L, regt, lj_str_newz(L, tname)); + if (tvisnil(tv)) { + GCtab *mt = lj_tab_new(L, 0, 1); + settabV(L, tv, mt); + settabV(L, L->top++, mt); + lj_gc_objbarriert(L, regt, mt); + return 1; + } else { + copyTV(L, L->top++, tv); + return 0; + } +} + +LUA_API int lua_pushthread(lua_State *L) +{ + setthreadV(L, L->top, L); + incr_top(L); + return (mainthread(G(L)) == L); +} + +LUA_API lua_State *lua_newthread(lua_State *L) +{ + lua_State *L1; + lj_gc_check(L); + L1 = lj_state_new(L); + setthreadV(L, L->top, L1); + incr_top(L); + return L1; +} + +LUA_API void *lua_newuserdata(lua_State *L, size_t size) +{ + GCudata *ud; + lj_gc_check(L); + if (size > LJ_MAX_UDATA) + lj_err_msg(L, LJ_ERR_UDATAOV); + ud = lj_udata_new(L, (MSize)size, getcurrenv(L)); + setudataV(L, L->top, ud); + incr_top(L); + return uddata(ud); +} + +LUA_API void lua_concat(lua_State *L, int n) +{ + api_checknelems(L, n); + if (n >= 2) { + n--; + do { + TValue *top = lj_meta_cat(L, L->top-1, n); + if (top == NULL) { + L->top -= n; + break; + } + n -= cast_int(L->top - top); + L->top = top+2; + lj_vm_call(L, top, 1+1); + L->top--; + copyTV(L, L->top-1, L->top); + } while (--n > 0); + } else if (n == 0) { /* Push empty string. */ + setstrV(L, L->top, lj_str_new(L, "", 0)); + incr_top(L); + } + /* else n == 1: nothing to do. */ +} + +/* -- Object getters ------------------------------------------------------ */ + +LUA_API void lua_gettable(lua_State *L, int idx) +{ + cTValue *v, *t = index2adr(L, idx); + api_checkvalidindex(L, t); + v = lj_meta_tget(L, t, L->top-1); + if (v == NULL) { + L->top += 2; + lj_vm_call(L, L->top-2, 1+1); + L->top -= 2; + v = L->top+1; + } + copyTV(L, L->top-1, v); +} + +LUA_API void lua_getfield(lua_State *L, int idx, const char *k) +{ + cTValue *v, *t = index2adr(L, idx); + TValue key; + api_checkvalidindex(L, t); + setstrV(L, &key, lj_str_newz(L, k)); + v = lj_meta_tget(L, t, &key); + if (v == NULL) { + L->top += 2; + lj_vm_call(L, L->top-2, 1+1); + L->top -= 2; + v = L->top+1; + } + copyTV(L, L->top, v); + incr_top(L); +} + +LUA_API void lua_rawget(lua_State *L, int idx) +{ + cTValue *t = index2adr(L, idx); + api_check(L, tvistab(t)); + copyTV(L, L->top-1, lj_tab_get(L, tabV(t), L->top-1)); +} + +LUA_API void lua_rawgeti(lua_State *L, int idx, int n) +{ + cTValue *v, *t = index2adr(L, idx); + api_check(L, tvistab(t)); + v = lj_tab_getint(tabV(t), n); + if (v) { + copyTV(L, L->top, v); + } else { + setnilV(L->top); + } + incr_top(L); +} + +LUA_API int lua_getmetatable(lua_State *L, int idx) +{ + cTValue *o = index2adr(L, idx); + GCtab *mt = NULL; + if (tvistab(o)) + mt = tabref(tabV(o)->metatable); + else if (tvisudata(o)) + mt = tabref(udataV(o)->metatable); + else + mt = tabref(G(L)->basemt[itypemap(o)]); + if (mt == NULL) + return 0; + settabV(L, L->top, mt); + incr_top(L); + return 1; +} + +LUALIB_API int luaL_getmetafield(lua_State *L, int idx, const char *field) +{ + if (lua_getmetatable(L, idx)) { + cTValue *tv = lj_tab_getstr(tabV(L->top-1), lj_str_newz(L, field)); + if (tv && !tvisnil(tv)) { + copyTV(L, L->top-1, tv); + return 1; + } + L->top--; + } + return 0; +} + +LUA_API void lua_getfenv(lua_State *L, int idx) +{ + cTValue *o = index2adr(L, idx); + api_checkvalidindex(L, o); + if (tvisfunc(o)) { + settabV(L, L->top, tabref(funcV(o)->c.env)); + } else if (tvisudata(o)) { + settabV(L, L->top, tabref(udataV(o)->env)); + } else if (tvisthread(o)) { + settabV(L, L->top, tabref(threadV(o)->env)); + } else { + setnilV(L->top); + } + incr_top(L); +} + +LUA_API int lua_next(lua_State *L, int idx) +{ + cTValue *t = index2adr(L, idx); + int more; + api_check(L, tvistab(t)); + more = lj_tab_next(L, tabV(t), L->top-1); + if (more) { + incr_top(L); /* Return new key and value slot. */ + } else { /* End of traversal. */ + L->top--; /* Remove key slot. */ + } + return more; +} + +static const char *aux_upvalue(cTValue *f, uint32_t idx, TValue **val) +{ + GCfunc *fn; + if (!tvisfunc(f)) return NULL; + fn = funcV(f); + if (isluafunc(fn)) { + GCproto *pt = funcproto(fn); + if (idx < pt->sizeuvname) { + *val = gcref(fn->l.uvptr[idx])->uv.v; + return strdata(pt->uvname[idx]); + } + } else { + if (idx < fn->c.nupvalues) { + *val = &fn->c.upvalue[idx]; + return ""; + } + } + return NULL; +} + +LUA_API const char *lua_getupvalue(lua_State *L, int idx, int n) +{ + TValue *val; + const char *name = aux_upvalue(index2adr(L, idx), (uint32_t)(n-1), &val); + if (name) { + copyTV(L, L->top, val); + incr_top(L); + } + return name; +} + +LUALIB_API void *luaL_checkudata(lua_State *L, int idx, const char *tname) +{ + cTValue *o = index2adr(L, idx); + if (tvisudata(o)) { + GCudata *ud = udataV(o); + cTValue *tv = lj_tab_getstr(tabV(registry(L)), lj_str_newz(L, tname)); + if (tv && tvistab(tv) && tabV(tv) == tabref(ud->metatable)) + return uddata(ud); + } + lj_err_argtype(L, idx, tname); + return NULL; /* unreachable */ +} + +/* -- Object setters ------------------------------------------------------ */ + +LUA_API void lua_settable(lua_State *L, int idx) +{ + TValue *o; + cTValue *t = index2adr(L, idx); + api_checknelems(L, 2); + api_checkvalidindex(L, t); + o = lj_meta_tset(L, t, L->top-2); + if (o) { + /* NOBARRIER: lj_meta_tset ensures the table is not black. */ + copyTV(L, o, L->top-1); + L->top -= 2; + } else { + L->top += 3; + copyTV(L, L->top-1, L->top-6); + lj_vm_call(L, L->top-3, 0+1); + L->top -= 3; + } +} + +LUA_API void lua_setfield(lua_State *L, int idx, const char *k) +{ + TValue *o; + TValue key; + cTValue *t = index2adr(L, idx); + api_checknelems(L, 1); + api_checkvalidindex(L, t); + setstrV(L, &key, lj_str_newz(L, k)); + o = lj_meta_tset(L, t, &key); + if (o) { + L->top--; + /* NOBARRIER: lj_meta_tset ensures the table is not black. */ + copyTV(L, o, L->top); + } else { + L->top += 3; + copyTV(L, L->top-1, L->top-6); + lj_vm_call(L, L->top-3, 0+1); + L->top -= 2; + } +} + +LUA_API void lua_rawset(lua_State *L, int idx) +{ + GCtab *t = tabV(index2adr(L, idx)); + TValue *dst, *key; + api_checknelems(L, 2); + key = L->top-2; + dst = lj_tab_set(L, t, key); + copyTV(L, dst, key+1); + lj_gc_barriert(L, t, dst); + L->top = key; +} + +LUA_API void lua_rawseti(lua_State *L, int idx, int n) +{ + GCtab *t = tabV(index2adr(L, idx)); + TValue *dst, *src; + api_checknelems(L, 1); + dst = lj_tab_setint(L, t, n); + src = L->top-1; + copyTV(L, dst, src); + lj_gc_barriert(L, t, dst); + L->top = src; +} + +LUA_API int lua_setmetatable(lua_State *L, int idx) +{ + global_State *g; + GCtab *mt; + cTValue *o = index2adr(L, idx); + api_checknelems(L, 1); + api_checkvalidindex(L, o); + if (tvisnil(L->top-1)) { + mt = NULL; + } else { + api_check(L, tvistab(L->top-1)); + mt = tabV(L->top-1); + } + g = G(L); + if (tvistab(o)) { + setgcref(tabV(o)->metatable, obj2gco(mt)); + if (mt) + lj_gc_objbarriert(L, tabV(o), mt); + } else if (tvisudata(o)) { + setgcref(udataV(o)->metatable, obj2gco(mt)); + if (mt) + lj_gc_objbarrier(L, udataV(o), mt); + } else { + /* Flush cache, since traces specialize to basemt. But not during __gc. */ + if (lj_trace_flushall(L)) + lj_err_caller(L, LJ_ERR_NOGCMM); + if (tvisbool(o)) { + /* NOBARRIER: g->basemt[] is a GC root. */ + setgcref(g->basemt[~LJ_TTRUE], obj2gco(mt)); + setgcref(g->basemt[~LJ_TFALSE], obj2gco(mt)); + } else { + /* NOBARRIER: g->basemt[] is a GC root. */ + setgcref(g->basemt[itypemap(o)], obj2gco(mt)); + } + } + L->top--; + return 1; +} + +LUA_API int lua_setfenv(lua_State *L, int idx) +{ + cTValue *o = index2adr(L, idx); + GCtab *t; + api_checknelems(L, 1); + api_checkvalidindex(L, o); + api_check(L, tvistab(L->top-1)); + t = tabV(L->top-1); + if (tvisfunc(o)) { + setgcref(funcV(o)->c.env, obj2gco(t)); + } else if (tvisudata(o)) { + setgcref(udataV(o)->env, obj2gco(t)); + } else if (tvisthread(o)) { + setgcref(threadV(o)->env, obj2gco(t)); + } else { + L->top--; + return 0; + } + lj_gc_objbarrier(L, gcV(o), t); + L->top--; + return 1; +} + +LUA_API const char *lua_setupvalue(lua_State *L, int idx, int n) +{ + cTValue *f = index2adr(L, idx); + TValue *val; + const char *name; + api_checknelems(L, 1); + name = aux_upvalue(f, (uint32_t)(n-1), &val); + if (name) { + L->top--; + copyTV(L, val, L->top); + lj_gc_barrier(L, funcV(f), L->top); + } + return name; +} + +/* -- Calls --------------------------------------------------------------- */ + +LUA_API void lua_call(lua_State *L, int nargs, int nresults) +{ + api_checknelems(L, nargs+1); + lj_vm_call(L, L->top - nargs, nresults+1); +} + +LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc) +{ + global_State *g = G(L); + uint8_t oldh = hook_save(g); + ptrdiff_t ef; + int status; + api_checknelems(L, nargs+1); + if (errfunc == 0) { + ef = 0; + } else { + cTValue *o = stkindex2adr(L, errfunc); + api_checkvalidindex(L, o); + ef = savestack(L, o); + } + status = lj_vm_pcall(L, L->top - nargs, nresults+1, ef); + if (status) hook_restore(g, oldh); + return status; +} + +static TValue *cpcall(lua_State *L, lua_CFunction func, void *ud) +{ + GCfunc *fn; + fn = lj_func_newC(L, 0, getcurrenv(L)); + fn->c.f = func; + setfuncV(L, L->top, fn); + setlightudV(L->top+1, checklightudptr(L, ud)); + cframe_nres(L->cframe) = 1+0; /* Zero results. */ + L->top += 2; + return L->top-1; /* Now call the newly allocated C function. */ +} + +LUA_API int lua_cpcall(lua_State *L, lua_CFunction func, void *ud) +{ + global_State *g = G(L); + uint8_t oldh = hook_save(g); + int status = lj_vm_cpcall(L, cpcall, func, ud); + if (status) hook_restore(g, oldh); + return status; +} + +LUALIB_API int luaL_callmeta(lua_State *L, int idx, const char *field) +{ + if (luaL_getmetafield(L, idx, field)) { + TValue *base = L->top--; + copyTV(L, base, index2adr(L, idx)); + L->top = base+1; + lj_vm_call(L, base, 1+1); + return 1; + } + return 0; +} + +/* -- Coroutine yield and resume ------------------------------------------ */ + +LUA_API int lua_yield(lua_State *L, int nresults) +{ + void *cf = L->cframe; + cTValue *f; + if (!cframe_canyield(cf)) + lj_err_msg(L, LJ_ERR_CYIELD); + f = L->top - nresults; + if (f > L->base) { + TValue *t = L->base; + while (--nresults >= 0) copyTV(L, t++, f++); + L->top = t; + } + L->cframe = NULL; + L->status = LUA_YIELD; + lj_vm_unwind_c(cf, LUA_YIELD); + return -1; /* unreachable */ +} + +LUA_API int lua_resume(lua_State *L, int nargs) +{ + if (L->cframe == NULL && L->status <= LUA_YIELD) + return lj_vm_resume(L, L->top - nargs, 0, 0); + L->top = L->base; + setstrV(L, L->top, lj_err_str(L, LJ_ERR_COSUSP)); + incr_top(L); + return LUA_ERRRUN; +} + +/* -- Load and dump Lua code ---------------------------------------------- */ + +static TValue *cpparser(lua_State *L, lua_CFunction dummy, void *ud) +{ + LexState *ls = cast(LexState *, ud); + GCfunc *fn; + UNUSED(dummy); + cframe_errfunc(L->cframe) = -1; /* Inherit error function. */ + lj_lex_start(L, ls); + fn = lj_func_newL(L, lj_parse(ls), tabref(L->env)); + /* Parser may realloc stack. Don't combine above/below into one statement. */ + setfuncV(L, L->top++, fn); + return NULL; +} + +LUA_API int lua_load(lua_State *L, lua_Reader reader, void *data, + const char *chunkname) +{ + LexState ls; + int status; + global_State *g; + ls.rfunc = reader; + ls.rdata = data; + ls.chunkarg = chunkname ? chunkname : "?"; + lj_str_initbuf(L, &ls.sb); + status = lj_vm_cpcall(L, cpparser, NULL, &ls); + g = G(L); + lj_str_freebuf(g, &ls.sb); + lj_gc_check(L); + return status; +} + +LUA_API int lua_dump(lua_State *L, lua_Writer writer, void *data) +{ + api_checknelems(L, 1); + UNUSED(L); UNUSED(writer); UNUSED(data); + return 1; /* Error, not supported. */ +} + +/* -- GC and memory management -------------------------------------------- */ + +LUA_API int lua_gc(lua_State *L, int what, int data) +{ + global_State *g = G(L); + int res = 0; + switch (what) { + case LUA_GCSTOP: + g->gc.threshold = LJ_MAX_MEM; + break; + case LUA_GCRESTART: + g->gc.threshold = g->gc.total; + break; + case LUA_GCCOLLECT: + lj_gc_fullgc(L); + break; + case LUA_GCCOUNT: + res = cast_int(g->gc.total >> 10); + break; + case LUA_GCCOUNTB: + res = cast_int(g->gc.total & 0x3ff); + break; + case LUA_GCSTEP: { + MSize a = (MSize)data << 10; + g->gc.threshold = (a <= g->gc.total) ? (g->gc.total - a) : 0; + while (g->gc.total >= g->gc.threshold) + if (lj_gc_step(L)) { + res = 1; + break; + } + break; + } + case LUA_GCSETPAUSE: + res = cast_int(g->gc.pause); + g->gc.pause = (MSize)data; + break; + case LUA_GCSETSTEPMUL: + res = cast_int(g->gc.stepmul); + g->gc.stepmul = (MSize)data; + break; + default: + res = -1; /* Invalid option. */ + } + return res; +} + +LUA_API lua_Alloc lua_getallocf(lua_State *L, void **ud) +{ + global_State *g = G(L); + if (ud) *ud = g->allocd; + return g->allocf; +} + +LUA_API void lua_setallocf(lua_State *L, lua_Alloc f, void *ud) +{ + global_State *g = G(L); + g->allocd = ud; + g->allocf = f; +} + diff --git a/src/lj_arch.h b/src/lj_arch.h new file mode 100644 index 0000000000..abdb5af914 --- /dev/null +++ b/src/lj_arch.h @@ -0,0 +1,88 @@ +/* +** Target architecture selection. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +*/ + +#ifndef _LJ_ARCH_H +#define _LJ_ARCH_H + +#include "lua.h" + + +/* Target endianess. */ +#define LUAJIT_LE 0 +#define LUAJIT_BE 1 + +/* Target architectures. */ +#define LUAJIT_ARCH_X86 1 +#define LUAJIT_ARCH_x86 1 +#define LUAJIT_ARCH_X64 2 +#define LUAJIT_ARCH_x64 2 + + +/* Select native target if no target defined. */ +#ifndef LUAJIT_TARGET + +#if defined(__i386) || defined(__i386__) || defined(_M_IX86) +#define LUAJIT_TARGET LUAJIT_ARCH_X86 +#elif defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) +#define LUAJIT_TARGET LUAJIT_ARCH_X64 +#else +#error "No support for this architecture (yet)" +#endif + +#endif + +/* Set target properties. */ +#if LUAJIT_TARGET == LUAJIT_ARCH_X86 +#define LJ_ARCH_NAME "x86" +#define LJ_ARCH_BITS 32 +#define LJ_ARCH_ENDIAN LUAJIT_LE +#define LJ_TARGET_X86 1 +#define LJ_TARGET_X86ORX64 1 +#define LJ_PAGESIZE 4096 +#elif LUAJIT_TARGET == LUAJIT_ARCH_X64 +#define LJ_ARCH_NAME "x64" +#define LJ_ARCH_BITS 64 +#define LJ_ARCH_ENDIAN LUAJIT_LE +#define LJ_TARGET_X64 1 +#define LJ_TARGET_X86ORX64 1 +#define LJ_PAGESIZE 4096 +#error "No support for x64 architecture (yet)" +#else +#error "No target architecture defined" +#endif + +/* Disable or enable the JIT compiler. */ +#if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) +#define LJ_HASJIT 0 +#else +#define LJ_HASJIT 1 +#endif + +#if LJ_ARCH_ENDIAN == LUAJIT_BE +#define LJ_ENDIAN_SELECT(le, be) be +#define LJ_ENDIAN_LOHI(lo, hi) hi lo +#else +#define LJ_ENDIAN_SELECT(le, be) le +#define LJ_ENDIAN_LOHI(lo, hi) lo hi +#endif + +#if LJ_ARCH_BITS == 32 +#define LJ_32 1 +#define LJ_64 0 +#elif LJ_ARCH_BITS == 64 +#define LJ_32 0 +#define LJ_64 1 +#else +#error "Bad LJ_ARCH_BITS setting" +#endif + +/* Whether target CPU masks the shift count by the operand length or not. */ +#if LJ_TARGET_X86ORX64 +#define LJ_TARGET_MASKEDSHIFT 1 +#else +#define LJ_TARGET_MASKEDSHIFT 0 +#endif + +#endif diff --git a/src/lj_asm.c b/src/lj_asm.c new file mode 100644 index 0000000000..b89b854382 --- /dev/null +++ b/src/lj_asm.c @@ -0,0 +1,3324 @@ +/* +** IR assembler (SSA IR -> machine code). +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +*/ + +#define lj_asm_c +#define LUA_CORE + +#include "lj_obj.h" + +#if LJ_HASJIT + +#include "lj_gc.h" +#include "lj_str.h" +#include "lj_tab.h" +#include "lj_ir.h" +#include "lj_jit.h" +#include "lj_iropt.h" +#include "lj_mcode.h" +#include "lj_iropt.h" +#include "lj_trace.h" +#include "lj_snap.h" +#include "lj_asm.h" +#include "lj_dispatch.h" +#include "lj_vm.h" +#include "lj_target.h" + +/* -- Assembler state and common macros ----------------------------------- */ + +/* Assembler state. */ +typedef struct ASMState { + RegCost cost[RID_MAX]; /* Reference and blended allocation cost for regs. */ + + MCode *mcp; /* Current MCode pointer (grows down). */ + MCode *mclim; /* Lower limit for MCode memory + red zone. */ + + IRIns *ir; /* Copy of pointer to IR instructions/constants. */ + jit_State *J; /* JIT compiler state. */ + + x86ModRM mrm; /* Fused x86 address operand. */ + + RegSet freeset; /* Set of free registers. */ + RegSet modset; /* Set of registers modified inside the loop. */ + RegSet phiset; /* Set of PHI registers. */ + + uint32_t flags; /* Copy of JIT compiler flags. */ + int loopinv; /* Loop branch inversion (0:no, 1:yes, 2:yes+CC_P). */ + + int32_t evenspill; /* Next even spill slot. */ + int32_t oddspill; /* Next odd spill slot (or 0). */ + + IRRef curins; /* Reference of current instruction. */ + IRRef stopins; /* Stop assembly before hitting this instruction. */ + IRRef orignins; /* Original T->nins. */ + + IRRef snapref; /* Current snapshot is active after this reference. */ + IRRef snaprename; /* Rename highwater mark for snapshot check. */ + SnapNo snapno; /* Current snapshot number. */ + SnapNo loopsnapno; /* Loop snapshot number. */ + + Trace *T; /* Trace to assemble. */ + Trace *parent; /* Parent trace (or NULL). */ + + IRRef fuseref; /* Fusion limit (loopref, 0 or FUSE_DISABLED). */ + IRRef sectref; /* Section base reference (loopref or 0). */ + IRRef loopref; /* Reference of LOOP instruction (or 0). */ + + BCReg topslot; /* Number of slots for stack check (unless 0). */ + MSize gcsteps; /* Accumulated number of GC steps (per section). */ + + MCode *mcbot; /* Bottom of reserved MCode. */ + MCode *mctop; /* Top of generated MCode. */ + MCode *mcloop; /* Pointer to loop MCode (or NULL). */ + MCode *invmcp; /* Points to invertible loop branch (or NULL). */ + MCode *testmcp; /* Pending opportunity to remove test r,r. */ + MCode *realign; /* Realign loop if not NULL. */ + + IRRef1 phireg[RID_MAX]; /* PHI register references. */ + uint16_t parentmap[LJ_MAX_JSLOTS]; /* Parent slot to RegSP map. */ +} ASMState; + +#define IR(ref) (&as->ir[(ref)]) + +/* Check for variant to invariant references. */ +#define iscrossref(as, ref) ((ref) < as->sectref) + +/* Inhibit memory op fusion from variant to invariant references. */ +#define FUSE_DISABLED (~(IRRef)0) +#define mayfuse(as, ref) ((ref) > as->fuseref) +#define neverfuse(as) (as->fuseref == FUSE_DISABLED) +#define opisfusableload(o) \ + ((o) == IR_ALOAD || (o) == IR_HLOAD || (o) == IR_ULOAD || \ + (o) == IR_FLOAD || (o) == IR_SLOAD || (o) == IR_XLOAD) + +/* Instruction selection for XMM moves. */ +#define XMM_MOVRR(as) ((as->flags & JIT_F_SPLIT_XMM) ? XO_MOVSD : XO_MOVAPS) +#define XMM_MOVRM(as) ((as->flags & JIT_F_SPLIT_XMM) ? XO_MOVLPD : XO_MOVSD) + +/* Sparse limit checks using a red zone before the actual limit. */ +#define MCLIM_REDZONE 64 +#define checkmclim(as) \ + if (LJ_UNLIKELY(as->mcp < as->mclim)) asm_mclimit(as) + +static LJ_NORET LJ_NOINLINE void asm_mclimit(ASMState *as) +{ + lj_mcode_limiterr(as->J, (size_t)(as->mctop - as->mcp + 4*MCLIM_REDZONE)); +} + +/* -- Emit x86 instructions ----------------------------------------------- */ + +#define MODRM(mode, r1, r2) ((MCode)((mode)+(((r1)&7)<<3)+((r2)&7))) + +#if LJ_64 +#define REXRB(p, rr, rb) \ + { MCode rex = 0x40 + (((rr)>>1)&4) + (((rb)>>3)&1); \ + if (rex != 0x40) *--(p) = rex; } +#define FORCE_REX 0x200 +#else +#define REXRB(p, rr, rb) ((void)0) +#define FORCE_REX 0 +#endif + +#define emit_i8(as, i) (*--as->mcp = (MCode)(i)) +#define emit_i32(as, i) (*(int32_t *)(as->mcp-4) = (i), as->mcp -= 4) + +#define emit_x87op(as, xo) \ + (*(uint16_t *)(as->mcp-2) = (uint16_t)(xo), as->mcp -= 2) + +/* op */ +static LJ_AINLINE MCode *emit_op(x86Op xo, Reg rr, Reg rb, Reg rx, + MCode *p, int delta) +{ + int n = (int8_t)xo; +#if defined(__GNUC__) + if (__builtin_constant_p(xo) && n == -2) + p[delta-2] = (MCode)(xo >> 24); + else if (__builtin_constant_p(xo) && n == -3) + *(uint16_t *)(p+delta-3) = (uint16_t)(xo >> 16); + else +#endif + *(uint32_t *)(p+delta-5) = (uint32_t)xo; + p += n + delta; +#if LJ_64 + { + uint32_t rex = 0x40 + ((rr>>1)&(4+(FORCE_REX>>1)))+((rx>>2)&2)+((rb>>3)&1); + if (rex != 0x40) { + if (n == -4) { *p = (MCode)rex; rex = (MCode)(xo >> 8); } + *--p = (MCode)rex; + } + } +#else + UNUSED(rr); UNUSED(rb); UNUSED(rx); +#endif + return p; +} + +/* op + modrm */ +#define emit_opm(xo, mode, rr, rb, p, delta) \ + (p[(delta)-1] = MODRM((mode), (rr), (rb)), \ + emit_op((xo), (rr), (rb), 0, (p), (delta))) + +/* op + modrm + sib */ +#define emit_opmx(xo, mode, scale, rr, rb, rx, p) \ + (p[-1] = MODRM((scale), (rx), (rb)), \ + p[-2] = MODRM((mode), (rr), RID_ESP), \ + emit_op((xo), (rr), (rb), (rx), (p), -1)) + +/* op r1, r2 */ +static void emit_rr(ASMState *as, x86Op xo, Reg r1, Reg r2) +{ + MCode *p = as->mcp; + as->mcp = emit_opm(xo, XM_REG, r1, r2, p, 0); +} + +#if LJ_64 && defined(LUA_USE_ASSERT) +/* [addr] is sign-extended in x64 and must be in lower 2G (not 4G). */ +static int32_t ptr2addr(void *p) +{ + lua_assert((uintptr_t)p < (uintptr_t)0x80000000); + return i32ptr(p); +} +#else +#define ptr2addr(p) (i32ptr((p))) +#endif + +/* op r, [addr] */ +static void emit_rma(ASMState *as, x86Op xo, Reg rr, const void *addr) +{ + MCode *p = as->mcp; + *(int32_t *)(p-4) = ptr2addr(addr); +#if LJ_64 + p[-5] = MODRM(XM_SCALE1, RID_ESP, RID_EBP); + as->mcp = emit_opm(xo, XM_OFS0, rr, RID_ESP, p, -5); +#else + as->mcp = emit_opm(xo, XM_OFS0, rr, RID_EBP, p, -4); +#endif +} + +/* op r, [base+ofs] */ +static void emit_rmro(ASMState *as, x86Op xo, Reg rr, Reg rb, int32_t ofs) +{ + MCode *p = as->mcp; + x86Mode mode; + if (ra_hasreg(rb)) { + if (ofs == 0 && (rb&7) != RID_EBP) { + mode = XM_OFS0; + } else if (checki8(ofs)) { + *--p = (MCode)ofs; + mode = XM_OFS8; + } else { + p -= 4; + *(int32_t *)p = ofs; + mode = XM_OFS32; + } + if ((rb&7) == RID_ESP) + *--p = MODRM(XM_SCALE1, RID_ESP, RID_ESP); + } else { + *(int32_t *)(p-4) = ofs; +#if LJ_64 + p[-5] = MODRM(XM_SCALE1, RID_ESP, RID_EBP); + p -= 5; + rb = RID_ESP; +#else + p -= 4; + rb = RID_EBP; +#endif + mode = XM_OFS0; + } + as->mcp = emit_opm(xo, mode, rr, rb, p, 0); +} + +/* op r, [base+idx*scale+ofs] */ +static void emit_rmrxo(ASMState *as, x86Op xo, Reg rr, Reg rb, Reg rx, + x86Mode scale, int32_t ofs) +{ + MCode *p = as->mcp; + x86Mode mode; + if (ofs == 0 && (rb&7) != RID_EBP) { + mode = XM_OFS0; + } else if (checki8(ofs)) { + mode = XM_OFS8; + *--p = (MCode)ofs; + } else { + mode = XM_OFS32; + p -= 4; + *(int32_t *)p = ofs; + } + as->mcp = emit_opmx(xo, mode, scale, rr, rb, rx, p); +} + +/* op r, i */ +static void emit_gri(ASMState *as, x86Group xg, Reg rb, int32_t i) +{ + MCode *p = as->mcp; + if (checki8(i)) { + p -= 3; + p[2] = (MCode)i; + p[0] = (MCode)(xg >> 16); + } else { + p -= 6; + *(int32_t *)(p+2) = i; + p[0] = (MCode)(xg >> 8); + } + p[1] = MODRM(XM_REG, xg, rb); + REXRB(p, 0, rb); + as->mcp = p; +} + +/* op [base+ofs], i */ +static void emit_gmroi(ASMState *as, x86Group xg, Reg rb, int32_t ofs, + int32_t i) +{ + x86Op xo; + if (checki8(i)) { + emit_i8(as, i); + xo = (x86Op)(((xg >> 16) << 24)+0xfe); + } else { + emit_i32(as, i); + xo = (x86Op)(((xg >> 8) << 24)+0xfe); + } + emit_rmro(as, xo, (Reg)xg, rb, ofs); +} + +#define emit_shifti(as, xg, r, i) \ + (emit_i8(as, (i)), emit_rr(as, XO_SHIFTi, (Reg)(xg), (r))) + +/* op r, rm/mrm */ +static void emit_mrm(ASMState *as, x86Op xo, Reg rr, Reg rb) +{ + MCode *p = as->mcp; + x86Mode mode = XM_REG; + if (rb == RID_MRM) { + rb = as->mrm.base; + if (rb == RID_NONE) { + rb = RID_EBP; + mode = XM_OFS0; + p -= 4; + *(int32_t *)p = as->mrm.ofs; + if (as->mrm.idx != RID_NONE) + goto mrmidx; +#if LJ_64 + *--p = MODRM(XM_SCALE1, RID_ESP, RID_EBP); + rb = RID_ESP; +#endif + } else { + if (as->mrm.ofs == 0 && (rb&7) != RID_EBP) { + mode = XM_OFS0; + } else if (checki8(as->mrm.ofs)) { + *--p = (MCode)as->mrm.ofs; + mode = XM_OFS8; + } else { + p -= 4; + *(int32_t *)p = as->mrm.ofs; + mode = XM_OFS32; + } + if (as->mrm.idx != RID_NONE) { + mrmidx: + as->mcp = emit_opmx(xo, mode, as->mrm.scale, rr, rb, as->mrm.idx, p); + return; + } + if ((rb&7) == RID_ESP) + *--p = MODRM(XM_SCALE1, RID_ESP, RID_ESP); + } + } + as->mcp = emit_opm(xo, mode, rr, rb, p, 0); +} + +static void emit_addptr(ASMState *as, Reg r, int32_t ofs) +{ + if (ofs) { + if ((as->flags & JIT_F_LEA_AGU)) + emit_rmro(as, XO_LEA, r, r, ofs); + else + emit_gri(as, XG_ARITHi(XOg_ADD), r, ofs); + } +} + +/* -- Emit moves ---------------------------------------------------------- */ + +/* Generic move between two regs. */ +static void emit_movrr(ASMState *as, Reg r1, Reg r2) +{ + emit_rr(as, r1 < RID_MAX_GPR ? XO_MOV : XMM_MOVRR(as), r1, r2); +} + +/* Generic move from [base+ofs]. */ +static void emit_movrmro(ASMState *as, Reg rr, Reg rb, int32_t ofs) +{ + emit_rmro(as, rr < RID_MAX_GPR ? XO_MOV : XMM_MOVRM(as), rr, rb, ofs); +} + +/* mov [base+ofs], i */ +static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i) +{ + emit_i32(as, i); + emit_rmro(as, XO_MOVmi, 0, base, ofs); +} + +/* mov [base+ofs], r */ +#define emit_movtomro(as, r, base, ofs) \ + emit_rmro(as, XO_MOVto, (r), (base), (ofs)) + +/* Get/set global_State fields. */ +#define emit_opgl(as, xo, r, field) \ + emit_rma(as, (xo), (r), (void *)&J2G(as->J)->field) +#define emit_getgl(as, r, field) emit_opgl(as, XO_MOV, (r), field) +#define emit_setgl(as, r, field) emit_opgl(as, XO_MOVto, (r), field) +#define emit_setgli(as, field, i) \ + (emit_i32(as, i), emit_opgl(as, XO_MOVmi, 0, field)) + +/* mov r, i / xor r, r */ +static void emit_loadi(ASMState *as, Reg r, int32_t i) +{ + if (i == 0) { + emit_rr(as, XO_ARITH(XOg_XOR), r, r); + } else { + MCode *p = as->mcp; + *(int32_t *)(p-4) = i; + p[-5] = (MCode)(XI_MOVri+(r&7)); + p -= 5; + REXRB(p, 0, r); + as->mcp = p; + } +} + +/* mov r, addr */ +#define emit_loada(as, r, addr) \ + emit_loadi(as, (r), ptr2addr((addr))) + +/* movsd r, [&tv->n] / xorps r, r */ +static void emit_loadn(ASMState *as, Reg r, cTValue *tv) +{ + if (tvispzero(tv)) /* Use xor only for +0. */ + emit_rr(as, XO_XORPS, r, r); + else + emit_rma(as, XMM_MOVRM(as), r, &tv->n); +} + +/* -- Emit branches ------------------------------------------------------- */ + +/* Label for short jumps. */ +typedef MCode *MCLabel; + +/* jcc short target */ +static void emit_sjcc(ASMState *as, int cc, MCLabel target) +{ + MCode *p = as->mcp; + p[-1] = (MCode)(int8_t)(target-p); + p[-2] = (MCode)(XI_JCCs+(cc&15)); + as->mcp = p - 2; +} + +/* jcc short (pending target) */ +static MCLabel emit_sjcc_label(ASMState *as, int cc) +{ + MCode *p = as->mcp; + p[-1] = 0; + p[-2] = (MCode)(XI_JCCs+(cc&15)); + as->mcp = p - 2; + return p; +} + +/* Fixup jcc short target. */ +static void emit_sfixup(ASMState *as, MCLabel source) +{ + source[-1] = (MCode)(as->mcp-source); +} + +/* Return label pointing to current PC. */ +#define emit_label(as) ((as)->mcp) + +/* jcc target */ +static void emit_jcc(ASMState *as, int cc, MCode *target) +{ + MCode *p = as->mcp; + int32_t addr = (int32_t)(target - p); + *(int32_t *)(p-4) = addr; + p[-5] = (MCode)(XI_JCCn+(cc&15)); + p[-6] = 0x0f; + as->mcp = p - 6; +} + +/* call target */ +static void emit_call_(ASMState *as, MCode *target) +{ + MCode *p = as->mcp; + *(int32_t *)(p-4) = (int32_t)(target - p); + p[-5] = XI_CALL; + as->mcp = p - 5; +} + +#define emit_call(as, f) emit_call_(as, (MCode *)(void *)(f)) + +/* Argument setup for C calls. Up to 3 args need no stack adjustment. */ +#define emit_setargr(as, narg, r) \ + emit_movtomro(as, (r), RID_ESP, ((narg)-1)*4); +#define emit_setargi(as, narg, imm) \ + emit_movmroi(as, RID_ESP, ((narg)-1)*4, (imm)) +#define emit_setargp(as, narg, ptr) \ + emit_setargi(as, (narg), ptr2addr((ptr))) + +/* -- Register allocator debugging ---------------------------------------- */ + +/* #define LUAJIT_DEBUG_RA */ + +#ifdef LUAJIT_DEBUG_RA + +#include +#include + +#define RIDNAME(name) #name, +static const char *const ra_regname[] = { + GPRDEF(RIDNAME) + FPRDEF(RIDNAME) + "mrm", + NULL +}; +#undef RIDNAME + +static char ra_dbg_buf[65536]; +static char *ra_dbg_p; +static char *ra_dbg_merge; +static MCode *ra_dbg_mcp; + +static void ra_dstart(void) +{ + ra_dbg_p = ra_dbg_buf; + ra_dbg_merge = NULL; + ra_dbg_mcp = NULL; +} + +static void ra_dflush(void) +{ + fwrite(ra_dbg_buf, 1, (size_t)(ra_dbg_p-ra_dbg_buf), stdout); + ra_dstart(); +} + +static void ra_dprintf(ASMState *as, const char *fmt, ...) +{ + char *p; + va_list argp; + va_start(argp, fmt); + p = ra_dbg_mcp == as->mcp ? ra_dbg_merge : ra_dbg_p; + ra_dbg_mcp = NULL; + p += sprintf(p, "%08x \e[36m%04d ", (uintptr_t)as->mcp, as->curins-REF_BIAS); + for (;;) { + const char *e = strchr(fmt, '$'); + if (e == NULL) break; + memcpy(p, fmt, (size_t)(e-fmt)); + p += e-fmt; + if (e[1] == 'r') { + Reg r = va_arg(argp, Reg) & RID_MASK; + if (r <= RID_MAX) { + const char *q; + for (q = ra_regname[r]; *q; q++) + *p++ = *q >= 'A' && *q <= 'Z' ? *q + 0x20 : *q; + } else { + *p++ = '?'; + lua_assert(0); + } + } else if (e[1] == 'f' || e[1] == 'i') { + IRRef ref; + if (e[1] == 'f') + ref = va_arg(argp, IRRef); + else + ref = va_arg(argp, IRIns *) - as->ir; + if (ref >= REF_BIAS) + p += sprintf(p, "%04d", ref - REF_BIAS); + else + p += sprintf(p, "K%03d", REF_BIAS - ref); + } else if (e[1] == 's') { + uint32_t slot = va_arg(argp, uint32_t); + p += sprintf(p, "[esp+0x%x]", sps_scale(slot)); + } else { + lua_assert(0); + } + fmt = e+2; + } + va_end(argp); + while (*fmt) + *p++ = *fmt++; + *p++ = '\e'; *p++ = '['; *p++ = 'm'; *p++ = '\n'; + if (p > ra_dbg_buf+sizeof(ra_dbg_buf)-256) { + fwrite(ra_dbg_buf, 1, (size_t)(p-ra_dbg_buf), stdout); + p = ra_dbg_buf; + } + ra_dbg_p = p; +} + +#define RA_DBG_START() ra_dstart() +#define RA_DBG_FLUSH() ra_dflush() +#define RA_DBG_REF() \ + do { char *_p = ra_dbg_p; ra_dprintf(as, ""); \ + ra_dbg_merge = _p; ra_dbg_mcp = as->mcp; } while (0) +#define RA_DBGX(x) ra_dprintf x + +#else +#define RA_DBG_START() ((void)0) +#define RA_DBG_FLUSH() ((void)0) +#define RA_DBG_REF() ((void)0) +#define RA_DBGX(x) ((void)0) +#endif + +/* -- Register allocator -------------------------------------------------- */ + +#define ra_free(as, r) rset_set(as->freeset, (r)) +#define ra_modified(as, r) rset_set(as->modset, (r)) + +#define ra_used(ir) (ra_hasreg((ir)->r) || ra_hasspill((ir)->s)) + +/* Setup register allocator. */ +static void ra_setup(ASMState *as) +{ + /* Initially all regs (except the stack pointer) are free for use. */ + as->freeset = RSET_ALL; + as->modset = RSET_EMPTY; + as->phiset = RSET_EMPTY; + memset(as->phireg, 0, sizeof(as->phireg)); + memset(as->cost, 0, sizeof(as->cost)); + as->cost[RID_ESP] = REGCOST(~0u, 0u); + + /* Start slots for spill slot allocation. */ + as->evenspill = (SPS_FIRST+1)&~1; + as->oddspill = (SPS_FIRST&1) ? SPS_FIRST : 0; +} + +/* Rematerialize constants. */ +static Reg ra_rematk(ASMState *as, IRIns *ir) +{ + Reg r = ir->r; + lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s)); + ra_free(as, r); + ra_modified(as, r); + ir->r = RID_INIT; /* Do not keep any hint. */ + RA_DBGX((as, "remat $i $r", ir, r)); + if (ir->o == IR_KNUM) { + emit_loadn(as, r, ir_knum(ir)); + } else if (ir->o == IR_BASE) { + ra_sethint(ir->r, RID_BASE); /* Restore BASE register hint. */ + emit_getgl(as, r, jit_base); + } else { + lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || + ir->o == IR_KPTR || ir->o == IR_KNULL); + emit_loadi(as, r, ir->i); + } + return r; +} + +/* Force a spill. Allocate a new spill slot if needed. */ +static int32_t ra_spill(ASMState *as, IRIns *ir) +{ + int32_t slot = ir->s; + if (!ra_hasspill(slot)) { + if (irt_isnum(ir->t)) { + slot = as->evenspill; + as->evenspill += 2; + } else if (as->oddspill) { + slot = as->oddspill; + as->oddspill = 0; + } else { + slot = as->evenspill; + as->oddspill = slot+1; + as->evenspill += 2; + } + if (as->evenspill > 256) + lj_trace_err(as->J, LJ_TRERR_SPILLOV); + ir->s = (uint8_t)slot; + } + return sps_scale(slot); +} + +/* Restore a register (marked as free). Rematerialize or force a spill. */ +static Reg ra_restore(ASMState *as, IRRef ref) +{ + IRIns *ir = IR(ref); + if (irref_isk(ref) || ref == REF_BASE) { + return ra_rematk(as, ir); + } else { + Reg r = ir->r; + lua_assert(ra_hasreg(r)); + ra_free(as, r); + ra_modified(as, r); + ra_sethint(ir->r, r); /* Keep hint. */ + RA_DBGX((as, "restore $i $r", ir, r)); + emit_movrmro(as, r, RID_ESP, ra_spill(as, ir)); /* Force a spill. */ + return r; + } +} + +/* Save a register to a spill slot. */ +static LJ_AINLINE void ra_save(ASMState *as, IRIns *ir, Reg r) +{ + RA_DBGX((as, "save $i $r", ir, r)); + emit_rmro(as, r < RID_MAX_GPR ? XO_MOVto : XO_MOVSDto, + r, RID_ESP, sps_scale(ir->s)); +} + +#define MINCOST(r) \ + if (LJ_LIKELY(allow&RID2RSET(r)) && as->cost[r] < cost) \ + cost = as->cost[r] + +/* Evict the register with the lowest cost, forcing a restore. */ +static Reg ra_evict(ASMState *as, RegSet allow) +{ + RegCost cost = ~(RegCost)0; + if (allow < RID2RSET(RID_MAX_GPR)) { + MINCOST(RID_EAX);MINCOST(RID_ECX);MINCOST(RID_EDX);MINCOST(RID_EBX); + MINCOST(RID_EBP);MINCOST(RID_ESI);MINCOST(RID_EDI); +#if LJ_64 + MINCOST(RID_R8D);MINCOST(RID_R9D);MINCOST(RID_R10D);MINCOST(RID_R11D); + MINCOST(RID_R12D);MINCOST(RID_R13D);MINCOST(RID_R14D);MINCOST(RID_R15D); +#endif + } else { + MINCOST(RID_XMM0);MINCOST(RID_XMM1);MINCOST(RID_XMM2);MINCOST(RID_XMM3); + MINCOST(RID_XMM4);MINCOST(RID_XMM5);MINCOST(RID_XMM6);MINCOST(RID_XMM7); +#if LJ_64 + MINCOST(RID_XMM8);MINCOST(RID_XMM9);MINCOST(RID_XMM10);MINCOST(RID_XMM11); + MINCOST(RID_XMM12);MINCOST(RID_XMM13);MINCOST(RID_XMM14);MINCOST(RID_XMM15); +#endif + } + lua_assert(allow != RSET_EMPTY); + lua_assert(regcost_ref(cost) >= as->T->nk && regcost_ref(cost) < as->T->nins); + return ra_restore(as, regcost_ref(cost)); +} + +/* Pick any register (marked as free). Evict on-demand. */ +static LJ_AINLINE Reg ra_pick(ASMState *as, RegSet allow) +{ + RegSet pick = as->freeset & allow; + if (!pick) + return ra_evict(as, allow); + else + return rset_picktop(pick); +} + +/* Get a scratch register (marked as free). */ +static LJ_AINLINE Reg ra_scratch(ASMState *as, RegSet allow) +{ + Reg r = ra_pick(as, allow); + ra_modified(as, r); + RA_DBGX((as, "scratch $r", r)); + return r; +} + +/* Evict all registers from a set (if not free). */ +static void ra_evictset(ASMState *as, RegSet drop) +{ + as->modset |= drop; + drop &= ~as->freeset; + while (drop) { + Reg r = rset_picktop(drop); + ra_restore(as, regcost_ref(as->cost[r])); + rset_clear(drop, r); + checkmclim(as); + } +} + +/* Allocate a register for ref from the allowed set of registers. +** Note: this function assumes the ref does NOT have a register yet! +** Picks an optimal register, sets the cost and marks the register as non-free. +*/ +static Reg ra_allocref(ASMState *as, IRRef ref, RegSet allow) +{ + IRIns *ir = IR(ref); + RegSet pick = as->freeset & allow; + Reg r; + lua_assert(ra_noreg(ir->r)); + if (pick) { + /* First check register hint from propagation or PHI. */ + if (ra_hashint(ir->r)) { + r = ra_gethint(ir->r); + if (rset_test(pick, r)) /* Use hint register if possible. */ + goto found; + /* Rematerialization is cheaper than missing a hint. */ + if (rset_test(allow, r) && irref_isk(regcost_ref(as->cost[r]))) { + ra_rematk(as, IR(regcost_ref(as->cost[r]))); + goto found; + } + RA_DBGX((as, "hintmiss $f $r", ref, r)); + } + /* Invariants should preferably get unused registers. */ + if (ref < as->loopref && !irt_isphi(ir->t)) + r = rset_pickbot(pick); + else + r = rset_picktop(pick); + } else { + r = ra_evict(as, allow); + } +found: + RA_DBGX((as, "alloc $f $r", ref, r)); + ir->r = (uint8_t)r; + rset_clear(as->freeset, r); + as->cost[r] = REGCOST_REF_T(ref, irt_t(ir->t)); + return r; +} + +/* Allocate a register on-demand. */ +static LJ_INLINE Reg ra_alloc1(ASMState *as, IRRef ref, RegSet allow) +{ + Reg r = IR(ref)->r; + /* Note: allow is ignored if the register is already allocated. */ + if (ra_noreg(r)) r = ra_allocref(as, ref, allow); + return r; +} + +/* Rename register allocation and emit move. */ +static void ra_rename(ASMState *as, Reg down, Reg up) +{ + IRRef ren, ref = regcost_ref(as->cost[up] = as->cost[down]); + IR(ref)->r = (uint8_t)up; + as->cost[down] = 0; + lua_assert((down < RID_MAX_GPR) == (up < RID_MAX_GPR)); + lua_assert(!rset_test(as->freeset, down) && rset_test(as->freeset, up)); + rset_set(as->freeset, down); /* 'down' is free ... */ + rset_clear(as->freeset, up); /* ... and 'up' is now allocated. */ + RA_DBGX((as, "rename $f $r $r", regcost_ref(as->cost[up]), down, up)); + emit_movrr(as, down, up); /* Backwards code generation needs inverse move. */ + if (!ra_hasspill(IR(ref)->s)) { /* Add the rename to the IR. */ + lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, as->snapno); + ren = tref_ref(lj_ir_emit(as->J)); + as->ir = as->T->ir; /* The IR may have been reallocated. */ + IR(ren)->r = (uint8_t)down; + IR(ren)->s = SPS_NONE; + } +} + +/* Pick a destination register (marked as free). +** Caveat: allow is ignored if there's already a destination register. +** Use ra_destreg() to get a specific register. +*/ +static Reg ra_dest(ASMState *as, IRIns *ir, RegSet allow) +{ + Reg dest = ir->r; + if (ra_hasreg(dest)) { + ra_free(as, dest); + ra_modified(as, dest); + } else { + dest = ra_scratch(as, allow); + } + if (LJ_UNLIKELY(ra_hasspill(ir->s))) ra_save(as, ir, dest); + return dest; +} + +/* Force a specific destination register (marked as free). */ +static void ra_destreg(ASMState *as, IRIns *ir, Reg r) +{ + Reg dest = ra_dest(as, ir, RID2RSET(r)); + if (dest != r) { + ra_scratch(as, RID2RSET(r)); + emit_movrr(as, dest, r); + } +} + +/* Propagate dest register to left reference. Emit moves as needed. +** This is a required fixup step for all 2-operand machine instructions. +*/ +static void ra_left(ASMState *as, Reg dest, IRRef lref) +{ + IRIns *ir = IR(lref); + Reg left = ir->r; + if (ra_noreg(left)) { + if (irref_isk(lref)) { + if (ir->o == IR_KNUM) { + cTValue *tv = ir_knum(ir); + /* FP remat needs a load except for +0. Still better than eviction. */ + if (tvispzero(tv) || !(as->freeset & RSET_FPR)) { + emit_loadn(as, dest, tv); + return; + } + } else { + lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || + ir->o == IR_KPTR || ir->o == IR_KNULL); + emit_loadi(as, dest, ir->i); + return; + } + } + if (!ra_hashint(left) && !iscrossref(as, lref)) + ra_sethint(ir->r, dest); /* Propagate register hint. */ + left = ra_allocref(as, lref, dest < RID_MAX_GPR ? RSET_GPR : RSET_FPR); + } + /* Move needed for true 3-operand instruction: y=a+b ==> y=a; y+=b. */ + if (dest != left) { + /* Use register renaming if dest is the PHI reg. */ + if (irt_isphi(ir->t) && as->phireg[dest] == lref) { + ra_modified(as, left); + ra_rename(as, left, dest); + } else { + emit_movrr(as, dest, left); + } + } +} + +/* -- Exit stubs ---------------------------------------------------------- */ + +/* Generate an exit stub group at the bottom of the reserved MCode memory. */ +static MCode *asm_exitstub_gen(ASMState *as, ExitNo group) +{ + ExitNo i, groupofs = (group*EXITSTUBS_PER_GROUP) & 0xff; + MCode *mxp = as->mcbot; + MCode *mxpstart = mxp; + if (mxp + (2+2)*EXITSTUBS_PER_GROUP+8+5 >= as->mctop) + asm_mclimit(as); + /* Push low byte of exitno for each exit stub. */ + *mxp++ = XI_PUSHi8; *mxp++ = (MCode)groupofs; + for (i = 1; i < EXITSTUBS_PER_GROUP; i++) { + *mxp++ = XI_JMPs; *mxp++ = (MCode)((2+2)*(EXITSTUBS_PER_GROUP - i) - 2); + *mxp++ = XI_PUSHi8; *mxp++ = (MCode)(groupofs + i); + } + /* Push the high byte of the exitno for each exit stub group. */ + *mxp++ = XI_PUSHi8; *mxp++ = (MCode)((group*EXITSTUBS_PER_GROUP)>>8); + /* Store DISPATCH in ExitInfo->dispatch. Account for the two push ops. */ + *mxp++ = XI_MOVmi; + *mxp++ = MODRM(XM_OFS8, 0, RID_ESP); + *mxp++ = MODRM(XM_SCALE1, RID_ESP, RID_ESP); + *mxp++ = 2*sizeof(void *); + *(int32_t *)mxp = ptr2addr(J2GG(as->J)->dispatch); mxp += 4; + /* Jump to exit handler which fills in the ExitState. */ + *mxp++ = XI_JMP; mxp += 4; + *((int32_t *)(mxp-4)) = (int32_t)((MCode *)lj_vm_exit_handler - mxp); + /* Commit the code for this group (even if assembly fails later on). */ + lj_mcode_commitbot(as->J, mxp); + as->mcbot = mxp; + as->mclim = as->mcbot + MCLIM_REDZONE; + return mxpstart; +} + +/* Setup all needed exit stubs. */ +static void asm_exitstub_setup(ASMState *as, ExitNo nexits) +{ + ExitNo i; + if (nexits >= EXITSTUBS_PER_GROUP*LJ_MAX_EXITSTUBGR) + lj_trace_err(as->J, LJ_TRERR_SNAPOV); + for (i = 0; i < (nexits+EXITSTUBS_PER_GROUP-1)/EXITSTUBS_PER_GROUP; i++) + if (as->J->exitstubgroup[i] == NULL) + as->J->exitstubgroup[i] = asm_exitstub_gen(as, i); +} + +/* -- Snapshot and guard handling ----------------------------------------- */ + +/* Can we rematerialize a KNUM instead of forcing a spill? */ +static int asm_snap_canremat(ASMState *as) +{ + Reg r; + for (r = RID_MIN_FPR; r < RID_MAX_FPR; r++) + if (irref_isk(regcost_ref(as->cost[r]))) + return 1; + return 0; +} + +/* Allocate registers or spill slots for refs escaping to a snapshot. */ +static void asm_snap_alloc(ASMState *as) +{ + SnapShot *snap = &as->T->snap[as->snapno]; + IRRef2 *map = &as->T->snapmap[snap->mapofs]; + BCReg s, nslots = snap->nslots; + for (s = 0; s < nslots; s++) { + IRRef ref = snap_ref(map[s]); + if (!irref_isk(ref)) { + IRIns *ir = IR(ref); + if (!ra_used(ir) && ir->o != IR_FRAME) { + RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; + /* Not a var-to-invar ref and got a free register (or a remat)? */ + if ((!iscrossref(as, ref) || irt_isphi(ir->t)) && + ((as->freeset & allow) || + (allow == RSET_FPR && asm_snap_canremat(as)))) { + ra_allocref(as, ref, allow); /* Allocate a register. */ + checkmclim(as); + RA_DBGX((as, "snapreg $f $r", ref, ir->r)); + } else { + ra_spill(as, ir); /* Otherwise force a spill slot. */ + RA_DBGX((as, "snapspill $f $s", ref, ir->s)); + } + } + } + } +} + +/* All guards for a snapshot use the same exitno. This is currently the +** same as the snapshot number. Since the exact origin of the exit cannot +** be determined, all guards for the same snapshot must exit with the same +** RegSP mapping. +** A renamed ref which has been used in a prior guard for the same snapshot +** would cause an inconsistency. The easy way out is to force a spill slot. +*/ +static int asm_snap_checkrename(ASMState *as, IRRef ren) +{ + SnapShot *snap = &as->T->snap[as->snapno]; + IRRef2 *map = &as->T->snapmap[snap->mapofs]; + BCReg s, nslots = snap->nslots; + for (s = 0; s < nslots; s++) { + IRRef ref = snap_ref(map[s]); + if (ref == ren) { + IRIns *ir = IR(ref); + ra_spill(as, ir); /* Register renamed, so force a spill slot. */ + RA_DBGX((as, "snaprensp $f $s", ref, ir->s)); + return 1; /* Found. */ + } + } + return 0; /* Not found. */ +} + +/* Prepare snapshot for next guard instruction. */ +static void asm_snap_prep(ASMState *as) +{ + if (as->curins < as->snapref) { + do { + lua_assert(as->snapno != 0); + as->snapno--; + as->snapref = as->T->snap[as->snapno].ref; + } while (as->curins < as->snapref); + asm_snap_alloc(as); + as->snaprename = as->T->nins; + } else { + /* Process any renames above the highwater mark. */ + for (; as->snaprename < as->T->nins; as->snaprename++) { + IRIns *ir = IR(as->snaprename); + if (asm_snap_checkrename(as, ir->op1)) + ir->op2 = REF_BIAS-1; /* Kill rename. */ + } + } +} + +/* Emit conditional branch to exit for guard. +** It's important to emit this *after* all registers have been allocated, +** because rematerializations may invalidate the flags. +*/ +static void asm_guardcc(ASMState *as, int cc) +{ + MCode *target = exitstub_addr(as->J, as->snapno); + MCode *p = as->mcp; + if (LJ_UNLIKELY(p == as->invmcp)) { + as->loopinv = 1; + *(int32_t *)(p+1) = target - (p+5); + target = p; + cc ^= 1; + if (as->realign) { + emit_sjcc(as, cc, target); + return; + } + } + emit_jcc(as, cc, target); +} + +/* -- Memory operand fusion ----------------------------------------------- */ + +/* Arch-specific field offsets. */ +static const uint8_t field_ofs[IRFL__MAX+1] = { +#define FLOFS(name, type, field) (uint8_t)offsetof(type, field), +IRFLDEF(FLOFS) +#undef FLOFS + 0 +}; + +/* Limit linear search to this distance. Avoids O(n^2) behavior. */ +#define CONFLICT_SEARCH_LIM 15 + +/* Check if there's no conflicting instruction between curins and ref. */ +static int noconflict(ASMState *as, IRRef ref, IROp conflict) +{ + IRIns *ir = as->ir; + IRRef i = as->curins; + if (i > ref + CONFLICT_SEARCH_LIM) + return 0; /* Give up, ref is too far away. */ + while (--i > ref) + if (ir[i].o == conflict) + return 0; /* Conflict found. */ + return 1; /* Ok, no conflict. */ +} + +/* Fuse array reference into memory operand. */ +static void asm_fusearef(ASMState *as, IRIns *ir, RegSet allow) +{ + IRIns *irb = IR(ir->op1); + IRIns *ira, *irx; + lua_assert(ir->o == IR_AREF); + lua_assert(irb->o == IR_FLOAD && irb->op2 == IRFL_TAB_ARRAY); + ira = IR(irb->op1); + if (ira->o == IR_TNEW && ira->op1 <= LJ_MAX_COLOSIZE && + noconflict(as, irb->op1, IR_NEWREF)) { + /* We can avoid the FLOAD of t->array for colocated arrays. */ + as->mrm.base = (uint8_t)ra_alloc1(as, irb->op1, allow); /* Table obj. */ + as->mrm.ofs = -(int32_t)(ira->op1*sizeof(TValue)); /* Ofs to colo array. */ + } else { + as->mrm.base = (uint8_t)ra_alloc1(as, ir->op1, allow); /* Array base. */ + as->mrm.ofs = 0; + } + irx = IR(ir->op2); + if (irref_isk(ir->op2)) { + as->mrm.ofs += 8*irx->i; + as->mrm.idx = RID_NONE; + } else { + rset_clear(allow, as->mrm.base); + as->mrm.scale = XM_SCALE8; + /* Fuse a constant ADD (e.g. t[i+1]) into the offset. + ** Doesn't help much without ABCelim, but reduces register pressure. + */ + if (mayfuse(as, ir->op2) && ra_noreg(irx->r) && + irx->o == IR_ADD && irref_isk(irx->op2)) { + as->mrm.ofs += 8*IR(irx->op2)->i; + as->mrm.idx = (uint8_t)ra_alloc1(as, irx->op1, allow); + } else { + as->mrm.idx = (uint8_t)ra_alloc1(as, ir->op2, allow); + } + } +} + +/* Fuse array/hash/upvalue reference into memory operand. +** Caveat: this may allocate GPRs for the base/idx registers. Be sure to +** pass the final allow mask, excluding any GPRs used for other inputs. +** In particular: 2-operand GPR instructions need to call ra_dest() first! +*/ +static void asm_fuseahuref(ASMState *as, IRRef ref, RegSet allow) +{ + IRIns *ir = IR(ref); + if (ra_noreg(ir->r)) { + switch ((IROp)ir->o) { + case IR_AREF: + if (mayfuse(as, ref)) { + asm_fusearef(as, ir, allow); + return; + } + break; + case IR_HREFK: + if (mayfuse(as, ref)) { + as->mrm.base = (uint8_t)ra_alloc1(as, ir->op1, allow); + as->mrm.ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node)); + as->mrm.idx = RID_NONE; + return; + } + break; + case IR_UREFC: + if (irref_isk(ir->op1)) { + GCfunc *fn = ir_kfunc(IR(ir->op1)); + GCupval *uv = &gcref(fn->l.uvptr[ir->op2])->uv; + as->mrm.ofs = ptr2addr(&uv->tv); + as->mrm.base = as->mrm.idx = RID_NONE; + return; + } + break; + default: + lua_assert(ir->o == IR_HREF || ir->o == IR_NEWREF || ir->o == IR_UREFO); + break; + } + } + as->mrm.base = (uint8_t)ra_alloc1(as, ref, allow); + as->mrm.ofs = 0; + as->mrm.idx = RID_NONE; +} + +/* Fuse FLOAD/FREF reference into memory operand. */ +static void asm_fusefref(ASMState *as, IRIns *ir, RegSet allow) +{ + lua_assert(ir->o == IR_FLOAD || ir->o == IR_FREF); + as->mrm.ofs = field_ofs[ir->op2]; + as->mrm.idx = RID_NONE; + if (irref_isk(ir->op1)) { + as->mrm.ofs += IR(ir->op1)->i; + as->mrm.base = RID_NONE; + } else { + as->mrm.base = (uint8_t)ra_alloc1(as, ir->op1, allow); + } +} + +/* Fuse string reference into memory operand. */ +static void asm_fusestrref(ASMState *as, IRIns *ir, RegSet allow) +{ + IRIns *irr; + lua_assert(ir->o == IR_STRREF); + as->mrm.idx = as->mrm.base = RID_NONE; + as->mrm.scale = XM_SCALE1; + as->mrm.ofs = sizeof(GCstr); + if (irref_isk(ir->op1)) { + as->mrm.ofs += IR(ir->op1)->i; + } else { + Reg r = ra_alloc1(as, ir->op1, allow); + rset_clear(allow, r); + as->mrm.base = (uint8_t)r; + } + irr = IR(ir->op2); + if (irref_isk(ir->op2)) { + as->mrm.ofs += irr->i; + } else { + Reg r; + /* Fuse a constant add into the offset, e.g. string.sub(s, i+10). */ + if (mayfuse(as, ir->op2) && irr->o == IR_ADD && irref_isk(irr->op2)) { + as->mrm.ofs += IR(irr->op2)->i; + r = ra_alloc1(as, irr->op1, allow); + } else { + r = ra_alloc1(as, ir->op2, allow); + } + if (as->mrm.base == RID_NONE) + as->mrm.base = (uint8_t)r; + else + as->mrm.idx = (uint8_t)r; + } +} + +/* Fuse load into memory operand. */ +static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) +{ + IRIns *ir = IR(ref); + if (ra_hasreg(ir->r)) { + if (allow != RSET_EMPTY) return ir->r; /* Fast path. */ + fusespill: + /* Force a spill if only memory operands are allowed (asm_x87load). */ + as->mrm.base = RID_ESP; + as->mrm.ofs = ra_spill(as, ir); + as->mrm.idx = RID_NONE; + return RID_MRM; + } + if (ir->o == IR_KNUM) { + lua_assert(allow != RSET_EMPTY); + if (!(as->freeset & ~as->modset & RSET_FPR)) { + as->mrm.ofs = ptr2addr(ir_knum(ir)); + as->mrm.base = as->mrm.idx = RID_NONE; + return RID_MRM; + } + } else if (mayfuse(as, ref)) { + RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR; + if (ir->o == IR_SLOAD) { + if (!irt_isint(ir->t) && !(ir->op2 & IRSLOAD_PARENT)) { + as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow); + as->mrm.ofs = 8*((int32_t)ir->op1-1); + as->mrm.idx = RID_NONE; + return RID_MRM; + } + } else if (ir->o == IR_FLOAD) { + /* Generic fusion is only ok for IRT_INT operand (but see asm_comp). */ + if (irt_isint(ir->t) && noconflict(as, ref, IR_FSTORE)) { + asm_fusefref(as, ir, xallow); + return RID_MRM; + } + } else if (ir->o == IR_ALOAD || ir->o == IR_HLOAD || ir->o == IR_ULOAD) { + if (noconflict(as, ref, ir->o + IRDELTA_L2S)) { + asm_fuseahuref(as, ir->op1, xallow); + return RID_MRM; + } + } else if (ir->o == IR_XLOAD) { + /* Generic fusion is only ok for IRT_INT operand (but see asm_comp). + ** Fusing unaligned memory operands is ok on x86 (except for SIMD types). + */ + if (irt_isint(ir->t)) { + asm_fusestrref(as, IR(ir->op1), xallow); + return RID_MRM; + } + } + } + if (!(as->freeset & allow) && + (allow == RSET_EMPTY || ra_hasspill(ir->s) || ref < as->loopref)) + goto fusespill; + return ra_allocref(as, ref, allow); +} + +/* -- Type conversions ---------------------------------------------------- */ + +static void asm_tonum(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, RSET_FPR); + Reg left = asm_fuseload(as, ir->op1, RSET_GPR); + emit_mrm(as, XO_CVTSI2SD, dest, left); + if (!(as->flags & JIT_F_SPLIT_XMM)) + emit_rr(as, XO_XORPS, dest, dest); /* Avoid partial register stall. */ +} + +static void asm_tointg(ASMState *as, IRIns *ir, Reg left) +{ + Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); + Reg dest = ra_dest(as, ir, RSET_GPR); + asm_guardcc(as, CC_P); + asm_guardcc(as, CC_NE); + emit_rr(as, XO_UCOMISD, left, tmp); + emit_rr(as, XO_CVTSI2SD, tmp, dest); + if (!(as->flags & JIT_F_SPLIT_XMM)) + emit_rr(as, XO_XORPS, tmp, tmp); /* Avoid partial register stall. */ + emit_rr(as, XO_CVTTSD2SI, dest, left); + /* Can't fuse since left is needed twice. */ +} + +static void asm_toint(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = asm_fuseload(as, ir->op1, RSET_FPR); + emit_mrm(as, XO_CVTSD2SI, dest, left); +} + +static void asm_tobit(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg tmp = ra_noreg(IR(ir->op1)->r) ? + ra_alloc1(as, ir->op1, RSET_FPR) : + ra_scratch(as, RSET_FPR); + Reg right = asm_fuseload(as, ir->op2, rset_exclude(RSET_FPR, tmp)); + emit_rr(as, XO_MOVDto, tmp, dest); + emit_mrm(as, XO_ADDSD, tmp, right); + ra_left(as, tmp, ir->op1); +} + +static void asm_strto(ASMState *as, IRIns *ir) +{ + Reg str; + int32_t ofs; + RegSet drop = RSET_SCRATCH; + /* Force a spill slot for the destination register (if any). */ + if ((drop & RSET_FPR) != RSET_FPR && ra_hasreg(ir->r)) + rset_set(drop, ir->r); /* WIN64 doesn't spill all FPRs. */ + ra_evictset(as, drop); + asm_guardcc(as, CC_E); + emit_rr(as, XO_TEST, RID_RET, RID_RET); + /* int lj_str_numconv(const char *s, TValue *n) */ + emit_call(as, lj_str_numconv); + ofs = sps_scale(ir->s); /* Use spill slot or slots SPS_TEMP1/2. */ + if (ofs == 0) { + emit_setargr(as, 2, RID_ESP); + } else { + emit_setargr(as, 2, RID_RET); + emit_rmro(as, XO_LEA, RID_RET, RID_ESP, ofs); + } + emit_setargr(as, 1, RID_RET); + str = ra_alloc1(as, ir->op1, RSET_GPR); + emit_rmro(as, XO_LEA, RID_RET, str, sizeof(GCstr)); +} + +static void asm_tostr(ASMState *as, IRIns *ir) +{ + IRIns *irl = IR(ir->op1); + ra_destreg(as, ir, RID_RET); + ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET)); + as->gcsteps++; + if (irt_isnum(irl->t)) { + /* GCstr *lj_str_fromnum(lua_State *L, const lua_Number *np) */ + emit_call(as, lj_str_fromnum); + emit_setargr(as, 1, RID_RET); + emit_getgl(as, RID_RET, jit_L); + emit_setargr(as, 2, RID_RET); + emit_rmro(as, XO_LEA, RID_RET, RID_ESP, ra_spill(as, irl)); + } else { + /* GCstr *lj_str_fromint(lua_State *L, int32_t k) */ + emit_call(as, lj_str_fromint); + emit_setargr(as, 1, RID_RET); + emit_getgl(as, RID_RET, jit_L); + emit_setargr(as, 2, ra_alloc1(as, ir->op1, RSET_GPR)); + } +} + +/* -- Memory references --------------------------------------------------- */ + +static void asm_aref(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + asm_fusearef(as, ir, RSET_GPR); + if (!(as->mrm.idx == RID_NONE && as->mrm.ofs == 0)) + emit_mrm(as, XO_LEA, dest, RID_MRM); + else if (as->mrm.base != dest) + emit_rr(as, XO_MOV, dest, as->mrm.base); +} + +/* Must match with hashkey() and hashrot() in lj_tab.c. */ +static uint32_t ir_khash(IRIns *ir) +{ + uint32_t lo, hi; + if (irt_isstr(ir->t)) { + return ir_kstr(ir)->hash; + } else if (irt_isnum(ir->t)) { + lo = ir_knum(ir)->u32.lo; + hi = ir_knum(ir)->u32.hi & 0x7fffffff; + } else if (irt_ispri(ir->t)) { + lua_assert(!irt_isnil(ir->t)); + return irt_type(ir->t)-IRT_FALSE; + } else { + lua_assert(irt_isaddr(ir->t)); + lo = u32ptr(ir_kgc(ir)); + hi = lo - 0x04c11db7; + } + lo ^= hi; hi = lj_rol(hi, 14); + lo -= hi; hi = lj_rol(hi, 5); + hi ^= lo; hi -= lj_rol(lo, 27); + return hi; +} + +/* Merge NE(HREF, niltv) check. */ +static MCode *merge_href_niltv(ASMState *as, IRIns *ir) +{ + /* Assumes nothing else generates NE of HREF. */ + if (ir[1].o == IR_NE && ir[1].op1 == as->curins) { + if (LJ_64 && *as->mcp != XI_ARITHi) + as->mcp += 7+6; + else + as->mcp += 6+6; /* Kill cmp reg, imm32 + jz exit. */ + return as->mcp + *(int32_t *)(as->mcp-4); /* Return exit address. */ + } + return NULL; +} + +/* Inlined hash lookup. Specialized for key type and for const keys. +** The equivalent C code is: +** Node *n = hashkey(t, key); +** do { +** if (lj_obj_equal(&n->key, key)) return &n->val; +** } while ((n = nextnode(n))); +** return niltv(L); +*/ +static void asm_href(ASMState *as, IRIns *ir) +{ + MCode *nilexit = merge_href_niltv(as, ir); /* Do this before any restores. */ + RegSet allow = RSET_GPR; + Reg dest = ra_dest(as, ir, allow); + Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); + Reg key = RID_NONE, tmp = RID_NONE; + IRIns *irkey = IR(ir->op2); + int isk = irref_isk(ir->op2); + IRType1 kt = irkey->t; + uint32_t khash; + MCLabel l_end, l_loop, l_next; + + if (!isk) { + rset_clear(allow, tab); + key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow); + if (!irt_isstr(kt)) + tmp = ra_scratch(as, rset_exclude(allow, key)); + } + + /* Key not found in chain: jump to exit (if merged with NE) or load niltv. */ + l_end = emit_label(as); + if (nilexit) + emit_jcc(as, CC_E, nilexit); /* XI_JMP is not found by lj_asm_patchexit. */ + else + emit_loada(as, dest, niltvg(J2G(as->J))); + + /* Follow hash chain until the end. */ + l_loop = emit_sjcc_label(as, CC_NZ); + emit_rr(as, XO_TEST, dest, dest); + emit_rmro(as, XO_MOV, dest, dest, offsetof(Node, next)); + l_next = emit_label(as); + + /* Type and value comparison. */ + emit_sjcc(as, CC_E, l_end); + if (irt_isnum(kt)) { + if (isk) { + /* Assumes -0.0 is already canonicalized to +0.0. */ + emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.u32.lo), + (int32_t)ir_knum(irkey)->u32.lo); + emit_sjcc(as, CC_NE, l_next); + emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.u32.hi), + (int32_t)ir_knum(irkey)->u32.hi); + } else { + emit_sjcc(as, CC_P, l_next); + emit_rmro(as, XO_UCOMISD, key, dest, offsetof(Node, key.n)); + emit_sjcc(as, CC_A, l_next); + /* The type check avoids NaN penalties and complaints from Valgrind. */ + emit_i8(as, ~IRT_NUM); + emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it)); + } + } else { + if (!irt_ispri(kt)) { + lua_assert(irt_isaddr(kt)); + if (isk) + emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.gcr), + ptr2addr(ir_kgc(irkey))); + else + emit_rmro(as, XO_CMP, key, dest, offsetof(Node, key.gcr)); + emit_sjcc(as, CC_NE, l_next); + } + lua_assert(!irt_isnil(kt)); + emit_i8(as, ~irt_type(kt)); + emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it)); + } + emit_sfixup(as, l_loop); + checkmclim(as); + + /* Load main position relative to tab->node into dest. */ + khash = isk ? ir_khash(irkey) : 1; + if (khash == 0) { + emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, node)); + } else { + emit_rmro(as, XO_ARITH(XOg_ADD), dest, tab, offsetof(GCtab, node)); + if ((as->flags & JIT_F_PREFER_IMUL)) { + emit_i8(as, sizeof(Node)); + emit_rr(as, XO_IMULi8, dest, dest); + } else { + emit_shifti(as, XOg_SHL, dest, 3); + emit_rmrxo(as, XO_LEA, dest, dest, dest, XM_SCALE2, 0); + } + if (isk) { + emit_gri(as, XG_ARITHi(XOg_AND), dest, (int32_t)khash); + emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, hmask)); + } else if (irt_isstr(kt)) { + emit_rmro(as, XO_ARITH(XOg_AND), dest, key, offsetof(GCstr, hash)); + emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, hmask)); + } else { /* Must match with hashrot() in lj_tab.c. */ + emit_rmro(as, XO_ARITH(XOg_AND), dest, tab, offsetof(GCtab, hmask)); + emit_rr(as, XO_ARITH(XOg_SUB), dest, tmp); + emit_shifti(as, XOg_ROL, tmp, 27); + emit_rr(as, XO_ARITH(XOg_XOR), dest, tmp); + emit_shifti(as, XOg_ROL, dest, 5); + emit_rr(as, XO_ARITH(XOg_SUB), tmp, dest); + emit_shifti(as, XOg_ROL, dest, 14); + emit_rr(as, XO_ARITH(XOg_XOR), tmp, dest); + if (irt_isnum(kt)) { + emit_rmro(as, XO_ARITH(XOg_AND), dest, RID_ESP, ra_spill(as, irkey)+4); + emit_loadi(as, dest, 0x7fffffff); + emit_rr(as, XO_MOVDto, key, tmp); + } else { + emit_rr(as, XO_MOV, tmp, key); + emit_rmro(as, XO_LEA, dest, key, -0x04c11db7); + } + } + } +} + +static void asm_hrefk(ASMState *as, IRIns *ir) +{ + IRIns *kslot = IR(ir->op2); + IRIns *irkey = IR(kslot->op1); + int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node)); + Reg dest = ra_used(ir) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; + Reg node = ra_alloc1(as, ir->op1, RSET_GPR); + MCLabel l_exit; + lua_assert(ofs % sizeof(Node) == 0); + if (ra_hasreg(dest)) { + if (ofs != 0) { + if (dest == node && !(as->flags & JIT_F_LEA_AGU)) + emit_gri(as, XG_ARITHi(XOg_ADD), dest, ofs); + else + emit_rmro(as, XO_LEA, dest, node, ofs); + } else if (dest != node) { + emit_rr(as, XO_MOV, dest, node); + } + } + asm_guardcc(as, CC_NE); + l_exit = emit_label(as); + if (irt_isnum(irkey->t)) { + /* Assumes -0.0 is already canonicalized to +0.0. */ + emit_gmroi(as, XG_ARITHi(XOg_CMP), node, + ofs + (int32_t)offsetof(Node, key.u32.lo), + (int32_t)ir_knum(irkey)->u32.lo); + emit_sjcc(as, CC_NE, l_exit); + emit_gmroi(as, XG_ARITHi(XOg_CMP), node, + ofs + (int32_t)offsetof(Node, key.u32.hi), + (int32_t)ir_knum(irkey)->u32.hi); + } else { + if (!irt_ispri(irkey->t)) { + lua_assert(irt_isgcv(irkey->t)); + emit_gmroi(as, XG_ARITHi(XOg_CMP), node, + ofs + (int32_t)offsetof(Node, key.gcr), + ptr2addr(ir_kgc(irkey))); + emit_sjcc(as, CC_NE, l_exit); + } + lua_assert(!irt_isnil(irkey->t)); + emit_i8(as, ~irt_type(irkey->t)); + emit_rmro(as, XO_ARITHi8, XOg_CMP, node, + ofs + (int32_t)offsetof(Node, key.it)); + } +} + +static void asm_newref(ASMState *as, IRIns *ir) +{ + IRRef keyref = ir->op2; + IRIns *irkey = IR(keyref); + RegSet allow = RSET_GPR; + Reg tab, tmp; + ra_destreg(as, ir, RID_RET); + ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET)); + tab = ra_alloc1(as, ir->op1, allow); + tmp = ra_scratch(as, rset_clear(allow, tab)); + /* TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key) */ + emit_call(as, lj_tab_newkey); + emit_setargr(as, 1, tmp); + emit_setargr(as, 2, tab); + emit_getgl(as, tmp, jit_L); + if (irt_isnum(irkey->t)) { + /* For numbers use the constant itself or a spill slot as a TValue. */ + if (irref_isk(keyref)) { + emit_setargp(as, 3, ir_knum(irkey)); + } else { + emit_setargr(as, 3, tmp); + emit_rmro(as, XO_LEA, tmp, RID_ESP, ra_spill(as, irkey)); + } + } else { + /* Otherwise use g->tmptv to hold the TValue. */ + lua_assert(irt_ispri(irkey->t) || irt_isaddr(irkey->t)); + emit_setargr(as, 3, tmp); + if (!irref_isk(keyref)) { + Reg src = ra_alloc1(as, keyref, rset_exclude(allow, tmp)); + emit_movtomro(as, src, tmp, 0); + } else if (!irt_ispri(irkey->t)) { + emit_movmroi(as, tmp, 0, irkey->i); + } + emit_movmroi(as, tmp, 4, irt_toitype(irkey->t)); + emit_loada(as, tmp, &J2G(as->J)->tmptv); + } +} + +static void asm_uref(ASMState *as, IRIns *ir) +{ + /* NYI: Check that UREFO is still open and not aliasing a slot. */ + if (ra_used(ir)) { + Reg dest = ra_dest(as, ir, RSET_GPR); + if (irref_isk(ir->op1)) { + GCfunc *fn = ir_kfunc(IR(ir->op1)); + TValue **v = &gcref(fn->l.uvptr[ir->op2])->uv.v; + emit_rma(as, XO_MOV, dest, v); + } else { + Reg uv = ra_scratch(as, RSET_GPR); + Reg func = ra_alloc1(as, ir->op1, RSET_GPR); + if (ir->o == IR_UREFC) { + emit_rmro(as, XO_LEA, dest, uv, offsetof(GCupval, tv)); + asm_guardcc(as, CC_NE); + emit_i8(as, 1); + emit_rmro(as, XO_ARITHib, XOg_CMP, uv, offsetof(GCupval, closed)); + } else { + emit_rmro(as, XO_MOV, dest, uv, offsetof(GCupval, v)); + } + emit_rmro(as, XO_MOV, uv, func, + (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)ir->op2); + } + } +} + +static void asm_fref(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + asm_fusefref(as, ir, RSET_GPR); + emit_mrm(as, XO_LEA, dest, RID_MRM); +} + +static void asm_strref(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + asm_fusestrref(as, ir, RSET_GPR); + if (as->mrm.base == RID_NONE) + emit_loadi(as, dest, as->mrm.ofs); + else if (as->mrm.base == dest && as->mrm.idx == RID_NONE) + emit_gri(as, XG_ARITHi(XOg_ADD), dest, as->mrm.ofs); + else + emit_mrm(as, XO_LEA, dest, RID_MRM); +} + +/* -- Loads and stores ---------------------------------------------------- */ + +static void asm_fload(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + x86Op xo; + asm_fusefref(as, ir, RSET_GPR); + switch (irt_type(ir->t)) { + case IRT_I8: xo = XO_MOVSXb; break; + case IRT_U8: xo = XO_MOVZXb; break; + case IRT_I16: xo = XO_MOVSXw; break; + case IRT_U16: xo = XO_MOVZXw; break; + default: + lua_assert(irt_isint(ir->t) || irt_isaddr(ir->t)); + xo = XO_MOV; + break; + } + emit_mrm(as, xo, dest, RID_MRM); +} + +static void asm_fstore(ASMState *as, IRIns *ir) +{ + RegSet allow = RSET_GPR; + Reg src = RID_NONE; + /* The IRT_I16/IRT_U16 stores should never be simplified for constant + ** values since mov word [mem], imm16 has a length-changing prefix. + */ + if (!irref_isk(ir->op2) || irt_isi16(ir->t) || irt_isu16(ir->t)) { + RegSet allow8 = (irt_isi8(ir->t) || irt_isu8(ir->t)) ? RSET_GPR8 : RSET_GPR; + src = ra_alloc1(as, ir->op2, allow8); + rset_clear(allow, src); + } + asm_fusefref(as, IR(ir->op1), allow); + if (ra_hasreg(src)) { + x86Op xo; + switch (irt_type(ir->t)) { + case IRT_I8: case IRT_U8: xo = XO_MOVtob; src |= FORCE_REX; break; + case IRT_I16: case IRT_U16: xo = XO_MOVtow; break; + default: + lua_assert(irt_isint(ir->t) || irt_isaddr(ir->t)); + xo = XO_MOVto; + break; + } + emit_mrm(as, xo, src, RID_MRM); + } else { + if (irt_isi8(ir->t) || irt_isu8(ir->t)) { + emit_i8(as, IR(ir->op2)->i); + emit_mrm(as, XO_MOVmib, 0, RID_MRM); + } else { + lua_assert(irt_isint(ir->t) || irt_isaddr(ir->t)); + emit_i32(as, IR(ir->op2)->i); + emit_mrm(as, XO_MOVmi, 0, RID_MRM); + } + } +} + +static void asm_ahuload(ASMState *as, IRIns *ir) +{ + RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; + lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t)); + if (ra_used(ir)) { + Reg dest = ra_dest(as, ir, allow); + asm_fuseahuref(as, ir->op1, RSET_GPR); + emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XMM_MOVRM(as), dest, RID_MRM); + } else { + asm_fuseahuref(as, ir->op1, RSET_GPR); + } + /* Always do the type check, even if the load result is unused. */ + asm_guardcc(as, irt_isnum(ir->t) ? CC_A : CC_NE); + emit_i8(as, ~irt_type(ir->t)); + as->mrm.ofs += 4; + emit_mrm(as, XO_ARITHi8, XOg_CMP, RID_MRM); +} + +static void asm_ahustore(ASMState *as, IRIns *ir) +{ + if (irt_isnum(ir->t)) { + Reg src = ra_alloc1(as, ir->op2, RSET_FPR); + asm_fuseahuref(as, ir->op1, RSET_GPR); + emit_mrm(as, XO_MOVSDto, src, RID_MRM); + } else { + IRIns *irr = IR(ir->op2); + RegSet allow = RSET_GPR; + Reg src = RID_NONE; + if (!irref_isk(ir->op2)) { + src = ra_alloc1(as, ir->op2, allow); + rset_clear(allow, src); + } + asm_fuseahuref(as, ir->op1, allow); + if (ra_hasreg(src)) { + emit_mrm(as, XO_MOVto, src, RID_MRM); + } else if (!irt_ispri(irr->t)) { + lua_assert(irt_isaddr(ir->t)); + emit_i32(as, irr->i); + emit_mrm(as, XO_MOVmi, 0, RID_MRM); + } + as->mrm.ofs += 4; + emit_i32(as, (int32_t)~irt_type(ir->t)); + emit_mrm(as, XO_MOVmi, 0, RID_MRM); + } +} + +static void asm_sload(ASMState *as, IRIns *ir) +{ + int32_t ofs = 8*((int32_t)ir->op1-1); + IRType1 t = ir->t; + Reg base; + lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ + if (irt_isint(t)) { + Reg left = ra_scratch(as, RSET_FPR); + asm_tointg(as, ir, left); /* Frees dest reg. Do this before base alloc. */ + base = ra_alloc1(as, REF_BASE, RSET_GPR); + emit_rmro(as, XMM_MOVRM(as), left, base, ofs); + t.irt = IRT_NUM; /* Continue with a regular number type check. */ + } else if (ra_used(ir)) { + RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; + Reg dest = ra_dest(as, ir, allow); + lua_assert(irt_isnum(ir->t) || irt_isaddr(ir->t)); + base = ra_alloc1(as, REF_BASE, RSET_GPR); + emit_movrmro(as, dest, base, ofs); + } else { + if (!irt_isguard(ir->t)) + return; /* No type check: avoid base alloc. */ + base = ra_alloc1(as, REF_BASE, RSET_GPR); + } + if (irt_isguard(ir->t)) { + /* Need type check, even if the load result is unused. */ + asm_guardcc(as, irt_isnum(t) ? CC_A : CC_NE); + emit_i8(as, ~irt_type(t)); + emit_rmro(as, XO_ARITHi8, XOg_CMP, base, ofs+4); + } +} + +static void asm_xload(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + x86Op xo; + asm_fusestrref(as, IR(ir->op1), RSET_GPR); /* For now only support STRREF. */ + /* ir->op2 is ignored -- unaligned loads are ok on x86. */ + switch (irt_type(ir->t)) { + case IRT_I8: xo = XO_MOVSXb; break; + case IRT_U8: xo = XO_MOVZXb; break; + case IRT_I16: xo = XO_MOVSXw; break; + case IRT_U16: xo = XO_MOVZXw; break; + default: lua_assert(irt_isint(ir->t)); xo = XO_MOV; break; + } + emit_mrm(as, xo, dest, RID_MRM); +} + +/* -- String ops ---------------------------------------------------------- */ + +static void asm_snew(ASMState *as, IRIns *ir) +{ + RegSet allow = RSET_GPR; + Reg left, right; + IRIns *irl; + ra_destreg(as, ir, RID_RET); + ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET)); + irl = IR(ir->op1); + left = irl->r; + right = IR(ir->op2)->r; + if (ra_noreg(left)) { + lua_assert(irl->o == IR_STRREF); + /* Get register only for non-const STRREF. */ + if (!(irref_isk(irl->op1) && irref_isk(irl->op2))) { + if (ra_hasreg(right)) rset_clear(allow, right); + left = ra_allocref(as, ir->op1, allow); + } + } + if (ra_noreg(right) && !irref_isk(ir->op2)) { + if (ra_hasreg(left)) rset_clear(allow, left); + right = ra_allocref(as, ir->op2, allow); + } + /* GCstr *lj_str_new(lua_State *L, const char *str, size_t len) */ + emit_call(as, lj_str_new); + emit_setargr(as, 1, RID_RET); + emit_getgl(as, RID_RET, jit_L); + if (ra_noreg(left)) /* Use immediate for const STRREF. */ + emit_setargi(as, 2, IR(irl->op1)->i + IR(irl->op2)->i + + (int32_t)sizeof(GCstr)); + else + emit_setargr(as, 2, left); + if (ra_noreg(right)) + emit_setargi(as, 3, IR(ir->op2)->i); + else + emit_setargr(as, 3, right); + as->gcsteps++; +} + +/* -- Table ops ----------------------------------------------------------- */ + +static void asm_tnew(ASMState *as, IRIns *ir) +{ + ra_destreg(as, ir, RID_RET); + ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET)); + /* GCtab *lj_tab_new(lua_State *L, int32_t asize, uint32_t hbits) */ + emit_call(as, lj_tab_new); + emit_setargr(as, 1, RID_RET); + emit_setargi(as, 2, ir->op1); + emit_setargi(as, 3, ir->op2); + emit_getgl(as, RID_RET, jit_L); + as->gcsteps++; +} + +static void asm_tdup(ASMState *as, IRIns *ir) +{ + ra_destreg(as, ir, RID_RET); + ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET)); + /* GCtab *lj_tab_dup(lua_State *L, const GCtab *kt) */ + emit_call(as, lj_tab_dup); + emit_setargr(as, 1, RID_RET); + emit_setargp(as, 2, ir_kgc(IR(ir->op1))); + emit_getgl(as, RID_RET, jit_L); + as->gcsteps++; +} + +static void asm_tlen(ASMState *as, IRIns *ir) +{ + ra_destreg(as, ir, RID_RET); + ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET)); + emit_call(as, lj_tab_len); /* MSize lj_tab_len(GCtab *t) */ + emit_setargr(as, 1, ra_alloc1(as, ir->op1, RSET_GPR)); +} + +static void asm_tbar(ASMState *as, IRIns *ir) +{ + Reg tab = ra_alloc1(as, ir->op1, RSET_GPR); + Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, tab)); + MCLabel l_end = emit_label(as); + emit_movtomro(as, tmp, tab, offsetof(GCtab, gclist)); + emit_setgl(as, tab, gc.grayagain); + emit_getgl(as, tmp, gc.grayagain); + emit_i8(as, ~LJ_GC_BLACK); + emit_rmro(as, XO_ARITHib, XOg_AND, tab, offsetof(GCtab, marked)); + emit_sjcc(as, CC_Z, l_end); + emit_i8(as, LJ_GC_BLACK); + emit_rmro(as, XO_GROUP3b, XOg_TEST, tab, offsetof(GCtab, marked)); +} + +static void asm_obar(ASMState *as, IRIns *ir) +{ + RegSet allow = RSET_GPR; + Reg obj, val; + GCobj *valp; + MCLabel l_end; + int32_t ofs; + ra_evictset(as, RSET_SCRATCH); + if (irref_isk(ir->op2)) { + valp = ir_kgc(IR(ir->op2)); + val = RID_NONE; + } else { + valp = NULL; + val = ra_alloc1(as, ir->op2, allow); + rset_clear(allow, val); + } + obj = ra_alloc1(as, ir->op1, allow); + l_end = emit_label(as); + /* No need for other object barriers (yet). */ + lua_assert(IR(ir->op1)->o == IR_UREFC); + ofs = -(int32_t)offsetof(GCupval, tv); + /* void lj_gc_barrieruv(global_State *g, GCobj *o, GCobj *v) */ + emit_call(as, lj_gc_barrieruv); + if (ofs == 0) { + emit_setargr(as, 2, obj); + } else if (rset_test(RSET_SCRATCH, obj) && !(as->flags & JIT_F_LEA_AGU)) { + emit_setargr(as, 2, obj); + emit_gri(as, XG_ARITHi(XOg_ADD), obj, ofs); + } else { + emit_setargr(as, 2, RID_RET); + emit_rmro(as, XO_LEA, RID_RET, obj, ofs); + } + emit_setargp(as, 1, J2G(as->J)); + if (valp) + emit_setargp(as, 3, valp); + else + emit_setargr(as, 3, val); + emit_sjcc(as, CC_Z, l_end); + emit_i8(as, LJ_GC_WHITES); + if (valp) + emit_rma(as, XO_GROUP3b, XOg_TEST, &valp->gch.marked); + else + emit_rmro(as, XO_GROUP3b, XOg_TEST, val, (int32_t)offsetof(GChead, marked)); + emit_sjcc(as, CC_Z, l_end); + emit_i8(as, LJ_GC_BLACK); + emit_rmro(as, XO_GROUP3b, XOg_TEST, obj, + ofs + (int32_t)offsetof(GChead, marked)); +} + +/* -- FP/int arithmetic and logic operations ------------------------------ */ + +/* Load reference onto x87 stack. Force a spill to memory if needed. */ +static void asm_x87load(ASMState *as, IRRef ref) +{ + IRIns *ir = IR(ref); + if (ir->o == IR_KNUM) { + cTValue *tv = ir_knum(ir); + if (tvispzero(tv)) /* Use fldz only for +0. */ + emit_x87op(as, XI_FLDZ); + else if (tvispone(tv)) + emit_x87op(as, XI_FLD1); + else + emit_rma(as, XO_FLDq, XOg_FLDq, tv); + } else if (ir->o == IR_TONUM && !ra_used(ir) && + !irref_isk(ir->op1) && mayfuse(as, ir->op1)) { + IRIns *iri = IR(ir->op1); + emit_rmro(as, XO_FILDd, XOg_FILDd, RID_ESP, ra_spill(as, iri)); + } else { + emit_mrm(as, XO_FLDq, XOg_FLDq, asm_fuseload(as, ref, RSET_EMPTY)); + } +} + +/* Try to rejoin pow from EXP2, MUL and LOG2 (if still unsplit). */ +static int fpmjoin_pow(ASMState *as, IRIns *ir) +{ + IRIns *irp = IR(ir->op1); + if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) { + IRIns *irpp = IR(irp->op1); + if (irpp == ir-2 && irpp->o == IR_FPMATH && + irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) { + emit_call(as, lj_vm_pow); /* st0 = lj_vm_pow(st1, st0) */ + asm_x87load(as, irp->op2); + asm_x87load(as, irpp->op1); + return 1; + } + } + return 0; +} + +static void asm_fpmath(ASMState *as, IRIns *ir) +{ + IRFPMathOp fpm = ir->o == IR_FPMATH ? (IRFPMathOp)ir->op2 : IRFPM_OTHER; + if (fpm == IRFPM_SQRT) { + Reg dest = ra_dest(as, ir, RSET_FPR); + Reg left = asm_fuseload(as, ir->op1, RSET_FPR); + emit_mrm(as, XO_SQRTSD, dest, left); + } else if ((as->flags & JIT_F_SSE4_1) && fpm <= IRFPM_TRUNC) { + Reg dest = ra_dest(as, ir, RSET_FPR); + Reg left = asm_fuseload(as, ir->op1, RSET_FPR); + /* Round down/up/trunc == 1001/1010/1011. */ + emit_i8(as, 0x09 + fpm); + /* ROUNDSD has a 4-byte opcode which doesn't fit in x86Op. */ + emit_mrm(as, XO_ROUNDSD, dest, left); + /* Let's pretend it's a 3-byte opcode, and compensate afterwards. */ + /* This is atrocious, but the alternatives are much worse. */ + if (LJ_64 && as->mcp[1] != (MCode)(XO_ROUNDSD >> 16)) { + as->mcp[0] = as->mcp[1]; as->mcp[1] = 0x0f; /* Swap 0F and REX. */ + } + *--as->mcp = 0x66; /* 1st byte of ROUNDSD opcode. */ + } else { + int32_t ofs = sps_scale(ir->s); /* Use spill slot or slots SPS_TEMP1/2. */ + Reg dest = ir->r; + if (ra_hasreg(dest)) { + ra_free(as, dest); + ra_modified(as, dest); + emit_rmro(as, XMM_MOVRM(as), dest, RID_ESP, ofs); + } + emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs); + switch (fpm) { /* st0 = lj_vm_*(st0) */ + case IRFPM_FLOOR: emit_call(as, lj_vm_floor); break; + case IRFPM_CEIL: emit_call(as, lj_vm_ceil); break; + case IRFPM_TRUNC: emit_call(as, lj_vm_trunc); break; + case IRFPM_EXP: emit_call(as, lj_vm_exp); break; + case IRFPM_EXP2: + if (fpmjoin_pow(as, ir)) return; + emit_call(as, lj_vm_exp2); /* st0 = lj_vm_exp2(st0) */ + break; + case IRFPM_SIN: emit_x87op(as, XI_FSIN); break; + case IRFPM_COS: emit_x87op(as, XI_FCOS); break; + case IRFPM_TAN: emit_x87op(as, XI_FPOP); emit_x87op(as, XI_FPTAN); break; + case IRFPM_LOG: case IRFPM_LOG2: case IRFPM_LOG10: + /* Note: the use of fyl2xp1 would be pointless here. When computing + ** log(1.0+eps) the precision is already lost after 1.0 is added. + ** Subtracting 1.0 won't recover it. OTOH math.log1p would make sense. + */ + emit_x87op(as, XI_FYL2X); break; + case IRFPM_OTHER: + switch (ir->o) { + case IR_ATAN2: + emit_x87op(as, XI_FPATAN); asm_x87load(as, ir->op2); break; + case IR_LDEXP: + emit_x87op(as, XI_FPOP1); emit_x87op(as, XI_FSCALE); break; + case IR_POWI: + emit_call(as, lj_vm_powi); /* st0 = lj_vm_powi(st0, [esp]) */ + emit_rmro(as, XO_MOVto, ra_alloc1(as, ir->op2, RSET_GPR), RID_ESP, 0); + break; + default: lua_assert(0); break; + } + break; + default: lua_assert(0); break; + } + asm_x87load(as, ir->op1); + switch (fpm) { + case IRFPM_LOG: emit_x87op(as, XI_FLDLN2); break; + case IRFPM_LOG2: emit_x87op(as, XI_FLD1); break; + case IRFPM_LOG10: emit_x87op(as, XI_FLDLG2); break; + case IRFPM_OTHER: + if (ir->o == IR_LDEXP) asm_x87load(as, ir->op2); + break; + default: break; + } + } +} + +/* Find out whether swapping operands might be beneficial. */ +static int swapops(ASMState *as, IRIns *ir) +{ + IRIns *irl = IR(ir->op1); + IRIns *irr = IR(ir->op2); + lua_assert(ra_noreg(irr->r)); + if (!irm_iscomm(lj_ir_mode[ir->o])) + return 0; /* Can't swap non-commutative operations. */ + if (irref_isk(ir->op2)) + return 0; /* Don't swap constants to the left. */ + if (ra_hasreg(irl->r)) + return 1; /* Swap if left already has a register. */ + if (ra_samehint(ir->r, irr->r)) + return 1; /* Swap if dest and right have matching hints. */ + if (ir->op1 < as->loopref && !irt_isphi(irl->t) && + !(ir->op2 < as->loopref && !irt_isphi(irr->t))) + return 1; /* Swap invariants to the right. */ + if (opisfusableload(irl->o)) + return 1; /* Swap fusable loads to the right. */ + return 0; /* Otherwise don't swap. */ +} + +static void asm_fparith(ASMState *as, IRIns *ir, x86Op xo) +{ + IRRef lref = ir->op1; + IRRef rref = ir->op2; + RegSet allow = RSET_FPR; + Reg dest; + Reg right = IR(rref)->r; + if (ra_hasreg(right)) + rset_clear(allow, right); + dest = ra_dest(as, ir, allow); + if (lref == rref) { + right = dest; + } else if (ra_noreg(right)) { + if (swapops(as, ir)) { + IRRef tmp = lref; lref = rref; rref = tmp; + } + right = asm_fuseload(as, rref, rset_clear(allow, dest)); + } + emit_mrm(as, xo, dest, right); + ra_left(as, dest, lref); +} + +static void asm_intarith(ASMState *as, IRIns *ir, x86Arith xa) +{ + IRRef lref = ir->op1; + IRRef rref = ir->op2; + RegSet allow = RSET_GPR; + Reg dest, right; + if (as->testmcp == as->mcp) { /* Drop test r,r instruction. */ + as->testmcp = NULL; + as->mcp += (LJ_64 && *as->mcp != XI_TEST) ? 3 : 2; + } + right = IR(rref)->r; + if (ra_hasreg(right)) + rset_clear(allow, right); + dest = ra_dest(as, ir, allow); + if (lref == rref) { + right = dest; + } else if (ra_noreg(right) && !irref_isk(rref)) { + if (swapops(as, ir)) { + IRRef tmp = lref; lref = rref; rref = tmp; + } + right = asm_fuseload(as, rref, rset_clear(allow, dest)); + /* Note: fuses only with IR_FLOAD for now. */ + } + if (irt_isguard(ir->t)) /* For IR_ADDOV etc. */ + asm_guardcc(as, CC_O); + if (ra_hasreg(right)) + emit_mrm(as, XO_ARITH(xa), dest, right); + else + emit_gri(as, XG_ARITHi(xa), dest, IR(ir->op2)->i); + ra_left(as, dest, lref); +} + +/* LEA is really a 4-operand ADD with an independent destination register, +** up to two source registers and an immediate. One register can be scaled +** by 1, 2, 4 or 8. This can be used to avoid moves or to fuse several +** instructions. +** +** Currently only a few common cases are supported: +** - 3-operand ADD: y = a+b; y = a+k with a and b already allocated +** - Left ADD fusion: y = (a+b)+k; y = (a+k)+b +** - Right ADD fusion: y = a+(b+k) +** The ommited variants have already been reduced by FOLD. +** +** There are more fusion opportunities, like gathering shifts or joining +** common references. But these are probably not worth the trouble, since +** array indexing is not decomposed and already makes use of all fields +** of the ModRM operand. +*/ +static int asm_lea(ASMState *as, IRIns *ir) +{ + IRIns *irl = IR(ir->op1); + IRIns *irr = IR(ir->op2); + RegSet allow = RSET_GPR; + Reg dest; + as->mrm.base = as->mrm.idx = RID_NONE; + as->mrm.scale = XM_SCALE1; + as->mrm.ofs = 0; + if (ra_hasreg(irl->r)) { + rset_clear(allow, irl->r); + as->mrm.base = irl->r; + if (irref_isk(ir->op2) || ra_hasreg(irr->r)) { + /* The PHI renaming logic does a better job in some cases. */ + if (ra_hasreg(ir->r) && + ((irt_isphi(irl->t) && as->phireg[ir->r] == ir->op1) || + (irt_isphi(irr->t) && as->phireg[ir->r] == ir->op2))) + return 0; + if (irref_isk(ir->op2)) { + as->mrm.ofs = irr->i; + } else { + rset_clear(allow, irr->r); + as->mrm.idx = irr->r; + } + } else if (irr->o == IR_ADD && mayfuse(as, ir->op2) && + irref_isk(irr->op2)) { + Reg idx = ra_alloc1(as, irr->op1, allow); + rset_clear(allow, idx); + as->mrm.idx = (uint8_t)idx; + as->mrm.ofs = IR(irr->op2)->i; + } else { + return 0; + } + } else if (ir->op1 != ir->op2 && irl->o == IR_ADD && mayfuse(as, ir->op1) && + (irref_isk(ir->op2) || irref_isk(irl->op2))) { + Reg idx, base = ra_alloc1(as, irl->op1, allow); + rset_clear(allow, base); + as->mrm.base = (uint8_t)base; + if (irref_isk(ir->op2)) { + as->mrm.ofs = irr->i; + idx = ra_alloc1(as, irl->op2, allow); + } else { + as->mrm.ofs = IR(irl->op2)->i; + idx = ra_alloc1(as, ir->op2, allow); + } + rset_clear(allow, idx); + as->mrm.idx = (uint8_t)idx; + } else { + return 0; + } + dest = ra_dest(as, ir, allow); + emit_mrm(as, XO_LEA, dest, RID_MRM); + return 1; /* Success. */ +} + +static void asm_add(ASMState *as, IRIns *ir) +{ + if (irt_isnum(ir->t)) + asm_fparith(as, ir, XO_ADDSD); + else if ((as->flags & JIT_F_LEA_AGU) || as->testmcp == as->mcp || + !asm_lea(as, ir)) + asm_intarith(as, ir, XOg_ADD); +} + +static void asm_bitnot(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + emit_rr(as, XO_GROUP3, XOg_NOT, dest); + ra_left(as, dest, ir->op1); +} + +static void asm_bitswap(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + MCode *p = as->mcp; + p[-1] = (MCode)(XI_BSWAP+(dest&7)); + p[-2] = 0x0f; + p -= 2; + REXRB(p, 0, dest); + as->mcp = p; + ra_left(as, dest, ir->op1); +} + +static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs) +{ + IRRef rref = ir->op2; + IRIns *irr = IR(rref); + Reg dest; + if (irref_isk(rref)) { /* Constant shifts. */ + int shift; + dest = ra_dest(as, ir, RSET_GPR); + shift = irr->i & 31; /* Handle shifts of 0..31 bits. */ + switch (shift) { + case 0: return; + case 1: emit_rr(as, XO_SHIFT1, (Reg)xs, dest); break; + default: emit_shifti(as, xs, dest, shift); break; + } + } else { /* Variable shifts implicitly use register cl (i.e. ecx). */ + RegSet allow = rset_exclude(RSET_GPR, RID_ECX); + Reg right = irr->r; + if (ra_noreg(right)) { + right = ra_allocref(as, rref, RID2RSET(RID_ECX)); + } else if (right != RID_ECX) { + rset_clear(allow, right); + ra_scratch(as, RID2RSET(RID_ECX)); + } + dest = ra_dest(as, ir, allow); + emit_rr(as, XO_SHIFTcl, (Reg)xs, dest); + if (right != RID_ECX) + emit_rr(as, XO_MOV, RID_ECX, right); + } + ra_left(as, dest, ir->op1); + /* + ** Note: avoid using the flags resulting from a shift or rotate! + ** All of them cause a partial flag stall, except for r,1 shifts + ** (but not rotates). And a shift count of 0 leaves the flags unmodified. + */ +} + +/* -- Comparisons --------------------------------------------------------- */ + +/* Virtual flags for unordered FP comparisons. */ +#define VCC_U 0x100 /* Unordered. */ +#define VCC_P 0x200 /* Needs extra CC_P branch. */ +#define VCC_S 0x400 /* Swap avoids CC_P branch. */ +#define VCC_PS (VCC_P|VCC_S) + +static void asm_comp_(ASMState *as, IRIns *ir, int cc) +{ + if (irt_isnum(ir->t)) { + IRRef lref = ir->op1; + IRRef rref = ir->op2; + Reg left, right; + MCLabel l_around; + /* + ** An extra CC_P branch is required to preserve ordered/unordered + ** semantics for FP comparisons. This can be avoided by swapping + ** the operands and inverting the condition (except for EQ and UNE). + ** So always try to swap if possible. + ** + ** Another option would be to swap operands to achieve better memory + ** operand fusion. But it's unlikely that this outweighs the cost + ** of the extra branches. + */ + if (cc & VCC_S) { /* Swap? */ + IRRef tmp = lref; lref = rref; rref = tmp; + cc ^= (VCC_PS|(5<<4)); /* A <-> B, AE <-> BE, PS <-> none */ + } + left = ra_alloc1(as, lref, RSET_FPR); + right = asm_fuseload(as, rref, rset_exclude(RSET_FPR, left)); + l_around = emit_label(as); + asm_guardcc(as, cc >> 4); + if (cc & VCC_P) { /* Extra CC_P branch required? */ + if (!(cc & VCC_U)) { + asm_guardcc(as, CC_P); /* Branch to exit for ordered comparisons. */ + } else if (l_around != as->invmcp) { + emit_sjcc(as, CC_P, l_around); /* Branch around for unordered. */ + } else { + /* Patched to mcloop by asm_loop_fixup. */ + as->loopinv = 2; + if (as->realign) + emit_sjcc(as, CC_P, as->mcp); + else + emit_jcc(as, CC_P, as->mcp); + } + } + emit_mrm(as, XO_UCOMISD, left, right); + } else if (!(irt_isstr(ir->t) && (cc & 0xe) != CC_E)) { + IRRef lref = ir->op1, rref = ir->op2; + IROp leftop = (IROp)(IR(lref)->o); + lua_assert(irt_isint(ir->t) || irt_isaddr(ir->t)); + /* Swap constants (only for ABC) and fusable loads to the right. */ + if (irref_isk(lref) || (!irref_isk(rref) && opisfusableload(leftop))) { + if ((cc & 0xc) == 0xc) cc ^= 3; /* L <-> G, LE <-> GE */ + else if ((cc & 0xa) == 0x2) cc ^= 5; /* A <-> B, AE <-> BE */ + lref = ir->op2; rref = ir->op1; + } + if (irref_isk(rref)) { + IRIns *irl = IR(lref); + int32_t imm = IR(rref)->i; + /* Check wether we can use test ins. Not for unsigned, since CF=0. */ + int usetest = (imm == 0 && (cc & 0xa) != 0x2); + if (usetest && irl->o == IR_BAND && irl+1 == ir && !ra_used(irl)) { + /* Combine comp(BAND(ref, r/imm), 0) into test mrm, r/imm. */ + Reg right, left = RID_NONE; + RegSet allow = RSET_GPR; + if (!irref_isk(irl->op2)) { + left = ra_alloc1(as, irl->op2, allow); + rset_clear(allow, left); + } + right = asm_fuseload(as, irl->op1, allow); + asm_guardcc(as, cc); + if (irref_isk(irl->op2)) { + emit_i32(as, IR(irl->op2)->i); + emit_mrm(as, XO_GROUP3, XOg_TEST, right); + } else { + emit_mrm(as, XO_TEST, left, right); + } + } else { + Reg left; + if (opisfusableload((IROp)irl->o) && + ((irt_isi8(irl->t) && checki8(imm)) || + (irt_isu8(irl->t) && checku8(imm)))) { + /* Only the IRT_INT case is fused by asm_fuseload. The IRT_I8/IRT_U8 + ** loads are handled here. The IRT_I16/IRT_U16 loads should never be + ** fused, since cmp word [mem], imm16 has a length-changing prefix. + */ + IRType1 origt = irl->t; /* Temporarily flip types. */ + irl->t.irt = (irl->t.irt & ~IRT_TYPE) | IRT_INT; + left = asm_fuseload(as, lref, RSET_GPR); + irl->t = origt; + if (left == RID_MRM) { /* Fusion succeeded? */ + asm_guardcc(as, cc); + emit_i8(as, imm); + emit_mrm(as, XO_ARITHib, XOg_CMP, RID_MRM); + return; + } /* Otherwise handle register case as usual. */ + } else { + left = asm_fuseload(as, lref, RSET_GPR); + } + asm_guardcc(as, cc); + if (usetest && left != RID_MRM) { + /* Use test r,r instead of cmp r,0. */ + if (irl+1 == ir) /* Referencing previous ins? */ + as->testmcp = as->mcp; /* Set flag to drop test r,r if possible. */ + emit_rr(as, XO_TEST, left, left); + } else { + x86Op xo; + if (checki8(imm)) { + emit_i8(as, imm); + xo = XO_ARITHi8; + } else { + emit_i32(as, imm); + xo = XO_ARITHi; + } + emit_mrm(as, xo, XOg_CMP, left); + } + } + } else { + Reg left = ra_alloc1(as, lref, RSET_GPR); + Reg right = asm_fuseload(as, rref, rset_exclude(RSET_GPR, left)); + asm_guardcc(as, cc); + emit_mrm(as, XO_CMP, left, right); + } + } else { /* Handle ordered string compares. */ + RegSet allow = RSET_GPR; + /* This assumes lj_str_cmp never uses any SSE registers. */ + ra_evictset(as, (RSET_SCRATCH & RSET_GPR)); + asm_guardcc(as, cc); + emit_rr(as, XO_TEST, RID_RET, RID_RET); + emit_call(as, lj_str_cmp); /* int32_t lj_str_cmp(GCstr *a, GCstr *b) */ + if (irref_isk(ir->op1)) { + emit_setargi(as, 1, IR(ir->op1)->i); + } else { + Reg left = ra_alloc1(as, ir->op1, allow); + rset_clear(allow, left); + emit_setargr(as, 1, left); + } + if (irref_isk(ir->op2)) { + emit_setargi(as, 2, IR(ir->op2)->i); + } else { + Reg right = ra_alloc1(as, ir->op2, allow); + emit_setargr(as, 2, right); + } + } +} + +#define asm_comp(as, ir, ci, cf, cu) \ + asm_comp_(as, ir, (ci)+((cf)<<4)+(cu)) + +/* -- GC handling --------------------------------------------------------- */ + +/* Sync all live GC values to Lua stack slots. */ +static void asm_gc_sync(ASMState *as, SnapShot *snap, Reg base, RegSet allow) +{ + IRRef2 *map = &as->T->snapmap[snap->mapofs]; + BCReg s, nslots = snap->nslots; + for (s = 0; s < nslots; s++) { + IRRef ref = snap_ref(map[s]); + if (!irref_isk(ref)) { + IRIns *ir = IR(ref); + if (ir->o == IR_FRAME) { + /* NYI: sync the frame, bump base, set topslot, clear new slots. */ + lj_trace_err(as->J, LJ_TRERR_NYIGCF); + } else if (irt_isgcv(ir->t) && + !(ir->o == IR_SLOAD && ir->op1 < nslots && map[ir->op1] == 0)) { + Reg src = ra_alloc1(as, ref, allow); + int32_t ofs = 8*(int32_t)(s-1); + emit_movtomro(as, src, base, ofs); + emit_movmroi(as, base, ofs+4, irt_toitype(ir->t)); + checkmclim(as); + } + } + } +} + +/* Check GC threshold and do one or more GC steps. */ +static void asm_gc_check(ASMState *as, SnapShot *snap) +{ + MCLabel l_end; + const BCIns *pc; + Reg tmp, base; + RegSet drop = RSET_SCRATCH; + /* Must evict BASE because the stack may be reallocated by the GC. */ + if (ra_hasreg(IR(REF_BASE)->r)) + drop |= RID2RSET(IR(REF_BASE)->r); + ra_evictset(as, drop); + base = ra_alloc1(as, REF_BASE, rset_exclude(RSET_GPR, RID_RET)); + l_end = emit_label(as); + /* void lj_gc_step_jit(lua_State *L, const BCIns *pc, MSize steps) */ + emit_call(as, lj_gc_step_jit); + emit_movtomro(as, base, RID_RET, offsetof(lua_State, base)); + emit_setargr(as, 1, RID_RET); + emit_setargi(as, 3, (int32_t)as->gcsteps); + emit_getgl(as, RID_RET, jit_L); + pc = (const BCIns *)(uintptr_t)as->T->snapmap[snap->mapofs+snap->nslots]; + emit_setargp(as, 2, pc); + asm_gc_sync(as, snap, base, rset_exclude(RSET_SCRATCH & RSET_GPR, base)); + if (as->curins == as->loopref) /* BASE gets restored by LOOP anyway. */ + ra_restore(as, REF_BASE); /* Better do it inside the slow path. */ + /* Jump around GC step if GC total < GC threshold. */ + tmp = ra_scratch(as, RSET_SCRATCH & RSET_GPR); + emit_sjcc(as, CC_B, l_end); + emit_opgl(as, XO_ARITH(XOg_CMP), tmp, gc.threshold); + emit_getgl(as, tmp, gc.total); + as->gcsteps = 0; + checkmclim(as); +} + +/* -- PHI and loop handling ----------------------------------------------- */ + +/* Break a PHI cycle by renaming to a free register (evict if needed). */ +static void asm_phi_break(ASMState *as, RegSet blocked, RegSet blockedby, + RegSet allow) +{ + RegSet candidates = blocked & allow; + if (candidates) { /* If this register file has candidates. */ + /* Note: the set for ra_pick cannot be empty, since each register file + ** has some registers never allocated to PHIs. + */ + Reg down, up = ra_pick(as, ~blocked & allow); /* Get a free register. */ + if (candidates & ~blockedby) /* Optimize shifts, else it's a cycle. */ + candidates = candidates & ~blockedby; + down = rset_picktop(candidates); /* Pick candidate PHI register. */ + ra_rename(as, down, up); /* And rename it to the free register. */ + } +} + +/* PHI register shuffling. +** +** The allocator tries hard to preserve PHI register assignments across +** the loop body. Most of the time this loop does nothing, since there +** are no register mismatches. +** +** If a register mismatch is detected and ... +** - the register is currently free: rename it. +** - the register is blocked by an invariant: restore/remat and rename it. +** - Otherwise the register is used by another PHI, so mark it as blocked. +** +** The renames are order-sensitive, so just retry the loop if a register +** is marked as blocked, but has been freed in the meantime. A cycle is +** detected if all of the blocked registers are allocated. To break the +** cycle rename one of them to a free register and retry. +** +** Note that PHI spill slots are kept in sync and don't need to be shuffled. +*/ +static void asm_phi_shuffle(ASMState *as) +{ + RegSet work; + + /* Find and resolve PHI register mismatches. */ + for (;;) { + RegSet blocked = RSET_EMPTY; + RegSet blockedby = RSET_EMPTY; + RegSet phiset = as->phiset; + while (phiset) { /* Check all left PHI operand registers. */ + Reg r = rset_picktop(phiset); + IRIns *irl = IR(as->phireg[r]); + Reg left = irl->r; + if (r != left) { /* Mismatch? */ + if (!rset_test(as->freeset, r)) { /* PHI register blocked? */ + IRRef ref = regcost_ref(as->cost[r]); + if (irt_ismarked(IR(ref)->t)) { /* Blocked by other PHI (w/reg)? */ + rset_set(blocked, r); + if (ra_hasreg(left)) + rset_set(blockedby, left); + left = RID_NONE; + } else { /* Otherwise grab register from invariant. */ + ra_restore(as, ref); + checkmclim(as); + } + } + if (ra_hasreg(left)) { + ra_rename(as, left, r); + checkmclim(as); + } + } + rset_clear(phiset, r); + } + if (!blocked) break; /* Finished. */ + if (!(as->freeset & blocked)) { /* Break cycles if none are free. */ + asm_phi_break(as, blocked, blockedby, RSET_GPR); + asm_phi_break(as, blocked, blockedby, RSET_FPR); + checkmclim(as); + } /* Else retry some more renames. */ + } + + /* Restore/remat invariants whose registers are modified inside the loop. */ + work = as->modset & ~(as->freeset | as->phiset); + while (work) { + Reg r = rset_picktop(work); + ra_restore(as, regcost_ref(as->cost[r])); + rset_clear(work, r); + checkmclim(as); + } + + /* Allocate and save all unsaved PHI regs and clear marks. */ + work = as->phiset; + while (work) { + Reg r = rset_picktop(work); + IRRef lref = as->phireg[r]; + IRIns *ir = IR(lref); + if (ra_hasspill(ir->s)) { /* Left PHI gained a spill slot? */ + irt_clearmark(ir->t); /* Handled here, so clear marker now. */ + ra_alloc1(as, lref, RID2RSET(r)); + ra_save(as, ir, r); /* Save to spill slot inside the loop. */ + checkmclim(as); + } + rset_clear(work, r); + } +} + +/* Emit renames for left PHIs which are only spilled outside the loop. */ +static void asm_phi_fixup(ASMState *as) +{ + RegSet work = as->phiset; + while (work) { + Reg r = rset_picktop(work); + IRRef lref = as->phireg[r]; + IRIns *ir = IR(lref); + /* Left PHI gained a spill slot before the loop? */ + if (irt_ismarked(ir->t) && ra_hasspill(ir->s)) { + IRRef ren; + lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), lref, as->loopsnapno); + ren = tref_ref(lj_ir_emit(as->J)); + as->ir = as->T->ir; /* The IR may have been reallocated. */ + IR(ren)->r = (uint8_t)r; + IR(ren)->s = SPS_NONE; + } + irt_clearmark(ir->t); /* Always clear marker. */ + rset_clear(work, r); + } +} + +/* Setup right PHI reference. */ +static void asm_phi(ASMState *as, IRIns *ir) +{ + RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; + RegSet afree = (as->freeset & allow); + IRIns *irl = IR(ir->op1); + IRIns *irr = IR(ir->op2); + /* Spill slot shuffling is not implemented yet (but rarely needed). */ + if (ra_hasspill(irl->s) || ra_hasspill(irr->s)) + lj_trace_err(as->J, LJ_TRERR_NYIPHI); + /* Leave at least one register free for non-PHIs (and PHI cycle breaking). */ + if ((afree & (afree-1))) { /* Two or more free registers? */ + Reg r; + if (ra_noreg(irr->r)) { /* Get a register for the right PHI. */ + r = ra_allocref(as, ir->op2, allow); + } else { /* Duplicate right PHI, need a copy (rare). */ + r = ra_scratch(as, allow); + emit_movrr(as, r, irr->r); + } + ir->r = (uint8_t)r; + rset_set(as->phiset, r); + as->phireg[r] = (IRRef1)ir->op1; + irt_setmark(irl->t); /* Marks left PHIs _with_ register. */ + if (ra_noreg(irl->r)) + ra_sethint(irl->r, r); /* Set register hint for left PHI. */ + } else { /* Otherwise allocate a spill slot. */ + /* This is overly restrictive, but it triggers only on synthetic code. */ + if (ra_hasreg(irl->r) || ra_hasreg(irr->r)) + lj_trace_err(as->J, LJ_TRERR_NYIPHI); + ra_spill(as, ir); + irl->s = irr->s = ir->s; /* Sync left/right PHI spill slots. */ + } +} + +/* Fixup the loop branch. */ +static void asm_loop_fixup(ASMState *as) +{ + MCode *p = as->mctop; + MCode *target = as->mcp; + if (as->realign) { /* Realigned loops use short jumps. */ + as->realign = NULL; /* Stop another retry. */ + lua_assert(((intptr_t)target & 15) == 0); + if (as->loopinv) { /* Inverted loop branch? */ + p -= 5; + p[0] = XI_JMP; + lua_assert(target - p >= -128); + p[-1] = (MCode)(target - p); /* Patch sjcc. */ + if (as->loopinv == 2) + p[-3] = (MCode)(target - p + 2); /* Patch opt. short jp. */ + } else { + lua_assert(target - p >= -128); + p[-1] = (MCode)(int8_t)(target - p); /* Patch short jmp. */ + p[-2] = XI_JMPs; + } + } else { + MCode *newloop; + p[-5] = XI_JMP; + if (as->loopinv) { /* Inverted loop branch? */ + /* asm_guardcc already inverted the jcc and patched the jmp. */ + p -= 5; + newloop = target+4; + *(int32_t *)(p-4) = (int32_t)(target - p); /* Patch jcc. */ + if (as->loopinv == 2) { + *(int32_t *)(p-10) = (int32_t)(target - p + 6); /* Patch opt. jp. */ + newloop = target+8; + } + } else { /* Otherwise just patch jmp. */ + *(int32_t *)(p-4) = (int32_t)(target - p); + newloop = target+3; + } + /* Realign small loops and shorten the loop branch. */ + if (newloop >= p - 128) { + as->realign = newloop; /* Force a retry and remember alignment. */ + as->curins = as->stopins; /* Abort asm_trace now. */ + as->T->nins = as->orignins; /* Remove any added renames. */ + } + } +} + +/* Middle part of a loop. */ +static void asm_loop(ASMState *as) +{ + /* LOOP is a guard, so the snapno is up to date. */ + as->loopsnapno = as->snapno; + if (as->gcsteps) + asm_gc_check(as, &as->T->snap[as->loopsnapno]); + /* LOOP marks the transition from the variant to the invariant part. */ + as->testmcp = as->invmcp = NULL; + as->sectref = 0; + if (!neverfuse(as)) as->fuseref = 0; + asm_phi_shuffle(as); + asm_loop_fixup(as); + as->mcloop = as->mcp; + RA_DBGX((as, "===== LOOP =====")); + if (!as->realign) RA_DBG_FLUSH(); +} + +/* -- Head of trace ------------------------------------------------------- */ + +/* Rematerialize all remaining constants in registers. */ +static void asm_const_remat(ASMState *as) +{ + RegSet work = ~as->freeset & RSET_ALL; + while (work) { + Reg r = rset_pickbot(work); + IRRef ref = regcost_ref(as->cost[r]); + if (irref_isk(ref) || ref == REF_BASE) { + ra_rematk(as, IR(ref)); + checkmclim(as); + } + rset_clear(work, r); + } +} + +/* Head of a root trace. */ +static void asm_head_root(ASMState *as) +{ + int32_t spadj; + emit_setgli(as, vmstate, (int32_t)as->J->curtrace); + spadj = sps_adjust(as); + as->T->spadjust = (uint16_t)spadj; + emit_addptr(as, RID_ESP, -spadj); +} + +/* Handle BASE coalescing for a root trace. */ +static void asm_head_base(ASMState *as) +{ + IRIns *ir = IR(REF_BASE); + Reg r = ir->r; + lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s)); + ra_free(as, r); + if (r != RID_BASE) { + ra_scratch(as, RID2RSET(RID_BASE)); + emit_rr(as, XO_MOV, r, RID_BASE); + } +} + +/* Check Lua stack size for overflow at the start of a side trace. +** Stack overflow is rare, so let the regular exit handling fix this up. +** This is done in the context of the *parent* trace and parent exitno! +*/ +static void asm_checkstack(ASMState *as, RegSet allow) +{ + /* Try to get an unused temp. register, otherwise spill/restore eax. */ + Reg r = allow ? rset_pickbot(allow) : RID_EAX; + emit_jcc(as, CC_B, exitstub_addr(as->J, as->J->exitno)); + if (allow == RSET_EMPTY) /* Restore temp. register. */ + emit_rmro(as, XO_MOV, r, RID_ESP, sps_scale(SPS_TEMP1)); + emit_gri(as, XG_ARITHi(XOg_CMP), r, (int32_t)(8*as->topslot)); + emit_rmro(as, XO_ARITH(XOg_SUB), r, RID_NONE, ptr2addr(&J2G(as->J)->jit_base)); + emit_rmro(as, XO_MOV, r, r, offsetof(lua_State, maxstack)); + emit_getgl(as, r, jit_L); + if (allow == RSET_EMPTY) /* Spill temp. register. */ + emit_rmro(as, XO_MOVto, r, RID_ESP, sps_scale(SPS_TEMP1)); +} + +/* Head of a side trace. +** +** The current simplistic algorithm requires that all slots inherited +** from the parent are live in a register between pass 2 and pass 3. This +** avoids the complexity of stack slot shuffling. But of course this may +** overflow the register set in some cases and cause the dreaded error: +** "NYI: register coalescing too complex". A refined algorithm is needed. +*/ +static void asm_head_side(ASMState *as) +{ + IRRef1 sloadins[RID_MAX]; + RegSet allow = RSET_ALL; /* Inverse of all coalesced registers. */ + RegSet live = RSET_EMPTY; /* Live parent registers. */ + int32_t spadj, spdelta; + int pass2 = 0; + int pass3 = 0; + IRRef i; + + /* Scan all parent SLOADs and collect register dependencies. */ + for (i = as->curins; i > REF_BASE; i--) { + IRIns *ir = IR(i); + lua_assert((ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_PARENT)) || + ir->o == IR_FRAME); + if (ir->o == IR_SLOAD) { + RegSP rs = as->parentmap[ir->op1]; + if (ra_hasreg(ir->r)) { + rset_clear(allow, ir->r); + if (ra_hasspill(ir->s)) + ra_save(as, ir, ir->r); + } else if (ra_hasspill(ir->s)) { + irt_setmark(ir->t); + pass2 = 1; + } + if (ir->r == rs) { /* Coalesce matching registers right now. */ + ra_free(as, ir->r); + } else if (ra_hasspill(regsp_spill(rs))) { + if (ra_hasreg(ir->r)) + pass3 = 1; + } else if (ra_used(ir)) { + sloadins[rs] = (IRRef1)i; + rset_set(live, rs); /* Block live parent register. */ + } + } + } + + /* Calculate stack frame adjustment. */ + spadj = sps_adjust(as); + spdelta = spadj - (int32_t)as->parent->spadjust; + if (spdelta < 0) { /* Don't shrink the stack frame. */ + spadj = (int32_t)as->parent->spadjust; + spdelta = 0; + } + as->T->spadjust = (uint16_t)spadj; + + /* Reload spilled target registers. */ + if (pass2) { + for (i = as->curins; i > REF_BASE; i--) { + IRIns *ir = IR(i); + if (irt_ismarked(ir->t)) { + RegSet mask; + Reg r; + RegSP rs; + irt_clearmark(ir->t); + rs = as->parentmap[ir->op1]; + if (!ra_hasspill(regsp_spill(rs))) + ra_sethint(ir->r, rs); /* Hint may be gone, set it again. */ + else if (sps_scale(regsp_spill(rs))+spdelta == sps_scale(ir->s)) + continue; /* Same spill slot, do nothing. */ + mask = (irt_isnum(ir->t) ? RSET_FPR : RSET_GPR) & allow; + if (mask == RSET_EMPTY) + lj_trace_err(as->J, LJ_TRERR_NYICOAL); + r = ra_allocref(as, i, mask); + ra_save(as, ir, r); + rset_clear(allow, r); + if (r == rs) { /* Coalesce matching registers right now. */ + ra_free(as, r); + rset_clear(live, r); + } else if (ra_hasspill(regsp_spill(rs))) { + pass3 = 1; + } + checkmclim(as); + } + } + } + + /* Store trace number and adjust stack frame relative to the parent. */ + emit_setgli(as, vmstate, (int32_t)as->J->curtrace); + emit_addptr(as, RID_ESP, -spdelta); + + /* Restore target registers from parent spill slots. */ + if (pass3) { + RegSet work = ~as->freeset & RSET_ALL; + while (work) { + Reg r = rset_pickbot(work); + IRIns *ir = IR(regcost_ref(as->cost[r])); + RegSP rs = as->parentmap[ir->op1]; + rset_clear(work, r); + if (ra_hasspill(regsp_spill(rs))) { + int32_t ofs = sps_scale(regsp_spill(rs)); + ra_free(as, r); + emit_movrmro(as, r, RID_ESP, ofs); + checkmclim(as); + } + } + } + + /* Shuffle registers to match up target regs with parent regs. */ + for (;;) { + RegSet work; + + /* Repeatedly coalesce free live registers by moving to their target. */ + while ((work = as->freeset & live) != RSET_EMPTY) { + Reg rp = rset_pickbot(work); + IRIns *ir = IR(sloadins[rp]); + rset_clear(live, rp); + rset_clear(allow, rp); + ra_free(as, ir->r); + emit_movrr(as, ir->r, rp); + checkmclim(as); + } + + /* We're done if no live registers remain. */ + if (live == RSET_EMPTY) + break; + + /* Break cycles by renaming one target to a temp. register. */ + if (live & RSET_GPR) { + RegSet tmpset = as->freeset & ~live & allow & RSET_GPR; + if (tmpset == RSET_EMPTY) + lj_trace_err(as->J, LJ_TRERR_NYICOAL); + ra_rename(as, rset_pickbot(live & RSET_GPR), rset_pickbot(tmpset)); + } + if (live & RSET_FPR) { + RegSet tmpset = as->freeset & ~live & allow & RSET_FPR; + if (tmpset == RSET_EMPTY) + lj_trace_err(as->J, LJ_TRERR_NYICOAL); + ra_rename(as, rset_pickbot(live & RSET_FPR), rset_pickbot(tmpset)); + } + checkmclim(as); + /* Continue with coalescing to fix up the broken cycle(s). */ + } + + /* Check Lua stack size if frames have been added. */ + if (as->topslot) + asm_checkstack(as, allow & RSET_GPR); +} + +/* -- Tail of trace ------------------------------------------------------- */ + +/* Sync Lua stack slots to match the last snapshot. +** Note: code generation is backwards, so this is best read bottom-up. +*/ +static void asm_tail_sync(ASMState *as) +{ + SnapShot *snap = &as->T->snap[as->T->nsnap-1]; /* Last snapshot. */ + BCReg s, nslots = snap->nslots; + IRRef2 *map = &as->T->snapmap[snap->mapofs]; + IRRef2 *flinks = map + nslots + snap->nframelinks; + BCReg newbase = 0; + BCReg secondbase = ~(BCReg)0; + BCReg topslot = 0; + + checkmclim(as); + ra_allocref(as, REF_BASE, RID2RSET(RID_BASE)); + + /* Must check all frames to find topslot (outer can be larger than inner). */ + for (s = 0; s < nslots; s++) { + IRRef ref = snap_ref(map[s]); + if (!irref_isk(ref)) { + IRIns *ir = IR(ref); + if (ir->o == IR_FRAME && irt_isfunc(ir->t)) { + GCfunc *fn = ir_kfunc(IR(ir->op2)); + if (isluafunc(fn)) { + BCReg fs = s + funcproto(fn)->framesize; + newbase = s; + if (secondbase == ~(BCReg)0) secondbase = s; + if (fs > topslot) topslot = fs; + } + } + } + } + as->topslot = topslot; /* Used in asm_head_side(). */ + + if (as->T->link == TRACE_INTERP) { + /* Setup fixed registers for exit to interpreter. */ + emit_loada(as, RID_DISPATCH, J2GG(as->J)->dispatch); + emit_loadi(as, RID_PC, (int32_t)map[nslots]); + } else if (newbase) { + /* Save modified BASE for linking to trace with higher start frame. */ + emit_setgl(as, RID_BASE, jit_base); + } + + emit_addptr(as, RID_BASE, 8*(int32_t)newbase); + + /* Clear stack slots of newly added frames. */ + if (nslots <= topslot) { + if (nslots < topslot) { + for (s = nslots; s <= topslot; s++) { + emit_movtomro(as, RID_EAX, RID_BASE, 8*(int32_t)s-4); + checkmclim(as); + } + emit_loadi(as, RID_EAX, LJ_TNIL); + } else { + emit_movmroi(as, RID_BASE, 8*(int32_t)nslots-4, LJ_TNIL); + } + } + + /* Store the value of all modified slots to the Lua stack. */ + for (s = 0; s < nslots; s++) { + int32_t ofs = 8*((int32_t)s-1); + IRRef ref = snap_ref(map[s]); + if (ref) { + IRIns *ir = IR(ref); + /* No need to restore readonly slots and unmodified non-parent slots. */ + if (ir->o == IR_SLOAD && ir->op1 == s && + (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT) + continue; + if (irt_isnum(ir->t)) { + Reg src = ra_alloc1(as, ref, RSET_FPR); + emit_rmro(as, XO_MOVSDto, src, RID_BASE, ofs); + } else if (ir->o == IR_FRAME) { + emit_movmroi(as, RID_BASE, ofs, ptr2addr(ir_kgc(IR(ir->op2)))); + if (s != 0) /* Do not overwrite link to previous frame. */ + emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*--flinks)); + } else { + lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t)); + if (!irref_isk(ref)) { + Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE)); + emit_movtomro(as, src, RID_BASE, ofs); + } else if (!irt_ispri(ir->t)) { + emit_movmroi(as, RID_BASE, ofs, ir->i); + } + emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t)); + } + } else if (s > secondbase) { + emit_movmroi(as, RID_BASE, ofs+4, LJ_TNIL); + } + checkmclim(as); + } + lua_assert(map + nslots == flinks-1); +} + +/* Fixup the tail code. */ +static void asm_tail_fixup(ASMState *as, TraceNo lnk) +{ + /* Note: don't use as->mcp swap + emit_*: emit_op overwrites more bytes. */ + MCode *p = as->mctop; + MCode *target, *q; + int32_t spadj = as->T->spadjust; + if (spadj == 0) { + p -= (as->flags & JIT_F_LEA_AGU) ? 7 : 6; + } else { + MCode *p1; + /* Patch stack adjustment. */ + if (checki8(spadj)) { + p -= 3; + p1 = p-6; + *p1 = (MCode)spadj; + } else { + p1 = p-9; + *(int32_t *)p1 = spadj; + } + if ((as->flags & JIT_F_LEA_AGU)) { + p1[-3] = (MCode)XI_LEA; + p1[-2] = MODRM(checki8(spadj) ? XM_OFS8 : XM_OFS32, RID_ESP, RID_ESP); + p1[-1] = MODRM(XM_SCALE1, RID_ESP, RID_ESP); + } else { + p1[-2] = (MCode)(checki8(spadj) ? XI_ARITHi8 : XI_ARITHi); + p1[-1] = MODRM(XM_REG, XOg_ADD, RID_ESP); + } + } + /* Patch exit branch. */ + target = lnk == TRACE_INTERP ? (MCode *)lj_vm_exit_interp : + as->J->trace[lnk]->mcode; + *(int32_t *)(p-4) = (int32_t)(target - p); + p[-5] = XI_JMP; + /* Drop unused mcode tail. Fill with NOPs to make the prefetcher happy. */ + for (q = as->mctop-1; q >= p; q--) + *q = XI_NOP; + as->mctop = p; +} + +/* -- Instruction dispatch ------------------------------------------------ */ + +/* Assemble a single instruction. */ +static void asm_ir(ASMState *as, IRIns *ir) +{ + switch ((IROp)ir->o) { + /* Miscellaneous ops. */ + case IR_LOOP: asm_loop(as); break; + case IR_NOP: break; + case IR_PHI: asm_phi(as, ir); break; + + /* Guarded assertions. */ + case IR_LT: asm_comp(as, ir, CC_GE, CC_AE, VCC_PS); break; + case IR_GE: asm_comp(as, ir, CC_L, CC_B, 0); break; + case IR_LE: asm_comp(as, ir, CC_G, CC_A, VCC_PS); break; + case IR_GT: asm_comp(as, ir, CC_LE, CC_BE, 0); break; + case IR_ULT: asm_comp(as, ir, CC_AE, CC_AE, VCC_U); break; + case IR_UGE: asm_comp(as, ir, CC_B, CC_B, VCC_U|VCC_PS); break; + case IR_ULE: asm_comp(as, ir, CC_A, CC_A, VCC_U); break; + case IR_ABC: + case IR_UGT: asm_comp(as, ir, CC_BE, CC_BE, VCC_U|VCC_PS); break; + + case IR_FRAME: + if (ir->op1 == ir->op2) break; /* No check needed for placeholder. */ + /* fallthrough */ + case IR_EQ: asm_comp(as, ir, CC_NE, CC_NE, VCC_P); break; + case IR_NE: asm_comp(as, ir, CC_E, CC_E, VCC_U|VCC_P); break; + + /* Bit ops. */ + case IR_BNOT: asm_bitnot(as, ir); break; + case IR_BSWAP: asm_bitswap(as, ir); break; + + case IR_BAND: asm_intarith(as, ir, XOg_AND); break; + case IR_BOR: asm_intarith(as, ir, XOg_OR); break; + case IR_BXOR: asm_intarith(as, ir, XOg_XOR); break; + + case IR_BSHL: asm_bitshift(as, ir, XOg_SHL); break; + case IR_BSHR: asm_bitshift(as, ir, XOg_SHR); break; + case IR_BSAR: asm_bitshift(as, ir, XOg_SAR); break; + case IR_BROL: asm_bitshift(as, ir, XOg_ROL); break; + case IR_BROR: asm_bitshift(as, ir, XOg_ROR); break; + + /* Arithmetic ops. */ + case IR_ADD: asm_add(as, ir); break; + case IR_SUB: + if (irt_isnum(ir->t)) + asm_fparith(as, ir, XO_SUBSD); + else /* Note: no need for LEA trick here. i-k is encoded as i+(-k). */ + asm_intarith(as, ir, XOg_SUB); + break; + case IR_MUL: asm_fparith(as, ir, XO_MULSD); break; + case IR_DIV: asm_fparith(as, ir, XO_DIVSD); break; + + case IR_NEG: asm_fparith(as, ir, XO_XORPS); break; + case IR_ABS: asm_fparith(as, ir, XO_ANDPS); break; + + case IR_MIN: asm_fparith(as, ir, XO_MINSD); break; + case IR_MAX: asm_fparith(as, ir, XO_MAXSD); break; + + case IR_FPMATH: case IR_ATAN2: case IR_LDEXP: case IR_POWI: + asm_fpmath(as, ir); + break; + + /* Overflow-checking arithmetic ops. Note: don't use LEA here! */ + case IR_ADDOV: asm_intarith(as, ir, XOg_ADD); break; + case IR_SUBOV: asm_intarith(as, ir, XOg_SUB); break; + + /* Memory references. */ + case IR_AREF: asm_aref(as, ir); break; + case IR_HREF: asm_href(as, ir); break; + case IR_HREFK: asm_hrefk(as, ir); break; + case IR_NEWREF: asm_newref(as, ir); break; + case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break; + case IR_FREF: asm_fref(as, ir); break; + case IR_STRREF: asm_strref(as, ir); break; + + /* Loads and stores. */ + case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: asm_ahuload(as, ir); break; + case IR_FLOAD: asm_fload(as, ir); break; + case IR_SLOAD: asm_sload(as, ir); break; + case IR_XLOAD: asm_xload(as, ir); break; + + case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; + case IR_FSTORE: asm_fstore(as, ir); break; + + /* String ops. */ + case IR_SNEW: asm_snew(as, ir); break; + + /* Table ops. */ + case IR_TNEW: asm_tnew(as, ir); break; + case IR_TDUP: asm_tdup(as, ir); break; + case IR_TLEN: asm_tlen(as, ir); break; + case IR_TBAR: asm_tbar(as, ir); break; + case IR_OBAR: asm_obar(as, ir); break; + + /* Type conversions. */ + case IR_TONUM: asm_tonum(as, ir); break; + case IR_TOINT: + if (irt_isguard(ir->t)) + asm_tointg(as, ir, ra_alloc1(as, ir->op1, RSET_FPR)); + else + asm_toint(as, ir); break; + break; + case IR_TOBIT: asm_tobit(as, ir); break; + case IR_TOSTR: asm_tostr(as, ir); break; + case IR_STRTO: asm_strto(as, ir); break; + + default: + setintV(&as->J->errinfo, ir->o); + lj_trace_err_info(as->J, LJ_TRERR_NYIIR); + break; + } +} + +/* Assemble a trace in linear backwards order. */ +static void asm_trace(ASMState *as) +{ + for (as->curins--; as->curins > as->stopins; as->curins--) { + IRIns *ir = IR(as->curins); + if (irt_isguard(ir->t)) + asm_snap_prep(as); + else if (!ra_used(ir) && !irm_sideeff(lj_ir_mode[ir->o]) && + (as->flags & JIT_F_OPT_DCE)) + continue; /* Dead-code elimination can be soooo easy. */ + RA_DBG_REF(); + checkmclim(as); + asm_ir(as, ir); + } +} + +/* -- Trace setup --------------------------------------------------------- */ + +/* Clear reg/sp for all instructions and add register hints. */ +static void asm_setup_regsp(ASMState *as, Trace *T) +{ + IRRef i, nins; + int inloop; + + /* Clear reg/sp for constants. */ + for (i = T->nk; i < REF_BIAS; i++) + IR(i)->prev = REGSP_INIT; + + /* REF_BASE is used for implicit references to the BASE register. */ + IR(REF_BASE)->prev = REGSP_HINT(RID_BASE); + + nins = T->nins; + if (IR(nins-1)->o == IR_RENAME) { + do { nins--; } while (IR(nins-1)->o == IR_RENAME); + T->nins = nins; /* Remove any renames left over from ASM restart. */ + } + as->snaprename = nins; + as->snapref = nins; + as->snapno = T->nsnap; + + as->stopins = REF_BASE; + as->orignins = nins; + as->curins = nins; + + inloop = 0; + for (i = REF_FIRST; i < nins; i++) { + IRIns *ir = IR(i); + switch (ir->o) { + case IR_LOOP: + inloop = 1; + break; + /* Set hints for slot loads from a parent trace. */ + case IR_SLOAD: + if ((ir->op2 & IRSLOAD_PARENT)) { + RegSP rs = as->parentmap[ir->op1]; + lua_assert(regsp_used(rs)); + as->stopins = i; + if (!ra_hasspill(regsp_spill(rs)) && ra_hasreg(regsp_reg(rs))) { + ir->prev = (uint16_t)REGSP_HINT(regsp_reg(rs)); + continue; + } + } + break; + case IR_FRAME: + if (i == as->stopins+1 && ir->op1 == ir->op2) + as->stopins++; + break; + /* C calls evict all scratch regs and return results in RID_RET. */ + case IR_SNEW: case IR_TNEW: case IR_TDUP: case IR_TLEN: case IR_TOSTR: + case IR_NEWREF: + ir->prev = REGSP_HINT(RID_RET); + if (inloop) + as->modset = RSET_SCRATCH; + continue; + case IR_STRTO: case IR_OBAR: + if (inloop) + as->modset = RSET_SCRATCH; + break; + /* Ordered string compares evict all integer scratch registers. */ + case IR_LT: case IR_GE: case IR_LE: case IR_GT: + if (irt_isstr(ir->t) && inloop) + as->modset |= (RSET_SCRATCH & RSET_GPR); + break; + /* Non-constant shift counts need to be in RID_ECX. */ + case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR: + if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) + IR(ir->op2)->r = REGSP_HINT(RID_ECX); + break; + /* Do not propagate hints across type conversions. */ + case IR_TONUM: case IR_TOINT: case IR_TOBIT: + break; + default: + /* Propagate hints across likely 'op reg, imm' or 'op reg'. */ + if (irref_isk(ir->op2) && !irref_isk(ir->op1)) { + ir->prev = IR(ir->op1)->prev; + continue; + } + break; + } + ir->prev = REGSP_INIT; + } +} + +/* -- Assembler core ------------------------------------------------------ */ + +/* Define this if you want to run LuaJIT with Valgrind. */ +#ifdef LUAJIT_USE_VALGRIND +#include +#define VG_INVALIDATE(p, sz) VALGRIND_DISCARD_TRANSLATIONS(p, sz) +#else +#define VG_INVALIDATE(p, sz) ((void)0) +#endif + +/* Assemble a trace. */ +void lj_asm_trace(jit_State *J, Trace *T) +{ + ASMState as_; + ASMState *as = &as_; + + /* Setup initial state. Copy some fields to reduce indirections. */ + as->J = J; + as->T = T; + as->ir = T->ir; + as->flags = J->flags; + as->loopref = J->loopref; + as->realign = NULL; + as->loopinv = 0; + if (J->parent) { + as->parent = J->trace[J->parent]; + lj_snap_regspmap(as->parentmap, as->parent, J->exitno); + } else { + as->parent = NULL; + } + as->mctop = lj_mcode_reserve(J, &as->mcbot); /* Reserve MCode memory. */ + as->mcp = as->mctop; + as->mclim = as->mcbot + MCLIM_REDZONE; + asm_exitstub_setup(as, T->nsnap); + + do { + as->mcp = as->mctop; + as->curins = T->nins; + RA_DBG_START(); + RA_DBGX((as, "===== STOP =====")); + /* Realign and leave room for backwards loop branch or exit branch. */ + if (as->realign) { + int i = ((int)(intptr_t)as->realign) & 15; + MCode *p = as->mctop; + /* Fill unused mcode tail with NOPs to make the prefetcher happy. */ + while (i-- > 0) + *--p = XI_NOP; + as->mctop = p; + as->mcp = p - (as->loopinv ? 5 : 2); /* Space for short/near jmp. */ + } else { + as->mcp = as->mctop - 5; /* Space for exit branch (near jmp). */ + } + as->invmcp = as->mcp; + as->mcloop = NULL; + as->testmcp = NULL; + as->topslot = 0; + as->gcsteps = 0; + as->sectref = as->loopref; + as->fuseref = (as->flags & JIT_F_OPT_FUSE) ? as->loopref : FUSE_DISABLED; + + /* Setup register allocation. */ + ra_setup(as); + asm_setup_regsp(as, T); + + if (!as->loopref) { + /* Leave room for ESP adjustment: add esp, imm or lea esp, [esp+imm] */ + as->mcp -= (as->flags & JIT_F_LEA_AGU) ? 7 : 6; + as->invmcp = NULL; + asm_tail_sync(as); + } + asm_trace(as); + } while (as->realign); /* Retry in case the MCode needs to be realigned. */ + + RA_DBG_REF(); + checkmclim(as); + if (as->gcsteps) + asm_gc_check(as, &as->T->snap[0]); + if (!J->parent) + asm_head_base(as); + asm_const_remat(as); + if (J->parent) + asm_head_side(as); + else + asm_head_root(as); + asm_phi_fixup(as); + + RA_DBGX((as, "===== START ====")); + RA_DBG_FLUSH(); + if (as->freeset != RSET_ALL) + lj_trace_err(as->J, LJ_TRERR_BADRA); /* Ouch! Should never happen. */ + + /* Set trace entry point before fixing up tail to allow link to self. */ + T->mcode = as->mcp; + T->mcloop = as->mcloop ? (MSize)(as->mcloop - as->mcp) : 0; + if (!as->loopref) + asm_tail_fixup(as, T->link); /* Note: this may change as->mctop! */ + T->szmcode = (MSize)(as->mctop - as->mcp); + VG_INVALIDATE(T->mcode, T->szmcode); +} + +/* Patch exit jumps of existing machine code to a new target. */ +void lj_asm_patchexit(jit_State *J, Trace *T, ExitNo exitno, MCode *target) +{ + MCode *p = T->mcode; + MCode *mcarea = lj_mcode_patch(J, p, 0); + MSize len = T->szmcode; + MCode *px = exitstub_addr(J, exitno) - 6; + MCode *pe = p+len-6; + if (len > 5 && p[len-5] == XI_JMP && p+len-6 + *(int32_t *)(p+len-4) == px) + *(int32_t *)(p+len-4) = (int32_t)(target - (p+len)); + for (; p < pe; p++) { + if ((*(uint16_t *)p & 0xf0ff) == 0x800f && p + *(int32_t *)(p+2) == px) { + *(int32_t *)(p+2) = (int32_t)(target - (p+6)); + p += 5; + } + } + lj_mcode_patch(J, mcarea, 1); + VG_INVALIDATE(T->mcode, T->szmcode); +} + +#undef IR + +#endif diff --git a/src/lj_asm.h b/src/lj_asm.h new file mode 100644 index 0000000000..84122b435b --- /dev/null +++ b/src/lj_asm.h @@ -0,0 +1,17 @@ +/* +** IR assembler (SSA IR -> machine code). +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +*/ + +#ifndef _LJ_ASM_H +#define _LJ_ASM_H + +#include "lj_jit.h" + +#if LJ_HASJIT +LJ_FUNC void lj_asm_trace(jit_State *J, Trace *T); +LJ_FUNC void lj_asm_patchexit(jit_State *J, Trace *T, ExitNo exitno, + MCode *target); +#endif + +#endif diff --git a/src/lj_bc.c b/src/lj_bc.c new file mode 100644 index 0000000000..79846325c8 --- /dev/null +++ b/src/lj_bc.c @@ -0,0 +1,17 @@ +/* +** Bytecode instruction modes. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +*/ + +#define lj_bc_c +#define LUA_CORE + +#include "lj_obj.h" +#include "lj_bc.h" + +/* Bytecode instruction modes. */ +LJ_DATADEF const uint16_t lj_bc_mode[BC__MAX+1] = { +BCDEF(BCMODE) + 0 +}; + diff --git a/src/lj_bc.h b/src/lj_bc.h new file mode 100644 index 0000000000..d906011c4c --- /dev/null +++ b/src/lj_bc.h @@ -0,0 +1,235 @@ +/* +** Bytecode instruction format. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +*/ + +#ifndef _LJ_BC_H +#define _LJ_BC_H + +#include "lj_def.h" +#include "lj_arch.h" + +/* Bytecode instruction format, 32 bit wide, fields of 8 or 16 bit: +** +** +----+----+----+----+ +** | B | C | A | OP | Format ABC +** +----+----+----+----+ +** | D | A | OP | Format AD +** +-------------------- +** MSB LSB +** +** In-memory instructions are always stored in host byte order. +*/ + +/* Operand ranges and related constants. */ +#define BCMAX_A 0xff +#define BCMAX_B 0xff +#define BCMAX_C 0xff +#define BCMAX_D 0xffff +#define BCBIAS_J 0x8000 +#define NO_REG BCMAX_A +#define NO_JMP (~(BCPos)0) + +/* Macros to get instruction fields. */ +#define bc_op(i) (cast(BCOp, (i)&0xff)) +#define bc_a(i) (cast(BCReg, ((i)>>8)&0xff)) +#define bc_b(i) (cast(BCReg, (i)>>24)) +#define bc_c(i) (cast(BCReg, ((i)>>16)&0xff)) +#define bc_d(i) (cast(BCReg, (i)>>16)) +#define bc_j(i) ((ptrdiff_t)bc_d(i)-BCBIAS_J) + +/* Macros to set instruction fields. */ +#define setbc_byte(p, x, ofs) \ + ((uint8_t *)(p))[LJ_ENDIAN_SELECT(ofs, 3-ofs)] = cast_byte(x) +#define setbc_op(p, x) setbc_byte(p, (x), 0) +#define setbc_a(p, x) setbc_byte(p, (x), 1) +#define setbc_b(p, x) setbc_byte(p, (x), 3) +#define setbc_c(p, x) setbc_byte(p, (x), 2) +#define setbc_d(p, x) \ + ((uint16_t *)(p))[LJ_ENDIAN_SELECT(1, 0)] = cast(uint16_t, (x)) +#define setbc_j(p, x) setbc_d(p, (BCPos)((int32_t)(x)+BCBIAS_J)) + +/* Macros to compose instructions. */ +#define BCINS_ABC(o, a, b, c) \ + (cast(BCIns, o)|(cast(BCIns, a)<<8)|\ + (cast(BCIns, b)<<24)|(cast(BCIns, c)<<16)) +#define BCINS_AD(o, a, d) \ + (cast(BCIns, o)|(cast(BCIns, a)<<8)|(cast(BCIns, d)<<16)) +#define BCINS_AJ(o, a, j) BCINS_AD(o, a, (BCPos)((int32_t)(j)+BCBIAS_J)) + +/* Bytecode instruction definition. Order matters, see below. +** +** (name, filler, Amode, Bmode, Cmode or Dmode, metamethod) +** +** The opcode name suffixes specify the type for RB/RC or RD: +** V = variable slot +** S = string const +** N = number const +** P = primitive type (~itype) +** B = unsigned byte literal +** M = multiple args/results +*/ +#define BCDEF(_) \ + /* Comparison ops. ORDER OPR. */ \ + _(ISLT, var, ___, var, lt) \ + _(ISGE, var, ___, var, lt) \ + _(ISLE, var, ___, var, le) \ + _(ISGT, var, ___, var, le) \ + \ + _(ISEQV, var, ___, var, eq) \ + _(ISNEV, var, ___, var, eq) \ + _(ISEQS, var, ___, str, eq) \ + _(ISNES, var, ___, str, eq) \ + _(ISEQN, var, ___, num, eq) \ + _(ISNEN, var, ___, num, eq) \ + _(ISEQP, var, ___, pri, eq) \ + _(ISNEP, var, ___, pri, eq) \ + \ + /* Unary test and copy ops. */ \ + _(ISTC, dst, ___, var, ___) \ + _(ISFC, dst, ___, var, ___) \ + _(IST, ___, ___, var, ___) \ + _(ISF, ___, ___, var, ___) \ + \ + /* Unary ops. */ \ + _(MOV, dst, ___, var, ___) \ + _(NOT, dst, ___, var, ___) \ + _(UNM, dst, ___, var, unm) \ + _(LEN, dst, ___, var, len) \ + \ + /* Binary ops. ORDER OPR. VV last, POW must be next. */ \ + _(ADDVN, dst, var, num, add) \ + _(SUBVN, dst, var, num, sub) \ + _(MULVN, dst, var, num, mul) \ + _(DIVVN, dst, var, num, div) \ + _(MODVN, dst, var, num, mod) \ + \ + _(ADDNV, dst, var, num, add) \ + _(SUBNV, dst, var, num, sub) \ + _(MULNV, dst, var, num, mul) \ + _(DIVNV, dst, var, num, div) \ + _(MODNV, dst, var, num, mod) \ + \ + _(ADDVV, dst, var, var, add) \ + _(SUBVV, dst, var, var, sub) \ + _(MULVV, dst, var, var, mul) \ + _(DIVVV, dst, var, var, div) \ + _(MODVV, dst, var, var, mod) \ + \ + _(POW, dst, var, var, pow) \ + _(CAT, dst, rbase, rbase, concat) \ + \ + /* Constant ops. */ \ + _(KSTR, dst, ___, str, ___) \ + _(KSHORT, dst, ___, lits, ___) \ + _(KNUM, dst, ___, num, ___) \ + _(KPRI, dst, ___, pri, ___) \ + _(KNIL, base, ___, base, ___) \ + \ + /* Upvalue and function ops. */ \ + _(UGET, dst, ___, uv, ___) \ + _(USETV, uv, ___, var, ___) \ + _(USETS, uv, ___, str, ___) \ + _(USETN, uv, ___, num, ___) \ + _(USETP, uv, ___, pri, ___) \ + _(UCLO, rbase, ___, jump, ___) \ + _(FNEW, dst, ___, func, gc) \ + \ + /* Table ops. */ \ + _(TNEW, dst, ___, lit, gc) \ + _(TDUP, dst, ___, tab, gc) \ + _(GGET, dst, ___, str, index) \ + _(GSET, var, ___, str, newindex) \ + _(TGETV, dst, var, var, index) \ + _(TGETS, dst, var, str, index) \ + _(TGETB, dst, var, lit, index) \ + _(TSETV, var, var, var, newindex) \ + _(TSETS, var, var, str, newindex) \ + _(TSETB, var, var, lit, newindex) \ + _(TSETM, base, ___, num, newindex) \ + \ + /* Calls and vararg handling. T = tail call. */ \ + _(CALLM, base, lit, lit, call) \ + _(CALL, base, lit, lit, call) \ + _(CALLMT, base, ___, lit, call) \ + _(CALLT, base, ___, lit, call) \ + _(ITERC, base, lit, lit, call) \ + _(VARG, base, lit, lit, ___) \ + \ + /* Returns. */ \ + _(RETM, base, ___, lit, ___) \ + _(RET, rbase, ___, lit, ___) \ + _(RET0, rbase, ___, lit, ___) \ + _(RET1, rbase, ___, lit, ___) \ + \ + /* Loops and branches. I/J = interp/JIT, I/C/L = init/call/loop. */ \ + _(FORI, base, ___, jump, ___) \ + _(JFORI, base, ___, jump, ___) \ + \ + _(FORL, base, ___, jump, ___) \ + _(IFORL, base, ___, jump, ___) \ + _(JFORL, base, ___, lit, ___) \ + \ + _(ITERL, base, ___, jump, ___) \ + _(IITERL, base, ___, jump, ___) \ + _(JITERL, base, ___, lit, ___) \ + \ + _(LOOP, rbase, ___, jump, ___) \ + _(ILOOP, rbase, ___, jump, ___) \ + _(JLOOP, rbase, ___, lit, ___) \ + \ + _(JMP, rbase, ___, jump, ___) + +/* Bytecode opcode numbers. */ +typedef enum { +#define BCENUM(name, ma, mb, mc, mt) BC_##name, +BCDEF(BCENUM) +#undef BCENUM + BC__MAX +} BCOp; + +LJ_STATIC_ASSERT((int)BC_ISEQV+1 == (int)BC_ISNEV); +LJ_STATIC_ASSERT(((int)BC_ISEQV^1) == (int)BC_ISNEV); +LJ_STATIC_ASSERT(((int)BC_ISEQS^1) == (int)BC_ISNES); +LJ_STATIC_ASSERT(((int)BC_ISEQN^1) == (int)BC_ISNEN); +LJ_STATIC_ASSERT(((int)BC_ISEQP^1) == (int)BC_ISNEP); +LJ_STATIC_ASSERT(((int)BC_ISLT^1) == (int)BC_ISGE); +LJ_STATIC_ASSERT(((int)BC_ISLE^1) == (int)BC_ISGT); +LJ_STATIC_ASSERT(((int)BC_ISLT^3) == (int)BC_ISGT); +LJ_STATIC_ASSERT((int)BC_IST-(int)BC_ISTC == (int)BC_ISF-(int)BC_ISFC); +LJ_STATIC_ASSERT((int)BC_CALLT-(int)BC_CALL == (int)BC_CALLMT-(int)BC_CALLM); +LJ_STATIC_ASSERT((int)BC_CALLMT + 1 == (int)BC_CALLT); +LJ_STATIC_ASSERT((int)BC_RETM + 1 == (int)BC_RET); +LJ_STATIC_ASSERT((int)BC_FORL + 1 == (int)BC_IFORL); +LJ_STATIC_ASSERT((int)BC_FORL + 2 == (int)BC_JFORL); +LJ_STATIC_ASSERT((int)BC_ITERL + 1 == (int)BC_IITERL); +LJ_STATIC_ASSERT((int)BC_ITERL + 2 == (int)BC_JITERL); +LJ_STATIC_ASSERT((int)BC_LOOP + 1 == (int)BC_ILOOP); +LJ_STATIC_ASSERT((int)BC_LOOP + 2 == (int)BC_JLOOP); + +/* Stack slots used by FORI/FORL, relative to operand A. */ +enum { + FORL_IDX, FORL_STOP, FORL_STEP, FORL_EXT +}; + +/* Bytecode operand modes. ORDER BCMode */ +typedef enum { + BCMnone, BCMdst, BCMbase, BCMvar, BCMrbase, BCMuv, /* Mode A must be <= 7 */ + BCMlit, BCMlits, BCMpri, BCMnum, BCMstr, BCMtab, BCMfunc, BCMjump, + BCM_max +} BCMode; +#define BCM___ BCMnone + +#define bcmode_a(op) (cast(BCMode, lj_bc_mode[op] & 7)) +#define bcmode_b(op) (cast(BCMode, (lj_bc_mode[op]>>3) & 15)) +#define bcmode_c(op) (cast(BCMode, (lj_bc_mode[op]>>7) & 15)) +#define bcmode_d(op) bcmode_c(op) +#define bcmode_hasd(op) ((lj_bc_mode[op] & (15<<3)) == (BCMnone<<3)) +#define bcmode_mm(op) (cast(MMS, lj_bc_mode[op]>>11)) + +#define BCMODE(name, ma, mb, mc, mm) \ + (BCM##ma|(BCM##mb<<3)|(BCM##mc<<7)|(MM_##mm<<11)), + +LJ_DATA const uint16_t lj_bc_mode[BC__MAX+1]; + +#endif diff --git a/src/lj_ctype.c b/src/lj_ctype.c new file mode 100644 index 0000000000..9f19b8790a --- /dev/null +++ b/src/lj_ctype.c @@ -0,0 +1,44 @@ +/* +** Internal CTYPE replacement. +** Donated to the public domain. +** +** This is intended to replace the problematic libc single-byte NLS functions. +** These just don't make sense anymore with UTF-8 locales becoming the norm +** on POSIX systems. It never worked too well on Windows systems since hardly +** anyone bothered to call setlocale(). +** +** Instead this table is hardcoded for ASCII, except for identifiers. These +** include the characters 128-255, too. This allows for the use of all +** non-ASCII chars as identifiers in the lexer. This is a broad definition, +** but works well in practice for both UTF-8 locales and most single-byte +** locales (such as ISO-8859-*). +** +** If you really need proper ctypes for UTF-8 strings, please use an add-on +** library such as slnunicode: http://luaforge.net/projects/sln/ +*/ + +#define lj_ctype_c +#define LUA_CORE + +#include "lj_ctype.h" + +LJ_DATADEF const uint8_t lj_ctype_bits[257] = { + 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 152,152,152,152,152,152,152,152,152,152, 4, 4, 4, 4, 4, 4, + 4,176,176,176,176,176,176,160,160,160,160,160,160,160,160,160, + 160,160,160,160,160,160,160,160,160,160,160, 4, 4, 4, 4,132, + 4,208,208,208,208,208,208,192,192,192,192,192,192,192,192,192, + 192,192,192,192,192,192,192,192,192,192,192, 4, 4, 4, 4, 1, + 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128, + 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128, + 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128, + 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128, + 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128, + 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128, + 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128, + 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128 +}; + diff --git a/src/lj_ctype.h b/src/lj_ctype.h new file mode 100644 index 0000000000..c4cdff8478 --- /dev/null +++ b/src/lj_ctype.h @@ -0,0 +1,40 @@ +/* +** Internal CTYPE replacement. +** Donated to the public domain. +*/ + +#ifndef _LJ_CTYPE_H +#define _LJ_CTYPE_H + +#include "lj_def.h" + +#define LJ_CTYPE_CNTRL 0x01 +#define LJ_CTYPE_SPACE 0x02 +#define LJ_CTYPE_PUNCT 0x04 +#define LJ_CTYPE_DIGIT 0x08 +#define LJ_CTYPE_XDIGIT 0x10 +#define LJ_CTYPE_UPPER 0x20 +#define LJ_CTYPE_LOWER 0x40 +#define LJ_CTYPE_IDENT 0x80 +#define LJ_CTYPE_ALPHA (LJ_CTYPE_LOWER|LJ_CTYPE_UPPER) +#define LJ_CTYPE_ALNUM (LJ_CTYPE_ALPHA|LJ_CTYPE_DIGIT) + +/* Only pass -1 or 0..255 to these macros. Never pass a signed char! */ +#define lj_ctype_isa(c, t) (lj_ctype_bits[(c)+1] & t) +#define lj_ctype_iscntrl(c) lj_ctype_isa((c), LJ_CTYPE_CNTRL) +#define lj_ctype_isspace(c) lj_ctype_isa((c), LJ_CTYPE_SPACE) +#define lj_ctype_ispunct(c) lj_ctype_isa((c), LJ_CTYPE_PUNCT) +#define lj_ctype_isdigit(c) lj_ctype_isa((c), LJ_CTYPE_DIGIT) +#define lj_ctype_isxdigit(c) lj_ctype_isa((c), LJ_CTYPE_XDIGIT) +#define lj_ctype_isupper(c) lj_ctype_isa((c), LJ_CTYPE_UPPER) +#define lj_ctype_islower(c) lj_ctype_isa((c), LJ_CTYPE_LOWER) +#define lj_ctype_isident(c) lj_ctype_isa((c), LJ_CTYPE_IDENT) +#define lj_ctype_isalpha(c) lj_ctype_isa((c), LJ_CTYPE_ALPHA) +#define lj_ctype_isalnum(c) lj_ctype_isa((c), LJ_CTYPE_ALNUM) + +#define lj_ctype_toupper(c) ((c) - (lj_ctype_islower(c) >> 1)) +#define lj_ctype_tolower(c) ((c) + lj_ctype_isupper(c)) + +LJ_DATA const uint8_t lj_ctype_bits[257]; + +#endif diff --git a/src/lj_def.h b/src/lj_def.h new file mode 100644 index 0000000000..dbfd5bf514 --- /dev/null +++ b/src/lj_def.h @@ -0,0 +1,226 @@ +/* +** LuaJIT common internal definitions. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +*/ + +#ifndef _LJ_DEF_H +#define _LJ_DEF_H + +#include "lua.h" + +#ifdef _MSC_VER +/* MSVC is stuck in the last century and doesn't have C99's stdint.h. */ +typedef __int8 int8_t; +typedef __int16 int16_t; +typedef __int32 int32_t; +typedef __int64 int64_t; +typedef unsigned __int8 uint8_t; +typedef unsigned __int16 uint16_t; +typedef unsigned __int32 uint32_t; +typedef unsigned __int64 uint64_t; +#ifdef _WIN64 +typedef __int64 intptr_t; +typedef unsigned __int64 uintptr_t; +#else +typedef __int32 intptr_t; +typedef unsigned __int32 uintptr_t; +#endif +#else +#include +#endif + +/* Needed everywhere. */ +#include +#include + +/* Various VM limits. */ +#define LJ_MAX_MEM 0x7fffff00 /* Max. total memory allocation. */ +#define LJ_MAX_ALLOC LJ_MAX_MEM /* Max. individual allocation length. */ +#define LJ_MAX_STR LJ_MAX_MEM /* Max. string length. */ +#define LJ_MAX_UDATA LJ_MAX_MEM /* Max. userdata length. */ + +#define LJ_MAX_STRTAB (1<<26) /* Max. string table size. */ +#define LJ_MAX_HBITS 26 /* Max. hash bits. */ +#define LJ_MAX_ABITS 28 /* Max. bits of array key. */ +#define LJ_MAX_ASIZE ((1<<(LJ_MAX_ABITS-1))+1) /* Max. array part size. */ +#define LJ_MAX_COLOSIZE 16 /* Max. elems for colocated array. */ + +#define LJ_MAX_LINE LJ_MAX_MEM /* Max. source code line number. */ +#define LJ_MAX_XLEVEL 200 /* Max. syntactic nesting level. */ +#define LJ_MAX_BCINS (1<<26) /* Max. # of bytecode instructions. */ +#define LJ_MAX_SLOTS 250 /* Max. # of slots in a Lua func. */ +#define LJ_MAX_LOCVAR 200 /* Max. # of local variables. */ +#define LJ_MAX_UPVAL 60 /* Max. # of upvalues. */ + +#define LJ_MAX_IDXCHAIN 100 /* __index/__newindex chain limit. */ +#define LJ_STACK_EXTRA 5 /* Extra stack space (metamethods). */ + +/* Minimum table/buffer sizes. */ +#define LJ_MIN_GLOBAL 6 /* Min. global table size (hbits). */ +#define LJ_MIN_REGISTRY 2 /* Min. registry size (hbits). */ +#define LJ_MIN_STRTAB 256 /* Min. string table size (pow2). */ +#define LJ_MIN_SBUF 32 /* Min. string buffer length. */ +#define LJ_MIN_VECSZ 8 /* Min. size for growable vectors. */ +#define LJ_MIN_IRSZ 32 /* Min. size for growable IR. */ +#define LJ_MIN_KNUMSZ 16 /* Min. size for chained KNUM array. */ + +/* JIT compiler limits. */ +#define LJ_MAX_JSLOTS 250 /* Max. # of stack slots for a trace. */ +#define LJ_MAX_PHI 32 /* Max. # of PHIs for a loop. */ +#define LJ_MAX_EXITSTUBGR 8 /* Max. # of exit stub groups. */ + +/* Various macros. */ +#ifndef UNUSED +#define UNUSED(x) ((void)(x)) /* to avoid warnings */ +#endif + +#ifndef cast +#define cast(t, exp) ((t)(exp)) +#endif + +#define U64x(hi, lo) (((uint64_t)0x##hi << 32) + (uint64_t)0x##lo) +#define cast_byte(i) cast(uint8_t, (i)) +#define cast_num(i) cast(lua_Number, (i)) +#define cast_int(i) cast(int, (i)) +#define i32ptr(p) ((int32_t)(intptr_t)(void *)(p)) +#define u32ptr(p) ((uint32_t)(intptr_t)(void *)(p)) + +#define checki8(x) ((x) == (int32_t)(int8_t)(x)) +#define checku8(x) ((x) == (int32_t)(uint8_t)(x)) +#define checki16(x) ((x) == (int32_t)(int16_t)(x)) + +/* Every half-decent C compiler transforms this into a rotate instruction. */ +#define lj_rol(x, n) (((x)<<(n)) | ((x)>>(32-(n)))) +#define lj_ror(x, n) (((x)<<(32-(n))) | ((x)>>(n))) + +/* A really naive Bloom filter. But sufficient for our needs. */ +typedef uintptr_t BloomFilter; +#define BLOOM_MASK (8*sizeof(BloomFilter) - 1) +#define bloombit(x) ((uintptr_t)1 << ((x) & BLOOM_MASK)) +#define bloomset(b, x) ((b) |= bloombit((x))) +#define bloomtest(b, x) ((b) & bloombit((x))) + +#if defined(__GNUC__) + +#if (__GNUC__ < 3) || ((__GNUC__ == 3) && __GNUC_MINOR__ < 4) +#error "sorry, need GCC 3.4 or newer" +#endif + +#define LJ_NORET __attribute__((noreturn)) +#define LJ_ALIGN(n) __attribute__((aligned(n))) +#define LJ_INLINE inline +#define LJ_AINLINE inline __attribute__((always_inline)) +#define LJ_NOINLINE __attribute__((noinline)) + +#if defined(__ELF__) || defined(__MACH__) +#define LJ_NOAPI extern __attribute__((visibility("hidden"))) +#endif + +/* Note: it's only beneficial to use fastcall on x86 and then only for up to +** two non-FP args. The amalgamated compile covers all LJ_FUNC cases. Only +** indirect calls and related tail-called C functions are marked as fastcall. +*/ +#if defined(__i386__) +#define LJ_FASTCALL __attribute__((fastcall)) +#endif + +#define LJ_LIKELY(x) __builtin_expect(!!(x), 1) +#define LJ_UNLIKELY(x) __builtin_expect(!!(x), 0) + +#define lj_ffs(x) ((uint32_t)__builtin_ctz(x)) +/* Don't ask ... */ +#if defined(__INTEL_COMPILER) && (defined(__i386__) || defined(__x86_64__)) +static LJ_AINLINE uint32_t lj_fls(uint32_t x) +{ + uint32_t r; __asm__("bsrl %1, %0" : "=r" (r) : "rm" (x) : "cc"); return r; +} +#else +#define lj_fls(x) ((uint32_t)(__builtin_clz(x)^31)) +#endif + +#if defined(__i386__) || defined(__x86_64__) +static LJ_AINLINE uint32_t lj_bswap(uint32_t x) +{ + uint32_t r; __asm__("bswap %0" : "=r" (r) : "0" (x)); return r; +} +#else +#error "missing define for lj_bswap()" +#endif + +#elif defined(_MSC_VER) + +#define LJ_NORET __declspec(noreturn) +#define LJ_ALIGN(n) __declspec(align(n)) +#define LJ_INLINE __inline +#define LJ_AINLINE __forceinline +#define LJ_NOINLINE __declspec(noinline) +#if defined(_M_IX86) +#define LJ_FASTCALL __fastcall +#endif + +static LJ_AINLINE uint32_t lj_ffs(uint32_t x) +{ + uint32_t r; _BitScanForward(&r, x); return r; +} + +static LJ_AINLINE uint32_t lj_fls(uint32_t x) +{ + uint32_t r; _BitScanReverse(&r, x); return r; +} + +#define lj_bswap(x) (_byteswap_ulong((x))) + +#else +#error "missing defines for your compiler" +#endif + +/* Optional defines. */ +#ifndef LJ_FASTCALL +#define LJ_FASTCALL +#endif +#ifndef LJ_NORET +#define LJ_NORET +#endif +#ifndef LJ_NOAPI +#define LJ_NOAPI extern +#endif +#ifndef LJ_LIKELY +#define LJ_LIKELY(x) (x) +#define LJ_UNLIKELY(x) (x) +#endif + +/* Attributes for internal functions. */ +#if defined(ljamalg_c) +#define LJ_DATA static +#define LJ_DATADEF static +#define LJ_FUNC static +#define LJ_ASMF LJ_NOAPI +#define LJ_FUNCA LJ_NOAPI +#else +#define LJ_DATA LJ_NOAPI +#define LJ_DATADEF +#define LJ_FUNC LJ_NOAPI +#define LJ_ASMF LJ_NOAPI +#define LJ_FUNCA LJ_NOAPI +#endif +#define LJ_FUNC_NORET LJ_FUNC LJ_NORET +#define LJ_FUNCA_NORET LJ_FUNCA LJ_NORET +#define LJ_ASMF_NORET LJ_ASMF LJ_NORET + +/* Runtime assertions. */ +#ifdef lua_assert +#define check_exp(c, e) (lua_assert(c), (e)) +#define api_check(l, e) lua_assert(e) +#else +#define lua_assert(c) ((void)0) +#define check_exp(c, e) (e) +#define api_check luai_apicheck +#endif + +/* Static assertions. */ +#define LJ_ASSERT_NAME2(name, line) name ## line +#define LJ_ASSERT_NAME(line) LJ_ASSERT_NAME2(lj_assert_, line) +#define LJ_STATIC_ASSERT(cond) \ + extern void LJ_ASSERT_NAME(__LINE__)(int STATIC_ASSERTION_FAILED[(cond)?1:-1]) + +#endif diff --git a/src/lj_dispatch.c b/src/lj_dispatch.c new file mode 100644 index 0000000000..d2fce2e03d --- /dev/null +++ b/src/lj_dispatch.c @@ -0,0 +1,284 @@ +/* +** Instruction dispatch handling. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +*/ + +#define lj_dispatch_c +#define LUA_CORE + +#include "lj_obj.h" +#include "lj_err.h" +#include "lj_state.h" +#include "lj_frame.h" +#include "lj_bc.h" +#if LJ_HASJIT +#include "lj_jit.h" +#endif +#include "lj_trace.h" +#include "lj_dispatch.h" +#include "lj_vm.h" +#include "luajit.h" + +/* -- Dispatch table management ------------------------------------------- */ + +/* Initialize instruction dispatch table and hot counters. */ +void lj_dispatch_init(GG_State *GG) +{ + uint32_t i; + ASMFunction *disp = GG->dispatch; + for (i = 0; i < BC__MAX; i++) + disp[GG_DISP_STATIC+i] = disp[i] = makeasmfunc(lj_vm_op_ofs[i]); + /* The JIT engine is off by default. luaopen_jit() turns it on. */ + disp[BC_FORL] = disp[BC_IFORL]; + disp[BC_ITERL] = disp[BC_IITERL]; + disp[BC_LOOP] = disp[BC_ILOOP]; +} + +/* Update dispatch table depending on various flags. */ +void lj_dispatch_update(global_State *g) +{ + uint8_t oldmode = g->dispatchmode; + uint8_t mode = 0; +#if LJ_HASJIT + mode |= (G2J(g)->flags & JIT_F_ON) ? 1 : 0; + mode |= G2J(g)->state != LJ_TRACE_IDLE ? 6 : 0; +#endif + mode |= (g->hookmask & HOOK_EVENTMASK) ? 2 : 0; + if (oldmode != mode) { /* Mode changed? */ + ASMFunction *disp = G2GG(g)->dispatch; + ASMFunction f_forl, f_iterl, f_loop; + g->dispatchmode = mode; + if ((mode & 5) == 1) { /* Hotcount if JIT is on, but not when recording. */ + f_forl = makeasmfunc(lj_vm_op_ofs[BC_FORL]); + f_iterl = makeasmfunc(lj_vm_op_ofs[BC_ITERL]); + f_loop = makeasmfunc(lj_vm_op_ofs[BC_LOOP]); + } else { /* Otherwise use the non-hotcounting instructions. */ + f_forl = disp[GG_DISP_STATIC+BC_IFORL]; + f_iterl = disp[GG_DISP_STATIC+BC_IITERL]; + f_loop = disp[GG_DISP_STATIC+BC_ILOOP]; + } + /* Set static loop ins first (may be copied below). */ + disp[GG_DISP_STATIC+BC_FORL] = f_forl; + disp[GG_DISP_STATIC+BC_ITERL] = f_iterl; + disp[GG_DISP_STATIC+BC_LOOP] = f_loop; + if ((oldmode & 6) != (mode & 6)) { /* Need to change whole table? */ + if ((mode & 6) == 0) { /* No hooks and no recording? */ + /* Copy static dispatch table to dynamic dispatch table. */ + memcpy(&disp[0], &disp[GG_DISP_STATIC], sizeof(ASMFunction)*BC__MAX); + } else { + /* The recording dispatch also checks for hooks. */ + ASMFunction f = (mode & 6) == 6 ? lj_vm_record : lj_vm_hook; + uint32_t i; + for (i = 0; i < BC__MAX; i++) + disp[i] = f; + } + } else if ((mode & 6) == 0) { /* Fix dynamic loop ins unless overriden. */ + disp[BC_FORL] = f_forl; + disp[BC_ITERL] = f_iterl; + disp[BC_LOOP] = f_loop; + } + } +} + +/* -- JIT mode setting ---------------------------------------------------- */ + +#if LJ_HASJIT +/* Set JIT mode for a single prototype. */ +static void setptmode(global_State *g, GCproto *pt, int mode) +{ + if ((mode & LUAJIT_MODE_ON)) { /* (Re-)enable JIT compilation. */ + pt->flags &= ~PROTO_NO_JIT; + lj_trace_reenableproto(pt); /* Unpatch all ILOOP etc. bytecodes. */ + } else { /* Flush and/or disable JIT compilation. */ + if (!(mode & LUAJIT_MODE_FLUSH)) + pt->flags |= PROTO_NO_JIT; + lj_trace_flushproto(g, pt); /* Flush all traces of prototype. */ + } +} + +/* Recursively set the JIT mode for all children of a prototype. */ +static void setptmode_all(global_State *g, GCproto *pt, int mode) +{ + ptrdiff_t i; + for (i = -(ptrdiff_t)pt->sizekgc; i < 0; i++) { + GCobj *o = gcref(pt->k.gc[i]); + if (o->gch.gct == ~LJ_TPROTO) { + setptmode(g, gco2pt(o), mode); + setptmode_all(g, gco2pt(o), mode); + } + } +} +#endif + +/* Public API function: control the JIT engine. */ +int luaJIT_setmode(lua_State *L, int idx, int mode) +{ + global_State *g = G(L); + int mm = mode & LUAJIT_MODE_MASK; + lj_trace_abort(g); /* Abort recording on any state change. */ + /* Avoid pulling the rug from under our own feet. */ + if ((g->hookmask & HOOK_GC)) + lj_err_caller(L, LJ_ERR_NOGCMM); + switch (mm) { +#if LJ_HASJIT + case LUAJIT_MODE_ENGINE: + if ((mode & LUAJIT_MODE_FLUSH)) { + lj_trace_flushall(L); + } else { + if ((mode & LUAJIT_MODE_ON)) + G2J(g)->flags |= (uint32_t)JIT_F_ON; + else + G2J(g)->flags &= ~(uint32_t)JIT_F_ON; + lj_dispatch_update(g); + } + break; + case LUAJIT_MODE_FUNC: + case LUAJIT_MODE_ALLFUNC: + case LUAJIT_MODE_ALLSUBFUNC: { + cTValue *tv = idx == 0 ? frame_prev(L->base-1) : + idx > 0 ? L->base + (idx-1) : L->top + idx; + GCproto *pt; + if ((idx == 0 || tvisfunc(tv)) && isluafunc(&gcval(tv)->fn)) + pt = funcproto(&gcval(tv)->fn); /* Cannot use funcV() for frame slot. */ + else if (tvisproto(tv)) + pt = protoV(tv); + else + return 0; /* Failed. */ + if (mm != LUAJIT_MODE_ALLSUBFUNC) + setptmode(g, pt, mode); + if (mm != LUAJIT_MODE_FUNC) + setptmode_all(g, pt, mode); + break; + } + case LUAJIT_MODE_TRACE: + if (!(mode & LUAJIT_MODE_FLUSH)) + return 0; /* Failed. */ + lj_trace_flush(G2J(g), idx); + break; +#else + case LUAJIT_MODE_ENGINE: + case LUAJIT_MODE_FUNC: + case LUAJIT_MODE_ALLFUNC: + case LUAJIT_MODE_ALLSUBFUNC: + UNUSED(idx); + if ((mode & LUAJIT_MODE_ON)) + return 0; /* Failed. */ + break; +#endif + default: + return 0; /* Failed. */ + } + return 1; /* OK. */ +} + +/* Enforce (dynamic) linker error for version mismatches. See luajit.c. */ +LUA_API void LUAJIT_VERSION_SYM(void) +{ +} + +/* -- Hooks --------------------------------------------------------------- */ + +/* This function can be called asynchronously (e.g. during a signal). */ +LUA_API int lua_sethook(lua_State *L, lua_Hook func, int mask, int count) +{ + global_State *g = G(L); + mask &= HOOK_EVENTMASK; + if (func == NULL || mask == 0) { mask = 0; func = NULL; } /* Consistency. */ + g->hookf = func; + g->hookcount = g->hookcstart = (int32_t)count; + g->hookmask = (uint8_t)((g->hookmask & ~HOOK_EVENTMASK) | mask); + lj_trace_abort(g); /* Abort recording on any hook change. */ + lj_dispatch_update(g); + return 1; +} + +LUA_API lua_Hook lua_gethook(lua_State *L) +{ + return G(L)->hookf; +} + +LUA_API int lua_gethookmask(lua_State *L) +{ + return G(L)->hookmask & HOOK_EVENTMASK; +} + +LUA_API int lua_gethookcount(lua_State *L) +{ + return (int)G(L)->hookcstart; +} + +/* Call a hook. */ +static void callhook(lua_State *L, int event, BCLine line) +{ + global_State *g = G(L); + lua_Hook hookf = g->hookf; + if (hookf && !hook_active(g)) { + lua_Debug ar; + lj_trace_abort(g); /* Abort recording on any hook call. */ + ar.event = event; + ar.currentline = line; + ar.i_ci = cast_int((L->base-1) - L->stack); /* Top frame, nextframe=NULL. */ + lj_state_checkstack(L, 1+LUA_MINSTACK); + hook_enter(g); + hookf(L, &ar); + lua_assert(hook_active(g)); + hook_leave(g); + } +} + +/* -- Instruction dispatch callbacks -------------------------------------- */ + +/* Calculate number of used stack slots in the current frame. */ +static BCReg cur_topslot(GCproto *pt, const BCIns *pc, uint32_t nres) +{ + BCIns ins = pc[-1]; + for (;;) { + switch (bc_op(ins)) { + case BC_UCLO: ins = pc[bc_j(ins)]; break; + case BC_CALLM: + case BC_CALLMT: return bc_a(ins) + bc_c(ins) + nres-1+1; + case BC_RETM: return bc_a(ins) + bc_d(ins) + nres-1; + case BC_TSETM: return bc_a(ins) + nres-1; + default: return pt->framesize; + } + } +} + +/* Instruction dispatch callback for instr/line hooks or when recording. */ +void lj_dispatch_ins(lua_State *L, const BCIns *pc, uint32_t nres) +{ + GCfunc *fn = curr_func(L); + GCproto *pt = funcproto(fn); + BCReg slots = cur_topslot(pt, pc, nres); + global_State *g = G(L); + const BCIns *oldpc = cframe_Lpc(L); + cframe_Lpc(L) = pc; + L->top = L->base + slots; /* Fix top. */ +#if LJ_HASJIT + { + jit_State *J = G2J(g); + if (J->state != LJ_TRACE_IDLE) { + J->L = L; + J->pc = pc-1; + J->fn = fn; + J->pt = pt; + lj_trace_ins(J); + } + } +#endif + if ((g->hookmask & LUA_MASKCOUNT) && g->hookcount == 0) { + g->hookcount = g->hookcstart; + callhook(L, LUA_HOOKCOUNT, -1); + } + if ((g->hookmask & LUA_MASKLINE) && pt->lineinfo) { + BCPos npc = (BCPos)(pc - pt->bc)-1; + BCPos opc = (BCPos)(oldpc - pt->bc)-1; + BCLine line = pt->lineinfo[npc]; + if (npc == 0 || pc <= oldpc || + opc >= pt->sizebc || line != pt->lineinfo[opc]) { + L->top = L->base + slots; /* Fix top again after instruction hook. */ + callhook(L, LUA_HOOKLINE, line); + } + } +} + diff --git a/src/lj_dispatch.h b/src/lj_dispatch.h new file mode 100644 index 0000000000..298aa166a6 --- /dev/null +++ b/src/lj_dispatch.h @@ -0,0 +1,64 @@ +/* +** Instruction dispatch handling. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +*/ + +#ifndef _LJ_DISPATCH_H +#define _LJ_DISPATCH_H + +#include "lj_obj.h" +#include "lj_bc.h" +#if LJ_HASJIT +#include "lj_jit.h" +#endif + +/* Type of hot counter. Must match the code in the assembler VM. */ +/* 16 bits are sufficient. Only 0.0015% overhead with maximum slot penalty. */ +typedef uint16_t HotCount; + +/* Number of hot counter hash table entries (must be a power of two). */ +#define HOTCOUNT_SIZE 64 +#define HOTCOUNT_PCMASK ((HOTCOUNT_SIZE-1)*sizeof(HotCount)) +#define HOTCOUNT_MIN_PENALTY 103 +#define HOTCOUNT_MAX_PENALTY 60000 + +/* Global state, main thread and extra fields are allocated together. */ +typedef struct GG_State { + lua_State L; /* Main thread. */ + global_State g; /* Global state. */ +#if LJ_HASJIT + jit_State J; /* JIT state. */ + HotCount hotcount[HOTCOUNT_SIZE]; /* Hot counters. */ +#endif + ASMFunction dispatch[2*BC__MAX]; /* Instruction dispatch tables. */ +} GG_State; + +#define GG_DISP_STATIC BC__MAX + +#define GG_OFS(field) ((int)offsetof(GG_State, field)) +#define G2GG(gl) \ + ((GG_State *)(((char *)(gl))-((char *)(&((GG_State *)0)->g)))) +#define J2GG(j) \ + ((GG_State *)(((char *)(j))-((char *)(&((GG_State *)0)->J)))) +#define L2GG(L) G2GG(G(L)) +#define J2G(J) (&J2GG(J)->g) +#define G2J(gl) (&G2GG(gl)->J) +#define L2J(L) (&L2GG(L)->J) +#define GG_G2DISP (GG_OFS(dispatch) - GG_OFS(g)) +#define GG_DISP2G (GG_OFS(g) - GG_OFS(dispatch)) +#define GG_DISP2J (GG_OFS(J) - GG_OFS(dispatch)) +#define GG_DISP2HOT (GG_OFS(hotcount) - GG_OFS(dispatch)) + +#define hotcount_get(gg, pc) \ + (gg)->hotcount[(u32ptr(pc)>>2) & (HOTCOUNT_SIZE-1)] +#define hotcount_set(gg, pc, val) \ + (hotcount_get((gg), (pc)) = (HotCount)(val)) + +/* Dispatch table management. */ +LJ_FUNC void lj_dispatch_init(GG_State *GG); +LJ_FUNC void lj_dispatch_update(global_State *g); + +/* Instruction dispatch callback for instr/line hooks or when recording. */ +LJ_FUNCA void lj_dispatch_ins(lua_State *L, const BCIns *pc, uint32_t nres); + +#endif diff --git a/src/lj_err.c b/src/lj_err.c new file mode 100644 index 0000000000..a723af4875 --- /dev/null +++ b/src/lj_err.c @@ -0,0 +1,763 @@ +/* +** Error handling and debugging API. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +** +** Portions taken verbatim or adapted from the Lua interpreter. +** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h +*/ + +#define lj_err_c +#define LUA_CORE + +#include "lj_obj.h" +#include "lj_err.h" +#include "lj_str.h" +#include "lj_tab.h" +#include "lj_func.h" +#include "lj_state.h" +#include "lj_frame.h" +#include "lj_bc.h" +#include "lj_trace.h" +#include "lj_vm.h" + +/* -- Error messages ------------------------------------------------------ */ + +/* Error message strings. */ +static const char *lj_err_allmsg = +#define ERRDEF(name, msg) msg "\0" +#include "lj_errmsg.h" +; + +#define err2msg(em) (lj_err_allmsg+(int)(em)) + +/* -- Frame and function introspection ------------------------------------ */ + +static BCPos currentpc(lua_State *L, GCfunc *fn, cTValue *nextframe) +{ + const BCIns *ins; + lua_assert(fn->c.gct == ~LJ_TFUNC || fn->c.gct == ~LJ_TTHREAD); + if (!isluafunc(fn)) { /* Cannot derive a PC for non-Lua functions. */ + return ~(BCPos)0; + } else if (nextframe == NULL) { /* Lua function on top. */ + ins = cframe_Lpc(L); /* Only happens during error/hook handling. */ + } else { + if (frame_islua(nextframe)) { + ins = frame_pc(nextframe); + } else if (frame_iscont(nextframe)) { + ins = frame_contpc(nextframe); + } else { + /* Lua function below errfunc/gc/hook: find cframe to get the PC. */ + void *cf = cframe_raw(L->cframe); + TValue *f = L->base-1; + while (f > nextframe) { + if (frame_islua(f)) { + f = frame_prevl(f); + } else { + if (frame_isc(f)) + cf = cframe_raw(cframe_prev(cf)); + f = frame_prevd(f); + } + } + if (cframe_prev(cf)) + cf = cframe_raw(cframe_prev(cf)); + ins = cframe_pc(cf); + } + } + return (BCPos)((ins - funcproto(fn)->bc) - 1); +} + +static BCLine currentline(lua_State *L, GCfunc *fn, cTValue *nextframe) +{ + BCPos pc = currentpc(L, fn, nextframe); + if (pc != ~(BCPos)0) { + GCproto *pt = funcproto(fn); + lua_assert(pc < pt->sizebc); + return pt->lineinfo ? pt->lineinfo[pc] : 0; + } else { + return -1; + } +} + +static const char *getvarname(const GCproto *pt, BCPos pc, BCReg slot) +{ + MSize i; + for (i = 0; i < pt->sizevarinfo && pt->varinfo[i].startpc <= pc; i++) + if (pc < pt->varinfo[i].endpc && slot-- == 0) + return strdata(pt->varinfo[i].name); + return NULL; +} + +static const char *getobjname(GCproto *pt, const BCIns *ip, BCReg slot, + const char **name) +{ + const char *lname; +restart: + lname = getvarname(pt, (BCPos)(ip - pt->bc), slot); + if (lname != NULL) { *name = lname; return "local"; } + while (--ip >= pt->bc) { + BCIns ins = *ip; + BCOp op = bc_op(ins); + BCReg ra = bc_a(ins); + if (bcmode_a(op) == BCMbase) { + if (slot >= ra && (op != BC_KNIL || slot <= bc_d(ins))) + return NULL; + } else if (bcmode_a(op) == BCMdst && ra == slot) { + switch (bc_op(ins)) { + case BC_MOV: + if (ra == slot) { slot = bc_d(ins); goto restart; } + break; + case BC_GGET: + *name = strdata(gco2str(gcref(pt->k.gc[~bc_d(ins)]))); + return "global"; + case BC_TGETS: + *name = strdata(gco2str(gcref(pt->k.gc[~bc_c(ins)]))); + if (ip > pt->bc) { + BCIns insp = ip[-1]; + if (bc_op(insp) == BC_MOV && bc_a(insp) == ra+1 && + bc_d(insp) == bc_b(ins)) + return "method"; + } + return "field"; + case BC_UGET: + *name = pt->uvname ? strdata(pt->uvname[bc_d(ins)]) : "?"; + return "upvalue"; + default: + return NULL; + } + } + } + return NULL; +} + +static const char *getfuncname(lua_State *L, TValue *frame, const char **name) +{ + MMS mm; + const BCIns *ip; + TValue *pframe; + GCfunc *fn; + BCPos pc; + if (frame_isvarg(frame)) + frame = frame_prevd(frame); + pframe = frame_prev(frame); + fn = frame_func(pframe); + pc = currentpc(L, fn, frame); + if (pc == ~(BCPos)0) + return NULL; + lua_assert(pc < funcproto(fn)->sizebc); + ip = &funcproto(fn)->bc[pc]; + mm = bcmode_mm(bc_op(*ip)); + if (mm == MM_call) { + BCReg slot = bc_a(*ip); + if (bc_op(*ip) == BC_ITERC) slot -= 3; + return getobjname(funcproto(fn), ip, slot, name); + } else if (mm != MM_MAX) { + *name = strdata(strref(G(L)->mmname[mm])); + return "metamethod"; + } else { + return NULL; + } +} + +void lj_err_pushloc(lua_State *L, GCproto *pt, BCPos pc) +{ + GCstr *name = pt->chunkname; + if (name) { + const char *s = strdata(name); + MSize i, len = name->len; + BCLine line; + if (pc) + line = pt->lineinfo ? pt->lineinfo[pc-1] : 0; + else + line = pt->linedefined; + if (*s == '@') { + s++; len--; + for (i = len; i > 0; i--) + if (s[i] == '/' || s[i] == '\\') { + s += i+1; + break; + } + lj_str_pushf(L, "%s:%d", s, line); + } else if (len > 40) { + lj_str_pushf(L, "%p:%d", pt, line); + } else if (*s == '=') { + lj_str_pushf(L, "%s:%d", s+1, line); + } else { + lj_str_pushf(L, "\"%s\":%d", s, line); + } + } else { + lj_str_pushf(L, "%p:%u", pt, pc); + } +} + +static void err_chunkid(char *out, const char *src) +{ + if (*src == '=') { + strncpy(out, src+1, LUA_IDSIZE); /* remove first char */ + out[LUA_IDSIZE-1] = '\0'; /* ensures null termination */ + } else if (*src == '@') { /* out = "source", or "...source" */ + size_t l = strlen(++src); /* skip the `@' */ + if (l >= LUA_IDSIZE) { + src += l-(LUA_IDSIZE-4); /* get last part of file name */ + strcpy(out, "..."); + out += 3; + } + strcpy(out, src); + } else { /* out = [string "string"] */ + size_t len; /* Length, up to first control char. */ + for (len = 0; len < LUA_IDSIZE-11; len++) + if (((const unsigned char *)src)[len] < ' ') break; + strcpy(out, "[string \""); out += 9; + if (src[len] != '\0') { /* must truncate? */ + if (len > LUA_IDSIZE-15) len = LUA_IDSIZE-15; + strncpy(out, src, len); out += len; + strcpy(out, "..."); out += 3; + } else { + strcpy(out, src); out += len; + } + strcpy(out, "\"]"); + } +} + +/* -- Public debug API ---------------------------------------------------- */ + +static TValue *findlocal(lua_State *L, const lua_Debug *ar, + const char **name, BCReg slot) +{ + uint32_t offset = (uint32_t)ar->i_ci & 0xffff; + uint32_t size = (uint32_t)ar->i_ci >> 16; + TValue *frame = L->stack + offset; + TValue *nextframe = size ? frame + size : NULL; + GCfunc *fn = frame_func(frame); + BCPos pc = currentpc(L, fn, nextframe); + if (pc != ~(BCPos)0 && + (*name = getvarname(funcproto(fn), pc, slot-1)) != NULL) + ; + else if (slot > 0 && frame + slot < (nextframe ? nextframe : L->top)) + *name = "(*temporary)"; + else + *name = NULL; + return frame+slot; +} + +LUA_API const char *lua_getlocal(lua_State *L, const lua_Debug *ar, int n) +{ + const char *name; + TValue *o = findlocal(L, ar, &name, (BCReg)n); + if (name) { + copyTV(L, L->top, o); + incr_top(L); + } + return name; +} + + +LUA_API const char *lua_setlocal(lua_State *L, const lua_Debug *ar, int n) +{ + const char *name; + TValue *o = findlocal(L, ar, &name, (BCReg)n); + if (name) + copyTV(L, o, L->top-1); + L->top--; + return name; +} + +LUA_API int lua_getinfo(lua_State *L, const char *what, lua_Debug *ar) +{ + int status = 1; + TValue *frame = NULL; + TValue *nextframe = NULL; + GCfunc *fn; + if (*what == '>') { + TValue *func = L->top - 1; + api_check(L, tvisfunc(func)); + fn = funcV(func); + L->top--; + what++; + } else { + uint32_t offset = (uint32_t)ar->i_ci & 0xffff; + uint32_t size = (uint32_t)ar->i_ci >> 16; + lua_assert(offset != 0); + frame = L->stack + offset; + if (size) nextframe = frame + size; + lua_assert(frame<=L->maxstack && (!nextframe || nextframe<=L->maxstack)); + fn = frame_func(frame); + lua_assert(fn->c.gct == ~LJ_TFUNC); + } + for (; *what; what++) { + switch (*what) { + case 'S': + if (isluafunc(fn)) { + ar->source = strdata(funcproto(fn)->chunkname); + ar->linedefined = cast_int(funcproto(fn)->linedefined); + ar->lastlinedefined = cast_int(funcproto(fn)->lastlinedefined); + ar->what = (ar->linedefined == 0) ? "main" : "Lua"; + } else { + ar->source = "=[C]"; + ar->linedefined = -1; + ar->lastlinedefined = -1; + ar->what = "C"; + } + err_chunkid(ar->short_src, ar->source); + break; + case 'l': + ar->currentline = frame ? currentline(L, fn, nextframe) : -1; + break; + case 'u': + ar->nups = fn->c.nupvalues; + break; + case 'n': + ar->namewhat = frame ? getfuncname(L, frame, &ar->name) : NULL; + if (ar->namewhat == NULL) { + ar->namewhat = ""; + ar->name = NULL; + } + break; + case 'f': + setfuncV(L, L->top, fn); + incr_top(L); + break; + case 'L': + if (isluafunc(fn)) { + GCtab *t = lj_tab_new(L, 0, 0); + BCLine *lineinfo = funcproto(fn)->lineinfo; + uint32_t i, szl = funcproto(fn)->sizelineinfo; + for (i = 0; i < szl; i++) + setboolV(lj_tab_setint(L, t, lineinfo[i]), 1); + settabV(L, L->top, t); + } else { + setnilV(L->top); + } + incr_top(L); + break; + default: + status = 0; /* Bad option. */ + break; + } + } + return status; +} + +cTValue *lj_err_getframe(lua_State *L, int level, int *size) +{ + cTValue *frame, *nextframe; + /* Traverse frames backwards. */ + for (nextframe = frame = L->base-1; frame > L->stack; ) { + if (frame_gc(frame) == obj2gco(L)) + level++; /* Skip dummy frames. See lj_meta_call(). */ + if (level-- == 0) { + *size = cast_int(nextframe - frame); + return frame; /* Level found. */ + } + nextframe = frame; + if (frame_islua(frame)) { + frame = frame_prevl(frame); + } else { + if (frame_isvarg(frame)) + level++; /* Skip vararg pseudo-frame. */ + frame = frame_prevd(frame); + } + } + *size = level; + return NULL; /* Level not found. */ +} + +LUA_API int lua_getstack(lua_State *L, int level, lua_Debug *ar) +{ + int size; + cTValue *frame = lj_err_getframe(L, level, &size); + if (frame) { + ar->i_ci = (size << 16) + cast_int(frame - L->stack); + return 1; + } else { + ar->i_ci = level - size; + return 0; + } +} + +/* -- Error handling ------------------------------------------------------ */ + +/* Return string object for error message. */ +LJ_NOINLINE GCstr *lj_err_str(lua_State *L, ErrMsg em) +{ + return lj_str_newz(L, err2msg(em)); +} + +/* Unwind Lua stack and add error message on top. */ +LJ_NOINLINE static void unwindstack(lua_State *L, TValue *top, int errcode) +{ + lj_func_closeuv(L, top); + switch (errcode) { + case LUA_ERRMEM: + setstrV(L, top, lj_err_str(L, LJ_ERR_ERRMEM)); + break; + case LUA_ERRERR: + setstrV(L, top, lj_err_str(L, LJ_ERR_ERRERR)); + break; + case LUA_ERRSYNTAX: + case LUA_ERRRUN: + copyTV(L, top, L->top - 1); + break; + default: + lua_assert(0); + break; + } + L->top = top+1; + lj_state_relimitstack(L); +} + +/* Throw error. Find catch frame, unwind stack and continue. */ +LJ_NOINLINE void lj_err_throw(lua_State *L, int errcode) +{ + TValue *frame = L->base-1; + void *cf = L->cframe; + global_State *g = G(L); + if (L->status == LUA_ERRERR+1) { /* Don't touch the stack during lua_open. */ + lj_vm_unwind_c(cf, errcode); + goto uncaught; /* unreachable */ + } + lj_trace_abort(g); + setgcrefnull(g->jit_L); + L->status = 0; + while (cf) { + if (cframe_nres(cframe_raw(cf)) < 0) { /* cframe without frame? */ + TValue *top = restorestack(L, -cframe_nres(cf)); + if (frame < top) { + L->cframe = cframe_prev(cf); + L->base = frame+1; + unwindstack(L, top, errcode); + lj_vm_unwind_c(cf, errcode); + goto uncaught; /* unreachable */ + } + } + if (frame <= L->stack) + break; + switch (frame_typep(frame)) { + case FRAME_LUA: + case FRAME_LUAP: + frame = frame_prevl(frame); + break; + case FRAME_C: + if (cframe_canyield(cf)) goto uncaught; + cf = cframe_prev(cf); + /* fallthrough */ + case FRAME_CONT: + case FRAME_VARG: + frame = frame_prevd(frame); + break; + case FRAME_CP: + L->cframe = cframe_prev(cf); + L->base = frame_prevd(frame) + 1; + unwindstack(L, frame, errcode); + lj_vm_unwind_c(cf, errcode); + goto uncaught; /* unreachable */ + case FRAME_PCALL: + hook_leave(g); + /* fallthrough */ + case FRAME_PCALLH: + L->cframe = cf; + L->base = frame_prevd(frame) + 1; + unwindstack(L, L->base, errcode); + lj_vm_unwind_ff(cf); + goto uncaught; /* unreachable */ + default: + lua_assert(0); + goto uncaught; + } + } + /* No catch frame found. Must be a resume or an unprotected error. */ +uncaught: + L->status = cast_byte(errcode); + L->cframe = NULL; + if (cframe_canyield(cf)) { /* Resume? */ + unwindstack(L, L->top, errcode); + lj_vm_unwind_c(cf, errcode); + } + /* Better rethrow on main thread than panic. */ + { + if (L != mainthread(g)) + lj_err_throw(mainthread(g), errcode); + if (g->panic) { + L->base = L->stack+1; + unwindstack(L, L->base, errcode); + g->panic(L); + } + } + exit(EXIT_FAILURE); +} + +/* Find error function for runtime errors. Requires an extra stack traversal. */ +static ptrdiff_t finderrfunc(lua_State *L) +{ + TValue *frame = L->base-1; + void *cf = L->cframe; + while (frame > L->stack) { + lua_assert(cf != NULL); + while (cframe_nres(cframe_raw(cf)) < 0) { /* cframe without frame? */ + if (frame >= restorestack(L, -cframe_nres(cf))) + break; + if (cframe_errfunc(cf) >= 0) /* Error handler not inherited (-1)? */ + return cframe_errfunc(cf); + cf = cframe_prev(cf); /* Else unwind cframe and continue searching. */ + if (cf == NULL) + return 0; + } + switch (frame_typep(frame)) { + case FRAME_LUA: + case FRAME_LUAP: + frame = frame_prevl(frame); + break; + case FRAME_C: + if (cframe_canyield(cf)) return 0; + cf = cframe_prev(cf); + /* fallthrough */ + case FRAME_CONT: + case FRAME_VARG: + frame = frame_prevd(frame); + break; + case FRAME_CP: + if (cframe_errfunc(cf) >= 0) + return cframe_errfunc(cf); + frame = frame_prevd(frame); + break; + case FRAME_PCALL: + case FRAME_PCALLH: + if (frame_ftsz(frame) >= (ptrdiff_t)(2*sizeof(TValue))) /* xpcall? */ + return savestack(L, frame-1); /* Point to xpcall's errorfunc. */ + return 0; + default: + lua_assert(0); + return 0; + } + } + return 0; +} + +/* Runtime error. */ +LJ_NOINLINE void lj_err_run(lua_State *L) +{ + ptrdiff_t ef = finderrfunc(L); + if (ef) { + TValue *errfunc = restorestack(L, ef); + TValue *top = L->top; + lj_trace_abort(G(L)); + if (!tvisfunc(errfunc) || L->status == LUA_ERRERR) + lj_err_throw(L, LUA_ERRERR); + L->status = LUA_ERRERR; + copyTV(L, top, top-1); + copyTV(L, top-1, errfunc); + L->top = top+1; + lj_vm_call(L, top, 1+1); /* Stack: |errfunc|msg| -> |msg| */ + } + lj_err_throw(L, LUA_ERRRUN); +} + +/* Add location to error message. */ +LJ_NOINLINE static void err_loc(lua_State *L, const char *msg, + cTValue *frame, cTValue *nextframe) +{ + if (frame) { + GCfunc *fn = frame_func(frame); + if (isluafunc(fn)) { + char buff[LUA_IDSIZE]; + BCLine line = currentline(L, fn, nextframe); + err_chunkid(buff, strdata(funcproto(fn)->chunkname)); + lj_str_pushf(L, "%s:%d: %s", buff, line, msg); + return; + } + } + lj_str_pushf(L, "%s", msg); +} + +/* Formatted runtime error message. */ +LJ_NORET LJ_NOINLINE static void err_msgv(lua_State *L, ErrMsg em, ...) +{ + const char *msg; + va_list argp; + va_start(argp, em); + if (curr_funcisL(L)) L->top = curr_topL(L); + msg = lj_str_pushvf(L, err2msg(em), argp); + va_end(argp); + err_loc(L, msg, L->base-1, NULL); + lj_err_run(L); +} + +/* Non-vararg variant for better calling conventions. */ +LJ_NOINLINE void lj_err_msg(lua_State *L, ErrMsg em) +{ + err_msgv(L, em); +} + +/* Lexer error. */ +LJ_NOINLINE void lj_err_lex(lua_State *L, const char *src, const char *tok, + BCLine line, ErrMsg em, va_list argp) +{ + char buff[LUA_IDSIZE]; + const char *msg; + err_chunkid(buff, src); + msg = lj_str_pushvf(L, err2msg(em), argp); + msg = lj_str_pushf(L, "%s:%d: %s", buff, line, msg); + if (tok) + lj_str_pushf(L, err2msg(LJ_ERR_XNEAR), msg, tok); + lj_err_throw(L, LUA_ERRSYNTAX); +} + +/* Typecheck error for operands. */ +LJ_NOINLINE void lj_err_optype(lua_State *L, cTValue *o, ErrMsg opm) +{ + const char *tname = typename(o); + const char *oname = NULL; + const char *opname = err2msg(opm); + if (curr_funcisL(L)) { + GCproto *pt = curr_proto(L); + const BCIns *pc = cframe_Lpc(L) - 1; + const char *kind = getobjname(pt, pc, (BCReg)(o - L->base), &oname); + if (kind) + err_msgv(L, LJ_ERR_BADOPRT, opname, kind, oname, tname); + } + err_msgv(L, LJ_ERR_BADOPRV, opname, tname); +} + +/* Typecheck error for ordered comparisons. */ +LJ_NOINLINE void lj_err_comp(lua_State *L, cTValue *o1, cTValue *o2) +{ + const char *t1 = typename(o1); + const char *t2 = typename(o2); + err_msgv(L, t1 == t2 ? LJ_ERR_BADCMPV : LJ_ERR_BADCMPT, t1, t2); + /* This assumes the two "boolean" entries are commoned by the C compiler. */ +} + +/* Typecheck error for __call. */ +LJ_NOINLINE void lj_err_optype_call(lua_State *L, TValue *o) +{ + /* Gross hack if lua_[p]call or pcall/xpcall fail for a non-callable object: + ** L->base still points to the caller. So add a dummy frame with L instead + ** of a function. See lua_getstack(). + */ + const BCIns *pc = cframe_Lpc(L); + if (((ptrdiff_t)pc & FRAME_TYPE) != FRAME_LUA) { + const char *tname = typename(o); + setframe_pc(o, pc); + setframe_gc(o, obj2gco(L)); + L->top = L->base = o+1; + err_msgv(L, LJ_ERR_BADCALL, tname); + } + lj_err_optype(L, o, LJ_ERR_OPCALL); +} + +/* Error in context of caller. */ +LJ_NOINLINE void lj_err_callermsg(lua_State *L, const char *msg) +{ + cTValue *frame = L->base-1; + cTValue *pframe = frame_islua(frame) ? frame_prevl(frame) : NULL; + err_loc(L, msg, pframe, frame); + lj_err_run(L); +} + +/* Formatted error in context of caller. */ +LJ_NOINLINE void lj_err_callerv(lua_State *L, ErrMsg em, ...) +{ + const char *msg; + va_list argp; + va_start(argp, em); + msg = lj_str_pushvf(L, err2msg(em), argp); + va_end(argp); + lj_err_callermsg(L, msg); +} + +/* Error in context of caller. */ +LJ_NOINLINE void lj_err_caller(lua_State *L, ErrMsg em) +{ + lj_err_callermsg(L, err2msg(em)); +} + +/* Argument error message. */ +LJ_NORET LJ_NOINLINE static void err_argmsg(lua_State *L, int narg, + const char *msg) +{ + const char *fname = "?"; + const char *ftype = getfuncname(L, L->base - 1, &fname); + if (ftype && ftype[3] == 'h' && --narg == 0) /* Check for "method". */ + msg = lj_str_pushf(L, err2msg(LJ_ERR_BADSELF), fname, msg); + else + msg = lj_str_pushf(L, err2msg(LJ_ERR_BADARG), narg, fname, msg); + lj_err_callermsg(L, msg); +} + +/* Formatted argument error. */ +LJ_NOINLINE void lj_err_argv(lua_State *L, int narg, ErrMsg em, ...) +{ + const char *msg; + va_list argp; + va_start(argp, em); + msg = lj_str_pushvf(L, err2msg(em), argp); + va_end(argp); + err_argmsg(L, narg, msg); +} + +/* Argument error. */ +LJ_NOINLINE void lj_err_arg(lua_State *L, int narg, ErrMsg em) +{ + err_argmsg(L, narg, err2msg(em)); +} + +/* Typecheck error for arguments. */ +LJ_NOINLINE void lj_err_argtype(lua_State *L, int narg, const char *xname) +{ + TValue *o = L->base + narg-1; + const char *tname = o < L->top ? typename(o) : lj_obj_typename[0]; + const char *msg = lj_str_pushf(L, err2msg(LJ_ERR_BADTYPE), xname, tname); + err_argmsg(L, narg, msg); +} + +/* Typecheck error for arguments. */ +LJ_NOINLINE void lj_err_argt(lua_State *L, int narg, int tt) +{ + lj_err_argtype(L, narg, lj_obj_typename[tt+1]); +} + +/* -- Public error handling API ------------------------------------------- */ + +LUA_API lua_CFunction lua_atpanic(lua_State *L, lua_CFunction panicf) +{ + lua_CFunction old = G(L)->panic; + G(L)->panic = panicf; + return old; +} + +/* Forwarders for the public API (C calling convention and no LJ_NORET). */ +LUA_API int lua_error(lua_State *L) +{ + lj_err_run(L); + return 0; /* unreachable */ +} + +LUALIB_API int luaL_argerror(lua_State *L, int narg, const char *msg) +{ + err_argmsg(L, narg, msg); + return 0; /* unreachable */ +} + +LUALIB_API int luaL_typerror(lua_State *L, int narg, const char *xname) +{ + lj_err_argtype(L, narg, xname); + return 0; /* unreachable */ +} + +LUALIB_API void luaL_where(lua_State *L, int level) +{ + int size; + cTValue *frame = lj_err_getframe(L, level, &size); + err_loc(L, "", frame, size ? frame+size : NULL); +} + +LUALIB_API int luaL_error(lua_State *L, const char *fmt, ...) +{ + const char *msg; + va_list argp; + va_start(argp, fmt); + msg = lj_str_pushvf(L, fmt, argp); + va_end(argp); + lj_err_callermsg(L, msg); + return 0; /* unreachable */ +} + diff --git a/src/lj_err.h b/src/lj_err.h new file mode 100644 index 0000000000..e794d44cd5 --- /dev/null +++ b/src/lj_err.h @@ -0,0 +1,40 @@ +/* +** Error handling and debugging support. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +*/ + +#ifndef _LJ_ERR_H +#define _LJ_ERR_H + +#include + +#include "lj_obj.h" + +typedef enum { +#define ERRDEF(name, msg) \ + LJ_ERR_##name, LJ_ERR_##name##_ = LJ_ERR_##name + sizeof(msg)-1, +#include "lj_errmsg.h" + LJ_ERR__MAX +} ErrMsg; + +LJ_FUNC GCstr *lj_err_str(lua_State *L, ErrMsg em); +LJ_FUNC_NORET void lj_err_throw(lua_State *L, int errcode); +LJ_FUNC_NORET void lj_err_run(lua_State *L); +LJ_FUNC_NORET void lj_err_msg(lua_State *L, ErrMsg em); +LJ_FUNC_NORET void lj_err_lex(lua_State *L, const char *src, const char *tok, + BCLine line, ErrMsg em, va_list argp); +LJ_FUNC_NORET void lj_err_optype(lua_State *L, cTValue *o, ErrMsg opm); +LJ_FUNC_NORET void lj_err_comp(lua_State *L, cTValue *o1, cTValue *o2); +LJ_FUNC_NORET void lj_err_optype_call(lua_State *L, TValue *o); +LJ_FUNC_NORET void lj_err_callermsg(lua_State *L, const char *msg); +LJ_FUNC_NORET void lj_err_callerv(lua_State *L, ErrMsg em, ...); +LJ_FUNC_NORET void lj_err_caller(lua_State *L, ErrMsg em); +LJ_FUNC_NORET void lj_err_arg(lua_State *L, int narg, ErrMsg em); +LJ_FUNC_NORET void lj_err_argv(lua_State *L, int narg, ErrMsg em, ...); +LJ_FUNC_NORET void lj_err_argtype(lua_State *L, int narg, const char *xname); +LJ_FUNC_NORET void lj_err_argt(lua_State *L, int narg, int tt); + +LJ_FUNC void lj_err_pushloc(lua_State *L, GCproto *pt, BCPos pc); +LJ_FUNC cTValue *lj_err_getframe(lua_State *L, int level, int *size); + +#endif diff --git a/src/lj_errmsg.h b/src/lj_errmsg.h new file mode 100644 index 0000000000..03abd59ba8 --- /dev/null +++ b/src/lj_errmsg.h @@ -0,0 +1,134 @@ +/* +** VM error messages. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +*/ + +/* This file may be included multiple times with different ERRDEF macros. */ + +/* Basic error handling. */ +ERRDEF(ERRMEM, "not enough memory") +ERRDEF(ERRERR, "error in error handling") + +/* Allocations. */ +ERRDEF(STROV, "string length overflow") +ERRDEF(UDATAOV, "userdata length overflow") +ERRDEF(STKOV, "stack overflow") +ERRDEF(STKOVM, "stack overflow (%s)") +ERRDEF(TABOV, "table overflow") + +/* Table indexing. */ +ERRDEF(NANIDX, "table index is NaN") +ERRDEF(NILIDX, "table index is nil") +ERRDEF(NEXTIDX, "invalid key to " LUA_QL("next")) + +/* Metamethod resolving. */ +ERRDEF(BADCALL, "attempt to call a %s value") +ERRDEF(BADOPRT, "attempt to %s %s " LUA_QS " (a %s value)") +ERRDEF(BADOPRV, "attempt to %s a %s value") +ERRDEF(BADCMPT, "attempt to compare %s with %s") +ERRDEF(BADCMPV, "attempt to compare two %s values") +ERRDEF(GETLOOP, "loop in gettable") +ERRDEF(SETLOOP, "loop in settable") +ERRDEF(OPCALL, "call") +ERRDEF(OPINDEX, "index") +ERRDEF(OPARITH, "perform arithmetic on") +ERRDEF(OPCAT, "concatenate") +ERRDEF(OPLEN, "get length of") + +/* Type checks. */ +ERRDEF(BADSELF, "calling " LUA_QS " on bad self (%s)") +ERRDEF(BADARG, "bad argument #%d to " LUA_QS " (%s)") +ERRDEF(BADTYPE, "%s expected, got %s") +ERRDEF(BADVAL, "invalid value") +ERRDEF(NOVAL, "value expected") +ERRDEF(NOCORO, "coroutine expected") +ERRDEF(NOTABN, "nil or table expected") +ERRDEF(NOLFUNC, "Lua function expected") +ERRDEF(NOFUNCL, "function or level expected") +ERRDEF(NOSFT, "string/function/table expected") +ERRDEF(NOPROXY, "boolean or proxy expected") +ERRDEF(FORINIT, LUA_QL("for") " initial value must be a number") +ERRDEF(FORLIM, LUA_QL("for") " limit must be a number") +ERRDEF(FORSTEP, LUA_QL("for") " step must be a number") + +/* C API checks. */ +ERRDEF(NOENV, "no calling environment") +ERRDEF(CYIELD, "attempt to yield across C-call boundary") +ERRDEF(BADLU, "bad light userdata pointer") +ERRDEF(NOGCMM, "bad action while in __gc metamethod") + +/* Standard library function errors. */ +ERRDEF(ASSERT, "assertion failed!") +ERRDEF(PROTMT, "cannot change a protected metatable") +ERRDEF(UNPACK, "too many results to unpack") +ERRDEF(RDRSTR, "reader function must return a string") +ERRDEF(PRTOSTR, LUA_QL("tostring") " must return a string to " LUA_QL("print")) +ERRDEF(IDXRNG, "index out of range") +ERRDEF(BASERNG, "base out of range") +ERRDEF(LVLRNG, "level out of range") +ERRDEF(INVLVL, "invalid level") +ERRDEF(INVOPT, "invalid option") +ERRDEF(INVOPTM, "invalid option " LUA_QS) +ERRDEF(INVFMT, "invalid format") +ERRDEF(SETFENV, LUA_QL("setfenv") " cannot change environment of given object") +ERRDEF(CORUN, "cannot resume running coroutine") +ERRDEF(CODEAD, "cannot resume dead coroutine") +ERRDEF(COSUSP, "cannot resume non-suspended coroutine") +ERRDEF(TABINS, "wrong number of arguments to " LUA_QL("insert")) +ERRDEF(TABCAT, "invalid value (%s) at index %d in table for " LUA_QL("concat")) +ERRDEF(TABSORT, "invalid order function for sorting") +ERRDEF(IOCLFL, "attempt to use a closed file") +ERRDEF(IOSTDCL, "standard file is closed") +ERRDEF(OSUNIQF, "unable to generate a unique filename") +ERRDEF(OSDATEF, "field " LUA_QS " missing in date table") +ERRDEF(STRDUMP, "cannot dump functions") +ERRDEF(STRSLC, "string slice too long") +ERRDEF(STRPATB, "missing " LUA_QL("[") " after " LUA_QL("%f") " in pattern") +ERRDEF(STRPATC, "invalid pattern capture") +ERRDEF(STRPATE, "malformed pattern (ends with " LUA_QL("%") ")") +ERRDEF(STRPATM, "malformed pattern (missing " LUA_QL("]") ")") +ERRDEF(STRPATU, "unbalanced pattern") +ERRDEF(STRCAPI, "invalid capture index") +ERRDEF(STRCAPN, "too many captures") +ERRDEF(STRCAPU, "unfinished capture") +ERRDEF(STRFMTO, "invalid option " LUA_QL("%%%c") " to " LUA_QL("format")) +ERRDEF(STRFMTR, "invalid format (repeated flags)") +ERRDEF(STRFMTW, "invalid format (width or precision too long)") +ERRDEF(STRGSRV, "invalid replacement value (a %s)") +ERRDEF(BADMODN, "name conflict for module " LUA_QS) +ERRDEF(NOJIT, "JIT compiler permanently disabled") +ERRDEF(JITOPT, "unknown or malformed optimization flag " LUA_QS) + +/* Lexer/parser errors. */ +ERRDEF(XNEAR, "%s near " LUA_QS) +ERRDEF(XELEM, "lexical element too long") +ERRDEF(XLINES, "chunk has too many lines") +ERRDEF(XLEVELS, "chunk has too many syntax levels") +ERRDEF(XNUMBER, "malformed number") +ERRDEF(XLSTR, "unfinished long string") +ERRDEF(XLCOM, "unfinished long comment") +ERRDEF(XSTR, "unfinished string") +ERRDEF(XESC, "escape sequence too large") +ERRDEF(XLDELIM, "invalid long string delimiter") +ERRDEF(XBCLOAD, "cannot load Lua bytecode") +ERRDEF(XTOKEN, LUA_QS " expected") +ERRDEF(XJUMP, "control structure too long") +ERRDEF(XSLOTS, "function or expression too complex") +ERRDEF(XLIMM, "main function has more than %d %s") +ERRDEF(XLIMF, "function at line %d has more than %d %s") +ERRDEF(XMATCH, LUA_QS " expected (to close " LUA_QS " at line %d)") +ERRDEF(XFIXUP, "function too long for return fixup") +ERRDEF(XPARAM, " or " LUA_QL("...") " expected") +ERRDEF(XAMBIG, "ambiguous syntax (function call x new statement)") +ERRDEF(XFUNARG, "function arguments expected") +ERRDEF(XSYMBOL, "unexpected symbol") +ERRDEF(XDOTS, "cannot use " LUA_QL("...") " outside a vararg function") +ERRDEF(XSYNTAX, "syntax error") +ERRDEF(XBREAK, "no loop to break") +ERRDEF(XFOR, LUA_QL("=") " or " LUA_QL("in") " expected") + +#undef ERRDEF + +/* Detecting unused error messages: + awk -F, '/^ERRDEF/ { gsub(/ERRDEF./, ""); printf "grep -q LJ_ERR_%s *.[ch] || echo %s\n", $1, $1}' lj_errmsg.h | sh +*/ diff --git a/src/lj_ff.h b/src/lj_ff.h new file mode 100644 index 0000000000..6dfd73a7b7 --- /dev/null +++ b/src/lj_ff.h @@ -0,0 +1,18 @@ +/* +** Fast function IDs. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +*/ + +#ifndef _LJ_FF_H +#define _LJ_FF_H + +/* Fast function ID. */ +typedef enum { + FF_LUA_ = FF_LUA, /* Lua function (must be 0). */ + FF_C_ = FF_C, /* Regular C function (must be 1). */ +#define FFDEF(name) FF_##name, +#include "lj_ffdef.h" + FF__MAX +} FastFunc; + +#endif diff --git a/src/lj_frame.h b/src/lj_frame.h new file mode 100644 index 0000000000..1c03e3e111 --- /dev/null +++ b/src/lj_frame.h @@ -0,0 +1,84 @@ +/* +** Stack frames. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +*/ + +#ifndef _LJ_FRAME_H +#define _LJ_FRAME_H + +#include "lj_obj.h" +#include "lj_bc.h" + +/* -- Lua stack frame ----------------------------------------------------- */ + +/* Frame type markers in callee function slot (callee base-1). */ +enum { + FRAME_LUA, FRAME_C, FRAME_CONT, FRAME_VARG, + FRAME_LUAP, FRAME_CP, FRAME_PCALL, FRAME_PCALLH +}; +#define FRAME_TYPE 3 +#define FRAME_P 4 +#define FRAME_TYPEP (FRAME_TYPE|FRAME_P) + +/* Macros to access and modify Lua frames. */ +#define frame_gc(f) (gcref((f)->fr.func)) +#define frame_func(f) (&frame_gc(f)->fn) +#define frame_ftsz(f) ((f)->fr.tp.ftsz) + +#define frame_type(f) (frame_ftsz(f) & FRAME_TYPE) +#define frame_typep(f) (frame_ftsz(f) & FRAME_TYPEP) +#define frame_islua(f) (frame_type(f) == FRAME_LUA) +#define frame_isc(f) (frame_type(f) == FRAME_C) +#define frame_iscont(f) (frame_typep(f) == FRAME_CONT) +#define frame_isvarg(f) (frame_typep(f) == FRAME_VARG) +#define frame_ispcall(f) ((frame_ftsz(f) & 6) == FRAME_PCALL) + +#define frame_pc(f) (mref((f)->fr.tp.pcr, const BCIns)) +#define frame_contpc(f) (frame_pc((f)-1)) +#if LJ_64 +#define frame_contf(f) \ + ((ASMFunction)(void *)((intptr_t)lj_vm_asm_begin+(((f)-1)->u64 & 0xffffffff))) +#else +#define frame_contf(f) ((ASMFunction)gcrefp(((f)-1)->gcr, void)) +#endif +#define frame_delta(f) (frame_ftsz(f) >> 3) +#define frame_sized(f) (frame_ftsz(f) & ~FRAME_TYPEP) + +#define frame_prevl(f) ((f) - (1+bc_a(frame_pc(f)[-1]))) +#define frame_prevd(f) ((TValue *)((char *)(f) - frame_sized(f))) +#define frame_prev(f) (frame_islua(f)?frame_prevl(f):frame_prevd(f)) +/* Note: this macro does not skip over FRAME_VARG. */ + +#define setframe_pc(f, pc) (setmref((f)->fr.tp.pcr, (pc))) +#define setframe_gc(f, p) (setgcref((f)->fr.func, (p))) + +/* -- C stack frame ------------------------------------------------------- */ + +/* Macros to access and modify the C stack frame chain. */ + +/* These definitions must match with the arch-specific *.dasc files. */ +#if LJ_TARGET_X86 +#define CFRAME_OFS_ERRF (15*sizeof(void *)) +#define CFRAME_OFS_NRES (14*sizeof(void *)) +#define CFRAME_OFS_PREV (13*sizeof(void *)) +#define CFRAME_OFS_L (12*sizeof(void *)) +#define CFRAME_OFS_PC (6*sizeof(void *)) +#define CFRAME_SIZE (12*sizeof(void *)) +#else +#error "Missing CFRAME_* definitions for this architecture" +#endif + +#define CFRAME_RESUME 1 +#define CFRAME_CANYIELD ((intptr_t)(CFRAME_RESUME)) +#define CFRAME_RAWMASK (~CFRAME_CANYIELD) + +#define cframe_errfunc(cf) (*(ptrdiff_t *)(((char *)cf)+CFRAME_OFS_ERRF)) +#define cframe_nres(cf) (*(ptrdiff_t *)(((char *)cf)+CFRAME_OFS_NRES)) +#define cframe_prev(cf) (*(void **)(((char *)cf)+CFRAME_OFS_PREV)) +#define cframe_L(cf) (*(lua_State **)(((char *)cf)+CFRAME_OFS_L)) +#define cframe_pc(cf) (*(const BCIns **)(((char *)cf)+CFRAME_OFS_PC)) +#define cframe_canyield(cf) ((intptr_t)(cf) & CFRAME_CANYIELD) +#define cframe_raw(cf) ((void *)((intptr_t)(cf) & CFRAME_RAWMASK)) +#define cframe_Lpc(L) cframe_pc(cframe_raw(L->cframe)) + +#endif diff --git a/src/lj_func.c b/src/lj_func.c new file mode 100644 index 0000000000..92cdeda27e --- /dev/null +++ b/src/lj_func.c @@ -0,0 +1,185 @@ +/* +** Function handling (prototypes, functions and upvalues). +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +** +** Portions taken verbatim or adapted from the Lua interpreter. +** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h +*/ + +#define lj_func_c +#define LUA_CORE + +#include "lj_obj.h" +#include "lj_gc.h" +#include "lj_func.h" +#include "lj_trace.h" +#include "lj_vm.h" + +/* -- Prototypes ---------------------------------------------------------- */ + +GCproto *lj_func_newproto(lua_State *L) +{ + GCproto *pt = lj_mem_newobj(L, GCproto); + pt->gct = ~LJ_TPROTO; + pt->numparams = 0; + pt->framesize = 0; + pt->sizeuv = 0; + pt->flags = 0; + pt->trace = 0; + pt->k.n = NULL; + pt->bc = NULL; + pt->uv = NULL; + pt->sizebc = 0; + pt->sizekgc = 0; + pt->sizekn = 0; + pt->sizelineinfo = 0; + pt->sizevarinfo = 0; + pt->sizeuvname = 0; + pt->linedefined = 0; + pt->lastlinedefined = 0; + pt->lineinfo = NULL; + pt->varinfo = NULL; + pt->uvname = NULL; + pt->chunkname = NULL; + return pt; +} + +void LJ_FASTCALL lj_func_freeproto(global_State *g, GCproto *pt) +{ + MSize nkgc = round_nkgc(pt->sizekgc); + MSize sizek = nkgc*(MSize)sizeof(GCobj *) + + pt->sizekn*(MSize)sizeof(lua_Number); + lj_mem_free(g, pt->k.gc - nkgc, sizek); + lj_mem_freevec(g, pt->bc, pt->sizebc, BCIns); + lj_mem_freevec(g, pt->uv, pt->sizeuv, int16_t); + lj_mem_freevec(g, pt->lineinfo, pt->sizelineinfo, int32_t); + lj_mem_freevec(g, pt->varinfo, pt->sizevarinfo, struct VarInfo); + lj_mem_freevec(g, pt->uvname, pt->sizeuvname, GCstr *); + lj_trace_freeproto(g, pt); + lj_mem_freet(g, pt); +} + +/* -- Upvalues ------------------------------------------------------------ */ + +static void unlinkuv(GCupval *uv) +{ + lua_assert(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv); + setgcrefr(uvnext(uv)->prev, uv->prev); + setgcrefr(uvprev(uv)->next, uv->next); +} + +/* Find existing open upvalue for a stack slot or create a new one. */ +static GCupval *func_finduv(lua_State *L, TValue *slot) +{ + global_State *g = G(L); + GCRef *pp = &L->openupval; + GCupval *p; + GCupval *uv; + /* Search the sorted list of open upvalues. */ + while (gcref(*pp) != NULL && (p = gco2uv(gcref(*pp)))->v >= slot) { + lua_assert(!p->closed && p->v != &p->tv); + if (p->v == slot) { /* Found open upvalue pointing to same slot? */ + if (isdead(g, obj2gco(p))) /* Resurrect it, if it's dead. */ + flipwhite(obj2gco(p)); + return p; + } + pp = &p->nextgc; + } + /* No matching upvalue found. Create a new one. */ + uv = lj_mem_newt(L, sizeof(GCupval), GCupval); + newwhite(g, uv); + uv->gct = ~LJ_TUPVAL; + uv->closed = 0; /* Still open. */ + uv->v = slot; /* Pointing to the stack slot. */ + /* NOBARRIER: The GCupval is new (marked white) and open. */ + setgcrefr(uv->nextgc, *pp); /* Insert into sorted list of open upvalues. */ + setgcref(*pp, obj2gco(uv)); + setgcref(uv->prev, obj2gco(&g->uvhead)); /* Insert into GC list, too. */ + setgcrefr(uv->next, g->uvhead.next); + setgcref(uvnext(uv)->prev, obj2gco(uv)); + setgcref(g->uvhead.next, obj2gco(uv)); + lua_assert(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv); + return uv; +} + +/* Close all open upvalues pointing to some stack level or above. */ +void lj_func_closeuv(lua_State *L, TValue *level) +{ + GCupval *uv; + global_State *g = G(L); + while (gcref(L->openupval) != NULL && + (uv = gco2uv(gcref(L->openupval)))->v >= level) { + GCobj *o = obj2gco(uv); + lua_assert(!isblack(o) && !uv->closed && uv->v != &uv->tv); + setgcrefr(L->openupval, uv->nextgc); /* No longer in open list. */ + if (isdead(g, o)) { + lj_func_freeuv(g, uv); + } else { + unlinkuv(uv); + lj_gc_closeuv(g, uv); + } + } +} + +void LJ_FASTCALL lj_func_freeuv(global_State *g, GCupval *uv) +{ + if (!uv->closed) + unlinkuv(uv); + lj_mem_freet(g, uv); +} + +/* -- Functions (closures) ------------------------------------------------ */ + +GCfunc *lj_func_newC(lua_State *L, MSize nelems, GCtab *env) +{ + GCfunc *fn = cast(GCfunc *, lj_mem_newgco(L, sizeCfunc(nelems))); + fn->c.gct = ~LJ_TFUNC; + fn->c.ffid = FF_C; + fn->c.nupvalues = cast_byte(nelems); + /* NOBARRIER: The GCfunc is new (marked white). */ + setgcref(fn->c.env, obj2gco(env)); + fn->c.gate = lj_gate_c; + return fn; +} + +GCfunc *lj_func_newL(lua_State *L, GCproto *pt, GCtab *env) +{ + GCfunc *fn = cast(GCfunc *, lj_mem_newgco(L, sizeLfunc((MSize)pt->sizeuv))); + fn->l.gct = ~LJ_TFUNC; + fn->l.ffid = FF_LUA; + fn->l.nupvalues = cast_byte(pt->sizeuv); + /* NOBARRIER: The GCfunc is new (marked white). */ + setgcref(fn->l.pt, obj2gco(pt)); + setgcref(fn->l.env, obj2gco(env)); + fn->l.gate = (pt->flags & PROTO_IS_VARARG) ? lj_gate_lv : lj_gate_lf; + return fn; +} + +/* Do a GC check and create a new Lua function with inherited upvalues. */ +GCfunc *lj_func_newL_gc(lua_State *L, GCproto *pt, GCfuncL *parent) +{ + GCfunc *fn; + GCRef *puv; + uint32_t i, nuv; + TValue *base; + lj_gc_check_fixtop(L); + fn = lj_func_newL(L, pt, tabref(parent->env)); + /* NOBARRIER: The GCfunc is new (marked white). */ + puv = parent->uvptr; + nuv = fn->l.nupvalues; + base = L->base; + for (i = 0; i < nuv; i++) { + int v = pt->uv[i]; + GCupval *uv = v < 0 ? &gcref(puv[~v])->uv : func_finduv(L, base + v); + setgcref(fn->l.uvptr[i], obj2gco(uv)); + } + return fn; +} + +void LJ_FASTCALL lj_func_free(global_State *g, GCfunc *fn) +{ + MSize size = isluafunc(fn) ? sizeLfunc((MSize)fn->l.nupvalues) : + sizeCfunc((MSize)fn->c.nupvalues); + lj_mem_free(g, fn, size); +} + diff --git a/src/lj_func.h b/src/lj_func.h new file mode 100644 index 0000000000..ee7942eac3 --- /dev/null +++ b/src/lj_func.h @@ -0,0 +1,25 @@ +/* +** Function handling (prototypes, functions and upvalues). +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +*/ + +#ifndef _LJ_FUNC_H +#define _LJ_FUNC_H + +#include "lj_obj.h" + +/* Prototypes. */ +LJ_FUNC GCproto *lj_func_newproto(lua_State *L); +LJ_FUNC void LJ_FASTCALL lj_func_freeproto(global_State *g, GCproto *pt); + +/* Upvalues. */ +LJ_FUNCA void lj_func_closeuv(lua_State *L, TValue *level); +LJ_FUNC void LJ_FASTCALL lj_func_freeuv(global_State *g, GCupval *uv); + +/* Functions (closures). */ +LJ_FUNC GCfunc *lj_func_newC(lua_State *L, MSize nelems, GCtab *env); +LJ_FUNC GCfunc *lj_func_newL(lua_State *L, GCproto *pt, GCtab *env); +LJ_FUNCA GCfunc *lj_func_newL_gc(lua_State *L, GCproto *pt, GCfuncL *parent); +LJ_FUNC void LJ_FASTCALL lj_func_free(global_State *g, GCfunc *c); + +#endif diff --git a/src/lj_gc.c b/src/lj_gc.c new file mode 100644 index 0000000000..e479b567ef --- /dev/null +++ b/src/lj_gc.c @@ -0,0 +1,800 @@ +/* +** Garbage collector. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +** +** Major portions taken verbatim or adapted from the Lua interpreter. +** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h +*/ + +#define lj_gc_c +#define LUA_CORE + +#include "lj_obj.h" +#include "lj_gc.h" +#include "lj_err.h" +#include "lj_str.h" +#include "lj_tab.h" +#include "lj_func.h" +#include "lj_udata.h" +#include "lj_meta.h" +#include "lj_state.h" +#include "lj_frame.h" +#include "lj_trace.h" +#include "lj_vm.h" + +#define GCSTEPSIZE 1024u +#define GCSWEEPMAX 40 +#define GCSWEEPCOST 10 +#define GCFINALIZECOST 100 + +/* Macros to set GCobj colors and flags. */ +#define white2gray(x) ((x)->gch.marked &= cast_byte(~LJ_GC_WHITES)) +#define black2gray(x) ((x)->gch.marked &= cast_byte(~LJ_GC_BLACK)) +#define gray2black(x) ((x)->gch.marked |= LJ_GC_BLACK) +#define makewhite(g, x) \ + ((x)->gch.marked = ((x)->gch.marked & cast_byte(~LJ_GC_COLORS)) | curwhite(g)) +#define isfinalized(u) ((u)->marked & LJ_GC_FINALIZED) +#define markfinalized(u) ((u)->marked |= LJ_GC_FINALIZED) + +/* -- Mark phase ---------------------------------------------------------- */ + +/* Mark a TValue (if needed). */ +#define gc_marktv(g, tv) \ + { lua_assert(!tvisgcv(tv) || (~itype(tv) == gcval(tv)->gch.gct)); \ + if (tviswhite(tv)) gc_mark(g, gcV(tv)); } + +/* Mark a GCobj (if needed). */ +#define gc_markobj(g, o) \ + { if (iswhite(obj2gco(o))) gc_mark(g, obj2gco(o)); } + +/* Mark a string object. */ +#define gc_mark_str(s) ((s)->marked &= cast_byte(~LJ_GC_WHITES)) + +/* Mark a white GCobj. */ +static void gc_mark(global_State *g, GCobj *o) +{ + lua_assert(iswhite(o) && !isdead(g, o)); + white2gray(o); + if (LJ_UNLIKELY(o->gch.gct == ~LJ_TUDATA)) { + GCtab *mt = tabref(gco2ud(o)->metatable); + gray2black(o); /* Userdata are never gray. */ + if (mt) gc_markobj(g, mt); + gc_markobj(g, tabref(gco2ud(o)->env)); + } else if (LJ_UNLIKELY(o->gch.gct == ~LJ_TUPVAL)) { + GCupval *uv = gco2uv(o); + gc_marktv(g, uv->v); + if (uv->closed) + gray2black(o); /* Closed upvalues are never gray. */ + } else if (o->gch.gct != ~LJ_TSTR) { + lua_assert(o->gch.gct == ~LJ_TFUNC || o->gch.gct == ~LJ_TTAB || + o->gch.gct == ~LJ_TTHREAD || o->gch.gct == ~LJ_TPROTO); + setgcrefr(o->gch.gclist, g->gc.gray); + setgcref(g->gc.gray, o); + } +} + +/* Mark the base metatables. */ +static void gc_mark_basemt(global_State *g) +{ + int i; + for (i = 0; i < BASEMT_MAX; i++) + if (tabref(g->basemt[i]) != NULL) + gc_markobj(g, tabref(g->basemt[i])); +} + +/* Start a GC cycle and mark the root set. */ +static void gc_mark_start(global_State *g) +{ + setgcrefnull(g->gc.gray); + setgcrefnull(g->gc.grayagain); + setgcrefnull(g->gc.weak); + gc_markobj(g, mainthread(g)); + gc_markobj(g, tabref(mainthread(g)->env)); + gc_marktv(g, &g->registrytv); + gc_mark_basemt(g); + g->gc.state = GCSpropagate; +} + +/* Mark open upvalues. */ +static void gc_mark_uv(global_State *g) +{ + GCupval *uv; + for (uv = uvnext(&g->uvhead); uv != &g->uvhead; uv = uvnext(uv)) { + lua_assert(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv); + if (isgray(obj2gco(uv))) + gc_marktv(g, uv->v); + } +} + +/* Mark userdata in mmudata list. */ +static void gc_mark_mmudata(global_State *g) +{ + GCobj *root = gcref(g->gc.mmudata); + GCobj *u = root; + if (u) { + do { + u = gcnext(u); + makewhite(g, u); /* Could be from previous GC. */ + gc_mark(g, u); + } while (u != root); + } +} + +/* Separate userdata which which needs finalization to mmudata list. */ +size_t lj_gc_separateudata(global_State *g, int all) +{ + size_t m = 0; + GCRef *p = &mainthread(g)->nextgc; + GCobj *o; + while ((o = gcref(*p)) != NULL) { + if (!(iswhite(o) || all) || isfinalized(gco2ud(o))) { + p = &o->gch.nextgc; /* Nothing to do. */ + } else if (!lj_meta_fastg(g, tabref(gco2ud(o)->metatable), MM_gc)) { + markfinalized(gco2ud(o)); /* Done, as there's no __gc metamethod. */ + p = &o->gch.nextgc; + } else { /* Otherwise move userdata to be finalized to mmudata list. */ + m += sizeudata(gco2ud(o)); + markfinalized(gco2ud(o)); + *p = o->gch.nextgc; + if (gcref(g->gc.mmudata)) { /* Link to end of mmudata list. */ + GCobj *root = gcref(g->gc.mmudata); + setgcrefr(o->gch.nextgc, root->gch.nextgc); + setgcref(root->gch.nextgc, o); + setgcref(g->gc.mmudata, o); + } else { /* Create circular list. */ + setgcref(o->gch.nextgc, o); + setgcref(g->gc.mmudata, o); + } + } + } + return m; +} + +/* -- Propagation phase --------------------------------------------------- */ + +/* Traverse a table. */ +static int gc_traverse_tab(global_State *g, GCtab *t) +{ + int weak = 0; + cTValue *mode; + GCtab *mt = tabref(t->metatable); + if (mt) + gc_markobj(g, mt); + mode = lj_meta_fastg(g, mt, MM_mode); + if (mode && tvisstr(mode)) { /* Valid __mode field? */ + const char *modestr = strVdata(mode); + int c; + while ((c = *modestr++)) { + if (c == 'k') weak |= LJ_GC_WEAKKEY; + else if (c == 'v') weak |= LJ_GC_WEAKVAL; + } + if (weak) { /* Weak tables are cleared in the atomic phase. */ + t->marked = cast_byte((t->marked & ~LJ_GC_WEAK) | weak); + setgcrefr(t->gclist, g->gc.weak); + setgcref(g->gc.weak, obj2gco(t)); + } + } + if (weak == LJ_GC_WEAK) /* Nothing to mark if both keys/values are weak. */ + return 1; + if (!(weak & LJ_GC_WEAKVAL)) { /* Mark array part. */ + MSize i, asize = t->asize; + for (i = 0; i < asize; i++) + gc_marktv(g, arrayslot(t, i)); + } + if (t->hmask > 0) { /* Mark hash part. */ + Node *node = noderef(t->node); + MSize i, hmask = t->hmask; + for (i = 0; i <= hmask; i++) { + Node *n = &node[i]; + lua_assert(itype(&n->key) != LJ_TDEADKEY || tvisnil(&n->val)); + if (!tvisnil(&n->val)) { /* Mark non-empty slot. */ + lua_assert(!tvisnil(&n->key)); + if (!(weak & LJ_GC_WEAKKEY)) gc_marktv(g, &n->key); + if (!(weak & LJ_GC_WEAKVAL)) gc_marktv(g, &n->val); + } else if (tvisgcv(&n->key)) { /* Leave GC key in, but mark as dead. */ + setitype(&n->key, LJ_TDEADKEY); + } + } + } + return weak; +} + +/* Traverse a function. */ +static void gc_traverse_func(global_State *g, GCfunc *fn) +{ + gc_markobj(g, tabref(fn->c.env)); + if (isluafunc(fn)) { + uint32_t i; + lua_assert(fn->l.nupvalues == funcproto(fn)->sizeuv); + gc_markobj(g, funcproto(fn)); + for (i = 0; i < fn->l.nupvalues; i++) /* Mark Lua function upvalues. */ + gc_markobj(g, &gcref(fn->l.uvptr[i])->uv); + } else { + uint32_t i; + for (i = 0; i < fn->c.nupvalues; i++) /* Mark C function upvalues. */ + gc_marktv(g, &fn->c.upvalue[i]); + } +} + +#if LJ_HASJIT +/* Traverse a trace. */ +static void gc_traverse_trace(global_State *g, Trace *T) +{ + IRRef ref; + for (ref = T->nk; ref < REF_TRUE; ref++) { + IRIns *ir = &T->ir[ref]; + if (ir->o == IR_KGC) + gc_markobj(g, ir_kgc(ir)); + } +} + +/* The current trace is a GC root while not anchored in the prototype (yet). */ +#define gc_mark_curtrace(g) \ + { if (G2J(g)->state != LJ_TRACE_IDLE && G2J(g)->curtrace != 0) \ + gc_traverse_trace(g, &G2J(g)->cur); } +#else +#define gc_mark_curtrace(g) UNUSED(g) +#endif + +/* Traverse a prototype. */ +static void gc_traverse_proto(global_State *g, GCproto *pt) +{ + ptrdiff_t i; +#if LJ_HASJIT + jit_State *J = G2J(g); + TraceNo root, side; + /* Mark all root traces and attached side traces. */ + for (root = pt->trace; root != 0; root = J->trace[root]->nextroot) { + for (side = J->trace[root]->nextside; side != 0; + side = J->trace[side]->nextside) + gc_traverse_trace(g, J->trace[side]); + gc_traverse_trace(g, J->trace[root]); + } +#endif + /* GC during prototype creation could cause NULL fields. */ + if (pt->chunkname) + gc_mark_str(pt->chunkname); + for (i = -(ptrdiff_t)pt->sizekgc; i < 0; i++) /* Mark collectable consts. */ + gc_markobj(g, gcref(pt->k.gc[i])); + for (i = 0; i < (ptrdiff_t)pt->sizeuvname; i++) /* Mark upvalue names. */ + if (pt->uvname[i]) + gc_mark_str(pt->uvname[i]); + for (i = 0; i < (ptrdiff_t)pt->sizevarinfo; i++) /* Mark names of locals. */ + if (pt->varinfo[i].name) + gc_mark_str(pt->varinfo[i].name); +} + +/* Traverse the frame structure of a stack. */ +static TValue *gc_traverse_frames(global_State *g, lua_State *th) +{ + TValue *frame, *top = th->top-1; + /* Note: extra vararg frame not skipped, marks function twice (harmless). */ + for (frame = th->base-1; frame > th->stack; frame = frame_prev(frame)) { + GCfunc *fn = frame_func(frame); + TValue *ftop = frame; + if (isluafunc(fn)) ftop += funcproto(fn)->framesize; + if (ftop > top) top = ftop; + gc_markobj(g, frame_gc(frame)); /* Need to mark hidden function (or L). */ + } + top++; /* Correct bias of -1 (frame == base-1). */ + if (top > th->maxstack) top = th->maxstack; + return top; +} + +/* Traverse a thread object. */ +static void gc_traverse_thread(global_State *g, lua_State *th) +{ + TValue *o, *lim; + gc_markobj(g, tabref(th->env)); + for (o = th->stack+1; o < th->top; o++) + gc_marktv(g, o); + lim = gc_traverse_frames(g, th); + /* Extra cleanup required to avoid this marking problem: + ** + ** [aa[bb.X| X created. + ** [aa[cc| GC called from (small) inner frame, X destroyed. + ** [aa....X.| GC called again in (larger) outer frame, X resurrected (ouch). + ** + ** During GC in step 2 the stack must be cleaned up to the max. frame extent: + ** + ** ***| Slots cleaned + ** [cc| from top of last frame + ** [aa......| to max. frame extent. + */ + for (; o <= lim; o++) + setnilV(o); + lj_state_shrinkstack(th, (MSize)(lim - th->stack)); +} + +/* Propagate one gray object. Traverse it and turn it black. */ +static size_t propagatemark(global_State *g) +{ + GCobj *o = gcref(g->gc.gray); + lua_assert(isgray(o)); + gray2black(o); + setgcrefr(g->gc.gray, o->gch.gclist); /* Remove from gray list. */ + if (LJ_LIKELY(o->gch.gct == ~LJ_TTAB)) { + GCtab *t = gco2tab(o); + if (gc_traverse_tab(g, t)) + black2gray(o); /* Keep weak tables gray. */ + return sizeof(GCtab) + sizeof(TValue) * t->asize + + sizeof(Node) * (t->hmask + 1); + } else if (LJ_LIKELY(o->gch.gct == ~LJ_TFUNC)) { + GCfunc *fn = gco2func(o); + gc_traverse_func(g, fn); + return isluafunc(fn) ? sizeLfunc((MSize)fn->l.nupvalues) : + sizeCfunc((MSize)fn->c.nupvalues); + } else if (LJ_LIKELY(o->gch.gct == ~LJ_TPROTO)) { + GCproto *pt = gco2pt(o); + gc_traverse_proto(g, pt); + return sizeof(GCproto) + sizeof(BCIns) * pt->sizebc + + sizeof(GCobj *) * pt->sizekgc + + sizeof(lua_Number) * pt->sizekn + + sizeof(int16_t) * pt->sizeuv + + sizeof(int32_t) * pt->sizelineinfo + + sizeof(VarInfo) * pt->sizevarinfo + + sizeof(GCstr *) * pt->sizeuvname; + } else { + lua_State *th = gco2th(o); + setgcrefr(th->gclist, g->gc.grayagain); + setgcref(g->gc.grayagain, o); + black2gray(o); /* Threads are never black. */ + gc_traverse_thread(g, th); + return sizeof(lua_State) + sizeof(TValue) * th->stacksize; + } +} + +/* Propagate all gray objects. */ +static size_t gc_propagate_gray(global_State *g) +{ + size_t m = 0; + while (gcref(g->gc.gray) != NULL) + m += propagatemark(g); + return m; +} + +/* -- Sweep phase --------------------------------------------------------- */ + +/* Try to shrink some common data structures. */ +static void gc_shrink(global_State *g, lua_State *L) +{ + if (g->strnum <= (g->strmask >> 2) && g->strmask > LJ_MIN_STRTAB*2-1) + lj_str_resize(L, g->strmask >> 1); /* Shrink string table. */ + if (g->tmpbuf.sz > LJ_MIN_SBUF*2) + lj_str_resizebuf(L, &g->tmpbuf, g->tmpbuf.sz >> 1); /* Shrink temp buf. */ +} + +/* Type of GC free functions. */ +typedef void (LJ_FASTCALL *GCFreeFunc)(global_State *g, GCobj *o); + +/* GC free functions for LJ_TSTR .. LJ_TUDATA. ORDER LJ_T */ +static const GCFreeFunc gc_freefunc[] = { + (GCFreeFunc)lj_str_free, + (GCFreeFunc)lj_func_freeuv, + (GCFreeFunc)lj_state_free, + (GCFreeFunc)lj_func_freeproto, + (GCFreeFunc)lj_func_free, + (GCFreeFunc)0, + (GCFreeFunc)lj_tab_free, + (GCFreeFunc)lj_udata_free +}; + +/* Full sweep of a GC list. */ +#define gc_fullsweep(g, p) gc_sweep(g, (p), LJ_MAX_MEM) + +/* Partial sweep of a GC list. */ +static GCRef *gc_sweep(global_State *g, GCRef *p, uint32_t lim) +{ + /* Mask with other white and LJ_GC_FIXED. Or LJ_GC_SFIXED on shutdown. */ + int ow = otherwhite(g); + GCobj *o; + while ((o = gcref(*p)) != NULL && lim-- > 0) { + if (o->gch.gct == ~LJ_TTHREAD) /* Need to sweep open upvalues, too. */ + gc_fullsweep(g, &gco2th(o)->openupval); + if (((o->gch.marked ^ LJ_GC_WHITES) & ow)) { /* Black or current white? */ + lua_assert(!isdead(g, o) || (o->gch.marked & LJ_GC_FIXED)); + makewhite(g, o); /* Value is alive, change to the current white. */ + p = &o->gch.nextgc; + } else { /* Otherwise value is dead, free it. */ + lua_assert(isdead(g, o) || ow == LJ_GC_SFIXED); + setgcrefr(*p, o->gch.nextgc); + if (o == gcref(g->gc.root)) + setgcrefr(g->gc.root, o->gch.nextgc); /* Adjust list anchor. */ + gc_freefunc[o->gch.gct - ~LJ_TSTR](g, o); + } + } + return p; +} + +/* Check whether we can clear a key or a value slot from a table. */ +static int gc_mayclear(cTValue *o, int val) +{ + if (tvisgcv(o)) { /* Only collectable objects can be weak references. */ + if (tvisstr(o)) { /* But strings cannot be used as weak references. */ + gc_mark_str(strV(o)); /* And need to be marked. */ + return 0; + } + if (iswhite(gcV(o))) + return 1; /* Object is about to be collected. */ + if (tvisudata(o) && val && isfinalized(udataV(o))) + return 1; /* Finalized userdata is dropped only from values. */ + } + return 0; /* Cannot clear. */ +} + +/* Clear collected entries from weak tables. */ +static void gc_clearweak(GCobj *o) +{ + while (o) { + GCtab *t = gco2tab(o); + lua_assert((t->marked & LJ_GC_WEAK)); + if ((t->marked & LJ_GC_WEAKVAL)) { + MSize i, asize = t->asize; + for (i = 0; i < asize; i++) { + /* Clear array slot when value is about to be collected. */ + TValue *tv = arrayslot(t, i); + if (gc_mayclear(tv, 1)) + setnilV(tv); + } + } + if (t->hmask > 0) { + Node *node = noderef(t->node); + MSize i, hmask = t->hmask; + for (i = 0; i <= hmask; i++) { + Node *n = &node[i]; + /* Clear hash slot when key or value is about to be collected. */ + if (!tvisnil(&n->val) && (gc_mayclear(&n->key, 0) || + gc_mayclear(&n->val, 1))) { + setnilV(&n->val); + if (tvisgcv(&n->key)) /* Leave GC key in, but mark as dead. */ + setitype(&n->key, LJ_TDEADKEY); + } + } + } + o = gcref(t->gclist); + } +} + +/* Finalize one userdata object from mmudata list. */ +static void gc_finalize(lua_State *L) +{ + global_State *g = G(L); + GCobj *o = gcnext(gcref(g->gc.mmudata)); + GCudata *ud = gco2ud(o); + cTValue *mo; + /* Unchain from list of userdata to be finalized. */ + if (o == gcref(g->gc.mmudata)) + setgcrefnull(g->gc.mmudata); + else + setgcrefr(gcref(g->gc.mmudata)->gch.nextgc, ud->nextgc); + /* Add it back to the main userdata list and make it white. */ + setgcrefr(ud->nextgc, mainthread(g)->nextgc); + setgcref(mainthread(g)->nextgc, o); + makewhite(g, o); + /* Resolve the __gc metamethod. */ + mo = lj_meta_fastg(g, tabref(ud->metatable), MM_gc); + if (mo) { + /* Save and restore lots of state around the __gc callback. */ + uint8_t oldh = hook_save(g); + MSize oldt = g->gc.threshold; + GCobj *oldjl = gcref(g->jit_L); + MSize oldjs = 0; + ptrdiff_t oldjb = 0; + int errcode; + TValue *top; + if (oldjl) { + oldjs = gco2th(oldjl)->stacksize; + oldjb = savestack(gco2th(oldjl), mref(g->jit_base, TValue )); + setgcrefnull(g->jit_L); + } + lj_trace_abort(g); + top = L->top; + L->top = top+2; + hook_entergc(g); /* Disable hooks and new traces during __gc. */ + g->gc.threshold = LJ_MAX_MEM; /* Prevent GC steps. */ + copyTV(L, top, mo); + setudataV(L, top+1, ud); + errcode = lj_vm_pcall(L, top+1, 1+0, -1); /* Stack: |mo|ud| -> | */ + hook_restore(g, oldh); + g->gc.threshold = oldt; /* Restore GC threshold. */ + if (oldjl) { + if (gco2th(oldjl)->stacksize < oldjs) + lj_state_growstack(gco2th(oldjl), oldjs - gco2th(oldjl)->stacksize); + setgcref(g->jit_L, oldjl); + setmref(g->jit_base, restorestack(gco2th(oldjl), oldjb)); + } + if (errcode) + lj_err_throw(L, errcode); /* Propagate errors. */ + } +} + +/* Finalize all userdata objects from mmudata list. */ +void lj_gc_finalizeudata(lua_State *L) +{ + while (gcref(G(L)->gc.mmudata) != NULL) + gc_finalize(L); +} + +/* Free all remaining GC objects. */ +void lj_gc_freeall(global_State *g) +{ + MSize i, strmask; + /* Free everything, except super-fixed objects (the main thread). */ + g->gc.currentwhite = LJ_GC_WHITES | LJ_GC_SFIXED; + gc_fullsweep(g, &g->gc.root); + strmask = g->strmask; + for (i = 0; i <= strmask; i++) /* Free all string hash chains. */ + gc_fullsweep(g, &g->strhash[i]); +} + +/* -- Collector ----------------------------------------------------------- */ + +/* Atomic part of the GC cycle, transitioning from mark to sweep phase. */ +static void atomic(global_State *g, lua_State *L) +{ + size_t udsize; + + gc_mark_uv(g); /* Need to remark open upvalues (the thread may be dead). */ + gc_propagate_gray(g); /* Propagate any left-overs. */ + + setgcrefr(g->gc.gray, g->gc.weak); /* Empty the list of weak tables. */ + setgcrefnull(g->gc.weak); + lua_assert(!iswhite(obj2gco(mainthread(g)))); + gc_markobj(g, L); /* Mark running thread. */ + gc_mark_curtrace(g); /* Mark current trace. */ + gc_mark_basemt(g); /* Mark base metatables (again). */ + gc_propagate_gray(g); /* Propagate all of the above. */ + + setgcrefr(g->gc.gray, g->gc.grayagain); /* Empty the 2nd chance list. */ + setgcrefnull(g->gc.grayagain); + gc_propagate_gray(g); /* Propagate it. */ + + udsize = lj_gc_separateudata(g, 0); /* Separate userdata to be finalized. */ + gc_mark_mmudata(g); /* Mark them. */ + udsize += gc_propagate_gray(g); /* And propagate the marks. */ + + /* All marking done, clear weak tables. */ + gc_clearweak(gcref(g->gc.weak)); + + /* Prepare for sweep phase. */ + g->gc.currentwhite = cast_byte(otherwhite(g)); /* Flip current white. */ + g->gc.sweepstr = 0; + g->gc.sweep = &g->gc.root; + g->gc.state = GCSsweepstring; + g->gc.estimate = g->gc.total - (MSize)udsize; /* Initial estimate. */ +} + +/* GC state machine. Returns a cost estimate for each step performed. */ +static size_t gc_onestep(lua_State *L) +{ + global_State *g = G(L); + switch (g->gc.state) { + case GCSpause: + gc_mark_start(g); /* Start a new GC cycle by marking all GC roots. */ + return 0; + case GCSpropagate: + if (gcref(g->gc.gray) != NULL) + return propagatemark(g); /* Propagate one gray object. */ + atomic(g, L); /* End of mark phase. */ + return 0; + case GCSsweepstring: { + MSize old = g->gc.total; + gc_fullsweep(g, &g->strhash[g->gc.sweepstr++]); /* Sweep one chain. */ + if (g->gc.sweepstr > g->strmask) + g->gc.state = GCSsweep; /* All string hash chains sweeped. */ + lua_assert(old >= g->gc.total); + g->gc.estimate -= old - g->gc.total; + return GCSWEEPCOST; + } + case GCSsweep: { + MSize old = g->gc.total; + g->gc.sweep = gc_sweep(g, g->gc.sweep, GCSWEEPMAX); /* Partial sweep. */ + if (gcref(*g->gc.sweep) == NULL) { + gc_shrink(g, L); + g->gc.state = GCSfinalize; /* End of sweep phase. */ + } + lua_assert(old >= g->gc.total); + g->gc.estimate -= old - g->gc.total; + return GCSWEEPMAX*GCSWEEPCOST; + } + case GCSfinalize: + if (gcref(g->gc.mmudata) != NULL) { + gc_finalize(L); /* Finalize one userdata object. */ + if (g->gc.estimate > GCFINALIZECOST) + g->gc.estimate -= GCFINALIZECOST; + return GCFINALIZECOST; + } + g->gc.state = GCSpause; /* End of GC cycle. */ + g->gc.debt = 0; + return 0; + default: + lua_assert(0); + return 0; + } +} + +/* Perform a limited amount of incremental GC steps. */ +int lj_gc_step(lua_State *L) +{ + global_State *g = G(L); + MSize lim; + int32_t ostate = g->vmstate; + setvmstate(g, GC); + lim = (GCSTEPSIZE/100) * g->gc.stepmul; + if (lim == 0) + lim = LJ_MAX_MEM; + g->gc.debt += g->gc.total - g->gc.threshold; + do { + lim -= (MSize)gc_onestep(L); + if (g->gc.state == GCSpause) { + lua_assert(g->gc.total >= g->gc.estimate); + g->gc.threshold = (g->gc.estimate/100) * g->gc.pause; + g->vmstate = ostate; + return 1; /* Finished a GC cycle. */ + } + } while ((int32_t)lim > 0); + if (g->gc.debt < GCSTEPSIZE) { + g->gc.threshold = g->gc.total + GCSTEPSIZE; + } else { + g->gc.debt -= GCSTEPSIZE; + g->gc.threshold = g->gc.total; + } + g->vmstate = ostate; + return 0; +} + +/* Ditto, but fix the stack top first. */ +void lj_gc_step_fixtop(lua_State *L) +{ + if (curr_funcisL(L)) L->top = curr_topL(L); + lj_gc_step(L); +} + +/* Perform multiple GC steps. Called from JIT-compiled code. */ +void lj_gc_step_jit(lua_State *L, const BCIns *pc, MSize steps) +{ + cframe_pc(cframe_raw(L->cframe)) = pc; + L->top = curr_topL(L); + while (steps-- > 0 && lj_gc_step(L) == 0) + ; +} + +/* Perform a full GC cycle. */ +void lj_gc_fullgc(lua_State *L) +{ + global_State *g = G(L); + int32_t ostate = g->vmstate; + setvmstate(g, GC); + if (g->gc.state <= GCSpropagate) { /* Caught somewhere in the middle. */ + g->gc.sweepstr = 0; + g->gc.sweep = &g->gc.root; /* Sweep everything (preserving it). */ + setgcrefnull(g->gc.gray); /* Reset lists from partial propagation. */ + setgcrefnull(g->gc.grayagain); + setgcrefnull(g->gc.weak); + g->gc.state = GCSsweepstring; /* Fast forward to the sweep phase. */ + } + lua_assert(g->gc.state != GCSpause && g->gc.state != GCSpropagate); + while (g->gc.state != GCSfinalize) { /* Finish sweep. */ + lua_assert(g->gc.state == GCSsweepstring || g->gc.state == GCSsweep); + gc_onestep(L); + } + /* Now perform a full GC. */ + gc_mark_start(g); + while (g->gc.state != GCSpause) + gc_onestep(L); + g->gc.threshold = (g->gc.estimate/100) * g->gc.pause; + g->vmstate = ostate; +} + +/* -- Write barriers ------------------------------------------------------ */ + +/* Move the GC propagation frontier back for tables (make it gray again). */ +void lj_gc_barrierback(global_State *g, GCtab *t) +{ + GCobj *o = obj2gco(t); + lua_assert(isblack(o) && !isdead(g, o)); + lua_assert(g->gc.state != GCSfinalize && g->gc.state != GCSpause); + black2gray(o); + setgcrefr(t->gclist, g->gc.grayagain); + setgcref(g->gc.grayagain, o); +} + +/* Move the GC propagation frontier forward. */ +void lj_gc_barrierf(global_State *g, GCobj *o, GCobj *v) +{ + lua_assert(isblack(o) && iswhite(v) && !isdead(g, v) && !isdead(g, o)); + lua_assert(g->gc.state != GCSfinalize && g->gc.state != GCSpause); + lua_assert(o->gch.gct != ~LJ_TTAB); + /* Preserve invariant during propagation. Otherwise it doesn't matter. */ + if (g->gc.state == GCSpropagate) + gc_mark(g, v); /* Move frontier forward. */ + else + makewhite(g, o); /* Make it white to avoid the following barrier. */ +} + +/* The reason for duplicating this is that it needs to be visible from ASM. */ +void lj_gc_barrieruv(global_State *g, GCobj *o, GCobj *v) +{ + lua_assert(isblack(o) && iswhite(v) && !isdead(g, v) && !isdead(g, o)); + lua_assert(g->gc.state != GCSfinalize && g->gc.state != GCSpause); + lua_assert(o->gch.gct == ~LJ_TUPVAL); + /* Preserve invariant during propagation. Otherwise it doesn't matter. */ + if (g->gc.state == GCSpropagate) + gc_mark(g, v); /* Move frontier forward. */ + else + makewhite(g, o); /* Make it white to avoid the following barrier. */ +} + +/* Close upvalue. Also needs a write barrier. */ +void lj_gc_closeuv(global_State *g, GCupval *uv) +{ + GCobj *o = obj2gco(uv); + /* Copy stack slot to upvalue itself and point to the copy. */ + copyTV(mainthread(g), &uv->tv, uv->v); + uv->v = &uv->tv; + uv->closed = 1; + setgcrefr(o->gch.nextgc, g->gc.root); + setgcref(g->gc.root, o); + if (isgray(o)) { /* A closed upvalue is never gray, so fix this. */ + if (g->gc.state == GCSpropagate) { + gray2black(o); /* Make it black and preserve invariant. */ + if (tviswhite(uv->v)) + lj_gc_barrierf(g, o, gcV(uv->v)); + } else { + makewhite(g, o); /* Make it white, i.e. sweep the upvalue. */ + lua_assert(g->gc.state != GCSfinalize && g->gc.state != GCSpause); + } + } +} + +#if LJ_HASJIT +/* Mark a trace if it's saved during the propagation phase. */ +void lj_gc_barriertrace(global_State *g, void *T) +{ + if (g->gc.state == GCSpropagate) + gc_traverse_trace(g, (Trace *)T); +} +#endif + +/* -- Allocator ----------------------------------------------------------- */ + +/* Call pluggable memory allocator to allocate or resize a fragment. */ +void *lj_mem_realloc(lua_State *L, void *p, MSize osz, MSize nsz) +{ + global_State *g = G(L); + lua_assert((osz == 0) == (p == NULL)); + p = g->allocf(g->allocd, p, osz, nsz); + if (p == NULL && nsz > 0) + lj_err_throw(L, LUA_ERRMEM); + lua_assert((nsz == 0) == (p == NULL)); + g->gc.total = (g->gc.total - osz) + nsz; + return p; +} + +/* Allocate new GC object and link it to the root set. */ +void *lj_mem_newgco(lua_State *L, MSize size) +{ + global_State *g = G(L); + GCobj *o = (GCobj *)g->allocf(g->allocd, NULL, 0, size); + if (o == NULL) + lj_err_throw(L, LUA_ERRMEM); + g->gc.total += size; + setgcrefr(o->gch.nextgc, g->gc.root); + setgcref(g->gc.root, o); + newwhite(g, o); + return o; +} + +/* Resize growable vector. */ +void *lj_mem_grow(lua_State *L, void *p, MSize *szp, MSize lim, MSize esz) +{ + MSize sz = (*szp) << 1; + if (sz < LJ_MIN_VECSZ) + sz = LJ_MIN_VECSZ; + if (sz > lim) + sz = lim; + p = lj_mem_realloc(L, p, (*szp)*esz, sz*esz); + *szp = sz; + return p; +} + diff --git a/src/lj_gc.h b/src/lj_gc.h new file mode 100644 index 0000000000..192066d3dc --- /dev/null +++ b/src/lj_gc.h @@ -0,0 +1,102 @@ +/* +** Garbage collector. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +*/ + +#ifndef _LJ_GC_H +#define _LJ_GC_H + +#include "lj_obj.h" + +/* Garbage collector states. Order matters. */ +enum { GCSpause, GCSpropagate, GCSsweepstring, GCSsweep, GCSfinalize }; + +/* Bitmasks for marked field of GCobj. */ +#define LJ_GC_WHITE0 0x01 +#define LJ_GC_WHITE1 0x02 +#define LJ_GC_BLACK 0x04 +#define LJ_GC_FINALIZED 0x08 +#define LJ_GC_WEAKKEY 0x08 +#define LJ_GC_WEAKVAL 0x10 +#define LJ_GC_FIXED 0x20 +#define LJ_GC_SFIXED 0x40 + +#define LJ_GC_WHITES (LJ_GC_WHITE0 | LJ_GC_WHITE1) +#define LJ_GC_COLORS (LJ_GC_WHITES | LJ_GC_BLACK) +#define LJ_GC_WEAK (LJ_GC_WEAKKEY | LJ_GC_WEAKVAL) + +/* Macros to test and set GCobj colors. */ +#define iswhite(x) ((x)->gch.marked & LJ_GC_WHITES) +#define isblack(x) ((x)->gch.marked & LJ_GC_BLACK) +#define isgray(x) (!((x)->gch.marked & (LJ_GC_BLACK|LJ_GC_WHITES))) +#define tviswhite(x) (tvisgcv(x) && iswhite(gcV(x))) +#define otherwhite(g) (g->gc.currentwhite ^ LJ_GC_WHITES) +#define isdead(g, v) ((v)->gch.marked & otherwhite(g) & LJ_GC_WHITES) + +#define curwhite(g) ((g)->gc.currentwhite & LJ_GC_WHITES) +#define newwhite(g, x) (obj2gco(x)->gch.marked = (uint8_t)curwhite(g)) +#define flipwhite(x) ((x)->gch.marked ^= LJ_GC_WHITES) +#define fixstring(s) ((s)->marked |= LJ_GC_FIXED) + +/* Collector. */ +LJ_FUNC size_t lj_gc_separateudata(global_State *g, int all); +LJ_FUNC void lj_gc_finalizeudata(lua_State *L); +LJ_FUNC void lj_gc_freeall(global_State *g); +LJ_FUNCA int lj_gc_step(lua_State *L); +LJ_FUNCA void lj_gc_step_fixtop(lua_State *L); +LJ_FUNCA void lj_gc_step_jit(lua_State *L, const BCIns *pc, MSize steps); +LJ_FUNC void lj_gc_fullgc(lua_State *L); + +/* GC check: drive collector forward if the GC threshold has been reached. */ +#define lj_gc_check(L) \ + { if (LJ_UNLIKELY(G(L)->gc.total >= G(L)->gc.threshold)) \ + lj_gc_step(L); } +#define lj_gc_check_fixtop(L) \ + { if (LJ_UNLIKELY(G(L)->gc.total >= G(L)->gc.threshold)) \ + lj_gc_step_fixtop(L); } + +/* Write barriers. */ +LJ_FUNC void lj_gc_barrierback(global_State *g, GCtab *t); +LJ_FUNC void lj_gc_barrierf(global_State *g, GCobj *o, GCobj *v); +LJ_FUNCA void lj_gc_barrieruv(global_State *g, GCobj *o, GCobj *v); +LJ_FUNC void lj_gc_closeuv(global_State *g, GCupval *uv); +LJ_FUNC void lj_gc_barriertrace(global_State *g, void *T); + +/* Barrier for stores to table objects. TValue and GCobj variant. */ +#define lj_gc_barriert(L, t, tv) \ + { if (tviswhite(tv) && isblack(obj2gco(t))) \ + lj_gc_barrierback(G(L), (t)); } +#define lj_gc_objbarriert(L, t, o) \ + { if (iswhite(obj2gco(o)) && isblack(obj2gco(t))) \ + lj_gc_barrierback(G(L), (t)); } + +/* Barrier for stores to any other object. TValue and GCobj variant. */ +#define lj_gc_barrier(L, p, tv) \ + { if (tviswhite(tv) && isblack(obj2gco(p))) \ + lj_gc_barrierf(G(L), obj2gco(p), gcV(tv)); } +#define lj_gc_objbarrier(L, p, o) \ + { if (iswhite(obj2gco(o)) && isblack(obj2gco(p))) \ + lj_gc_barrierf(G(L), obj2gco(p), obj2gco(o)); } + +/* Allocator. */ +LJ_FUNC void *lj_mem_realloc(lua_State *L, void *p, MSize osz, MSize nsz); +LJ_FUNC void *lj_mem_newgco(lua_State *L, MSize size); +LJ_FUNC void *lj_mem_grow(lua_State *L, void *p, + MSize *szp, MSize lim, MSize esz); + +#define lj_mem_new(L, s) lj_mem_realloc(L, NULL, 0, (s)) +#define lj_mem_free(g, p, osize) \ + (g->gc.total -= (MSize)(osize), g->allocf(g->allocd, (p), (osize), 0)) + +#define lj_mem_newvec(L, n, t) ((t *)lj_mem_new(L, (MSize)((n)*sizeof(t)))) +#define lj_mem_reallocvec(L, p, on, n, t) \ + ((p) = (t *)lj_mem_realloc(L, p, (on)*sizeof(t), (MSize)((n)*sizeof(t)))) +#define lj_mem_growvec(L, p, n, m, t) \ + ((p) = (t *)lj_mem_grow(L, (p), &(n), (m), (MSize)sizeof(t))) +#define lj_mem_freevec(g, p, n, t) lj_mem_free(g, (p), (n)*sizeof(t)) + +#define lj_mem_newobj(L, t) ((t *)lj_mem_newgco(L, sizeof(t))) +#define lj_mem_newt(L, s, t) ((t *)lj_mem_new(L, (s))) +#define lj_mem_freet(g, p) lj_mem_free(g, (p), sizeof(*(p))) + +#endif diff --git a/src/lj_gdbjit.c b/src/lj_gdbjit.c new file mode 100644 index 0000000000..dfec188ae8 --- /dev/null +++ b/src/lj_gdbjit.c @@ -0,0 +1,739 @@ +/* +** Client for the GDB JIT API. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +*/ + +#define lj_gdbjit_c +#define LUA_CORE + +#include "lj_obj.h" + +#if LJ_HASJIT + +#include "lj_gc.h" +#include "lj_err.h" +#include "lj_str.h" +#include "lj_frame.h" +#include "lj_jit.h" +#include "lj_dispatch.h" + +/* This is not compiled in by default. +** Enable with -DLUAJIT_USE_GDBJIT in the Makefile and recompile everything. +*/ +#ifdef LUAJIT_USE_GDBJIT + +/* The GDB JIT API allows JIT compilers to pass debug information about +** JIT-compiled code back to GDB. You need at least GDB 7.0 or higher +** to see it in action. +** +** This is a passive API, so it works even when not running under GDB +** or when attaching to an already running process. Alas, this implies +** enabling it always has a non-negligible overhead -- do not use in +** release mode! +** +** The LuaJIT GDB JIT client is rather minimal at the moment. It gives +** each trace a symbol name and adds a source location and frame unwind +** information. Obviously LuaJIT itself and any embedding C application +** should be compiled with debug symbols, too (see the Makefile). +** +** Traces are named TRACE_1, TRACE_2, ... these correspond to the trace +** numbers from -jv or -jdump. Use "break TRACE_1" or "tbreak TRACE_1" etc. +** to set breakpoints on specific traces (even ahead of their creation). +** +** The source location for each trace allows listing the corresponding +** source lines with the GDB command "list" (but only if the Lua source +** has been loaded from a file). Currently this is always set to the +** location where the trace has been started. +** +** Frame unwind information can be inspected with the GDB command +** "info frame". This also allows proper backtraces across JIT-compiled +** code with the GDB command "bt". +** +** You probably want to add the following settings to a .gdbinit file +** (or add them to ~/.gdbinit): +** set disassembly-flavor intel +** set breakpoint pending on +** +** Here's a sample GDB session: +** ------------------------------------------------------------------------ + +$ cat >x.lua +for outer=1,100 do + for inner=1,100 do end +end +^D + +$ luajit -jv x.lua +[TRACE 1 x.lua:2] +[TRACE 2 (1/3) x.lua:1 -> 1] + +$ gdb --quiet --args luajit x.lua +(gdb) tbreak TRACE_1 +Function "TRACE_1" not defined. +Temporary breakpoint 1 (TRACE_1) pending. +(gdb) run +Starting program: luajit x.lua + +Temporary breakpoint 1, TRACE_1 () at x.lua:2 +2 for inner=1,100 do end +(gdb) list +1 for outer=1,100 do +2 for inner=1,100 do end +3 end +(gdb) bt +#0 TRACE_1 () at x.lua:2 +#1 0x08053690 in lua_pcall [...] +[...] +#7 0x0806ff90 in main [...] +(gdb) disass TRACE_1 +Dump of assembler code for function TRACE_1: +0xf7fd9fba : mov DWORD PTR ds:0xf7e0e2a0,0x1 +0xf7fd9fc4 : movsd xmm7,QWORD PTR [edx+0x20] +[...] +0xf7fd9ff8 : jmp 0xf7fd2014 +End of assembler dump. +(gdb) tbreak TRACE_2 +Function "TRACE_2" not defined. +Temporary breakpoint 2 (TRACE_2) pending. +(gdb) cont +Continuing. + +Temporary breakpoint 2, TRACE_2 () at x.lua:1 +1 for outer=1,100 do +(gdb) info frame +Stack level 0, frame at 0xffffd7c0: + eip = 0xf7fd9f60 in TRACE_2 (x.lua:1); saved eip 0x8053690 + called by frame at 0xffffd7e0 + source language unknown. + Arglist at 0xffffd78c, args: + Locals at 0xffffd78c, Previous frame's sp is 0xffffd7c0 + Saved registers: + ebx at 0xffffd7ac, ebp at 0xffffd7b8, esi at 0xffffd7b0, edi at 0xffffd7b4, + eip at 0xffffd7bc +(gdb) + +** ------------------------------------------------------------------------ +*/ + +/* -- GDB JIT API --------------------------------------------------------- */ + +/* GDB JIT actions. */ +enum { + GDBJIT_NOACTION = 0, + GDBJIT_REGISTER, + GDBJIT_UNREGISTER +}; + +/* GDB JIT entry. */ +typedef struct GDBJITentry { + struct GDBJITentry *next_entry; + struct GDBJITentry *prev_entry; + const char *symfile_addr; + uint64_t symfile_size; +} GDBJITentry; + +/* GDB JIT descriptor. */ +typedef struct GDBJITdesc { + uint32_t version; + uint32_t action_flag; + GDBJITentry *relevant_entry; + GDBJITentry *first_entry; +} GDBJITdesc; + +GDBJITdesc __jit_debug_descriptor = { + 1, GDBJIT_NOACTION, NULL, NULL +}; + +/* GDB sets a breakpoint at this function. */ +void LJ_NOINLINE __jit_debug_register_code() +{ + __asm__ __volatile__(""); +}; + +/* -- In-memory ELF object definitions ------------------------------------ */ + +/* ELF definitions. */ +typedef struct ELFheader { + uint8_t emagic[4]; + uint8_t eclass; + uint8_t eendian; + uint8_t eversion; + uint8_t eosabi; + uint8_t eabiversion; + uint8_t epad[7]; + uint16_t type; + uint16_t machine; + uint32_t version; + uintptr_t entry; + uintptr_t phofs; + uintptr_t shofs; + uint32_t flags; + uint16_t ehsize; + uint16_t phentsize; + uint16_t phnum; + uint16_t shentsize; + uint16_t shnum; + uint16_t shstridx; +} ELFheader; + +typedef struct ELFsectheader { + uint32_t name; + uint32_t type; + uintptr_t flags; + uintptr_t addr; + uintptr_t ofs; + uintptr_t size; + uint32_t link; + uint32_t info; + uintptr_t align; + uintptr_t entsize; +} ELFsectheader; + +#define ELFSECT_IDX_ABS 0xfff1 + +enum { + ELFSECT_TYPE_PROGBITS = 1, + ELFSECT_TYPE_SYMTAB = 2, + ELFSECT_TYPE_STRTAB = 3, + ELFSECT_TYPE_NOBITS = 8 +}; + +#define ELFSECT_FLAGS_WRITE 1 +#define ELFSECT_FLAGS_ALLOC 2 +#define ELFSECT_FLAGS_EXEC 4 + +typedef struct ELFsymbol { +#if LJ_64 + uint32_t name; + uint8_t info; + uint8_t other; + uint16_t sectidx; + uintptr_t value; + uint64_t size; +#else + uint32_t name; + uintptr_t value; + uint32_t size; + uint8_t info; + uint8_t other; + uint16_t sectidx; +#endif +} ELFsymbol; + +enum { + ELFSYM_TYPE_FUNC = 2, + ELFSYM_TYPE_FILE = 4, + ELFSYM_BIND_LOCAL = 0 << 4, + ELFSYM_BIND_GLOBAL = 1 << 4, +}; + +/* DWARF definitions. */ +#define DW_CIE_VERSION 1 + +enum { + DW_CFA_nop = 0x0, + DW_CFA_def_cfa = 0xc, + DW_CFA_def_cfa_offset = 0xe, + DW_CFA_advance_loc = 0x40, + DW_CFA_offset = 0x80 +}; + +enum { + DW_EH_PE_udata4 = 3, + DW_EH_PE_textrel = 0x20 +}; + +enum { + DW_TAG_compile_unit = 0x11 +}; + +enum { + DW_children_no = 0, + DW_children_yes = 1 +}; + +enum { + DW_AT_name = 0x03, + DW_AT_stmt_list = 0x10, + DW_AT_low_pc = 0x11, + DW_AT_high_pc = 0x12 +}; + +enum { + DW_FORM_addr = 0x01, + DW_FORM_data4 = 0x06, + DW_FORM_string = 0x08 +}; + +enum { + DW_LNS_extended_op = 0, + DW_LNS_copy = 1, + DW_LNS_advance_pc = 2, + DW_LNS_advance_line = 3 +}; + +enum { + DW_LNE_end_sequence = 1, + DW_LNE_set_address = 2 +}; + +enum { +#if LJ_TARGET_X86 + DW_REG_AX, DW_REG_CX, DW_REG_DX, DW_REG_BX, + DW_REG_SP, DW_REG_BP, DW_REG_SI, DW_REG_DI, + DW_REG_RA, +#elif LJ_TARGET_X64 + /* Yes, the order is strange, but correct. */ + DW_REG_AX, DW_REG_DX, DW_REG_CX, DW_REG_BX, + DW_REG_SI, DW_REG_DI, DW_REG_BP, DW_REG_SP, + DW_REG_8, DW_REG_9, DW_REG_10, DW_REG_11, + DW_REG_12, DW_REG_13, DW_REG_14, DW_REG_15, + DW_REG_RA, +#else +#error "Unsupported target architecture" +#endif +}; + +/* Minimal list of sections for the in-memory ELF object. */ +enum { + GDBJIT_SECT_NULL, + GDBJIT_SECT_text, + GDBJIT_SECT_eh_frame, + GDBJIT_SECT_shstrtab, + GDBJIT_SECT_strtab, + GDBJIT_SECT_symtab, + GDBJIT_SECT_debug_info, + GDBJIT_SECT_debug_abbrev, + GDBJIT_SECT_debug_line, + GDBJIT_SECT__MAX +}; + +enum { + GDBJIT_SYM_UNDEF, + GDBJIT_SYM_FILE, + GDBJIT_SYM_FUNC, + GDBJIT_SYM__MAX +}; + +/* In-memory ELF object. */ +typedef struct GDBJITobj { + ELFheader hdr; /* ELF header. */ + ELFsectheader sect[GDBJIT_SECT__MAX]; /* ELF sections. */ + ELFsymbol sym[GDBJIT_SYM__MAX]; /* ELF symbol table. */ + uint8_t space[4096]; /* Space for various section data. */ +} GDBJITobj; + +/* Combined structure for GDB JIT entry and ELF object. */ +typedef struct GDBJITentryobj { + GDBJITentry entry; + size_t sz; + GDBJITobj obj; +} GDBJITentryobj; + +/* Template for in-memory ELF header. */ +static const ELFheader elfhdr_template = { + .emagic = { 0x7f, 'E', 'L', 'F' }, + .eclass = LJ_64 ? 2 : 1, + .eendian = LJ_ENDIAN_SELECT(1, 2), + .eversion = 1, +#if defined(__linux__) + .eosabi = 0, /* Nope, it's not 3. */ +#elif defined(__FreeBSD__) + .eosabi = 9, +#elif defined(__NetBSD__) + .eosabi = 2, +#elif defined(__OpenBSD__) + .eosabi = 12, +#elif defined(__solaris__) + .eosabi = 6, +#else + .eosabi = 0, +#endif + .eabiversion = 0, + .epad = { 0, 0, 0, 0, 0, 0, 0 }, + .type = 1, +#if LJ_TARGET_X86 + .machine = 3, +#elif LJ_TARGET_X64 + .machine = 62, +#else +#error "Unsupported target architecture" +#endif + .version = 1, + .entry = 0, + .phofs = 0, + .shofs = offsetof(GDBJITobj, sect), + .flags = 0, + .ehsize = sizeof(ELFheader), + .phentsize = 0, + .phnum = 0, + .shentsize = sizeof(ELFsectheader), + .shnum = GDBJIT_SECT__MAX, + .shstridx = GDBJIT_SECT_shstrtab +}; + +/* -- In-memory ELF object generation ------------------------------------- */ + +/* Context for generating the ELF object for the GDB JIT API. */ +typedef struct GDBJITctx { + uint8_t *p; /* Pointer to next address in obj.space. */ + uint8_t *startp; /* Pointer to start address in obj.space. */ + Trace *T; /* Generate symbols for this trace. */ + uintptr_t mcaddr; /* Machine code address. */ + MSize szmcode; /* Size of machine code. */ + MSize spadjp; /* Stack adjustment for parent trace or interpreter. */ + MSize spadj; /* Stack adjustment for trace itself. */ + BCLine lineno; /* Starting line number. */ + const char *filename; /* Starting file name. */ + const char *trname; /* Name of trace. */ + size_t objsize; /* Final size of ELF object. */ + GDBJITobj obj; /* In-memory ELF object. */ +} GDBJITctx; + +/* Add a zero-terminated string. */ +static uint32_t gdbjit_strz(GDBJITctx *ctx, const char *str) +{ + uint8_t *p = ctx->p; + uint32_t ofs = (uint32_t)(p - ctx->startp); + do { + *p++ = (uint8_t)*str; + } while (*str++); + ctx->p = p; + return ofs; +} + +/* Add a ULEB128 value. */ +static void gdbjit_uleb128(GDBJITctx *ctx, uint32_t v) +{ + uint8_t *p = ctx->p; + for (; v >= 0x80; v >>= 7) + *p++ = (uint8_t)((v & 0x7f) | 0x80); + *p++ = (uint8_t)v; + ctx->p = p; +} + +/* Add a SLEB128 value. */ +static void gdbjit_sleb128(GDBJITctx *ctx, int32_t v) +{ + uint8_t *p = ctx->p; + for (; (uint32_t)(v+0x40) >= 0x80; v >>= 7) + *p++ = (uint8_t)((v & 0x7f) | 0x80); + *p++ = (uint8_t)(v & 0x7f); + ctx->p = p; +} + +/* Shortcuts to generate DWARF structures. */ +#define DB(x) (*p++ = (x)) +#define DI8(x) (*(int8_t *)p = (x), p++) +#define DU16(x) (*(uint16_t *)p = (x), p += 2) +#define DU32(x) (*(uint32_t *)p = (x), p += 4) +#define DADDR(x) (*(uintptr_t *)p = (x), p += sizeof(uintptr_t)) +#define DUV(x) (ctx->p = p, gdbjit_uleb128(ctx, (x)), p = ctx->p) +#define DSV(x) (ctx->p = p, gdbjit_sleb128(ctx, (x)), p = ctx->p) +#define DSTR(str) (ctx->p = p, gdbjit_strz(ctx, (str)), p = ctx->p) +#define DALIGNNOP(s) while ((uintptr_t)p & ((s)-1)) *p++ = DW_CFA_nop +#define DSECT(name, stmt) \ + { uint32_t *szp_##name = (uint32_t *)p; p += 4; stmt \ + *szp_##name = (uint32_t)((p-(uint8_t *)szp_##name)-4); } \ + +/* Initialize ELF section headers. */ +static void LJ_FASTCALL gdbjit_secthdr(GDBJITctx *ctx) +{ + ELFsectheader *sect; + + *ctx->p++ = '\0'; /* Empty string at start of string table. */ + +#define SECTDEF(id, tp, al) \ + sect = &ctx->obj.sect[GDBJIT_SECT_##id]; \ + sect->name = gdbjit_strz(ctx, "." #id); \ + sect->type = ELFSECT_TYPE_##tp; \ + sect->align = (al) + + SECTDEF(text, NOBITS, 16); + sect->flags = ELFSECT_FLAGS_ALLOC|ELFSECT_FLAGS_EXEC; + sect->addr = ctx->mcaddr; + sect->ofs = 0; + sect->size = ctx->szmcode; + + SECTDEF(eh_frame, PROGBITS, sizeof(uintptr_t)); + sect->flags = ELFSECT_FLAGS_ALLOC; + + SECTDEF(shstrtab, STRTAB, 1); + SECTDEF(strtab, STRTAB, 1); + + SECTDEF(symtab, SYMTAB, sizeof(uintptr_t)); + sect->ofs = offsetof(GDBJITobj, sym); + sect->size = sizeof(ctx->obj.sym); + sect->link = GDBJIT_SECT_strtab; + sect->entsize = sizeof(ELFsymbol); + sect->info = GDBJIT_SYM_FUNC; + + SECTDEF(debug_info, PROGBITS, 1); + SECTDEF(debug_abbrev, PROGBITS, 1); + SECTDEF(debug_line, PROGBITS, 1); + +#undef SECTDEF +} + +/* Initialize symbol table. */ +static void LJ_FASTCALL gdbjit_symtab(GDBJITctx *ctx) +{ + ELFsymbol *sym; + + *ctx->p++ = '\0'; /* Empty string at start of string table. */ + + sym = &ctx->obj.sym[GDBJIT_SYM_FILE]; + sym->name = gdbjit_strz(ctx, "JIT mcode"); + sym->sectidx = ELFSECT_IDX_ABS; + sym->info = ELFSYM_TYPE_FILE|ELFSYM_BIND_LOCAL; + + sym = &ctx->obj.sym[GDBJIT_SYM_FUNC]; + sym->name = gdbjit_strz(ctx, ctx->trname); + sym->sectidx = GDBJIT_SECT_text; + sym->value = 0; + sym->size = ctx->szmcode; + sym->info = ELFSYM_TYPE_FUNC|ELFSYM_BIND_GLOBAL; +} + +/* Initialize .eh_frame section. */ +static void LJ_FASTCALL gdbjit_ehframe(GDBJITctx *ctx) +{ + uint8_t *p = ctx->p; + uint8_t *framep = p; + + /* Emit DWARF EH CIE. */ + DSECT(CIE, + DU32(0); /* Offset to CIE itself. */ + DB(DW_CIE_VERSION); + DSTR("zR"); /* Augmentation. */ + DUV(1); /* Code alignment factor. */ + DSV(-(int32_t)sizeof(uintptr_t)); /* Data alignment factor. */ + DB(DW_REG_RA); /* Return address register. */ + DB(1); DB(DW_EH_PE_textrel|DW_EH_PE_udata4); /* Augmentation data. */ + DB(DW_CFA_def_cfa); DUV(DW_REG_SP); DUV(sizeof(uintptr_t)); + DB(DW_CFA_offset|DW_REG_RA); DUV(1); + DALIGNNOP(sizeof(uintptr_t)); + ) + + /* Emit DWARF EH FDE. */ + DSECT(FDE, + DU32((uint32_t)(p-framep)); /* Offset to CIE. */ + DU32(0); /* Machine code offset relative to .text. */ + DU32(ctx->szmcode); /* Machine code length. */ + DB(0); /* Augmentation data. */ + /* Registers saved in CFRAME. */ +#if LJ_TARGET_X86 + DB(DW_CFA_offset|DW_REG_BP); DUV(2); + DB(DW_CFA_offset|DW_REG_DI); DUV(3); + DB(DW_CFA_offset|DW_REG_SI); DUV(4); + DB(DW_CFA_offset|DW_REG_BX); DUV(5); +#elif LJ_TARGET_X64 + /* Add saved registers for x64 CFRAME. */ +#else +#error "Unsupported target architecture" +#endif + if (ctx->spadjp != ctx->spadj) { /* Parent/interpreter stack frame size. */ + DB(DW_CFA_def_cfa_offset); DUV(ctx->spadjp); + DB(DW_CFA_advance_loc|1); /* Only an approximation. */ + } + DB(DW_CFA_def_cfa_offset); DUV(ctx->spadj); /* Trace stack frame size. */ + DALIGNNOP(sizeof(uintptr_t)); + ) + + ctx->p = p; +} + +/* Initialize .debug_info section. */ +static void LJ_FASTCALL gdbjit_debuginfo(GDBJITctx *ctx) +{ + uint8_t *p = ctx->p; + + DSECT(info, + DU16(2); /* DWARF version. */ + DU32(0); /* Abbrev offset. */ + DB(sizeof(uintptr_t)); /* Pointer size. */ + + DUV(1); /* Abbrev #1: DW_TAG_compile_unit. */ + DSTR(ctx->filename); /* DW_AT_name. */ + DADDR(ctx->mcaddr); /* DW_AT_low_pc. */ + DADDR(ctx->mcaddr + ctx->szmcode); /* DW_AT_high_pc. */ + DU32(0); /* DW_AT_stmt_list. */ + ) + + ctx->p = p; +} + +/* Initialize .debug_abbrev section. */ +static void LJ_FASTCALL gdbjit_debugabbrev(GDBJITctx *ctx) +{ + uint8_t *p = ctx->p; + + /* Abbrev #1: DW_TAG_compile_unit. */ + DUV(1); DUV(DW_TAG_compile_unit); + DB(DW_children_no); + DUV(DW_AT_name); DUV(DW_FORM_string); + DUV(DW_AT_low_pc); DUV(DW_FORM_addr); + DUV(DW_AT_high_pc); DUV(DW_FORM_addr); + DUV(DW_AT_stmt_list); DUV(DW_FORM_data4); + DB(0); DB(0); + + ctx->p = p; +} + +#define DLNE(op, s) (DB(DW_LNS_extended_op), DUV(1+(s)), DB((op))) + +/* Initialize .debug_line section. */ +static void LJ_FASTCALL gdbjit_debugline(GDBJITctx *ctx) +{ + uint8_t *p = ctx->p; + + DSECT(line, + DU16(2); /* DWARF version. */ + DSECT(header, + DB(1); /* Minimum instruction length. */ + DB(1); /* is_stmt. */ + DI8(0); /* Line base for special opcodes. */ + DB(2); /* Line range for special opcodes. */ + DB(3+1); /* Opcode base at DW_LNS_advance_line+1. */ + DB(0); DB(1); DB(1); /* Standard opcode lengths. */ + /* Directory table. */ + DB(0); + /* File name table. */ + DSTR(ctx->filename); DUV(0); DUV(0); DUV(0); + DB(0); + ) + + DLNE(DW_LNE_set_address, sizeof(uintptr_t)); DADDR(ctx->mcaddr); + if (ctx->lineno) { + DB(DW_LNS_advance_line); DSV(ctx->lineno-1); + } + DB(DW_LNS_copy); + DB(DW_LNS_advance_pc); DUV(ctx->szmcode); + DLNE(DW_LNE_end_sequence, 0); + ) + + ctx->p = p; +} + +#undef DLNE + +/* Undef shortcuts. */ +#undef DB +#undef DI8 +#undef DU16 +#undef DU32 +#undef DADDR +#undef DUV +#undef DSV +#undef DSTR +#undef DALIGNNOP +#undef DSECT + +/* Type of a section initializer callback. */ +typedef void (LJ_FASTCALL *GDBJITinitf)(GDBJITctx *ctx); + +/* Call section initializer and set the section offset and size. */ +static void gdbjit_initsect(GDBJITctx *ctx, int sect, GDBJITinitf initf) +{ + ctx->startp = ctx->p; + ctx->obj.sect[sect].ofs = (uintptr_t)((char *)ctx->p - (char *)&ctx->obj); + initf(ctx); + ctx->obj.sect[sect].size = (uintptr_t)(ctx->p - ctx->startp); +} + +#define SECTALIGN(p, a) \ + ((p) = (uint8_t *)(((uintptr_t)(p) + ((a)-1)) & ~(uintptr_t)((a)-1))) + +/* Build in-memory ELF object. */ +static void gdbjit_buildobj(GDBJITctx *ctx) +{ + GDBJITobj *obj = &ctx->obj; + /* Fill in ELF header and clear structures. */ + memcpy(&obj->hdr, &elfhdr_template, sizeof(ELFheader)); + memset(&obj->sect, 0, sizeof(ELFsectheader)*GDBJIT_SECT__MAX); + memset(&obj->sym, 0, sizeof(ELFsymbol)*GDBJIT_SYM__MAX); + /* Initialize sections. */ + ctx->p = obj->space; + gdbjit_initsect(ctx, GDBJIT_SECT_shstrtab, gdbjit_secthdr); + gdbjit_initsect(ctx, GDBJIT_SECT_strtab, gdbjit_symtab); + gdbjit_initsect(ctx, GDBJIT_SECT_debug_info, gdbjit_debuginfo); + gdbjit_initsect(ctx, GDBJIT_SECT_debug_abbrev, gdbjit_debugabbrev); + gdbjit_initsect(ctx, GDBJIT_SECT_debug_line, gdbjit_debugline); + SECTALIGN(ctx->p, sizeof(uintptr_t)); + gdbjit_initsect(ctx, GDBJIT_SECT_eh_frame, gdbjit_ehframe); + ctx->objsize = (size_t)((char *)ctx->p - (char *)obj); + lua_assert(ctx->objsize < sizeof(GDBJITobj)); +} + +#undef SECTALIGN + +/* -- Interface to GDB JIT API -------------------------------------------- */ + +/* Add new entry to GDB JIT symbol chain. */ +static void gdbjit_newentry(lua_State *L, GDBJITctx *ctx) +{ + /* Allocate memory for GDB JIT entry and ELF object. */ + MSize sz = (MSize)(sizeof(GDBJITentryobj) - sizeof(GDBJITobj) + ctx->objsize); + GDBJITentryobj *eo = lj_mem_newt(L, sz, GDBJITentryobj); + memcpy(&eo->obj, &ctx->obj, ctx->objsize); /* Copy ELF object. */ + eo->sz = sz; + ctx->T->gdbjit_entry = (void *)eo; + /* Link new entry to chain and register it. */ + eo->entry.prev_entry = NULL; + eo->entry.next_entry = __jit_debug_descriptor.first_entry; + if (eo->entry.next_entry) + eo->entry.next_entry->prev_entry = &eo->entry; + eo->entry.symfile_addr = (const char *)&eo->obj; + eo->entry.symfile_size = ctx->objsize; + __jit_debug_descriptor.first_entry = &eo->entry; + __jit_debug_descriptor.relevant_entry = &eo->entry; + __jit_debug_descriptor.action_flag = GDBJIT_REGISTER; + __jit_debug_register_code(); +} + +/* Add debug info for newly compiled trace and notify GDB. */ +void lj_gdbjit_addtrace(jit_State *J, Trace *T, TraceNo traceno) +{ + GDBJITctx ctx; + lua_State *L = J->L; + GCproto *pt = &gcref(T->startpt)->pt; + TraceNo parent = T->ir[REF_BASE].op1; + uintptr_t pcofs = (uintptr_t)(T->snap[0].mapofs+T->snap[0].nslots); + const BCIns *startpc = (const BCIns *)(uintptr_t)T->snapmap[pcofs]; + ctx.T = T; + ctx.mcaddr = (uintptr_t)T->mcode; + ctx.szmcode = T->szmcode; + ctx.spadjp = CFRAME_SIZE + (MSize)(parent ? J->trace[parent]->spadjust : 0); + ctx.spadj = CFRAME_SIZE + T->spadjust; + ctx.lineno = pt->lineinfo ? pt->lineinfo[startpc - pt->bc] : 0; + ctx.filename = strdata(pt->chunkname); + if (*ctx.filename == '@' || *ctx.filename == '=') + ctx.filename++; + else + ctx.filename = "(string)"; + ctx.trname = lj_str_pushf(L, "TRACE_%d", traceno); + L->top--; + gdbjit_buildobj(&ctx); + gdbjit_newentry(L, &ctx); +} + +/* Delete debug info for trace and notify GDB. */ +void lj_gdbjit_deltrace(jit_State *J, Trace *T) +{ + GDBJITentryobj *eo = (GDBJITentryobj *)T->gdbjit_entry; + if (eo) { + if (eo->entry.prev_entry) + eo->entry.prev_entry->next_entry = eo->entry.next_entry; + else + __jit_debug_descriptor.first_entry = eo->entry.next_entry; + if (eo->entry.next_entry) + eo->entry.next_entry->prev_entry = eo->entry.prev_entry; + __jit_debug_descriptor.relevant_entry = &eo->entry; + __jit_debug_descriptor.action_flag = GDBJIT_UNREGISTER; + __jit_debug_register_code(); + lj_mem_free(J2G(J), eo, eo->sz); + } +} + +#endif +#endif diff --git a/src/lj_gdbjit.h b/src/lj_gdbjit.h new file mode 100644 index 0000000000..2221948f3f --- /dev/null +++ b/src/lj_gdbjit.h @@ -0,0 +1,22 @@ +/* +** Client for the GDB JIT API. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +*/ + +#ifndef _LJ_GDBJIT_H +#define _LJ_GDBJIT_H + +#include "lj_obj.h" +#include "lj_jit.h" + +#if LJ_HASJIT && defined(LUAJIT_USE_GDBJIT) + +LJ_FUNC void lj_gdbjit_addtrace(jit_State *J, Trace *T, TraceNo traceno); +LJ_FUNC void lj_gdbjit_deltrace(jit_State *J, Trace *T); + +#else +#define lj_gdbjit_addtrace(J, T, tn) UNUSED(T) +#define lj_gdbjit_deltrace(J, T) UNUSED(T) +#endif + +#endif diff --git a/src/lj_ir.c b/src/lj_ir.c new file mode 100644 index 0000000000..2ff5482185 --- /dev/null +++ b/src/lj_ir.c @@ -0,0 +1,461 @@ +/* +** SSA IR (Intermediate Representation) emitter. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +*/ + +#define lj_ir_c +#define LUA_CORE + +#include "lj_obj.h" + +#if LJ_HASJIT + +#include "lj_gc.h" +#include "lj_str.h" +#include "lj_ir.h" +#include "lj_jit.h" +#include "lj_iropt.h" +#include "lj_trace.h" + +/* Some local macros to save typing. Undef'd at the end. */ +#define IR(ref) (&J->cur.ir[(ref)]) +#define fins (&J->fold.ins) + +/* Pass IR on to next optimization in chain (FOLD). */ +#define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J)) + +/* -- IR tables ----------------------------------------------------------- */ + +/* IR instruction modes. */ +LJ_DATADEF const uint8_t lj_ir_mode[IR__MAX+1] = { +IRDEF(IRMODE) + 0 +}; + +/* -- IR emitter ---------------------------------------------------------- */ + +/* Grow IR buffer at the top. */ +void LJ_FASTCALL lj_ir_growtop(jit_State *J) +{ + IRIns *baseir = J->irbuf + J->irbotlim; + MSize szins = J->irtoplim - J->irbotlim; + if (szins) { + baseir = (IRIns *)lj_mem_realloc(J->L, baseir, szins*sizeof(IRIns), + 2*szins*sizeof(IRIns)); + J->irtoplim = J->irbotlim + 2*szins; + } else { + baseir = (IRIns *)lj_mem_realloc(J->L, NULL, 0, LJ_MIN_IRSZ*sizeof(IRIns)); + J->irbotlim = REF_BASE - LJ_MIN_IRSZ/4; + J->irtoplim = J->irbotlim + LJ_MIN_IRSZ; + } + J->cur.ir = J->irbuf = baseir - J->irbotlim; +} + +/* Grow IR buffer at the bottom or shift it up. */ +static void lj_ir_growbot(jit_State *J) +{ + IRIns *baseir = J->irbuf + J->irbotlim; + MSize szins = J->irtoplim - J->irbotlim; + lua_assert(szins != 0); + lua_assert(J->cur.nk == J->irbotlim); + if (J->cur.nins + (szins >> 1) < J->irtoplim) { + /* More than half of the buffer is free on top: shift up by a quarter. */ + MSize ofs = szins >> 2; + memmove(baseir + ofs, baseir, (J->cur.nins - J->irbotlim)*sizeof(IRIns)); + J->irbotlim -= ofs; + J->irtoplim -= ofs; + J->cur.ir = J->irbuf = baseir - J->irbotlim; + } else { + /* Double the buffer size, but split the growth amongst top/bottom. */ + IRIns *newbase = lj_mem_newt(J->L, 2*szins*sizeof(IRIns), IRIns); + MSize ofs = szins >= 256 ? 128 : (szins >> 1); /* Limit bottom growth. */ + memcpy(newbase + ofs, baseir, (J->cur.nins - J->irbotlim)*sizeof(IRIns)); + lj_mem_free(G(J->L), baseir, szins*sizeof(IRIns)); + J->irbotlim -= ofs; + J->irtoplim = J->irbotlim + 2*szins; + J->cur.ir = J->irbuf = newbase - J->irbotlim; + } +} + +/* Emit IR without any optimizations. */ +TRef LJ_FASTCALL lj_ir_emit(jit_State *J) +{ + IRRef ref = lj_ir_nextins(J); + IRIns *ir = IR(ref); + IROp op = fins->o; + ir->prev = J->chain[op]; + J->chain[op] = (IRRef1)ref; + ir->o = op; + ir->op1 = fins->op1; + ir->op2 = fins->op2; + J->guardemit.irt |= fins->t.irt; + return TREF(ref, irt_t((ir->t = fins->t))); +} + +/* -- Interning of constants ---------------------------------------------- */ + +/* +** IR instructions for constants are kept between J->cur.nk >= ref < REF_BIAS. +** They are chained like all other instructions, but grow downwards. +** The are interned (like strings in the VM) to facilitate reference +** comparisons. The same constant must get the same reference. +*/ + +/* Get ref of next IR constant and optionally grow IR. +** Note: this may invalidate all IRIns *! +*/ +static LJ_AINLINE IRRef ir_nextk(jit_State *J) +{ + IRRef ref = J->cur.nk; + if (LJ_UNLIKELY(ref <= J->irbotlim)) lj_ir_growbot(J); + J->cur.nk = --ref; + return ref; +} + +/* Intern int32_t constant. */ +TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k) +{ + IRIns *ir, *cir = J->cur.ir; + IRRef ref; + for (ref = J->chain[IR_KINT]; ref; ref = cir[ref].prev) + if (cir[ref].i == k) + goto found; + ref = ir_nextk(J); + ir = IR(ref); + ir->i = k; + ir->t.irt = IRT_INT; + ir->o = IR_KINT; + ir->prev = J->chain[IR_KINT]; + J->chain[IR_KINT] = (IRRef1)ref; +found: + return TREF(ref, IRT_INT); +} + +/* The MRef inside the KNUM IR instruction holds the address of the constant +** (an aligned double or a special 64 bit pattern). The KNUM constants +** themselves are stored in a chained array and shared across traces. +** +** Rationale for choosing this data structure: +** - The address of the constants is embedded in the generated machine code +** and must never move. A resizable array or hash table wouldn't work. +** - Most apps need very few non-integer constants (less than a dozen). +** - Linear search is hard to beat in terms of speed and low complexity. +*/ +typedef struct KNumArray { + MRef next; /* Pointer to next list. */ + MSize numk; /* Number of used elements in this array. */ + TValue k[LJ_MIN_KNUMSZ]; /* Array of constants. */ +} KNumArray; + +/* Free all chained arrays. */ +void lj_ir_knum_freeall(jit_State *J) +{ + KNumArray *kn; + for (kn = mref(J->knum, KNumArray); kn; ) { + KNumArray *next = mref(kn->next, KNumArray); + lj_mem_free(J2G(J), kn, sizeof(KNumArray)); + kn = next; + } +} + +/* Find KNUM constant in chained array or add it. */ +static cTValue *ir_knum_find(jit_State *J, uint64_t nn) +{ + KNumArray *kn, *knp = NULL; + TValue *ntv; + MSize idx; + /* Search for the constant in the whole chain of arrays. */ + for (kn = mref(J->knum, KNumArray); kn; kn = mref(kn->next, KNumArray)) { + knp = kn; /* Remember previous element in list. */ + for (idx = 0; idx < kn->numk; idx++) { /* Search one array. */ + TValue *tv = &kn->k[idx]; + if (tv->u64 == nn) /* Needed for +-0/NaN/absmask. */ + return tv; + } + } + /* Constant was not found, need to add it. */ + if (!(knp && knp->numk < LJ_MIN_KNUMSZ)) { /* Allocate a new array. */ + KNumArray *nkn = lj_mem_newt(J->L, sizeof(KNumArray), KNumArray); + setmref(nkn->next, NULL); + nkn->numk = 0; + if (knp) + setmref(knp->next, nkn); /* Chain to the end of the list. */ + else + setmref(J->knum, nkn); /* Link first array. */ + knp = nkn; + } + ntv = &knp->k[knp->numk++]; /* Add to current array. */ + ntv->u64 = nn; + return ntv; +} + +/* Intern FP constant, given by its address. */ +TRef lj_ir_knum_addr(jit_State *J, cTValue *tv) +{ + IRIns *ir, *cir = J->cur.ir; + IRRef ref; + for (ref = J->chain[IR_KNUM]; ref; ref = cir[ref].prev) + if (ir_knum(&cir[ref]) == tv) + goto found; + ref = ir_nextk(J); + ir = IR(ref); + setmref(ir->ptr, tv); + ir->t.irt = IRT_NUM; + ir->o = IR_KNUM; + ir->prev = J->chain[IR_KNUM]; + J->chain[IR_KNUM] = (IRRef1)ref; +found: + return TREF(ref, IRT_NUM); +} + +/* Intern FP constant, given by its 64 bit pattern. */ +TRef lj_ir_knum_nn(jit_State *J, uint64_t nn) +{ + return lj_ir_knum_addr(J, ir_knum_find(J, nn)); +} + +/* Special 16 byte aligned SIMD constants. */ +LJ_DATADEF LJ_ALIGN(16) cTValue lj_ir_knum_tv[4] = { + { U64x(7fffffff,ffffffff) }, { U64x(7fffffff,ffffffff) }, + { U64x(80000000,00000000) }, { U64x(80000000,00000000) } +}; + +/* Check whether a number is int and return it. -0 is NOT considered an int. */ +static int numistrueint(lua_Number n, int32_t *kp) +{ + int32_t k = lj_num2int(n); + if (n == cast_num(k)) { + if (kp) *kp = k; + if (k == 0) { /* Special check for -0. */ + TValue tv; + setnumV(&tv, n); + if (tv.u32.hi != 0) + return 0; + } + return 1; + } + return 0; +} + +/* Intern number as int32_t constant if possible, otherwise as FP constant. */ +TRef lj_ir_knumint(jit_State *J, lua_Number n) +{ + int32_t k; + if (numistrueint(n, &k)) + return lj_ir_kint(J, k); + else + return lj_ir_knum(J, n); +} + +/* Intern GC object "constant". */ +TRef lj_ir_kgc(jit_State *J, GCobj *o, IRType t) +{ + IRIns *ir, *cir = J->cur.ir; + IRRef ref; + for (ref = J->chain[IR_KGC]; ref; ref = cir[ref].prev) + if (ir_kgc(&cir[ref]) == o) + goto found; + ref = ir_nextk(J); + ir = IR(ref); + /* NOBARRIER: Current trace is a GC root. */ + setgcref(ir->gcr, o); + ir->t.irt = (uint8_t)t; + ir->o = IR_KGC; + ir->prev = J->chain[IR_KGC]; + J->chain[IR_KGC] = (IRRef1)ref; +found: + return TREF(ref, t); +} + +/* Intern 32 bit pointer constant. */ +TRef lj_ir_kptr(jit_State *J, void *ptr) +{ + IRIns *ir, *cir = J->cur.ir; + IRRef ref; + lua_assert((void *)(intptr_t)i32ptr(ptr) == ptr); + for (ref = J->chain[IR_KPTR]; ref; ref = cir[ref].prev) + if (mref(cir[ref].ptr, void) == ptr) + goto found; + ref = ir_nextk(J); + ir = IR(ref); + setmref(ir->ptr, ptr); + ir->t.irt = IRT_PTR; + ir->o = IR_KPTR; + ir->prev = J->chain[IR_KPTR]; + J->chain[IR_KPTR] = (IRRef1)ref; +found: + return TREF(ref, IRT_PTR); +} + +/* Intern typed NULL constant. */ +TRef lj_ir_knull(jit_State *J, IRType t) +{ + IRIns *ir, *cir = J->cur.ir; + IRRef ref; + for (ref = J->chain[IR_KNULL]; ref; ref = cir[ref].prev) + if (irt_t(cir[ref].t) == t) + goto found; + ref = ir_nextk(J); + ir = IR(ref); + ir->i = 0; + ir->t.irt = (uint8_t)t; + ir->o = IR_KNULL; + ir->prev = J->chain[IR_KNULL]; + J->chain[IR_KNULL] = (IRRef1)ref; +found: + return TREF(ref, t); +} + +/* Intern key slot. */ +TRef lj_ir_kslot(jit_State *J, TRef key, IRRef slot) +{ + IRIns *ir, *cir = J->cur.ir; + IRRef2 op12 = IRREF2((IRRef1)key, (IRRef1)slot); + IRRef ref; + /* Const part is not touched by CSE/DCE, so 0-65535 is ok for IRMlit here. */ + lua_assert(tref_isk(key) && slot == (IRRef)(IRRef1)slot); + for (ref = J->chain[IR_KSLOT]; ref; ref = cir[ref].prev) + if (cir[ref].op12 == op12) + goto found; + ref = ir_nextk(J); + ir = IR(ref); + ir->op12 = op12; + ir->t.irt = IRT_PTR; + ir->o = IR_KSLOT; + ir->prev = J->chain[IR_KSLOT]; + J->chain[IR_KSLOT] = (IRRef1)ref; +found: + return TREF(ref, IRT_PTR); +} + +/* -- Access to IR constants ---------------------------------------------- */ + +/* Copy value of IR constant. */ +void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir) +{ + UNUSED(L); + lua_assert(ir->o != IR_KSLOT); /* Common mistake. */ + if (irt_isint(ir->t)) { + lua_assert(ir->o == IR_KINT); + setintV(tv, ir->i); + } else if (irt_isnum(ir->t)) { + lua_assert(ir->o == IR_KNUM); + setnumV(tv, ir_knum(ir)->n); + } else if (irt_ispri(ir->t)) { + lua_assert(ir->o == IR_KPRI); + setitype(tv, irt_toitype(ir->t)); + } else { + if (ir->o == IR_KGC) { + lua_assert(irt_isgcv(ir->t)); + setgcV(L, tv, &ir_kgc(ir)->gch, irt_toitype(ir->t)); + } else { + lua_assert(ir->o == IR_KPTR || ir->o == IR_KNULL); + setlightudV(tv, mref(ir->ptr, void)); + } + } +} + +/* -- Convert IR operand types -------------------------------------------- */ + +/* Convert from integer or string to number. */ +TRef LJ_FASTCALL lj_ir_tonum(jit_State *J, TRef tr) +{ + if (!tref_isnum(tr)) { + if (tref_isinteger(tr)) + tr = emitir(IRTN(IR_TONUM), tr, 0); + else if (tref_isstr(tr)) + tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0); + else + lj_trace_err(J, LJ_TRERR_BADTYPE); + } + return tr; +} + +/* Convert from integer or number to string. */ +TRef LJ_FASTCALL lj_ir_tostr(jit_State *J, TRef tr) +{ + if (!tref_isstr(tr)) { + if (!tref_isnumber(tr)) + lj_trace_err(J, LJ_TRERR_BADTYPE); + tr = emitir(IRT(IR_TOSTR, IRT_STR), tr, 0); + } + return tr; +} + +/* Convert from number or string to bitop operand (overflow wrapped). */ +TRef LJ_FASTCALL lj_ir_tobit(jit_State *J, TRef tr) +{ + if (!tref_isinteger(tr)) { + if (tref_isstr(tr)) + tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0); + else if (!tref_isnum(tr)) + lj_trace_err(J, LJ_TRERR_BADTYPE); + tr = emitir(IRTI(IR_TOBIT), tr, lj_ir_knum_tobit(J)); + } + return tr; +} + +/* Convert from number or string to integer (overflow undefined). */ +TRef LJ_FASTCALL lj_ir_toint(jit_State *J, TRef tr) +{ + if (!tref_isinteger(tr)) { + if (tref_isstr(tr)) + tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0); + else if (!tref_isnum(tr)) + lj_trace_err(J, LJ_TRERR_BADTYPE); + tr = emitir(IRTI(IR_TOINT), tr, IRTOINT_ANY); + } + return tr; +} + +/* -- Miscellaneous IR ops ------------------------------------------------ */ + +/* Evaluate numeric comparison. */ +int lj_ir_numcmp(lua_Number a, lua_Number b, IROp op) +{ + switch (op) { + case IR_EQ: return (a == b); + case IR_NE: return (a != b); + case IR_LT: return (a < b); + case IR_GE: return (a >= b); + case IR_LE: return (a <= b); + case IR_GT: return (a > b); + case IR_ULT: return !(a >= b); + case IR_UGE: return !(a < b); + case IR_ULE: return !(a > b); + case IR_UGT: return !(a <= b); + default: lua_assert(0); return 0; + } +} + +/* Evaluate string comparison. */ +int lj_ir_strcmp(GCstr *a, GCstr *b, IROp op) +{ + int res = lj_str_cmp(a, b); + switch (op) { + case IR_LT: return (res < 0); + case IR_GE: return (res >= 0); + case IR_LE: return (res <= 0); + case IR_GT: return (res > 0); + default: lua_assert(0); return 0; + } +} + +/* Rollback IR to previous state. */ +void lj_ir_rollback(jit_State *J, IRRef ref) +{ + IRRef nins = J->cur.nins; + while (nins > ref) { + IRIns *ir; + nins--; + ir = IR(nins); + J->chain[ir->o] = ir->prev; + } + J->cur.nins = nins; +} + +#undef IR +#undef fins +#undef emitir + +#endif diff --git a/src/lj_ir.h b/src/lj_ir.h new file mode 100644 index 0000000000..a6973a8199 --- /dev/null +++ b/src/lj_ir.h @@ -0,0 +1,429 @@ +/* +** SSA IR (Intermediate Representation) format. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +*/ + +#ifndef _LJ_IR_H +#define _LJ_IR_H + +#include "lj_obj.h" + +/* IR instruction definition. Order matters, see below. */ +#define IRDEF(_) \ + /* Miscellaneous ops. */ \ + _(NOP, N , ___, ___) \ + _(BASE, N , lit, lit) \ + _(LOOP, G , ___, ___) \ + _(PHI, S , ref, ref) \ + _(RENAME, S , ref, lit) \ + \ + /* Constants. */ \ + _(KPRI, N , ___, ___) \ + _(KINT, N , cst, ___) \ + _(KGC, N , cst, ___) \ + _(KPTR, N , cst, ___) \ + _(KNULL, N , cst, ___) \ + _(KNUM, N , cst, ___) \ + _(KSLOT, N , ref, lit) \ + \ + /* Guarded assertions. */ \ + /* Must be properly aligned to flip opposites (^1) and (un)ordered (^4). */ \ + _(EQ, GC, ref, ref) \ + _(NE, GC, ref, ref) \ + \ + _(ABC, G , ref, ref) \ + _(FRAME, G , ref, ref) \ + \ + _(LT, G , ref, ref) \ + _(GE, G , ref, ref) \ + _(LE, G , ref, ref) \ + _(GT, G , ref, ref) \ + \ + _(ULT, G , ref, ref) \ + _(UGE, G , ref, ref) \ + _(ULE, G , ref, ref) \ + _(UGT, G , ref, ref) \ + \ + /* Bit ops. */ \ + _(BNOT, N , ref, ___) \ + _(BSWAP, N , ref, ___) \ + _(BAND, C , ref, ref) \ + _(BOR, C , ref, ref) \ + _(BXOR, C , ref, ref) \ + _(BSHL, N , ref, ref) \ + _(BSHR, N , ref, ref) \ + _(BSAR, N , ref, ref) \ + _(BROL, N , ref, ref) \ + _(BROR, N , ref, ref) \ + \ + /* Arithmetic ops. ORDER ARITH (FPMATH/POWI take the space for MOD/POW). */ \ + _(ADD, C , ref, ref) \ + _(SUB, N , ref, ref) \ + _(MUL, C , ref, ref) \ + _(DIV, N , ref, ref) \ + \ + _(FPMATH, N , ref, lit) \ + _(POWI, N , ref, ref) \ + \ + _(NEG, N , ref, ref) \ + _(ABS, N , ref, ref) \ + _(ATAN2, N , ref, ref) \ + _(LDEXP, N , ref, ref) \ + _(MIN, C , ref, ref) \ + _(MAX, C , ref, ref) \ + \ + /* Overflow-checking arithmetic ops. */ \ + _(ADDOV, GC, ref, ref) \ + _(SUBOV, G , ref, ref) \ + \ + /* Memory ops. A = array, H = hash, U = upvalue, F = field, S = stack. */ \ + \ + /* Memory references. */ \ + _(AREF, R , ref, ref) \ + _(HREFK, RG, ref, ref) \ + _(HREF, L , ref, ref) \ + _(NEWREF, S , ref, ref) \ + _(UREFO, LG, ref, lit) \ + _(UREFC, LG, ref, lit) \ + _(FREF, R , ref, lit) \ + _(STRREF, N , ref, ref) \ + \ + /* Loads and Stores. These must be in the same order. */ \ + _(ALOAD, LG, ref, ___) \ + _(HLOAD, LG, ref, ___) \ + _(ULOAD, LG, ref, ___) \ + _(FLOAD, L , ref, lit) \ + _(SLOAD, LG, lit, lit) \ + _(XLOAD, L , ref, lit) \ + \ + _(ASTORE, S , ref, ref) \ + _(HSTORE, S , ref, ref) \ + _(USTORE, S , ref, ref) \ + _(FSTORE, S , ref, ref) \ + \ + /* String ops. */ \ + _(SNEW, N , ref, ref) \ + \ + /* Table ops. */ \ + _(TNEW, A , lit, lit) \ + _(TDUP, A , ref, ___) \ + _(TLEN, L , ref, ___) \ + _(TBAR, S , ref, ___) \ + _(OBAR, S , ref, ref) \ + \ + /* Type conversions. */ \ + _(TONUM, N , ref, ___) \ + _(TOINT, N , ref, lit) \ + _(TOBIT, N , ref, ref) \ + _(TOSTR, N , ref, ___) \ + _(STRTO, G , ref, ___) \ + \ + /* End of list. */ + +/* IR opcodes (max. 256). */ +typedef enum { +#define IRENUM(name, m, m1, m2) IR_##name, +IRDEF(IRENUM) +#undef IRENUM + IR__MAX +} IROp; + +/* Stored opcode. */ +typedef uint8_t IROp1; + +LJ_STATIC_ASSERT(((int)IR_EQ^1) == (int)IR_NE); +LJ_STATIC_ASSERT(((int)IR_LT^1) == (int)IR_GE); +LJ_STATIC_ASSERT(((int)IR_LE^1) == (int)IR_GT); +LJ_STATIC_ASSERT(((int)IR_LT^3) == (int)IR_GT); +LJ_STATIC_ASSERT(((int)IR_LT^4) == (int)IR_ULT); + +/* Delta between xLOAD and xSTORE. */ +#define IRDELTA_L2S ((int)IR_ASTORE - (int)IR_ALOAD) + +LJ_STATIC_ASSERT((int)IR_HLOAD + IRDELTA_L2S == (int)IR_HSTORE); +LJ_STATIC_ASSERT((int)IR_ULOAD + IRDELTA_L2S == (int)IR_USTORE); +LJ_STATIC_ASSERT((int)IR_FLOAD + IRDELTA_L2S == (int)IR_FSTORE); + +/* FPMATH sub-functions. ORDER FPM. */ +#define IRFPMDEF(_) \ + _(FLOOR) _(CEIL) _(TRUNC) /* Must be first and in this order. */ \ + _(SQRT) _(EXP) _(EXP2) _(LOG) _(LOG2) _(LOG10) \ + _(SIN) _(COS) _(TAN) \ + _(OTHER) + +typedef enum { +#define FPMENUM(name) IRFPM_##name, +IRFPMDEF(FPMENUM) +#undef FPMENUM + IRFPM__MAX +} IRFPMathOp; + +/* FLOAD field IDs. */ +#define IRFLDEF(_) \ + _(STR_LEN, GCstr, len) \ + _(FUNC_ENV, GCfunc, l.env) \ + _(TAB_META, GCtab, metatable) \ + _(TAB_ARRAY, GCtab, array) \ + _(TAB_NODE, GCtab, node) \ + _(TAB_ASIZE, GCtab, asize) \ + _(TAB_HMASK, GCtab, hmask) \ + _(TAB_NOMM, GCtab, nomm) \ + _(UDATA_META, GCudata, metatable) + +typedef enum { +#define FLENUM(name, type, field) IRFL_##name, +IRFLDEF(FLENUM) +#undef FLENUM + IRFL__MAX +} IRFieldID; + +/* SLOAD mode bits, stored in op2. */ +#define IRSLOAD_INHERIT 1 /* Inherited by exits/side traces. */ +#define IRSLOAD_READONLY 2 /* Read-only, omit slot store. */ +#define IRSLOAD_PARENT 4 /* Coalesce with parent trace. */ + +/* XLOAD mode, stored in op2. */ +#define IRXLOAD_UNALIGNED 1 + +/* TOINT mode, stored in op2. Ordered by strength of the checks. */ +#define IRTOINT_CHECK 0 /* Number checked for integerness. */ +#define IRTOINT_INDEX 1 /* Checked + special backprop rules. */ +#define IRTOINT_ANY 2 /* Any FP number is ok. */ +#define IRTOINT_TOBIT 3 /* Cache only: TOBIT conversion. */ + +/* IR operand mode (2 bit). */ +typedef enum { + IRMref, /* IR reference. */ + IRMlit, /* 16 bit unsigned literal. */ + IRMcst, /* Constant literal: i, gcr or ptr. */ + IRMnone /* Unused operand. */ +} IRMode; +#define IRM___ IRMnone + +/* Mode bits: Commutative, {Normal/Ref, Alloc, Load, Store}, Guard. */ +#define IRM_C 0x10 + +#define IRM_N 0x00 +#define IRM_R IRM_N +#define IRM_A 0x20 +#define IRM_L 0x40 +#define IRM_S 0x60 + +#define IRM_G 0x80 + +#define IRM_GC (IRM_G|IRM_C) +#define IRM_RG (IRM_R|IRM_G) +#define IRM_LG (IRM_L|IRM_G) + +#define irm_op1(m) (cast(IRMode, (m)&3)) +#define irm_op2(m) (cast(IRMode, ((m)>>2)&3)) +#define irm_iscomm(m) ((m) & IRM_C) +#define irm_kind(m) ((m) & IRM_S) +#define irm_isguard(m) ((m) & IRM_G) +/* Stores or any other op with a guard has a side-effect. */ +#define irm_sideeff(m) ((m) >= IRM_S) + +#define IRMODE(name, m, m1, m2) ((IRM##m1)|((IRM##m2)<<2)|(IRM_##m)), + +LJ_DATA const uint8_t lj_ir_mode[IR__MAX+1]; + +/* IR result type and flags (8 bit). */ +typedef enum { + /* Map of itypes to non-negative numbers. ORDER LJ_T */ + IRT_NIL, + IRT_FALSE, + IRT_TRUE, + IRT_LIGHTUD, + /* GCobj types are from here ... */ + IRT_STR, + IRT_PTR, /* IRT_PTR never escapes the IR (map of LJ_TUPVAL). */ + IRT_THREAD, + IRT_PROTO, + IRT_FUNC, + IRT_9, /* LJ_TDEADKEY is never used in the IR. */ + IRT_TAB, + IRT_UDATA, + /* ... until here. */ + IRT_NUM, + /* The various integers are only used in the IR and can only escape to + ** a TValue after implicit or explicit conversion (TONUM). Their types + ** must be contiguous and next to IRT_NUM (see the typerange macros below). + */ + IRT_INT, + IRT_I8, + IRT_U8, + IRT_I16, + IRT_U16, + /* There is room for 14 more types. */ + + /* Additional flags. */ + IRT_MARK = 0x20, /* Marker for misc. purposes. */ + IRT_GUARD = 0x40, /* Instruction is a guard. */ + IRT_ISPHI = 0x80, /* Instruction is left or right PHI operand. */ + + /* Masks. */ + IRT_TYPE = 0x1f, + IRT_T = 0xff +} IRType; + +#define irtype_ispri(irt) ((uint32_t)(irt) <= IRT_TRUE) + +/* Stored IRType. */ +typedef struct IRType1 { uint8_t irt; } IRType1; + +#define IRT(o, t) ((uint32_t)(((o)<<8) | (t))) +#define IRTI(o) (IRT((o), IRT_INT)) +#define IRTN(o) (IRT((o), IRT_NUM)) +#define IRTG(o, t) (IRT((o), IRT_GUARD|(t))) +#define IRTGI(o) (IRT((o), IRT_GUARD|IRT_INT)) + +#define irt_t(t) (cast(IRType, (t).irt)) +#define irt_type(t) (cast(IRType, (t).irt & IRT_TYPE)) +#define irt_sametype(t1, t2) ((((t1).irt ^ (t2).irt) & IRT_TYPE) == 0) +#define irt_typerange(t, first, last) \ + ((uint32_t)((t).irt & IRT_TYPE) - (uint32_t)(first) <= (uint32_t)(last-first)) + +#define irt_isnil(t) (irt_type(t) == IRT_NIL) +#define irt_ispri(t) ((uint32_t)irt_type(t) <= IRT_TRUE) +#define irt_isstr(t) (irt_type(t) == IRT_STR) +#define irt_isfunc(t) (irt_type(t) == IRT_FUNC) +#define irt_istab(t) (irt_type(t) == IRT_TAB) +#define irt_isnum(t) (irt_type(t) == IRT_NUM) +#define irt_isint(t) (irt_type(t) == IRT_INT) +#define irt_isi8(t) (irt_type(t) == IRT_I8) +#define irt_isu8(t) (irt_type(t) == IRT_U8) +#define irt_isi16(t) (irt_type(t) == IRT_I16) +#define irt_isu16(t) (irt_type(t) == IRT_U16) + +#define irt_isinteger(t) (irt_typerange((t), IRT_INT, IRT_U16)) +#define irt_isgcv(t) (irt_typerange((t), IRT_STR, IRT_UDATA)) +#define irt_isaddr(t) (irt_typerange((t), IRT_LIGHTUD, IRT_UDATA)) + +#define itype2irt(tv) \ + (~uitype(tv) < IRT_NUM ? cast(IRType, ~uitype(tv)) : IRT_NUM) +#define irt_toitype(t) ((int32_t)~(uint32_t)irt_type(t)) + +#define irt_isguard(t) ((t).irt & IRT_GUARD) +#define irt_ismarked(t) ((t).irt & IRT_MARK) +#define irt_setmark(t) ((t).irt |= IRT_MARK) +#define irt_clearmark(t) ((t).irt &= ~IRT_MARK) +#define irt_isphi(t) ((t).irt & IRT_ISPHI) +#define irt_setphi(t) ((t).irt |= IRT_ISPHI) +#define irt_clearphi(t) ((t).irt &= ~IRT_ISPHI) + +/* Stored combined IR opcode and type. */ +typedef uint16_t IROpT; + +/* IR references. */ +typedef uint16_t IRRef1; /* One stored reference. */ +typedef uint32_t IRRef2; /* Two stored references. */ +typedef uint32_t IRRef; /* Used to pass around references. */ + +/* Fixed references. */ +enum { + REF_BIAS = 0x8000, + REF_TRUE = REF_BIAS-3, + REF_FALSE = REF_BIAS-2, + REF_NIL = REF_BIAS-1, /* \--- Constants grow downwards. */ + REF_BASE = REF_BIAS, /* /--- IR grows upwards. */ + REF_FIRST = REF_BIAS+1, + REF_DROP = 0xffff +}; + +/* Note: IRMlit operands must be < REF_BIAS, too! +** This allows for fast and uniform manipulation of all operands +** without looking up the operand mode in lj_ir_mode: +** - CSE calculates the maximum reference of two operands. +** This must work with mixed reference/literal operands, too. +** - DCE marking only checks for operand >= REF_BIAS. +** - LOOP needs to substitute reference operands. +** Constant references and literals must not be modified. +*/ + +#define IRREF2(lo, hi) ((IRRef2)(lo) | ((IRRef2)(hi) << 16)) + +#define irref_isk(ref) ((ref) < REF_BIAS) + +/* Tagged IR references. */ +typedef uint32_t TRef; + +#define TREF(ref, t) (cast(TRef, (ref) + ((t)<<16))) + +#define tref_ref(tr) (cast(IRRef1, (tr))) +#define tref_t(tr) (cast(IRType, (tr)>>16)) +#define tref_type(tr) (cast(IRType, ((tr)>>16) & IRT_TYPE)) +#define tref_typerange(tr, first, last) \ + ((((tr)>>16) & IRT_TYPE) - (TRef)(first) <= (TRef)(last-first)) + +#define tref_istype(tr, t) (((tr) & (IRT_TYPE<<16)) == ((t)<<16)) +#define tref_isnil(tr) (tref_istype((tr), IRT_NIL)) +#define tref_isfalse(tr) (tref_istype((tr), IRT_FALSE)) +#define tref_istrue(tr) (tref_istype((tr), IRT_TRUE)) +#define tref_isstr(tr) (tref_istype((tr), IRT_STR)) +#define tref_isfunc(tr) (tref_istype((tr), IRT_FUNC)) +#define tref_istab(tr) (tref_istype((tr), IRT_TAB)) +#define tref_isudata(tr) (tref_istype((tr), IRT_UDATA)) +#define tref_isnum(tr) (tref_istype((tr), IRT_NUM)) +#define tref_isint(tr) (tref_istype((tr), IRT_INT)) + +#define tref_isbool(tr) (tref_typerange((tr), IRT_FALSE, IRT_TRUE)) +#define tref_ispri(tr) (tref_typerange((tr), IRT_NIL, IRT_TRUE)) +#define tref_istruecond(tr) (!tref_typerange((tr), IRT_NIL, IRT_FALSE)) +#define tref_isinteger(tr) (tref_typerange((tr), IRT_INT, IRT_U16)) +#define tref_isnumber(tr) (tref_typerange((tr), IRT_NUM, IRT_U16)) +#define tref_isnumber_str(tr) (tref_isnumber((tr)) || tref_isstr((tr))) +#define tref_isgcv(tr) (tref_typerange((tr), IRT_STR, IRT_UDATA)) + +#define tref_isk(tr) (irref_isk(tref_ref((tr)))) +#define tref_isk2(tr1, tr2) (irref_isk(tref_ref((tr1) | (tr2)))) + +#define TREF_PRI(t) (TREF(REF_NIL-(t), (t))) +#define TREF_NIL (TREF_PRI(IRT_NIL)) +#define TREF_FALSE (TREF_PRI(IRT_FALSE)) +#define TREF_TRUE (TREF_PRI(IRT_TRUE)) + +/* IR instruction format (64 bit). +** +** 16 16 8 8 8 8 +** +-------+-------+---+---+---+---+ +** | op1 | op2 | t | o | r | s | +** +-------+-------+---+---+---+---+ +** | op12/i/gco | ot | prev | (alternative fields in union) +** +---------------+-------+-------+ +** 32 16 16 +** +** prev is only valid prior to register allocation and then reused for r + s. +*/ + +typedef union IRIns { + struct { + LJ_ENDIAN_LOHI( + IRRef1 op1; /* IR operand 1. */ + , IRRef1 op2; /* IR operand 2. */ + ) + IROpT ot; /* IR opcode and type (overlaps t and o). */ + IRRef1 prev; /* Previous ins in same chain (overlaps r and s). */ + }; + struct { + IRRef2 op12; /* IR operand 1 and 2 (overlaps op1 and op2). */ + LJ_ENDIAN_LOHI( + IRType1 t; /* IR type. */ + , IROp1 o; /* IR opcode. */ + ) + LJ_ENDIAN_LOHI( + uint8_t r; /* Register allocation (overlaps prev). */ + , uint8_t s; /* Spill slot allocation (overlaps prev). */ + ) + }; + int32_t i; /* 32 bit signed integer literal (overlaps op12). */ + GCRef gcr; /* GCobj constant (overlaps op12). */ + MRef ptr; /* Pointer constant (overlaps op12). */ +} IRIns; + +#define ir_kgc(ir) (gcref((ir)->gcr)) +#define ir_kstr(ir) (gco2str(ir_kgc((ir)))) +#define ir_ktab(ir) (gco2tab(ir_kgc((ir)))) +#define ir_kfunc(ir) (gco2func(ir_kgc((ir)))) +#define ir_knum(ir) (mref((ir)->ptr, cTValue)) + +#endif diff --git a/src/lj_iropt.h b/src/lj_iropt.h new file mode 100644 index 0000000000..69b0a955e3 --- /dev/null +++ b/src/lj_iropt.h @@ -0,0 +1,128 @@ +/* +** Common header for IR emitter and optimizations. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +*/ + +#ifndef _LJ_IROPT_H +#define _LJ_IROPT_H + +#include "lj_obj.h" +#include "lj_jit.h" + +#if LJ_HASJIT +/* IR emitter. */ +LJ_FUNC void LJ_FASTCALL lj_ir_growtop(jit_State *J); +LJ_FUNC TRef LJ_FASTCALL lj_ir_emit(jit_State *J); + +/* Save current IR in J->fold.ins, but do not emit it (yet). */ +static LJ_AINLINE void lj_ir_set_(jit_State *J, uint16_t ot, IRRef1 a, IRRef1 b) +{ + J->fold.ins.ot = ot; J->fold.ins.op1 = a; J->fold.ins.op2 = b; +} + +#define lj_ir_set(J, ot, a, b) \ + lj_ir_set_(J, (uint16_t)(ot), (IRRef1)(a), (IRRef1)(b)) + +/* Get ref of next IR instruction and optionally grow IR. +** Note: this may invalidate all IRIns*! +*/ +static LJ_AINLINE IRRef lj_ir_nextins(jit_State *J) +{ + IRRef ref = J->cur.nins; + if (LJ_UNLIKELY(ref >= J->irtoplim)) lj_ir_growtop(J); + J->cur.nins = ref + 1; + return ref; +} + +/* Interning of constants. */ +LJ_FUNC TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k); +LJ_FUNC void lj_ir_knum_freeall(jit_State *J); +LJ_FUNC TRef lj_ir_knum_addr(jit_State *J, cTValue *tv); +LJ_FUNC TRef lj_ir_knum_nn(jit_State *J, uint64_t nn); +LJ_FUNC TRef lj_ir_knumint(jit_State *J, lua_Number n); +LJ_FUNC TRef lj_ir_kgc(jit_State *J, GCobj *o, IRType t); +LJ_FUNC TRef lj_ir_kptr(jit_State *J, void *ptr); +LJ_FUNC TRef lj_ir_knull(jit_State *J, IRType t); +LJ_FUNC TRef lj_ir_kslot(jit_State *J, TRef key, IRRef slot); + +static LJ_AINLINE TRef lj_ir_knum(jit_State *J, lua_Number n) +{ + TValue tv; + tv.n = n; + return lj_ir_knum_nn(J, tv.u64); +} + +#define lj_ir_kstr(J, str) lj_ir_kgc(J, obj2gco((str)), IRT_STR) +#define lj_ir_ktab(J, tab) lj_ir_kgc(J, obj2gco((tab)), IRT_TAB) +#define lj_ir_kfunc(J, func) lj_ir_kgc(J, obj2gco((func)), IRT_FUNC) + +/* Special FP constants. */ +#define lj_ir_knum_zero(J) lj_ir_knum_nn(J, U64x(00000000,00000000)) +#define lj_ir_knum_one(J) lj_ir_knum_nn(J, U64x(3ff00000,00000000)) +#define lj_ir_knum_tobit(J) lj_ir_knum_nn(J, U64x(43380000,00000000)) + +/* Special 16 byte aligned SIMD constants. */ +LJ_DATA LJ_ALIGN(16) cTValue lj_ir_knum_tv[4]; +#define lj_ir_knum_abs(J) lj_ir_knum_addr(J, &lj_ir_knum_tv[0]) +#define lj_ir_knum_neg(J) lj_ir_knum_addr(J, &lj_ir_knum_tv[2]) + +/* Access to constants. */ +LJ_FUNC void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir); + +/* Convert IR operand types. */ +LJ_FUNC TRef LJ_FASTCALL lj_ir_tonum(jit_State *J, TRef tr); +LJ_FUNC TRef LJ_FASTCALL lj_ir_tostr(jit_State *J, TRef tr); +LJ_FUNC TRef LJ_FASTCALL lj_ir_tobit(jit_State *J, TRef tr); +LJ_FUNC TRef LJ_FASTCALL lj_ir_toint(jit_State *J, TRef tr); + +/* Miscellaneous IR ops. */ +LJ_FUNC int lj_ir_numcmp(lua_Number a, lua_Number b, IROp op); +LJ_FUNC int lj_ir_strcmp(GCstr *a, GCstr *b, IROp op); +LJ_FUNC void lj_ir_rollback(jit_State *J, IRRef ref); + +/* Emit IR instructions with on-the-fly optimizations. */ +LJ_FUNC TRef LJ_FASTCALL lj_opt_fold(jit_State *J); +LJ_FUNC TRef LJ_FASTCALL lj_opt_cse(jit_State *J); + +/* Special return values for the fold functions. */ +enum { + NEXTFOLD, /* Couldn't fold, pass on. */ + RETRYFOLD, /* Retry fold with modified fins. */ + KINTFOLD, /* Return ref for int constant in fins->i. */ + FAILFOLD, /* Guard would always fail. */ + DROPFOLD, /* Guard eliminated. */ + MAX_FOLD +}; + +#define INTFOLD(k) ((J->fold.ins.i = (k)), (TRef)KINTFOLD) +#define CONDFOLD(cond) ((TRef)FAILFOLD + (TRef)(cond)) +#define LEFTFOLD (J->fold.ins.op1) +#define RIGHTFOLD (J->fold.ins.op2) +#define CSEFOLD (lj_opt_cse(J)) +#define EMITFOLD (lj_ir_emit(J)) + +/* Load/store forwarding. */ +LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_aload(jit_State *J); +LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_hload(jit_State *J); +LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J); +LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_fload(jit_State *J); +LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_tlen(jit_State *J); +LJ_FUNC int lj_opt_fwd_wasnonnil(jit_State *J, IROpT loadop, IRRef xref); + +/* Dead-store elimination. */ +LJ_FUNC TRef LJ_FASTCALL lj_opt_dse_ahstore(jit_State *J); +LJ_FUNC TRef LJ_FASTCALL lj_opt_dse_ustore(jit_State *J); +LJ_FUNC TRef LJ_FASTCALL lj_opt_dse_fstore(jit_State *J); + +/* Narrowing. */ +LJ_FUNC TRef LJ_FASTCALL lj_opt_narrow_convert(jit_State *J); +LJ_FUNC TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc); +LJ_FUNC TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc); +LJ_FUNC IRType lj_opt_narrow_forl(cTValue *forbase); + +/* Optimization passes. */ +LJ_FUNC void lj_opt_dce(jit_State *J); +LJ_FUNC int lj_opt_loop(jit_State *J); +#endif + +#endif diff --git a/src/lj_jit.h b/src/lj_jit.h new file mode 100644 index 0000000000..280eff41ec --- /dev/null +++ b/src/lj_jit.h @@ -0,0 +1,279 @@ +/* +** Common definitions for the JIT compiler. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +*/ + +#ifndef _LJ_JIT_H +#define _LJ_JIT_H + +#include "lj_obj.h" +#include "lj_ir.h" + +/* JIT engine flags. */ +#define JIT_F_ON 0x00000001 + +/* CPU-specific JIT engine flags. */ +#if LJ_TARGET_X86ORX64 +#define JIT_F_CMOV 0x00000100 +#define JIT_F_SSE2 0x00000200 +#define JIT_F_SSE4_1 0x00000400 +#define JIT_F_P4 0x00000800 +#define JIT_F_PREFER_IMUL 0x00001000 +#define JIT_F_SPLIT_XMM 0x00002000 +#define JIT_F_LEA_AGU 0x00004000 + +/* Names for the CPU-specific flags. Must match the order above. */ +#define JIT_F_CPU_FIRST JIT_F_CMOV +#define JIT_F_CPUSTRING "\4CMOV\4SSE2\6SSE4.1\2P4\3AMD\2K8\4ATOM" +#else +#error "Missing CPU-specific JIT engine flags" +#endif + +/* Optimization flags. */ +#define JIT_F_OPT_MASK 0x00ff0000 + +#define JIT_F_OPT_FOLD 0x00010000 +#define JIT_F_OPT_CSE 0x00020000 +#define JIT_F_OPT_DCE 0x00040000 +#define JIT_F_OPT_FWD 0x00080000 +#define JIT_F_OPT_DSE 0x00100000 +#define JIT_F_OPT_NARROW 0x00200000 +#define JIT_F_OPT_LOOP 0x00400000 +#define JIT_F_OPT_FUSE 0x00800000 + +/* Optimizations names for -O. Must match the order above. */ +#define JIT_F_OPT_FIRST JIT_F_OPT_FOLD +#define JIT_F_OPTSTRING \ + "\4fold\3cse\3dce\3fwd\3dse\6narrow\4loop\4fuse" + +/* Optimization levels set a fixed combination of flags. */ +#define JIT_F_OPT_0 0 +#define JIT_F_OPT_1 (JIT_F_OPT_FOLD|JIT_F_OPT_CSE|JIT_F_OPT_DCE) +#define JIT_F_OPT_2 (JIT_F_OPT_1|JIT_F_OPT_NARROW|JIT_F_OPT_LOOP) +#define JIT_F_OPT_3 (JIT_F_OPT_2|JIT_F_OPT_FWD|JIT_F_OPT_DSE|JIT_F_OPT_FUSE) +#define JIT_F_OPT_DEFAULT JIT_F_OPT_3 + +#ifdef LUA_USE_WIN +/* See: http://blogs.msdn.com/oldnewthing/archive/2003/10/08/55239.aspx */ +#define JIT_P_sizemcode_DEFAULT 64 +#else +/* Could go as low as 4K, but the mmap() overhead would be rather high. */ +#define JIT_P_sizemcode_DEFAULT 32 +#endif + +/* Optimization parameters and their defaults. Length is a char in octal! */ +#define JIT_PARAMDEF(_) \ + _(\010, maxtrace, 1000) /* Max. # of traces in cache. */ \ + _(\011, maxrecord, 2000) /* Max. # of recorded IR instructions. */ \ + _(\012, maxirconst, 500) /* Max. # of IR constants of a trace. */ \ + _(\007, maxside, 100) /* Max. # of side traces of a root trace. */ \ + _(\007, maxsnap, 100) /* Max. # of snapshots for a trace. */ \ + \ + _(\007, hotloop, 57) /* # of iterations to detect a hot loop. */ \ + _(\007, hotexit, 10) /* # of taken exits to start a side trace. */ \ + _(\007, tryside, 4) /* # of attempts to compile a side trace. */ \ + \ + _(\012, instunroll, 4) /* Max. unroll for instable loops. */ \ + _(\012, loopunroll, 7) /* Max. unroll for loop ops in side traces. */ \ + _(\012, callunroll, 3) /* Max. unroll for recursive calls. */ \ + _(\011, recunroll, 0) /* Max. unroll for true recursion. */ \ + \ + /* Size of each machine code area (in KBytes). */ \ + _(\011, sizemcode, JIT_P_sizemcode_DEFAULT) \ + /* Max. total size of all machine code areas (in KBytes). */ \ + _(\010, maxmcode, 512) \ + /* End of list. */ + +enum { +#define JIT_PARAMENUM(len, name, value) JIT_P_##name, +JIT_PARAMDEF(JIT_PARAMENUM) +#undef JIT_PARAMENUM + JIT_P__MAX +}; + +#define JIT_PARAMSTR(len, name, value) #len #name +#define JIT_P_STRING JIT_PARAMDEF(JIT_PARAMSTR) + +/* Trace compiler state. */ +typedef enum { + LJ_TRACE_IDLE, /* Trace compiler idle. */ + LJ_TRACE_ACTIVE = 0x10, + LJ_TRACE_RECORD, /* Bytecode recording active. */ + LJ_TRACE_START, /* New trace started. */ + LJ_TRACE_END, /* End of trace. */ + LJ_TRACE_ASM, /* Assemble trace. */ + LJ_TRACE_ERR, /* Trace aborted with error. */ +} TraceState; + +/* Machine code type. */ +typedef uint8_t MCode; + +/* Stack snapshot header. */ +typedef struct SnapShot { + uint16_t mapofs; /* Offset into snapshot map. */ + IRRef1 ref; /* First IR ref for this snapshot. */ + uint8_t nslots; /* Number of stack slots. */ + uint8_t nframelinks; /* Number of frame links. */ + uint8_t count; /* Count of taken exits for this snapshot. */ + uint8_t unused1; +} SnapShot; + +#define SNAPCOUNT_DONE 255 /* Already compiled and linked a side trace. */ +#define snap_ref(sn) ((IRRef)(IRRef1)(sn)) +#define snap_ridsp(sn) ((sn) >> 16) + +/* Snapshot and exit numbers. */ +typedef uint32_t SnapNo; +typedef uint32_t ExitNo; + +/* Trace number. */ +typedef uint32_t TraceNo; /* Used to pass around trace numbers. */ +typedef uint16_t TraceNo1; /* Stored trace number. */ + +#define TRACE_INTERP 0 /* Fallback to interpreter. */ + +/* Trace anchor. */ +typedef struct Trace { + IRIns *ir; /* IR instructions/constants. Biased with REF_BIAS. */ + IRRef nins; /* Next IR instruction. Biased with REF_BIAS. */ + IRRef nk; /* Lowest IR constant. Biased with REF_BIAS. */ + SnapShot *snap; /* Snapshot array. */ + IRRef2 *snapmap; /* Snapshot map. */ + uint16_t nsnap; /* Number of snapshots. */ + uint16_t nsnapmap; /* Number of snapshot map elements. */ + GCRef startpt; /* Starting prototype. */ + BCIns startins; /* Original bytecode of starting instruction. */ + MCode *mcode; /* Start of machine code. */ + MSize szmcode; /* Size of machine code. */ + MSize mcloop; /* Offset of loop start in machine code. */ + TraceNo1 link; /* Linked trace (or self for loops). */ + TraceNo1 root; /* Root trace of side trace (or 0 for root traces). */ + TraceNo1 nextroot; /* Next root trace for same prototype. */ + TraceNo1 nextside; /* Next side trace of same root trace. */ + uint16_t nchild; /* Number of child traces (root trace only). */ + uint16_t spadjust; /* Stack pointer adjustment (offset in bytes). */ +#ifdef LUAJIT_USE_GDBJIT + void *gdbjit_entry; /* GDB JIT entry. */ +#endif +} Trace; + +/* Round-robin penalty cache for bytecodes leading to aborted traces. */ +typedef struct HotPenalty { + const BCIns *pc; /* Starting bytecode PC. */ + uint16_t val; /* Penalty value, i.e. hotcount start. */ + uint16_t reason; /* Abort reason (really TraceErr). */ +} HotPenalty; + +/* Number of slots for the penalty cache. Must be a power of 2. */ +#define PENALTY_SLOTS 16 + +/* Round-robin backpropagation cache for narrowing conversions. */ +typedef struct BPropEntry { + IRRef1 key; /* Key: original reference. */ + IRRef1 val; /* Value: reference after conversion. */ + IRRef mode; /* Mode for this entry (currently IRTOINT_*). */ +} BPropEntry; + +/* Number of slots for the backpropagation cache. Must be a power of 2. */ +#define BPROP_SLOTS 16 + +/* Fold state is used to fold instructions on-the-fly. */ +typedef struct FoldState { + IRIns ins; /* Currently emitted instruction. */ + IRIns left; /* Instruction referenced by left operand. */ + IRIns right; /* Instruction referenced by right operand. */ +} FoldState; + +/* JIT compiler state. */ +typedef struct jit_State { + Trace cur; /* Current trace. */ + + lua_State *L; /* Current Lua state. */ + const BCIns *pc; /* Current PC. */ + BCReg maxslot; /* Relative to baseslot. */ + + uint32_t flags; /* JIT engine flags. */ + TRef *base; /* Current frame base, points into J->slots. */ + BCReg baseslot; /* Current frame base, offset into J->slots. */ + GCfunc *fn; /* Current function. */ + GCproto *pt; /* Current prototype. */ + + FoldState fold; /* Fold state. */ + + uint8_t mergesnap; /* Allowed to merge with next snapshot. */ + uint8_t needsnap; /* Need snapshot before recording next bytecode. */ + IRType1 guardemit; /* Accumulated IRT_GUARD for emitted instructions. */ + uint8_t unused1; + + const BCIns *bc_min; /* Start of allowed bytecode range for root trace. */ + MSize bc_extent; /* Extent of the range. */ + + TraceState state; /* Trace compiler state. */ + + int32_t instunroll; /* Unroll counter for instable loops. */ + int32_t loopunroll; /* Unroll counter for loop ops in side traces. */ + int32_t tailcalled; /* Number of successive tailcalls. */ + int32_t framedepth; /* Current frame depth. */ + + MRef knum; /* Pointer to chained array of KNUM constants. */ + + IRIns *irbuf; /* Temp. IR instruction buffer. Biased with REF_BIAS. */ + IRRef irtoplim; /* Upper limit of instuction buffer (biased). */ + IRRef irbotlim; /* Lower limit of instuction buffer (biased). */ + IRRef loopref; /* Last loop reference or ref of final LOOP (or 0). */ + + SnapShot *snapbuf; /* Temp. snapshot buffer. */ + IRRef2 *snapmapbuf; /* Temp. snapshot map buffer. */ + MSize sizesnap; /* Size of temp. snapshot buffer. */ + MSize sizesnapmap; /* Size of temp. snapshot map buffer. */ + + Trace **trace; /* Array of traces. */ + TraceNo curtrace; /* Current trace number (if not 0). Kept in J->cur. */ + TraceNo freetrace; /* Start of scan for next free trace. */ + MSize sizetrace; /* Size of trace array. */ + + IRRef1 chain[IR__MAX]; /* IR instruction skip-list chain anchors. */ + TRef slot[LJ_MAX_JSLOTS+LJ_STACK_EXTRA]; /* Stack slot map. */ + + int32_t param[JIT_P__MAX]; /* JIT engine parameters. */ + + MCode *exitstubgroup[LJ_MAX_EXITSTUBGR]; /* Exit stub group addresses. */ + + HotPenalty penalty[PENALTY_SLOTS]; /* Penalty slots. */ + uint32_t penaltyslot; /* Round-robin index into penalty slots. */ + + BPropEntry bpropcache[BPROP_SLOTS]; /* Backpropagation cache slots. */ + uint32_t bpropslot; /* Round-robin index into bpropcache slots. */ + + const BCIns *startpc; /* Bytecode PC of starting instruction. */ + TraceNo parent; /* Parent of current side trace (0 for root traces). */ + ExitNo exitno; /* Exit number in parent of current side trace. */ + + TValue errinfo; /* Additional info element for trace errors. */ + + MCode *mcarea; /* Base of current mcode area. */ + MCode *mctop; /* Top of current mcode area. */ + MCode *mcbot; /* Bottom of current mcode area. */ + size_t szmcarea; /* Size of current mcode area. */ + size_t szallmcarea; /* Total size of all allocated mcode areas. */ + int mcprot; /* Protection of current mcode area. */ +} jit_State; + +/* Exit stubs. */ +#if LJ_TARGET_X86ORX64 +/* Limited by the range of a short fwd jump (127): (2+2)*(32-1)-2 = 122. */ +#define EXITSTUB_SPACING (2+2) +#define EXITSTUBS_PER_GROUP 32 +#else +#error "Missing CPU-specific exit stub definitions" +#endif + +/* Return the address of an exit stub. */ +static LJ_AINLINE MCode *exitstub_addr(jit_State *J, ExitNo exitno) +{ + lua_assert(J->exitstubgroup[exitno / EXITSTUBS_PER_GROUP] != NULL); + return J->exitstubgroup[exitno / EXITSTUBS_PER_GROUP] + + EXITSTUB_SPACING*(exitno % EXITSTUBS_PER_GROUP); +} + +#endif diff --git a/src/lj_lex.c b/src/lj_lex.c new file mode 100644 index 0000000000..38b0a7d4b7 --- /dev/null +++ b/src/lj_lex.c @@ -0,0 +1,393 @@ +/* +** Lexical analyzer. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +** +** Major portions taken verbatim or adapted from the Lua interpreter. +** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h +*/ + +#define lj_lex_c +#define LUA_CORE + +#include "lj_obj.h" +#include "lj_gc.h" +#include "lj_err.h" +#include "lj_str.h" +#include "lj_lex.h" +#include "lj_parse.h" +#include "lj_ctype.h" + +/* Lua lexer token names. */ +static const char *const tokennames[] = { +#define TKSTR1(name) #name, +#define TKSTR2(name, sym) #sym, +TKDEF(TKSTR1, TKSTR2) +#undef TKSTR1 +#undef TKSTR2 + NULL +}; + +/* -- Buffer handling ----------------------------------------------------- */ + +#define char2int(c) cast(int, cast(uint8_t, (c))) +#define next(ls) \ + (ls->current = (ls->n--) > 0 ? char2int(*ls->p++) : fillbuf(ls)) +#define save_and_next(ls) (save(ls, ls->current), next(ls)) +#define currIsNewline(ls) (ls->current == '\n' || ls->current == '\r') +#define END_OF_STREAM (-1) + +static int fillbuf(LexState *ls) +{ + size_t sz; + const char *buf = ls->rfunc(ls->L, ls->rdata, &sz); + if (buf == NULL || sz == 0) return END_OF_STREAM; + ls->n = (MSize)sz - 1; + ls->p = buf; + return char2int(*(ls->p++)); +} + +static void save(LexState *ls, int c) +{ + if (ls->sb.n + 1 > ls->sb.sz) { + MSize newsize; + if (ls->sb.sz >= LJ_MAX_STR/2) + lj_lex_error(ls, 0, LJ_ERR_XELEM); + newsize = ls->sb.sz * 2; + lj_str_resizebuf(ls->L, &ls->sb, newsize); + } + ls->sb.buf[ls->sb.n++] = cast(char, c); +} + +static int check_next(LexState *ls, const char *set) +{ + if (!strchr(set, ls->current)) + return 0; + save_and_next(ls); + return 1; +} + +static void inclinenumber(LexState *ls) +{ + int old = ls->current; + lua_assert(currIsNewline(ls)); + next(ls); /* skip `\n' or `\r' */ + if (currIsNewline(ls) && ls->current != old) + next(ls); /* skip `\n\r' or `\r\n' */ + if (++ls->linenumber >= LJ_MAX_LINE) + lj_lex_error(ls, ls->token, LJ_ERR_XLINES); +} + +/* -- Scanner for terminals ----------------------------------------------- */ + +static void read_numeral(LexState *ls, TValue *tv) +{ + lua_assert(lj_ctype_isdigit(ls->current)); + do { + save_and_next(ls); + } while (lj_ctype_isdigit(ls->current) || ls->current == '.'); + if (check_next(ls, "Ee")) /* `E'? */ + check_next(ls, "+-"); /* optional exponent sign */ + while (lj_ctype_isident(ls->current)) + save_and_next(ls); + save(ls, '\0'); + if (!lj_str_numconv(ls->sb.buf, tv)) + lj_lex_error(ls, TK_number, LJ_ERR_XNUMBER); +} + +static int skip_sep(LexState *ls) +{ + int count = 0; + int s = ls->current; + lua_assert(s == '[' || s == ']'); + save_and_next(ls); + while (ls->current == '=') { + save_and_next(ls); + count++; + } + return (ls->current == s) ? count : (-count) - 1; +} + +static void read_long_string(LexState *ls, TValue *tv, int sep) +{ + save_and_next(ls); /* skip 2nd `[' */ + if (currIsNewline(ls)) /* string starts with a newline? */ + inclinenumber(ls); /* skip it */ + for (;;) { + switch (ls->current) { + case END_OF_STREAM: + lj_lex_error(ls, TK_eof, tv ? LJ_ERR_XLSTR : LJ_ERR_XLCOM); + break; + case ']': + if (skip_sep(ls) == sep) { + save_and_next(ls); /* skip 2nd `]' */ + goto endloop; + } + break; + case '\n': + case '\r': + save(ls, '\n'); + inclinenumber(ls); + if (!tv) lj_str_resetbuf(&ls->sb); /* avoid wasting space */ + break; + default: + if (tv) save_and_next(ls); + else next(ls); + break; + } + } endloop: + if (tv) { + GCstr *str = lj_parse_keepstr(ls, ls->sb.buf + (2 + (MSize)sep), + ls->sb.n - 2*(2 + (MSize)sep)); + setstrV(ls->L, tv, str); + } +} + +static void read_string(LexState *ls, int delim, TValue *tv) +{ + save_and_next(ls); + while (ls->current != delim) { + switch (ls->current) { + case END_OF_STREAM: + lj_lex_error(ls, TK_eof, LJ_ERR_XSTR); + continue; + case '\n': + case '\r': + lj_lex_error(ls, TK_string, LJ_ERR_XSTR); + continue; + case '\\': { + int c; + next(ls); /* do not save the `\' */ + switch (ls->current) { + case 'a': c = '\a'; break; + case 'b': c = '\b'; break; + case 'f': c = '\f'; break; + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 't': c = '\t'; break; + case 'v': c = '\v'; break; + case '\n': case '\r': save(ls, '\n'); inclinenumber(ls); continue; + case END_OF_STREAM: continue; /* will raise an error next loop */ + default: + if (!lj_ctype_isdigit(ls->current)) { + save_and_next(ls); /* handles \\, \", \', and \? */ + } else { /* \xxx */ + int i = 0; + c = 0; + do { + c = 10*c + (ls->current-'0'); + next(ls); + } while (++i<3 && lj_ctype_isdigit(ls->current)); + if (c > UCHAR_MAX) + lj_lex_error(ls, TK_string, LJ_ERR_XESC); + save(ls, c); + } + continue; + } + save(ls, c); + next(ls); + continue; + } + default: + save_and_next(ls); + break; + } + } + save_and_next(ls); /* skip delimiter */ + setstrV(ls->L, tv, lj_parse_keepstr(ls, ls->sb.buf + 1, ls->sb.n - 2)); +} + +/* -- Main lexical scanner ------------------------------------------------ */ + +static int llex(LexState *ls, TValue *tv) +{ + lj_str_resetbuf(&ls->sb); + for (;;) { + if (lj_ctype_isident(ls->current)) { + GCstr *s; + if (lj_ctype_isdigit(ls->current)) { /* Numeric literal. */ + read_numeral(ls, tv); + return TK_number; + } + /* Identifier or reserved word. */ + do { + save_and_next(ls); + } while (lj_ctype_isident(ls->current)); + s = lj_parse_keepstr(ls, ls->sb.buf, ls->sb.n); + if (s->reserved > 0) /* Reserved word? */ + return TK_OFS + s->reserved; + setstrV(ls->L, tv, s); + return TK_name; + } + switch (ls->current) { + case '\n': + case '\r': + inclinenumber(ls); + continue; + case ' ': + case '\t': + case '\v': + case '\f': + next(ls); + continue; + case '-': + next(ls); + if (ls->current != '-') return '-'; + /* else is a comment */ + next(ls); + if (ls->current == '[') { + int sep = skip_sep(ls); + lj_str_resetbuf(&ls->sb); /* `skip_sep' may dirty the buffer */ + if (sep >= 0) { + read_long_string(ls, NULL, sep); /* long comment */ + lj_str_resetbuf(&ls->sb); + continue; + } + } + /* else short comment */ + while (!currIsNewline(ls) && ls->current != END_OF_STREAM) + next(ls); + continue; + case '[': { + int sep = skip_sep(ls); + if (sep >= 0) { + read_long_string(ls, tv, sep); + return TK_string; + } else if (sep == -1) { + return '['; + } else { + lj_lex_error(ls, TK_string, LJ_ERR_XLDELIM); + continue; + } + } + case '=': + next(ls); + if (ls->current != '=') return '='; else { next(ls); return TK_eq; } + case '<': + next(ls); + if (ls->current != '=') return '<'; else { next(ls); return TK_le; } + case '>': + next(ls); + if (ls->current != '=') return '>'; else { next(ls); return TK_ge; } + case '~': + next(ls); + if (ls->current != '=') return '~'; else { next(ls); return TK_ne; } + case '"': + case '\'': + read_string(ls, ls->current, tv); + return TK_string; + case '.': + save_and_next(ls); + if (check_next(ls, ".")) { + if (check_next(ls, ".")) + return TK_dots; /* ... */ + else + return TK_concat; /* .. */ + } else if (!lj_ctype_isdigit(ls->current)) { + return '.'; + } else { + read_numeral(ls, tv); + return TK_number; + } + case END_OF_STREAM: + return TK_eof; + default: { + int c = ls->current; + next(ls); + return c; /* Single-char tokens (+ - / ...). */ + } + } + } +} + +/* -- Lexer API ----------------------------------------------------------- */ + +void lj_lex_start(lua_State *L, LexState *ls) +{ + ls->L = L; + ls->fs = NULL; + ls->n = 0; + ls->p = NULL; + ls->lookahead = TK_eof; /* No look-ahead token. */ + ls->linenumber = 1; + ls->lastline = 1; + lj_str_resizebuf(ls->L, &ls->sb, LJ_MIN_SBUF); + next(ls); /* Read-ahead first char. */ + if (ls->current == 0xef && ls->n >= 2 && char2int(ls->p[0]) == 0xbb && + char2int(ls->p[1]) == 0xbf) { /* Skip UTF-8 BOM (if buffered). */ + ls->n -= 2; + ls->p += 2; + next(ls); + } + if (ls->current == '#') { /* Skip POSIX #! header line. */ + do { + next(ls); + if (ls->current == END_OF_STREAM) return; + } while (!currIsNewline(ls)); + inclinenumber(ls); + } + if (ls->current == LUA_SIGNATURE[0]) { + setstrV(L, L->top++, lj_err_str(L, LJ_ERR_XBCLOAD)); + lj_err_throw(L, LUA_ERRSYNTAX); + } + /* This is an unanchored GCstr before it's stored in the prototype. + ** Do this last since next() calls the reader which may call the GC. + */ + ls->chunkname = lj_str_newz(L, ls->chunkarg); +} + +void lj_lex_next(LexState *ls) +{ + ls->lastline = ls->linenumber; + if (LJ_LIKELY(ls->lookahead == TK_eof)) { /* No lookahead token? */ + ls->token = llex(ls, &ls->tokenval); /* Get next token. */ + } else { /* Otherwise return lookahead token. */ + ls->token = ls->lookahead; + ls->lookahead = TK_eof; + ls->tokenval = ls->lookaheadval; + } +} + +LexToken lj_lex_lookahead(LexState *ls) +{ + lua_assert(ls->lookahead == TK_eof); + ls->lookahead = llex(ls, &ls->lookaheadval); + return ls->lookahead; +} + +const char *lj_lex_token2str(LexState *ls, LexToken token) +{ + if (token > TK_OFS) + return tokennames[token-TK_OFS-1]; + else if (!lj_ctype_iscntrl(token)) + return lj_str_pushf(ls->L, "%c", token); + else + return lj_str_pushf(ls->L, "char(%d)", token); +} + +void lj_lex_error(LexState *ls, LexToken token, ErrMsg em, ...) +{ + const char *tok; + va_list argp; + if (token == 0) { + tok = NULL; + } else if (token == TK_name || token == TK_string || token == TK_number) { + save(ls, '\0'); + tok = ls->sb.buf; + } else { + tok = lj_lex_token2str(ls, token); + } + va_start(argp, em); + lj_err_lex(ls->L, strdata(ls->chunkname), tok, ls->linenumber, em, argp); + va_end(argp); +} + +void lj_lex_init(lua_State *L) +{ + uint32_t i; + for (i = 0; i < TK_RESERVED; i++) { + GCstr *s = lj_str_newz(L, tokennames[i]); + fixstring(s); /* Reserved words are never collected. */ + s->reserved = cast_byte(i+1); + } +} + diff --git a/src/lj_lex.h b/src/lj_lex.h new file mode 100644 index 0000000000..cc5d5a9ff3 --- /dev/null +++ b/src/lj_lex.h @@ -0,0 +1,63 @@ +/* +** Lexical analyzer. +** Major parts taken verbatim from the Lua interpreter. +** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h +*/ + +#ifndef _LJ_LEX_H +#define _LJ_LEX_H + +#include + +#include "lj_obj.h" +#include "lj_err.h" + +/* Lua lexer tokens. */ +#define TKDEF(_, __) \ + _(and) _(break) _(do) _(else) _(elseif) _(end) _(false) \ + _(for) _(function) _(if) _(in) _(local) _(nil) _(not) _(or) \ + _(repeat) _(return) _(then) _(true) _(until) _(while) \ + __(concat, ..) __(dots, ...) __(eq, ==) __(ge, >=) __(le, <=) __(ne, ~=) \ + __(number, ) __(name, ) __(string, ) __(eof, ) + +enum { + TK_OFS = 256, +#define TKENUM1(name) TK_##name, +#define TKENUM2(name, sym) TK_##name, +TKDEF(TKENUM1, TKENUM2) +#undef TKENUM1 +#undef TKENUM2 + TK_RESERVED = TK_while - TK_OFS +}; + +typedef int LexToken; + +/* Lua lexer state. */ +typedef struct LexState { + struct FuncState *fs; /* Current FuncState. Defined in lj_parse.c. */ + struct lua_State *L; /* Lua state. */ + TValue tokenval; /* Current token value. */ + TValue lookaheadval; /* Lookahead token value. */ + int current; /* Current character (charint). */ + LexToken token; /* Current token. */ + LexToken lookahead; /* Lookahead token. */ + SBuf sb; /* String buffer for tokens. */ + const char *p; /* Current position in input buffer. */ + MSize n; /* Bytes left in input buffer. */ + lua_Reader rfunc; /* Reader callback. */ + void *rdata; /* Reader callback data. */ + BCLine linenumber; /* Input line counter. */ + BCLine lastline; /* Line of last token. */ + GCstr *chunkname; /* Current chunk name (interned string). */ + const char *chunkarg; /* Chunk name argument. */ + uint32_t level; /* Syntactical nesting level. */ +} LexState; + +LJ_FUNC void lj_lex_start(lua_State *L, LexState *ls); +LJ_FUNC void lj_lex_next(LexState *ls); +LJ_FUNC LexToken lj_lex_lookahead(LexState *ls); +LJ_FUNC const char *lj_lex_token2str(LexState *ls, LexToken token); +LJ_FUNC_NORET void lj_lex_error(LexState *ls, LexToken token, ErrMsg em, ...); +LJ_FUNC void lj_lex_init(lua_State *L); + +#endif diff --git a/src/lj_lib.c b/src/lj_lib.c new file mode 100644 index 0000000000..683c66d6ed --- /dev/null +++ b/src/lj_lib.c @@ -0,0 +1,216 @@ +/* +** Library function support. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +*/ + +#define lj_lib_c +#define LUA_CORE + +#include "lauxlib.h" + +#include "lj_obj.h" +#include "lj_gc.h" +#include "lj_err.h" +#include "lj_str.h" +#include "lj_tab.h" +#include "lj_func.h" +#include "lj_vm.h" +#include "lj_lib.h" + +/* -- Library initialization ---------------------------------------------- */ + +static GCtab *lib_create_table(lua_State *L, const char *libname, int hsize) +{ + if (libname) { + luaL_findtable(L, LUA_REGISTRYINDEX, "_LOADED", 16); + lua_getfield(L, -1, libname); + if (!tvistab(L->top-1)) { + L->top--; + if (luaL_findtable(L, LUA_GLOBALSINDEX, libname, hsize) != NULL) + lj_err_callerv(L, LJ_ERR_BADMODN, libname); + settabV(L, L->top, tabV(L->top-1)); + L->top++; + lua_setfield(L, -3, libname); /* _LOADED[libname] = new table */ + } + L->top--; + settabV(L, L->top-1, tabV(L->top)); + } else { + lua_createtable(L, 0, hsize); + } + return tabV(L->top-1); +} + +void lj_lib_register(lua_State *L, const char *libname, + const uint8_t *p, const lua_CFunction *cf) +{ + GCtab *env = tabref(L->env); + GCfunc *ofn = NULL; + int ffid = *p++; + GCtab *tab = lib_create_table(L, libname, *p++); + ptrdiff_t tpos = L->top - L->base; + + /* Avoid barriers further down. */ + if (isblack(obj2gco(tab))) lj_gc_barrierback(G(L), tab); + tab->nomm = 0; + + for (;;) { + uint32_t tag = *p++; + MSize len = tag & LIBINIT_LENMASK; + tag &= LIBINIT_TAGMASK; + if (tag != LIBINIT_STRING) { + const char *name; + MSize nuv = (MSize)(L->top - L->base - tpos); + GCfunc *fn = lj_func_newC(L, nuv, env); + if (nuv) { + L->top = L->base + tpos; + memcpy(fn->c.upvalue, L->top, sizeof(TValue)*nuv); + } + fn->c.ffid = (uint8_t)(ffid++); + name = (const char *)p; + p += len; + if (tag != LIBINIT_CF) { + fn->c.gate = makeasmfunc(p[0] + (p[1] << 8)); + p += 2; + } + if (tag == LIBINIT_ASM_) + fn->c.f = ofn->c.f; /* Copy handler from previous function. */ + else + fn->c.f = *cf++; /* Get cf or handler from C function table. */ + if (len) { + /* NOBARRIER: See above for common barrier. */ + setfuncV(L, lj_tab_setstr(L, tab, lj_str_new(L, name, len)), fn); + } + ofn = fn; + } else { + switch (tag | len) { + case LIBINIT_SET: + L->top -= 2; + if (tvisstr(L->top+1) && strV(L->top+1)->len == 0) + env = tabV(L->top); + else /* NOBARRIER: See above for common barrier. */ + copyTV(L, lj_tab_set(L, tab, L->top+1), L->top); + break; + case LIBINIT_NUMBER: + memcpy(&L->top->n, p, sizeof(double)); + L->top++; + p += sizeof(double); + break; + case LIBINIT_COPY: + copyTV(L, L->top, L->top - *p++); + L->top++; + break; + case LIBINIT_LASTCL: + setfuncV(L, L->top++, ofn); + break; + case LIBINIT_FFID: + ffid++; + break; + case LIBINIT_END: + return; + default: + setstrV(L, L->top++, lj_str_new(L, (const char *)p, len)); + p += len; + break; + } + } + } +} + +/* -- Type checks --------------------------------------------------------- */ + +TValue *lj_lib_checkany(lua_State *L, int narg) +{ + TValue *o = L->base + narg-1; + if (o >= L->top) + lj_err_arg(L, narg, LJ_ERR_NOVAL); + return o; +} + +GCstr *lj_lib_checkstr(lua_State *L, int narg) +{ + TValue *o = L->base + narg-1; + if (o < L->top) { + if (LJ_LIKELY(tvisstr(o))) { + return strV(o); + } else if (tvisnum(o)) { + GCstr *s = lj_str_fromnum(L, &o->n); + setstrV(L, o, s); + return s; + } + } + lj_err_argt(L, narg, LUA_TSTRING); + return NULL; /* unreachable */ +} + +GCstr *lj_lib_optstr(lua_State *L, int narg) +{ + TValue *o = L->base + narg-1; + return (o < L->top && !tvisnil(o)) ? lj_lib_checkstr(L, narg) : NULL; +} + +lua_Number lj_lib_checknum(lua_State *L, int narg) +{ + TValue *o = L->base + narg-1; + if (!(o < L->top && + (tvisnum(o) || (tvisstr(o) && lj_str_numconv(strVdata(o), o))))) + lj_err_argt(L, narg, LUA_TNUMBER); + return numV(o); +} + +int32_t lj_lib_checkint(lua_State *L, int narg) +{ + return lj_num2int(lj_lib_checknum(L, narg)); +} + +int32_t lj_lib_optint(lua_State *L, int narg, int32_t def) +{ + TValue *o = L->base + narg-1; + return (o < L->top && !tvisnil(o)) ? lj_lib_checkint(L, narg) : def; +} + +GCfunc *lj_lib_checkfunc(lua_State *L, int narg) +{ + TValue *o = L->base + narg-1; + if (!(o < L->top && tvisfunc(o))) + lj_err_argt(L, narg, LUA_TFUNCTION); + return funcV(o); +} + +GCtab *lj_lib_checktab(lua_State *L, int narg) +{ + TValue *o = L->base + narg-1; + if (!(o < L->top && tvistab(o))) + lj_err_argt(L, narg, LUA_TTABLE); + return tabV(o); +} + +GCtab *lj_lib_checktabornil(lua_State *L, int narg) +{ + TValue *o = L->base + narg-1; + if (o < L->top) { + if (tvistab(o)) + return tabV(o); + else if (tvisnil(o)) + return NULL; + } + lj_err_arg(L, narg, LJ_ERR_NOTABN); + return NULL; /* unreachable */ +} + +int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst) +{ + GCstr *s = def >= 0 ? lj_lib_optstr(L, narg) : lj_lib_checkstr(L, narg); + if (s) { + const char *opt = strdata(s); + MSize len = s->len; + int i; + for (i = 0; *(const uint8_t *)lst; i++) { + if (*(const uint8_t *)lst == len && memcmp(opt, lst+1, len) == 0) + return i; + lst += 1+*(const uint8_t *)lst; + } + lj_err_argv(L, narg, LJ_ERR_INVOPTM, opt); + } + return def; +} + diff --git a/src/lj_lib.h b/src/lj_lib.h new file mode 100644 index 0000000000..1cba377860 --- /dev/null +++ b/src/lj_lib.h @@ -0,0 +1,84 @@ +/* +** Library function support. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +*/ + +#ifndef _LJ_LIB_H +#define _LJ_LIB_H + +#include "lj_obj.h" + +/* +** A fallback handler is called by the assembler VM if the fast path fails: +** +** - too few arguments: unrecoverable. +** - wrong argument type: recoverable, if coercion succeeds. +** - bad argument value: unrecoverable. +** - stack overflow: recoverable, if stack reallocation succeeds. +** - extra handling: recoverable. +** +** The unrecoverable cases throw an error with lj_err_arg(), lj_err_argtype(), +** lj_err_caller() or lj_err_callermsg(). +** The recoverable cases return 0 or the number of results + 1. +** The assembler VM retries the fast path only if 0 is returned. +** This time the fallback must not be called again or it gets stuck in a loop. +*/ + +/* Return values from fallback handler. */ +#define FFH_RETRY 0 +#define FFH_UNREACHABLE FFH_RETRY +#define FFH_RES(n) ((n)+1) + +LJ_FUNC TValue *lj_lib_checkany(lua_State *L, int narg); +LJ_FUNC GCstr *lj_lib_checkstr(lua_State *L, int narg); +LJ_FUNC GCstr *lj_lib_optstr(lua_State *L, int narg); +LJ_FUNC lua_Number lj_lib_checknum(lua_State *L, int narg); +LJ_FUNC int32_t lj_lib_checkint(lua_State *L, int narg); +LJ_FUNC int32_t lj_lib_optint(lua_State *L, int narg, int32_t def); +LJ_FUNC GCfunc *lj_lib_checkfunc(lua_State *L, int narg); +LJ_FUNC GCtab *lj_lib_checktab(lua_State *L, int narg); +LJ_FUNC GCtab *lj_lib_checktabornil(lua_State *L, int narg); +LJ_FUNC int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst); + +#define lj_lib_opt(L, narg, gotarg, noarg) \ + { TValue *_o = L->base + (narg)-1; \ + if (_o < L->top && !tvisnil(_o)) { gotarg } else { noarg } } + +/* Avoid including lj_frame.h. */ +#define lj_lib_upvalue(L, n) \ + (&gcref((L->base-1)->fr.func)->fn.c.upvalue[(n)-1]) + +/* Library function declarations. Scanned by buildvm. */ +#define LJLIB_CF(name) static int lj_cf_##name(lua_State *L) +#define LJLIB_ASM(name) static int lj_ffh_##name(lua_State *L) +#define LJLIB_ASM_(name) +#define LJLIB_SET(name) +#define LJLIB_PUSH(arg) +#define LJLIB_REC(handler) +#define LJLIB_NOREGUV +#define LJLIB_NOREG + +#define LJ_LIB_REG(L, name) \ + lj_lib_register(L, #name, lj_lib_init_##name, lj_lib_cf_##name) +#define LJ_LIB_REG_(L, regname, name) \ + lj_lib_register(L, regname, lj_lib_init_##name, lj_lib_cf_##name) + +LJ_FUNC void lj_lib_register(lua_State *L, const char *libname, + const uint8_t *init, const lua_CFunction *cf); + +/* Library init data tags. */ +#define LIBINIT_LENMASK 0x3f +#define LIBINIT_TAGMASK 0xc0 +#define LIBINIT_CF 0x00 +#define LIBINIT_ASM 0x40 +#define LIBINIT_ASM_ 0x80 +#define LIBINIT_STRING 0xc0 +#define LIBINIT_MAXSTR 0x39 +#define LIBINIT_SET 0xfa +#define LIBINIT_NUMBER 0xfb +#define LIBINIT_COPY 0xfc +#define LIBINIT_LASTCL 0xfd +#define LIBINIT_FFID 0xfe +#define LIBINIT_END 0xff + +#endif diff --git a/src/lj_mcode.c b/src/lj_mcode.c new file mode 100644 index 0000000000..e5791e9f8f --- /dev/null +++ b/src/lj_mcode.c @@ -0,0 +1,260 @@ +/* +** Machine code management. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +*/ + +#define lj_mcode_c +#define LUA_CORE + +#include "lj_obj.h" + +#if LJ_HASJIT + +#include "lj_gc.h" +#include "lj_jit.h" +#include "lj_mcode.h" +#include "lj_trace.h" +#include "lj_dispatch.h" + +/* -- OS-specific functions ----------------------------------------------- */ + +#if defined(LUA_USE_WIN) + +#define WIN32_LEAN_AND_MEAN +#include + +#define MCPROT_RW PAGE_READWRITE +#define MCPROT_RX PAGE_EXECUTE_READ +#define MCPROT_RWX PAGE_EXECUTE_READWRITE + +static LJ_AINLINE void *mcode_alloc(jit_State *J, size_t sz, DWORD prot) +{ + void *p = VirtualAlloc(NULL, sz, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, prot); + if (!p) + lj_trace_err(J, LJ_TRERR_MCODEAL); + return p; +} + +static LJ_AINLINE void mcode_free(jit_State *J, void *p, size_t sz) +{ + UNUSED(J); UNUSED(sz); + VirtualFree(p, 0, MEM_RELEASE); +} + +static LJ_AINLINE void mcode_setprot(void *p, size_t sz, DWORD prot) +{ + DWORD oprot; + VirtualProtect(p, sz, prot, &oprot); +} + +#elif defined(LUA_USE_POSIX) + +#include + +#ifndef MAP_ANONYMOUS +#define MAP_ANONYMOUS MAP_ANON +#endif + +#define MCPROT_RW (PROT_READ|PROT_WRITE) +#define MCPROT_RX (PROT_READ|PROT_EXEC) +#define MCPROT_RWX (PROT_READ|PROT_WRITE|PROT_EXEC) + +static LJ_AINLINE void *mcode_alloc(jit_State *J, size_t sz, int prot) +{ + void *p = mmap(NULL, sz, prot, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + if (p == MAP_FAILED) + lj_trace_err(J, LJ_TRERR_MCODEAL); + return p; +} + +static LJ_AINLINE void mcode_free(jit_State *J, void *p, size_t sz) +{ + UNUSED(J); + munmap(p, sz); +} + +static LJ_AINLINE void mcode_setprot(void *p, size_t sz, int prot) +{ + mprotect(p, sz, prot); +} + +#else + +/* Fallback allocator. This will fail if memory is not executable by default. */ +#define LUAJIT_UNPROTECT_MCODE +#define MCPROT_RW 0 +#define MCPROT_RX 0 +#define MCPROT_RWX 0 + +static LJ_AINLINE void *mcode_alloc(jit_State *J, size_t sz, int prot) +{ + UNUSED(prot); + return lj_mem_new(J->L, sz); +} + +static LJ_AINLINE void mcode_free(jit_State *J, void *p, size_t sz) +{ + lj_mem_free(J2G(J), p, sz); +} + +#define mcode_setprot(p, sz, prot) UNUSED(p) + +#endif + +/* -- MCode area management ----------------------------------------------- */ + +/* Define this ONLY if the page protection twiddling becomes a bottleneck. */ +#ifdef LUAJIT_UNPROTECT_MCODE + +/* It's generally considered to be a potential security risk to have +** pages with simultaneous write *and* execute access in a process. +** +** Do not even think about using this mode for server processes or +** apps handling untrusted external data (such as a browser). +** +** The security risk is not in LuaJIT itself -- but if an adversary finds +** any *other* flaw in your C application logic, then any RWX memory page +** simplifies writing an exploit considerably. +*/ +#define MCPROT_GEN MCPROT_RWX +#define MCPROT_RUN MCPROT_RWX + +#else + +/* This is the default behaviour and much safer: +** +** Most of the time the memory pages holding machine code are executable, +** but NONE of them is writable. +** +** The current memory area is marked read-write (but NOT executable) only +** during the short time window while the assembler generates machine code. +*/ +#define MCPROT_GEN MCPROT_RW +#define MCPROT_RUN MCPROT_RX + +#endif + +/* Change protection of MCode area. */ +static void mcode_protect(jit_State *J, int prot) +{ +#ifdef LUAJIT_UNPROTECT_MCODE + UNUSED(J); UNUSED(prot); +#else + if (J->mcprot != prot) { + mcode_setprot(J->mcarea, J->szmcarea, prot); + J->mcprot = prot; + } +#endif +} + +/* Linked list of MCode areas. */ +typedef struct MCLink { + MCode *next; /* Next area. */ + size_t size; /* Size of current area. */ +} MCLink; + +/* Allocate a new MCode area. */ +static void mcode_allocarea(jit_State *J) +{ + MCode *oldarea = J->mcarea; + size_t sz = (size_t)J->param[JIT_P_sizemcode] << 10; + sz = (sz + LJ_PAGESIZE-1) & ~(size_t)(LJ_PAGESIZE - 1); + J->mcarea = (MCode *)mcode_alloc(J, sz, MCPROT_GEN); + J->szmcarea = sz; + J->mcprot = MCPROT_GEN; + J->mctop = (MCode *)((char *)J->mcarea + J->szmcarea); + J->mcbot = (MCode *)((char *)J->mcarea + sizeof(MCLink)); + ((MCLink *)J->mcarea)->next = oldarea; + ((MCLink *)J->mcarea)->size = sz; + J->szallmcarea += sz; +} + +/* Free all MCode areas. */ +void lj_mcode_free(jit_State *J) +{ + MCode *mc = J->mcarea; + J->mcarea = NULL; + J->szallmcarea = 0; + while (mc) { + MCode *next = ((MCLink *)mc)->next; + mcode_free(J, mc, ((MCLink *)mc)->size); + mc = next; + } +} + +/* -- MCode transactions -------------------------------------------------- */ + +/* Reserve the remainder of the current MCode area. */ +MCode *lj_mcode_reserve(jit_State *J, MCode **lim) +{ + if (!J->mcarea) + mcode_allocarea(J); + else + mcode_protect(J, MCPROT_GEN); + *lim = J->mcbot; + return J->mctop; +} + +/* Commit the top part of the current MCode area. */ +void lj_mcode_commit(jit_State *J, MCode *top) +{ + J->mctop = top; + mcode_protect(J, MCPROT_RUN); +} + +/* Abort the reservation. */ +void lj_mcode_abort(jit_State *J) +{ + mcode_protect(J, MCPROT_RUN); +} + +/* Set/reset protection to allow patching of MCode areas. */ +MCode *lj_mcode_patch(jit_State *J, MCode *ptr, int finish) +{ +#ifdef LUAJIT_UNPROTECT_MCODE + UNUSED(J); UNUSED(ptr); UNUSED(finish); + return NULL; +#else + if (finish) { + if (J->mcarea == ptr) + mcode_protect(J, MCPROT_RUN); + else + mcode_setprot(ptr, ((MCLink *)ptr)->size, MCPROT_RUN); + return NULL; + } else { + MCode *mc = J->mcarea; + /* Try current area first to use the protection cache. */ + if (ptr >= mc && ptr < mc + J->szmcarea) { + mcode_protect(J, MCPROT_GEN); + return mc; + } + /* Otherwise search through the list of MCode areas. */ + for (;;) { + mc = ((MCLink *)mc)->next; + lua_assert(mc != NULL); + if (ptr >= mc && ptr < mc + ((MCLink *)mc)->size) { + mcode_setprot(mc, ((MCLink *)mc)->size, MCPROT_GEN); + return mc; + } + } + } +#endif +} + +/* Limit of MCode reservation reached. */ +void lj_mcode_limiterr(jit_State *J, size_t need) +{ + size_t sizemcode, maxmcode; + lj_mcode_abort(J); + sizemcode = (size_t)J->param[JIT_P_sizemcode] << 10; + sizemcode = (sizemcode + LJ_PAGESIZE-1) & ~(size_t)(LJ_PAGESIZE - 1); + maxmcode = (size_t)J->param[JIT_P_maxmcode] << 10; + if ((size_t)need > sizemcode) + lj_trace_err(J, LJ_TRERR_MCODEOV); /* Too long for any area. */ + if (J->szallmcarea + sizemcode > maxmcode) + lj_trace_err(J, LJ_TRERR_MCODEAL); + mcode_allocarea(J); + lj_trace_err(J, LJ_TRERR_MCODELM); /* Retry with new area. */ +} + +#endif diff --git a/src/lj_mcode.h b/src/lj_mcode.h new file mode 100644 index 0000000000..d4573bf4c2 --- /dev/null +++ b/src/lj_mcode.h @@ -0,0 +1,23 @@ +/* +** Machine code management. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +*/ + +#ifndef _LJ_MCODE_H +#define _LJ_MCODE_H + +#include "lj_jit.h" + +#if LJ_HASJIT +LJ_FUNC void lj_mcode_free(jit_State *J); +LJ_FUNC MCode *lj_mcode_reserve(jit_State *J, MCode **lim); +LJ_FUNC void lj_mcode_commit(jit_State *J, MCode *m); +LJ_FUNC void lj_mcode_abort(jit_State *J); +LJ_FUNC MCode *lj_mcode_patch(jit_State *J, MCode *ptr, int finish); +LJ_FUNC_NORET void lj_mcode_limiterr(jit_State *J, size_t need); + +#define lj_mcode_commitbot(J, m) (J->mcbot = (m)) + +#endif + +#endif diff --git a/src/lj_meta.c b/src/lj_meta.c new file mode 100644 index 0000000000..dff01f8554 --- /dev/null +++ b/src/lj_meta.c @@ -0,0 +1,358 @@ +/* +** Metamethod handling. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +** +** Portions taken verbatim or adapted from the Lua interpreter. +** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h +*/ + +#define lj_meta_c +#define LUA_CORE + +#include "lj_obj.h" +#include "lj_gc.h" +#include "lj_err.h" +#include "lj_str.h" +#include "lj_tab.h" +#include "lj_meta.h" +#include "lj_bc.h" +#include "lj_vm.h" + +/* -- Metamethod handling ------------------------------------------------- */ + +/* String interning of metamethod names for fast indexing. */ +void lj_meta_init(lua_State *L) +{ +#define MMNAME(name) "__" #name + const char *metanames = MMDEF(MMNAME); +#undef MMNAME + global_State *g = G(L); + const char *p, *q; + uint32_t i; + for (i = 0, p = metanames; *p; i++, p = q) { + GCstr *s; + for (q = p+2; *q && *q != '_'; q++) ; + s = lj_str_new(L, p, (size_t)(q-p)); + fixstring(s); /* Never collect these names. */ + /* NOBARRIER: g->mmname[] is a GC root. */ + setgcref(g->mmname[i], obj2gco(s)); + } +} + +/* Negative caching of a few fast metamethods. See the lj_meta_fast() macro. */ +cTValue *lj_meta_cache(GCtab *mt, MMS mm, GCstr *name) +{ + cTValue *mo = lj_tab_getstr(mt, name); + lua_assert(mm <= MM_FAST); + if (!mo || tvisnil(mo)) { /* No metamethod? */ + mt->nomm |= cast_byte(1u<metatable); + else if (tvisudata(o)) + mt = tabref(udataV(o)->metatable); + else + mt = tabref(G(L)->basemt[itypemap(o)]); + if (mt) { + cTValue *mo = lj_tab_getstr(mt, strref(G(L)->mmname[mm])); + if (mo) + return mo; + } + return niltv(L); +} + +/* Setup call to metamethod to be run by Assembler VM. */ +static TValue *mmcall(lua_State *L, ASMFunction cont, cTValue *mo, + cTValue *a, cTValue *b) +{ + /* + ** |-- framesize -> top top+1 top+2 top+3 + ** before: [func slots ...] + ** mm setup: [func slots ...] [cont|?] [mo|tmtype] [a] [b] + ** in asm: [func slots ...] [cont|PC] [mo|delta] [a] [b] + ** ^-- func base ^-- mm base + ** after mm: [func slots ...] [result] + ** ^-- copy to base[PC_RA] --/ for lj_cont_ra + ** istruecond + branch for lj_cont_cond* + ** ignore for lj_cont_nop + ** next PC: [func slots ...] + */ + TValue *top = L->top; + if (curr_funcisL(L)) top = curr_topL(L); + setcont(top, cont); /* Assembler VM stores PC in upper word. */ + copyTV(L, top+1, mo); /* Store metamethod and two arguments. */ + copyTV(L, top+2, a); + copyTV(L, top+3, b); + return top+2; /* Return new base. */ +} + +/* -- C helpers for some instructions, called from assembler VM ----------- */ + +/* Helper for TGET*. __index chain and metamethod. */ +cTValue *lj_meta_tget(lua_State *L, cTValue *o, cTValue *k) +{ + int loop; + for (loop = 0; loop < LJ_MAX_IDXCHAIN; loop++) { + cTValue *mo; + if (tvistab(o)) { + GCtab *t = tabV(o); + cTValue *tv = lj_tab_get(L, t, k); + if (!tvisnil(tv) || + !(mo = lj_meta_fast(L, tabref(t->metatable), MM_index))) + return tv; + } else if (tvisnil(mo = lj_meta_lookup(L, o, MM_index))) { + lj_err_optype(L, o, LJ_ERR_OPINDEX); + return NULL; /* unreachable */ + } + if (tvisfunc(mo)) { + L->top = mmcall(L, lj_cont_ra, mo, o, k); + return NULL; /* Trigger metamethod call. */ + } + o = mo; + } + lj_err_msg(L, LJ_ERR_GETLOOP); + return NULL; /* unreachable */ +} + +/* Helper for TSET*. __newindex chain and metamethod. */ +TValue *lj_meta_tset(lua_State *L, cTValue *o, cTValue *k) +{ + TValue tmp; + int loop; + for (loop = 0; loop < LJ_MAX_IDXCHAIN; loop++) { + cTValue *mo; + if (tvistab(o)) { + GCtab *t = tabV(o); + TValue *tv = lj_tab_set(L, t, k); + if (!tvisnil(tv) || + !(mo = lj_meta_fast(L, tabref(t->metatable), MM_newindex))) { + if (isblack(obj2gco(t))) lj_gc_barrierback(G(L), t); + return tv; + } + } else if (tvisnil(mo = lj_meta_lookup(L, o, MM_newindex))) { + lj_err_optype(L, o, LJ_ERR_OPINDEX); + return NULL; /* unreachable */ + } + if (tvisfunc(mo)) { + L->top = mmcall(L, lj_cont_nop, mo, o, k); + /* L->top+2 = v filled in by caller. */ + return NULL; /* Trigger metamethod call. */ + } + copyTV(L, &tmp, mo); + o = &tmp; + } + lj_err_msg(L, LJ_ERR_SETLOOP); + return NULL; /* unreachable */ +} + +static cTValue *str2num(cTValue *o, TValue *n) +{ + if (tvisnum(o)) + return o; + else if (tvisstr(o) && lj_str_numconv(strVdata(o), n)) + return n; + else + return NULL; +} + +/* Helper for arithmetic instructions. Coercion, metamethod. */ +TValue *lj_meta_arith(lua_State *L, TValue *ra, cTValue *rb, cTValue *rc, + BCReg op) +{ + MMS mm = bcmode_mm(op); + TValue tempb, tempc; + cTValue *b, *c; + if ((b = str2num(rb, &tempb)) != NULL && + (c = str2num(rc, &tempc)) != NULL) { /* Try coercion first. */ + setnumV(ra, lj_vm_foldarith(numV(b), numV(c), (int)mm-MM_add)); + return NULL; + } else { + cTValue *mo = lj_meta_lookup(L, rb, mm); + if (tvisnil(mo)) { + mo = lj_meta_lookup(L, rc, mm); + if (tvisnil(mo)) { + if (str2num(rb, &tempb) == NULL) rc = rb; + lj_err_optype(L, rc, LJ_ERR_OPARITH); + return NULL; /* unreachable */ + } + } + return mmcall(L, lj_cont_ra, mo, rb, rc); + } +} + +/* In-place coercion of a number to a string. */ +static LJ_AINLINE int tostring(lua_State *L, TValue *o) +{ + if (tvisstr(o)) { + return 1; + } else if (tvisnum(o)) { + setstrV(L, o, lj_str_fromnum(L, &o->n)); + return 1; + } else { + return 0; + } +} + +/* Helper for CAT. Coercion, iterative concat, __concat metamethod. */ +TValue *lj_meta_cat(lua_State *L, TValue *top, int left) +{ + do { + int n = 1; + if (!(tvisstr(top-1) || tvisnum(top-1)) || !tostring(L, top)) { + cTValue *mo = lj_meta_lookup(L, top-1, MM_concat); + if (tvisnil(mo)) { + mo = lj_meta_lookup(L, top, MM_concat); + if (tvisnil(mo)) { + if (tvisstr(top-1) || tvisnum(top-1)) top++; + lj_err_optype(L, top-1, LJ_ERR_OPCAT); + return NULL; /* unreachable */ + } + } + /* One of the top two elements is not a string, call __cat metamethod: + ** + ** before: [...][CAT stack .........................] + ** top-1 top top+1 top+2 + ** pick two: [...][CAT stack ...] [o1] [o2] + ** setup mm: [...][CAT stack ...] [cont|?] [mo|tmtype] [o1] [o2] + ** in asm: [...][CAT stack ...] [cont|PC] [mo|delta] [o1] [o2] + ** ^-- func base ^-- mm base + ** after mm: [...][CAT stack ...] <--push-- [result] + ** next step: [...][CAT stack .............] + */ + copyTV(L, top+2, top) /* Careful with the order of stack copies! */ + copyTV(L, top+1, top-1) + copyTV(L, top, mo) + setcont(top-1, lj_cont_cat); + return top+1; /* Trigger metamethod call. */ + } else if (strV(top)->len == 0) { /* Shortcut. */ + (void)tostring(L, top-1); + } else { + /* Pick as many strings as possible from the top and concatenate them: + ** + ** before: [...][CAT stack ...........................] + ** pick str: [...][CAT stack ...] [...... strings ......] + ** concat: [...][CAT stack ...] [result] + ** next step: [...][CAT stack ............] + */ + MSize tlen = strV(top)->len; + char *buffer; + int i; + for (n = 1; n <= left && tostring(L, top-n); n++) { + MSize len = strV(top-n)->len; + if (len >= LJ_MAX_STR - tlen) + lj_err_msg(L, LJ_ERR_STROV); + tlen += len; + } + buffer = lj_str_needbuf(L, &G(L)->tmpbuf, tlen); + n--; + tlen = 0; + for (i = n; i >= 0; i--) { + MSize len = strV(top-i)->len; + memcpy(buffer + tlen, strVdata(top-i), len); + tlen += len; + } + setstrV(L, top-n, lj_str_new(L, buffer, tlen)); + } + left -= n; + top -= n; + } while (left >= 1); + lj_gc_check_fixtop(L); + return NULL; +} + +/* Helper for LEN. __len metamethod. */ +TValue *lj_meta_len(lua_State *L, cTValue *o) +{ + cTValue *mo = lj_meta_lookup(L, o, MM_len); + if (tvisnil(mo)) { + lj_err_optype(L, o, LJ_ERR_OPLEN); + return NULL; /* unreachable */ + } + return mmcall(L, lj_cont_ra, mo, o, niltv(L)); +} + +/* Helper for equality comparisons. __eq metamethod. */ +TValue *lj_meta_equal(lua_State *L, GCobj *o1, GCobj *o2, int ne) +{ + /* Field metatable must be at same offset for GCtab and GCudata! */ + cTValue *mo = lj_meta_fast(L, tabref(o1->gch.metatable), MM_eq); + if (mo) { + TValue *top; + int it; + if (tabref(o1->gch.metatable) != tabref(o2->gch.metatable)) { + cTValue *mo2 = lj_meta_fast(L, tabref(o2->gch.metatable), MM_eq); + if (mo2 == NULL || !lj_obj_equal(mo, mo2)) + return cast(TValue *, (intptr_t)ne); + } + top = curr_top(L); + setcont(top, ne ? lj_cont_condf : lj_cont_condt); + copyTV(L, top+1, mo); + it = o1->gch.gct == ~LJ_TTAB ? LJ_TTAB : LJ_TUDATA; + setgcV(L, top+2, &o1->gch, it); + setgcV(L, top+3, &o2->gch, it); + return top+2; /* Trigger metamethod call. */ + } + return cast(TValue *, (intptr_t)ne); +} + +/* Helper for ordered comparisons. String compare, __lt/__le metamethods. */ +TValue *lj_meta_comp(lua_State *L, cTValue *o1, cTValue *o2, int op) +{ + if (itype(o1) == itype(o2)) { /* Never called with two numbers. */ + if (tvisstr(o1) && tvisstr(o2)) { + int32_t res = lj_str_cmp(strV(o1), strV(o2)); + return cast(TValue *, (intptr_t)(((op&2) ? res <= 0 : res < 0) ^ (op&1))); + } else { + trymt: + while (1) { + ASMFunction cont = (op & 1) ? lj_cont_condf : lj_cont_condt; + MMS mm = (op & 2) ? MM_le : MM_lt; + cTValue *mo = lj_meta_lookup(L, o1, mm); + cTValue *mo2 = lj_meta_lookup(L, o2, mm); + if (tvisnil(mo) || !lj_obj_equal(mo, mo2)) { + if (op & 2) { /* MM_le not found: retry with MM_lt. */ + cTValue *ot = o1; o1 = o2; o2 = ot; /* Swap operands. */ + op ^= 3; /* Use LT and flip condition. */ + continue; + } + goto err; + } + return mmcall(L, cont, mo, o1, o2); + } + } + } else if (tvisbool(o1) && tvisbool(o2)) { + goto trymt; + } else { + err: + lj_err_comp(L, o1, o2); + return NULL; + } +} + +/* Helper for calls. __call metamethod. */ +void lj_meta_call(lua_State *L, TValue *func, TValue *top) +{ + cTValue *mo = lj_meta_lookup(L, func, MM_call); + TValue *p; + if (!tvisfunc(mo)) + lj_err_optype_call(L, func); + for (p = top; p > func; p--) copyTV(L, p, p-1); + copyTV(L, func, mo); +} + +/* Helper for FORI. Coercion. */ +void lj_meta_for(lua_State *L, TValue *base) +{ + if (!str2num(base, base)) lj_err_msg(L, LJ_ERR_FORINIT); + if (!str2num(base+1, base+1)) lj_err_msg(L, LJ_ERR_FORLIM); + if (!str2num(base+2, base+2)) lj_err_msg(L, LJ_ERR_FORSTEP); +} + diff --git a/src/lj_meta.h b/src/lj_meta.h new file mode 100644 index 0000000000..60d1e79e0e --- /dev/null +++ b/src/lj_meta.h @@ -0,0 +1,33 @@ +/* +** Metamethod handling. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +*/ + +#ifndef _LJ_META_H +#define _LJ_META_H + +#include "lj_obj.h" + +/* Metamethod handling */ +LJ_FUNC void lj_meta_init(lua_State *L); +LJ_FUNC cTValue *lj_meta_cache(GCtab *mt, MMS mm, GCstr *name); +LJ_FUNC cTValue *lj_meta_lookup(lua_State *L, cTValue *o, MMS mm); + +#define lj_meta_fastg(g, mt, mm) \ + ((mt) == NULL ? NULL : ((mt)->nomm & (1u<<(mm))) ? NULL : \ + lj_meta_cache(mt, mm, strref((g)->mmname[mm]))) +#define lj_meta_fast(L, mt, mm) lj_meta_fastg(G(L), mt, mm) + +/* C helpers for some instructions, called from assembler VM. */ +LJ_FUNCA cTValue *lj_meta_tget(lua_State *L, cTValue *o, cTValue *k); +LJ_FUNCA TValue *lj_meta_tset(lua_State *L, cTValue *o, cTValue *k); +LJ_FUNCA TValue *lj_meta_arith(lua_State *L, TValue *ra, cTValue *rb, + cTValue *rc, BCReg op); +LJ_FUNCA TValue *lj_meta_cat(lua_State *L, TValue *top, int left); +LJ_FUNCA TValue *lj_meta_len(lua_State *L, cTValue *o); +LJ_FUNCA TValue *lj_meta_equal(lua_State *L, GCobj *o1, GCobj *o2, int ne); +LJ_FUNCA TValue *lj_meta_comp(lua_State *L, cTValue *o1, cTValue *o2, int op); +LJ_FUNCA void lj_meta_call(lua_State *L, TValue *func, TValue *top); +LJ_FUNCA void lj_meta_for(lua_State *L, TValue *base); + +#endif diff --git a/src/lj_obj.c b/src/lj_obj.c new file mode 100644 index 0000000000..d26a6b383d --- /dev/null +++ b/src/lj_obj.c @@ -0,0 +1,41 @@ +/* +** Miscellaneous object handling. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +*/ + +#define lj_obj_c +#define LUA_CORE + +#include "lj_obj.h" + +/* Object type names. */ +LJ_DATADEF const char *const lj_obj_typename[] = { /* ORDER LUA_T */ + "no value", "nil", "boolean", "userdata", "number", "string", + "table", "function", "userdata", "thread", "proto", "upval" +}; + +LJ_DATADEF const char *const lj_obj_itypename[] = { /* ORDER LJ_T */ + "nil", "boolean", "boolean", "userdata", "string", "upval", "thread", + "proto", "function", "deadkey", "table", "userdata", "number" +}; + +/* Compare two objects without calling metamethods. */ +int lj_obj_equal(cTValue *o1, cTValue *o2) +{ + if (itype(o1) == itype(o2)) { + if (tvispri(o1)) + return 1; + if (!tvisnum(o1)) { +#if LJ_64 + if (tvislightud(o1)) + return o1->u64 == o2->u64; + else +#endif + return gcrefeq(o1->gcr, o2->gcr); + } + } else if (!tvisnum(o1) || !tvisnum(o2)) { + return 0; + } + return numV(o1) == numV(o2); +} + diff --git a/src/lj_obj.h b/src/lj_obj.h new file mode 100644 index 0000000000..e5ea713d74 --- /dev/null +++ b/src/lj_obj.h @@ -0,0 +1,676 @@ +/* +** LuaJIT VM tags, values and objects. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +** +** Portions taken verbatim or adapted from the Lua interpreter. +** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h +*/ + +#ifndef _LJ_OBJ_H +#define _LJ_OBJ_H + +#include "lua.h" +#include "lj_def.h" +#include "lj_arch.h" + +/* -- Memory references (32 bit address space) ---------------------------- */ + +/* Memory size. */ +typedef uint32_t MSize; + +/* Memory reference */ +typedef struct MRef { + uint32_t ptr32; /* Pseudo 32 bit pointer. */ +} MRef; + +#define mref(r, t) ((t *)(void *)(uintptr_t)(r).ptr32) + +#define setmref(r, p) ((r).ptr32 = (uint32_t)(uintptr_t)(void *)(p)) +#define setmrefr(r, v) ((r).ptr32 = (v).ptr32) + +/* -- GC object references (32 bit address space) ------------------------- */ + +/* GCobj reference */ +typedef struct GCRef { + uint32_t gcptr32; /* Pseudo 32 bit pointer. */ +} GCRef; + +/* Common GC header for all collectable objects. */ +#define GCHeader GCRef nextgc; uint8_t marked; uint8_t gct +/* This occupies 6 bytes, so use the next 2 bytes for non-32 bit fields. */ + +#define gcref(r) ((GCobj *)(uintptr_t)(r).gcptr32) +#define gcrefp(r, t) ((t *)(void *)(uintptr_t)(r).gcptr32) +#define gcrefu(r) ((r).gcptr32) +#define gcrefi(r) ((int32_t)(r).gcptr32) +#define gcrefeq(r1, r2) ((r1).gcptr32 == (r2).gcptr32) +#define gcnext(gc) (gcref((gc)->gch.nextgc)) + +#define setgcref(r, gc) ((r).gcptr32 = (uint32_t)(uintptr_t)&(gc)->gch) +#define setgcrefi(r, i) ((r).gcptr32 = (uint32_t)(i)) +#define setgcrefp(r, p) ((r).gcptr32 = (uint32_t)(uintptr_t)(p)) +#define setgcrefnull(r) ((r).gcptr32 = 0) +#define setgcrefr(r, v) ((r).gcptr32 = (v).gcptr32) + +/* IMPORTANT NOTE: +** +** All uses of the setgcref* macros MUST be accompanied with a write barrier. +** +** This is to ensure the integrity of the incremental GC. The invariant +** to preserve is that a black object never points to a white object. +** I.e. never store a white object into a field of a black object. +** +** It's ok to LEAVE OUT the write barrier ONLY in the following cases: +** - The source is not a GC object (NULL). +** - The target is a GC root. I.e. everything in global_State. +** - The target is a lua_State field (threads are never black). +** - The target is a stack slot, see setgcV et al. +** - The target is an open upvalue, i.e. pointing to a stack slot. +** - The target is a newly created object (i.e. marked white). But make +** sure nothing invokes the GC inbetween. +** - The target and the source are the same object (self-reference). +** - The target already contains the object (e.g. moving elements around). +** +** The most common case is a store to a stack slot. All other cases where +** a barrier has been omitted are annotated with a NOBARRIER comment. +** +** The same logic applies for stores to table slots (array part or hash +** part). ALL uses of lj_tab_set* require a barrier for the stored *value* +** (if it's a GC object). The barrier for the *key* is already handled +** internally by lj_tab_newkey. +*/ + +/* -- Common type definitions --------------------------------------------- */ + +/* Types for handling bytecodes. Need this here, details in lj_bc.h. */ +typedef uint32_t BCIns; /* Bytecode instruction. */ +typedef uint32_t BCPos; /* Bytecode position. */ +typedef uint32_t BCReg; /* Bytecode register. */ +typedef int32_t BCLine; /* Bytecode line number. */ + +/* Internal assembler functions. Never call these directly from C. */ +typedef void (*ASMFunction)(void); + +/* Resizable string buffer. Need this here, details in lj_str.h. */ +typedef struct SBuf { + char *buf; /* String buffer base. */ + MSize n; /* String buffer length. */ + MSize sz; /* String buffer size. */ +} SBuf; + +/* -- Tags and values ----------------------------------------------------- */ + +/* Frame link. */ +typedef union { + int32_t ftsz; /* Frame type and size of previous frame. */ + MRef pcr; /* Overlaps PC for Lua frames. */ +} FrameLink; + +/* Tagged value. */ +typedef LJ_ALIGN(8) union TValue { + uint64_t u64; /* 64 bit pattern overlaps number. */ + lua_Number n; /* Number object overlaps split tag/value object. */ + struct { + LJ_ENDIAN_LOHI( + GCRef gcr; /* GCobj reference (if any). */ + , int32_t it; /* Internal object tag. Must overlap MSW of number. */ + ) + }; + struct { + LJ_ENDIAN_LOHI( + GCRef func; /* Function for next frame (or dummy L). */ + , FrameLink tp; /* Link to previous frame. */ + ) + } fr; + struct { + LJ_ENDIAN_LOHI( + uint32_t lo; /* Lower 32 bits of number. */ + , uint32_t hi; /* Upper 32 bits of number. */ + ) + } u32; +} TValue; + +typedef const TValue cTValue; + +#define tvref(r) (mref(r, TValue)) + +/* More external and GCobj tags for internal objects. */ +#define LAST_TT LUA_TTHREAD + +#define LUA_TPROTO (LAST_TT+1) +#define LUA_TUPVAL (LAST_TT+2) +#define LUA_TDEADKEY (LAST_TT+3) + +/* Internal object tags. +** +** Internal tags overlap the MSW of a number object (must be a double). +** Interpreted as a double these are special NaNs. The FPU only generates +** one type of NaN (0xfff8_0000_0000_0000). So MSWs > 0xfff80000 are available +** for use as internal tags. Small negative numbers are used to shorten the +** encoding of type comparisons (reg/mem against sign-ext. 8 bit immediate). +** +** ---MSW---.---LSW--- +** primitive types | itype | | +** lightuserdata | itype | void * | (32 bit platforms) +** lightuserdata |fffc| void * | (64 bit platforms, 48 bit pointers) +** GC objects | itype | GCRef | +** number -------double------ +** +** ORDER LJ_T +** Primitive types nil/false/true must be first, lightuserdata next. +** GC objects are at the end, table/userdata must be lowest. +** Also check lj_ir.h for similar ordering constraints. +*/ +#define LJ_TNIL (-1) +#define LJ_TFALSE (-2) +#define LJ_TTRUE (-3) +#define LJ_TLIGHTUD (-4) +#define LJ_TSTR (-5) +#define LJ_TUPVAL (-6) +#define LJ_TTHREAD (-7) +#define LJ_TPROTO (-8) +#define LJ_TFUNC (-9) +#define LJ_TDEADKEY (-10) +#define LJ_TTAB (-11) +#define LJ_TUDATA (-12) +/* This is just the canonical number type used in some places. */ +#define LJ_TNUMX (-13) + +#if LJ_64 +#define LJ_TISNUM ((uint32_t)0xfff80000) +#else +#define LJ_TISNUM ((uint32_t)LJ_TNUMX) +#endif +#define LJ_TISTRUECOND ((uint32_t)LJ_TFALSE) +#define LJ_TISPRI ((uint32_t)LJ_TTRUE) +#define LJ_TISGCV ((uint32_t)(LJ_TSTR+1)) +#define LJ_TISTABUD ((uint32_t)LJ_TTAB) + +/* -- TValue getters/setters ---------------------------------------------- */ + +/* Macros to test types. */ +#define itype(o) ((o)->it) +#define uitype(o) ((uint32_t)itype(o)) +#define tvisnil(o) (itype(o) == LJ_TNIL) +#define tvisfalse(o) (itype(o) == LJ_TFALSE) +#define tvistrue(o) (itype(o) == LJ_TTRUE) +#define tvisbool(o) (tvisfalse(o) || tvistrue(o)) +#if LJ_64 +#define tvislightud(o) ((itype(o) >> 16) == LJ_TLIGHTUD) +#else +#define tvislightud(o) (itype(o) == LJ_TLIGHTUD) +#endif +#define tvisstr(o) (itype(o) == LJ_TSTR) +#define tvisfunc(o) (itype(o) == LJ_TFUNC) +#define tvisthread(o) (itype(o) == LJ_TTHREAD) +#define tvisproto(o) (itype(o) == LJ_TPROTO) +#define tvistab(o) (itype(o) == LJ_TTAB) +#define tvisudata(o) (itype(o) == LJ_TUDATA) +#define tvisnum(o) (uitype(o) <= LJ_TISNUM) + +#define tvistruecond(o) (uitype(o) < LJ_TISTRUECOND) +#define tvispri(o) (uitype(o) >= LJ_TISPRI) +#define tvistabud(o) (uitype(o) <= LJ_TISTABUD) /* && !tvisnum() */ +#define tvisgcv(o) \ + ((uitype(o) - LJ_TISGCV) > ((uint32_t)LJ_TNUMX - LJ_TISGCV)) + +/* Special macros to test numbers for NaN, +0, -0, +1 and raw equality. */ +#define tvisnan(o) ((o)->n != (o)->n) +#define tvispzero(o) ((o)->u64 == 0) +#define tvismzero(o) ((o)->u64 == U64x(80000000,00000000)) +#define tvispone(o) ((o)->u64 == U64x(3ff00000,00000000)) +#define rawnumequal(o1, o2) ((o1)->u64 == (o2)->u64) + +/* Macros to convert type ids. */ +#if LJ_64 +#define itypemap(o) \ + (tvisnum(o) ? ~LJ_TNUMX : tvislightud(o) ? ~LJ_TLIGHTUD : ~itype(o)) +#else +#define itypemap(o) (tvisnum(o) ? ~LJ_TNUMX : ~itype(o)) +#endif + +/* Macros to get tagged values. */ +#define gcval(o) (gcref((o)->gcr)) +#define boolV(o) check_exp(tvisbool(o), (LJ_TFALSE - (o)->it)) +#if LJ_64 +#define lightudV(o) check_exp(tvislightud(o), \ + (void *)((o)->u64 & U64x(0000ffff,ffffffff))) +#else +#define lightudV(o) check_exp(tvislightud(o), gcrefp((o)->gcr, void)) +#endif +#define gcV(o) check_exp(tvisgcv(o), gcval(o)) +#define strV(o) check_exp(tvisstr(o), &gcval(o)->str) +#define funcV(o) check_exp(tvisfunc(o), &gcval(o)->fn) +#define threadV(o) check_exp(tvisthread(o), &gcval(o)->th) +#define protoV(o) check_exp(tvisproto(o), &gcval(o)->pt) +#define tabV(o) check_exp(tvistab(o), &gcval(o)->tab) +#define udataV(o) check_exp(tvisudata(o), &gcval(o)->ud) +#define numV(o) check_exp(tvisnum(o), (o)->n) + +/* Macros to set tagged values. */ +#define setitype(o, i) ((o)->it = (i)) +#define setnilV(o) ((o)->it = LJ_TNIL) +#define setboolV(o, x) ((o)->it = LJ_TFALSE-(x)) + +#if LJ_64 +#define checklightudptr(L, p) \ + (((uint64_t)(p) >> 48) ? (lj_err_msg(L, LJ_ERR_BADLU), NULL) : (p)) +#define setlightudV(o, x) \ + ((o)->u64 = (uint64_t)(x) | (((uint64_t)LJ_TLIGHTUD) << 48)) +#define setcont(o, x) \ + ((o)->u64 = (uint64_t)(x) - (uint64_t)lj_vm_asm_begin) +#else +#define checklightudptr(L, p) (p) +#define setlightudV(o, x) \ + { TValue *i_o = (o); \ + setgcrefp(i_o->gcr, (x)); i_o->it = LJ_TLIGHTUD; } +#define setcont(o, x) \ + { TValue *i_o = (o); \ + setgcrefp(i_o->gcr, (x)); i_o->it = LJ_TLIGHTUD; } +#endif + +#define tvchecklive(g, o) \ + lua_assert(!tvisgcv(o) || \ + ((~itype(o) == gcval(o)->gch.gct) && !isdead(g, gcval(o)))) + +#define setgcV(L, o, x, itype) \ + { TValue *i_o = (o); \ + setgcrefp(i_o->gcr, &(x)->nextgc); i_o->it = itype; \ + tvchecklive(G(L), i_o); } +#define setstrV(L, o, x) setgcV(L, o, x, LJ_TSTR) +#define setthreadV(L, o, x) setgcV(L, o, x, LJ_TTHREAD) +#define setprotoV(L, o, x) setgcV(L, o, x, LJ_TPROTO) +#define setfuncV(L, o, x) setgcV(L, o, &(x)->l, LJ_TFUNC) +#define settabV(L, o, x) setgcV(L, o, x, LJ_TTAB) +#define setudataV(L, o, x) setgcV(L, o, x, LJ_TUDATA) + +#define setnumV(o, x) ((o)->n = (x)) +#define setnanV(o) ((o)->u64 = U64x(fff80000,00000000)) +#define setintV(o, i) ((o)->n = cast_num((int32_t)(i))) + +/* Copy tagged values. */ +#define copyTV(L, o1, o2) \ + { cTValue *i_o2 = (o2); TValue *i_o1 = (o1); \ + *i_o1 = *i_o2; tvchecklive(G(L), i_o1); } + +/* -- String object ------------------------------------------------------- */ + +/* String object header. String payload follows. */ +typedef struct GCstr { + GCHeader; + uint8_t reserved; /* Used by lexer for fast lookup of reserved words. */ + uint8_t unused; + MSize hash; /* Hash of string. */ + MSize len; /* Size of string. */ +} GCstr; + +#define strref(r) (&gcref((r))->str) +#define strdata(s) ((const char *)((s)+1)) +#define strdatawr(s) ((char *)((s)+1)) +#define strVdata(o) strdata(strV(o)) +#define sizestring(s) (sizeof(struct GCstr)+(s)->len+1) + +/* -- Userdata object ----------------------------------------------------- */ + +/* Userdata object. Payload follows. */ +typedef struct GCudata { + GCHeader; + uint8_t unused1; + uint8_t unused2; + GCRef env; /* Should be at same offset in GCfunc. */ + MSize len; /* Size of payload. */ + GCRef metatable; /* Must be at same offset in GCtab. */ + uint32_t align1; /* To force 8 byte alignment of the payload. */ +} GCudata; + +#define uddata(u) ((void *)((u)+1)) +#define sizeudata(u) (sizeof(struct GCudata)+(u)->len) + +/* -- Prototype object ---------------------------------------------------- */ + +/* Split constant array. Collectables are below, numbers above pointer. */ +typedef union ProtoK { + lua_Number *n; /* Numbers. */ + GCRef *gc; /* Collectable objects (strings/table/proto). */ +} ProtoK; + +#define SCALE_NUM_GCO ((int32_t)sizeof(lua_Number)/sizeof(GCRef)) +#define round_nkgc(n) (((n) + SCALE_NUM_GCO-1) & ~(SCALE_NUM_GCO-1)) + +typedef struct VarInfo { + GCstr *name; /* Local variable name. */ + BCPos startpc; /* First point where the local variable is active. */ + BCPos endpc; /* First point where the local variable is dead. */ +} VarInfo; + +typedef struct GCproto { + GCHeader; + uint8_t numparams; /* Number of parameters. */ + uint8_t framesize; /* Fixed frame size. */ + MSize sizebc; /* Number of bytecode instructions. */ + GCRef gclist; + ProtoK k; /* Split constant array (points to the middle). */ + BCIns *bc; /* Array of bytecode instructions. */ + int16_t *uv; /* Upvalue list. local >= 0. parent uv < 0. */ + MSize sizekgc; /* Number of collectable constants. */ + MSize sizekn; /* Number of lua_Number constants. */ + uint8_t sizeuv; /* Number of upvalues. */ + uint8_t flags; /* Miscellaneous flags (see below). */ + uint16_t trace; /* Anchor for chain of root traces. */ + /* ------ The following fields are for debugging/tracebacks only ------ */ + MSize sizelineinfo; /* Size of lineinfo array (may be 0). */ + MSize sizevarinfo; /* Size of local var info array (may be 0). */ + MSize sizeuvname; /* Size of upvalue names array (may be 0). */ + BCLine linedefined; /* First line of the function definition. */ + BCLine lastlinedefined; /* Last line of the function definition. */ + BCLine *lineinfo; /* Map from bytecode instructions to source lines. */ + struct VarInfo *varinfo; /* Names and extents of local variables. */ + GCstr **uvname; /* Upvalue names. */ + GCstr *chunkname; /* Name of the chunk this function was defined in. */ +} GCproto; + +#define PROTO_IS_VARARG 0x01 +#define PROTO_HAS_FNEW 0x02 +#define PROTO_HAS_RETURN 0x04 +#define PROTO_FIXUP_RETURN 0x08 +#define PROTO_NO_JIT 0x10 +#define PROTO_HAS_ILOOP 0x20 + +/* -- Upvalue object ------------------------------------------------------ */ + +typedef struct GCupval { + GCHeader; + uint8_t closed; /* Set if closed (i.e. uv->v == &uv->u.value). */ + uint8_t unused; + union { + TValue tv; /* If closed: the value itself. */ + struct { /* If open: double linked list, anchored at thread. */ + GCRef prev; + GCRef next; + }; + }; + TValue *v; /* Points to stack slot (open) or above (closed). */ +#if LJ_32 + int32_t unusedv; /* For consistent alignment (32 bit only). */ +#endif +} GCupval; + +#define uvprev(uv_) (&gcref((uv_)->prev)->uv) +#define uvnext(uv_) (&gcref((uv_)->next)->uv) + +/* -- Function object (closures) ------------------------------------------ */ + +/* Common header for functions. env should be at same offset in GCudata. */ +#define GCfuncHeader \ + GCHeader; uint8_t ffid; uint8_t nupvalues; \ + GCRef env; GCRef gclist; ASMFunction gate + +typedef struct GCfuncC { + GCfuncHeader; + lua_CFunction f; /* C function to be called. */ + TValue upvalue[1]; /* Array of upvalues (TValue). */ +} GCfuncC; + +typedef struct GCfuncL { + GCfuncHeader; + GCRef pt; /* Link to prototype this function is based on. */ + GCRef uvptr[1]; /* Array of _pointers_ to upvalue objects (GCupval). */ +} GCfuncL; + +typedef union GCfunc { + GCfuncC c; + GCfuncL l; +} GCfunc; + +#define FF_LUA 0 +#define FF_C 1 +#define isluafunc(fn) ((fn)->c.ffid == FF_LUA) +#define iscfunc(fn) ((fn)->c.ffid == FF_C) +#define isffunc(fn) ((fn)->c.ffid > FF_C) +#define funcproto(fn) check_exp(isluafunc(fn), &gcref((fn)->l.pt)->pt) +#define sizeCfunc(n) (sizeof(GCfuncC) + sizeof(TValue)*((n)-1)) +#define sizeLfunc(n) (sizeof(GCfuncL) + sizeof(TValue *)*((n)-1)) + +/* -- Table object -------------------------------------------------------- */ + +/* Hash node. */ +typedef struct Node { + TValue val; /* Value object. Must be first field. */ + TValue key; /* Key object. */ + MRef next; /* Hash chain. */ + int32_t unused; /* For consistent alignment. */ +} Node; + +LJ_STATIC_ASSERT(offsetof(Node, val) == 0); + +typedef struct GCtab { + GCHeader; + uint8_t nomm; /* Negative cache for fast metamethods. */ + int8_t colo; /* Array colocation. */ + MRef array; /* Array part. */ + GCRef gclist; + GCRef metatable; /* Must be at same offset in GCudata. */ + MRef node; /* Hash part. */ + uint32_t asize; /* Size of array part (keys [0, asize-1]). */ + uint32_t hmask; /* Hash part mask (size of hash part - 1). */ + MRef lastfree; /* Any free position is before this position. */ +} GCtab; + +#define sizetabcolo(n) ((n)*sizeof(TValue) + sizeof(GCtab)) +#define tabref(r) (&gcref((r))->tab) +#define noderef(r) (mref((r), Node)) +#define nextnode(n) (mref((n)->next, Node)) + +/* -- State objects ------------------------------------------------------- */ + +/* VM states. */ +enum { + LJ_VMST_INTERP, /* Interpreter. */ + LJ_VMST_C, /* C function. */ + LJ_VMST_GC, /* Garbage collector. */ + LJ_VMST_EXIT, /* Trace exit handler. */ + LJ_VMST_RECORD, /* Trace recorder. */ + LJ_VMST_OPT, /* Optimizer. */ + LJ_VMST_ASM, /* Assembler. */ + LJ_VMST__MAX +}; + +#define setvmstate(g, st) ((g)->vmstate = ~LJ_VMST_##st) + +/* Metamethods. */ +#define MMDEF(_) \ + _(index) _(newindex) _(gc) _(mode) _(eq) \ + /* Only the above (fast) metamethods are negative cached (max. 8). */ \ + _(len) _(lt) _(le) _(concat) _(call) \ + /* The following must be in ORDER ARITH. */ \ + _(add) _(sub) _(mul) _(div) _(mod) _(pow) _(unm) \ + /* The following are used in the standard libraries. */ \ + _(metatable) _(tostring) + +typedef enum { +#define MMENUM(name) MM_##name, +MMDEF(MMENUM) +#undef MMENUM + MM_MAX, + MM____ = MM_MAX, + MM_FAST = MM_eq +} MMS; + +#define BASEMT_MAX ((~LJ_TNUMX)+1) + +typedef struct GCState { + MSize total; /* Memory currently allocated. */ + MSize threshold; /* Memory threshold. */ + uint8_t currentwhite; /* Current white color. */ + uint8_t state; /* GC state. */ + uint8_t unused1; + uint8_t unused2; + MSize sweepstr; /* Sweep position in string table. */ + GCRef root; /* List of all collectable objects. */ + GCRef *sweep; /* Sweep position in root list. */ + GCRef gray; /* List of gray objects. */ + GCRef grayagain; /* List of objects for atomic traversal. */ + GCRef weak; /* List of weak tables (to be cleared). */ + GCRef mmudata; /* List of userdata (to be finalized). */ + MSize stepmul; /* Incremental GC step granularity. */ + MSize debt; /* Debt (how much GC is behind schedule). */ + MSize estimate; /* Estimate of memory actually in use. */ + MSize pause; /* Pause between successive GC cycles. */ +} GCState; + +/* Global state, shared by all threads of a Lua universe. */ +typedef struct global_State { + GCRef *strhash; /* String hash table (hash chain anchors). */ + MSize strmask; /* String hash mask (size of hash table - 1). */ + MSize strnum; /* Number of strings in hash table. */ + lua_Alloc allocf; /* Memory allocator. */ + void *allocd; /* Memory allocator data. */ + GCState gc; /* Garbage collector. */ + SBuf tmpbuf; /* Temporary buffer for string concatenation. */ + Node nilnode; /* Fallback 1-element hash part (nil key and value). */ + uint8_t hookmask; /* Hook mask. */ + uint8_t dispatchmode; /* Dispatch mode. */ + uint8_t vmevmask; /* VM event mask. */ + uint8_t unused1; + GCRef mainthref; /* Link to main thread. */ + TValue registrytv; /* Anchor for registry. */ + TValue tmptv; /* Temporary TValue. */ + GCupval uvhead; /* Head of double-linked list of all open upvalues. */ + int32_t hookcount; /* Instruction hook countdown. */ + int32_t hookcstart; /* Start count for instruction hook counter. */ + lua_Hook hookf; /* Hook function. */ + lua_CFunction panic; /* Called as a last resort for errors. */ + volatile int32_t vmstate; /* VM state or current JIT code trace number. */ + GCRef jit_L; /* Current JIT code lua_State or NULL. */ + MRef jit_base; /* Current JIT code L->base. */ + GCRef basemt[BASEMT_MAX]; /* Metatables for base types. */ + GCRef mmname[MM_MAX]; /* Array holding metamethod names. */ +} global_State; + +#define mainthread(g) (&gcref(g->mainthref)->th) +#define niltv(L) \ + check_exp(tvisnil(&G(L)->nilnode.val), &G(L)->nilnode.val) +#define niltvg(g) \ + check_exp(tvisnil(&(g)->nilnode.val), &(g)->nilnode.val) + +/* Hook management. Hook event masks are defined in lua.h. */ +#define HOOK_EVENTMASK 0x0f +#define HOOK_ACTIVE 0x10 +#define HOOK_VMEVENT 0x20 +#define HOOK_GC 0x40 +#define hook_active(g) ((g)->hookmask & HOOK_ACTIVE) +#define hook_enter(g) ((g)->hookmask |= HOOK_ACTIVE) +#define hook_entergc(g) ((g)->hookmask |= (HOOK_ACTIVE|HOOK_GC)) +#define hook_vmevent(g) ((g)->hookmask |= (HOOK_ACTIVE|HOOK_VMEVENT)) +#define hook_leave(g) ((g)->hookmask &= ~HOOK_ACTIVE) +#define hook_save(g) ((g)->hookmask & ~HOOK_EVENTMASK) +#define hook_restore(g, h) \ + ((g)->hookmask = ((g)->hookmask & HOOK_EVENTMASK) | (h)) + +/* Per-thread state object. */ +struct lua_State { + GCHeader; + uint8_t dummy_ffid; /* Fake FF_C for curr_funcisL() on dummy frames. */ + uint8_t status; /* Thread status. */ + MRef glref; /* Link to global state. */ + GCRef gclist; /* GC chain. */ + TValue *base; /* Base of currently executing function. */ + TValue *top; /* First free slot in the stack. */ + TValue *maxstack; /* Last free slot in the stack. */ + TValue *stack; /* Stack base. */ + GCRef openupval; /* List of open upvalues in the stack. */ + GCRef env; /* Thread environment (table of globals). */ + void *cframe; /* End of C stack frame chain. */ + MSize stacksize; /* True stack size (incl. LJ_STACK_EXTRA). */ +}; + +#define G(L) (mref(L->glref, global_State)) +#define registry(L) (&G(L)->registrytv) + +/* Macros to access the currently executing (Lua) function. */ +#define curr_func(L) (&gcref((L->base-1)->fr.func)->fn) +#define curr_funcisL(L) (isluafunc(curr_func(L))) +#define curr_proto(L) (funcproto(curr_func(L))) +#define curr_topL(L) (L->base + curr_proto(L)->framesize) +#define curr_top(L) (curr_funcisL(L) ? curr_topL(L) : L->top) + +/* -- GC object definition and conversions -------------------------------- */ + +/* GC header for generic access to common fields of GC objects. */ +typedef struct GChead { + GCHeader; + uint8_t unused1; + uint8_t unused2; + GCRef env; + GCRef gclist; + GCRef metatable; +} GChead; + +/* The env field SHOULD be at the same offset for all GC objects. */ +LJ_STATIC_ASSERT(offsetof(GChead, env) == offsetof(GCfuncL, env)); +LJ_STATIC_ASSERT(offsetof(GChead, env) == offsetof(GCudata, env)); + +/* The metatable field MUST be at the same offset for all GC objects. */ +LJ_STATIC_ASSERT(offsetof(GChead, metatable) == offsetof(GCtab, metatable)); +LJ_STATIC_ASSERT(offsetof(GChead, metatable) == offsetof(GCudata, metatable)); + +/* The gclist field MUST be at the same offset for all GC objects. */ +LJ_STATIC_ASSERT(offsetof(GChead, gclist) == offsetof(lua_State, gclist)); +LJ_STATIC_ASSERT(offsetof(GChead, gclist) == offsetof(GCproto, gclist)); +LJ_STATIC_ASSERT(offsetof(GChead, gclist) == offsetof(GCfuncL, gclist)); +LJ_STATIC_ASSERT(offsetof(GChead, gclist) == offsetof(GCtab, gclist)); + +typedef union GCobj { + GChead gch; + GCstr str; + GCupval uv; + lua_State th; + GCproto pt; + GCfunc fn; + GCtab tab; + GCudata ud; +} GCobj; + +/* Macros to convert a GCobj pointer into a specific value. */ +#define gco2str(o) check_exp((o)->gch.gct == ~LJ_TSTR, &(o)->str) +#define gco2uv(o) check_exp((o)->gch.gct == ~LJ_TUPVAL, &(o)->uv) +#define gco2th(o) check_exp((o)->gch.gct == ~LJ_TTHREAD, &(o)->th) +#define gco2pt(o) check_exp((o)->gch.gct == ~LJ_TPROTO, &(o)->pt) +#define gco2func(o) check_exp((o)->gch.gct == ~LJ_TFUNC, &(o)->fn) +#define gco2tab(o) check_exp((o)->gch.gct == ~LJ_TTAB, &(o)->tab) +#define gco2ud(o) check_exp((o)->gch.gct == ~LJ_TUDATA, &(o)->ud) + +/* Macro to convert any collectable object into a GCobj pointer. */ +#define obj2gco(v) (cast(GCobj *, (v))) + +/* -- Number to integer conversion ---------------------------------------- */ + +static LJ_AINLINE int32_t lj_num2bit(lua_Number n) +{ + TValue o; + o.n = n + 6755399441055744.0; /* 2^52 + 2^51 */ + return (int32_t)o.u32.lo; +} + +#if (defined(__i386__) || defined(_M_IX86)) && !defined(__SSE2__) +#define lj_num2int(n) lj_num2bit((n)) +#else +#define lj_num2int(n) ((int32_t)(n)) +#endif + +/* -- Miscellaneous object handling --------------------------------------- */ + +/* Names and maps for internal and external object tags. */ +LJ_DATA const char *const lj_obj_typename[1+LUA_TUPVAL+1]; +LJ_DATA const char *const lj_obj_itypename[~LJ_TNUMX+1]; + +#define typename(o) (lj_obj_itypename[itypemap(o)]) + +/* Compare two objects without calling metamethods. */ +LJ_FUNC int lj_obj_equal(cTValue *o1, cTValue *o2); + +#ifdef LUA_USE_ASSERT +#include "lj_gc.h" +#endif + +#endif diff --git a/src/lj_opt_dce.c b/src/lj_opt_dce.c new file mode 100644 index 0000000000..0cd608301d --- /dev/null +++ b/src/lj_opt_dce.c @@ -0,0 +1,79 @@ +/* +** DCE: Dead Code Elimination. Pre-LOOP only -- ASM already performs DCE. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +*/ + +#define lj_opt_dce_c +#define LUA_CORE + +#include "lj_obj.h" + +#if LJ_HASJIT + +#include "lj_ir.h" +#include "lj_jit.h" +#include "lj_iropt.h" + +/* Some local macros to save typing. Undef'd at the end. */ +#define IR(ref) (&J->cur.ir[(ref)]) + +/* Scan through all snapshots and mark all referenced instructions. */ +static void dce_marksnap(jit_State *J) +{ + SnapNo i, nsnap = J->cur.nsnap; + for (i = 0; i < nsnap; i++) { + SnapShot *snap = &J->cur.snap[i]; + IRRef2 *map = &J->cur.snapmap[snap->mapofs]; + BCReg s, nslots = snap->nslots; + for (s = 0; s < nslots; s++) { + IRRef ref = snap_ref(map[s]); + if (!irref_isk(ref)) + irt_setmark(IR(ref)->t); + } + } +} + +/* Backwards propagate marks. Replace unused instructions with NOPs. */ +static void dce_propagate(jit_State *J) +{ + IRRef1 *pchain[IR__MAX]; + IRRef ins; + uint32_t i; + for (i = 0; i < IR__MAX; i++) pchain[i] = &J->chain[i]; + for (ins = J->cur.nins-1; ins >= REF_FIRST; ins--) { + IRIns *ir = IR(ins); + if (irt_ismarked(ir->t)) { + irt_clearmark(ir->t); + pchain[ir->o] = &ir->prev; + } else if (!(irt_isguard(ir->t) || irm_sideeff(lj_ir_mode[ir->o]))) { + *pchain[ir->o] = ir->prev; /* Reroute original instruction chain. */ + *pchain[IR_NOP] = (IRRef1)ins; + ir->t.irt = IRT_NIL; + ir->o = IR_NOP; /* Replace instruction with NOP. */ + ir->op1 = ir->op2 = 0; + pchain[IR_NOP] = &ir->prev; + continue; + } + if (!irref_isk(ir->op1)) irt_setmark(IR(ir->op1)->t); + if (!irref_isk(ir->op2)) irt_setmark(IR(ir->op2)->t); + } + *pchain[IR_NOP] = 0; /* Terminate NOP chain. */ +} + +/* Dead Code Elimination. +** +** First backpropagate marks for all used instructions. Then replace +** the unused ones with a NOP. Note that compressing the IR to eliminate +** the NOPs does not pay off. +*/ +void lj_opt_dce(jit_State *J) +{ + if ((J->flags & JIT_F_OPT_DCE)) { + dce_marksnap(J); + dce_propagate(J); + } +} + +#undef IR + +#endif diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c new file mode 100644 index 0000000000..e5d98162ab --- /dev/null +++ b/src/lj_opt_fold.c @@ -0,0 +1,1415 @@ +/* +** FOLD: Constant Folding, Algebraic Simplifications and Reassociation. +** CSE: Common-Subexpression Elimination. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +*/ + +#define lj_opt_fold_c +#define LUA_CORE + +#include "lj_obj.h" + +#if LJ_HASJIT + +#include "lj_str.h" +#include "lj_ir.h" +#include "lj_jit.h" +#include "lj_iropt.h" +#include "lj_trace.h" +#include "lj_vm.h" + +/* Here's a short description how the FOLD engine processes instructions: +** +** The FOLD engine receives a single instruction stored in fins (J->fold.ins). +** The instruction and its operands are used to select matching fold rules. +** These are applied iteratively until a fixed point is reached. +** +** The 8 bit opcode of the instruction itself plus the opcodes of the +** two instructions referenced by its operands form a 24 bit key +** 'ins left right' (unused operands -> 0, literals -> lowest 8 bits). +** +** This key is used for partial matching against the fold rules. The +** left/right operand fields of the key are successively masked with +** the 'any' wildcard, from most specific to least specific: +** +** ins left right +** ins any right +** ins left any +** ins any any +** +** The masked key is used to lookup a matching fold rule in a semi-perfect +** hash table. If a matching rule is found, the related fold function is run. +** Multiple rules can share the same fold function. A fold rule may return +** one of several special values: +** +** - NEXTFOLD means no folding was applied, because an additional test +** inside the fold function failed. Matching continues against less +** specific fold rules. Finally the instruction is passed on to CSE. +** +** - RETRYFOLD means the instruction was modified in-place. Folding is +** retried as if this instruction had just been received. +** +** All other return values are terminal actions -- no further folding is +** applied: +** +** - INTFOLD(i) returns a reference to the integer constant i. +** +** - LEFTFOLD and RIGHTFOLD return the left/right operand reference +** without emitting an instruction. +** +** - CSEFOLD and EMITFOLD pass the instruction directly to CSE or emit +** it without passing through any further optimizations. +** +** - FAILFOLD, DROPFOLD and CONDFOLD only apply to instructions which have +** no result (e.g. guarded assertions): FAILFOLD means the guard would +** always fail, i.e. the current trace is pointless. DROPFOLD means +** the guard is always true and has been eliminated. CONDFOLD is a +** shortcut for FAILFOLD + cond (i.e. drop if true, otherwise fail). +** +** - Any other return value is interpreted as an IRRef or TRef. This +** can be a reference to an existing or a newly created instruction. +** Only the least-significant 16 bits (IRRef1) are used to form a TRef +** which is finally returned to the caller. +** +** The FOLD engine receives instructions both from the trace recorder and +** substituted instructions from LOOP unrolling. This means all types +** of instructions may end up here, even though the recorder bypasses +** FOLD in some cases. Thus all loads, stores and allocations must have +** an any/any rule to avoid being passed on to CSE. +** +** Carefully read the following requirements before adding or modifying +** any fold rules: +** +** Requirement #1: All fold rules must preserve their destination type. +** +** Consistently use INTFOLD() (KINT result) or lj_ir_knum() (KNUM result). +** Never use lj_ir_knumint() which can have either a KINT or KNUM result. +** +** Requirement #2: Fold rules should not create *new* instructions which +** reference operands *across* PHIs. +** +** E.g. a RETRYFOLD with 'fins->op1 = fleft->op1' is invalid if the +** left operand is a PHI. Then fleft->op1 would point across the PHI +** frontier to an invariant instruction. Adding a PHI for this instruction +** would be counterproductive. The solution is to add a barrier which +** prevents folding across PHIs, i.e. 'PHIBARRIER(fleft)' in this case. +** The only exception is for recurrences with high latencies like +** repeated int->num->int conversions. +** +** One could relax this condition a bit if the referenced instruction is +** a PHI, too. But this often leads to worse code due to excessive +** register shuffling. +** +** Note: returning *existing* instructions (e.g. LEFTFOLD) is ok, though. +** Even returning fleft->op1 would be ok, because a new PHI will added, +** if needed. But again, this leads to excessive register shuffling and +** should be avoided. +** +** Requirement #3: The set of all fold rules must be monotonic to guarantee +** termination. +** +** The goal is optimization, so one primarily wants to add strength-reducing +** rules. This means eliminating an instruction or replacing an instruction +** with one or more simpler instructions. Don't add fold rules which point +** into the other direction. +** +** Some rules (like commutativity) do not directly reduce the strength of +** an instruction, but enable other fold rules (e.g. by moving constants +** to the right operand). These rules must be made unidirectional to avoid +** cycles. +** +** Rule of thumb: the trace recorder expands the IR and FOLD shrinks it. +*/ + +/* Some local macros to save typing. Undef'd at the end. */ +#define IR(ref) (&J->cur.ir[(ref)]) +#define fins (&J->fold.ins) +#define fleft (&J->fold.left) +#define fright (&J->fold.right) +#define knumleft (ir_knum(fleft)->n) +#define knumright (ir_knum(fright)->n) + +/* Pass IR on to next optimization in chain (FOLD). */ +#define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J)) + +/* Fold function type. Fastcall on x86 significantly reduces their size. */ +typedef IRRef (LJ_FASTCALL *FoldFunc)(jit_State *J); + +/* Macros for the fold specs, so buildvm can recognize them. */ +#define LJFOLD(x) +#define LJFOLDX(x) +#define LJFOLDF(name) static TRef LJ_FASTCALL name(jit_State *J) +/* Note: They must be at the start of a line or buildvm ignores them! */ + +/* Barrier to prevent using operands across PHIs. */ +#define PHIBARRIER(ir) if (irt_isphi((ir)->t)) return NEXTFOLD + +/* Barrier to prevent folding across a GC step. +** GC steps can only happen at the head of a trace and at LOOP. +** And the GC is only driven forward if there is at least one allocation. +*/ +#define gcstep_barrier(J, ref) \ + ((ref) < J->chain[IR_LOOP] && \ + (J->chain[IR_TNEW] || J->chain[IR_TDUP] || \ + J->chain[IR_SNEW] || J->chain[IR_TOSTR])) + +/* -- Constant folding ---------------------------------------------------- */ + +LJFOLD(ADD KNUM KNUM) +LJFOLD(SUB KNUM KNUM) +LJFOLD(MUL KNUM KNUM) +LJFOLD(DIV KNUM KNUM) +LJFOLD(NEG KNUM KNUM) +LJFOLD(ABS KNUM KNUM) +LJFOLD(ATAN2 KNUM KNUM) +LJFOLD(LDEXP KNUM KNUM) +LJFOLD(MIN KNUM KNUM) +LJFOLD(MAX KNUM KNUM) +LJFOLDF(kfold_numarith) +{ + lua_Number a = knumleft; + lua_Number b = knumright; + lua_Number y = lj_vm_foldarith(a, b, fins->o - IR_ADD); + return lj_ir_knum(J, y); +} + +LJFOLD(FPMATH KNUM any) +LJFOLDF(kfold_fpmath) +{ + lua_Number a = knumleft; + lua_Number y = lj_vm_foldfpm(a, fins->op2); + return lj_ir_knum(J, y); +} + +LJFOLD(POWI KNUM KINT) +LJFOLDF(kfold_powi) +{ + lua_Number a = knumleft; + lua_Number b = cast_num(fright->i); + lua_Number y = lj_vm_foldarith(a, b, IR_POWI - IR_ADD); + return lj_ir_knum(J, y); +} + +static int32_t kfold_intop(int32_t k1, int32_t k2, IROp op) +{ + switch (op) { + case IR_ADD: k1 += k2; break; + case IR_SUB: k1 -= k2; break; + case IR_BAND: k1 &= k2; break; + case IR_BOR: k1 |= k2; break; + case IR_BXOR: k1 ^= k2; break; + case IR_BSHL: k1 <<= (k2 & 31); break; + case IR_BSHR: k1 = (int32_t)((uint32_t)k1 >> (k2 & 31)); break; + case IR_BSAR: k1 >>= (k2 & 31); break; + case IR_BROL: k1 = (int32_t)lj_rol((uint32_t)k1, (k2 & 31)); break; + case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 31)); break; + default: lua_assert(0); break; + } + return k1; +} + +LJFOLD(ADD KINT KINT) +LJFOLD(SUB KINT KINT) +LJFOLD(BAND KINT KINT) +LJFOLD(BOR KINT KINT) +LJFOLD(BXOR KINT KINT) +LJFOLD(BSHL KINT KINT) +LJFOLD(BSHR KINT KINT) +LJFOLD(BSAR KINT KINT) +LJFOLD(BROL KINT KINT) +LJFOLD(BROR KINT KINT) +LJFOLDF(kfold_intarith) +{ + return INTFOLD(kfold_intop(fleft->i, fright->i, (IROp)fins->o)); +} + +LJFOLD(BNOT KINT) +LJFOLDF(kfold_bnot) +{ + return INTFOLD(~fleft->i); +} + +LJFOLD(BSWAP KINT) +LJFOLDF(kfold_bswap) +{ + return INTFOLD((int32_t)lj_bswap((uint32_t)fleft->i)); +} + +LJFOLD(TONUM KINT) +LJFOLDF(kfold_tonum) +{ + return lj_ir_knum(J, cast_num(fleft->i)); +} + +LJFOLD(TOBIT KNUM KNUM) +LJFOLDF(kfold_tobit) +{ + TValue tv; + tv.n = knumleft + knumright; + return INTFOLD((int32_t)tv.u32.lo); +} + +LJFOLD(TOINT KNUM any) +LJFOLDF(kfold_toint) +{ + lua_Number n = knumleft; + int32_t k = lj_num2int(n); + if (irt_isguard(fins->t) && n != cast_num(k)) { + /* We're about to create a guard which always fails, like TOINT +1.5. + ** Some pathological loops cause this during LICM, e.g.: + ** local x,k,t = 0,1.5,{1,[1.5]=2} + ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end + ** assert(x == 300) + */ + return FAILFOLD; + } + return INTFOLD(k); +} + +LJFOLD(TOSTR KNUM) +LJFOLDF(kfold_tostr_knum) +{ + return lj_ir_kstr(J, lj_str_fromnum(J->L, &knumleft)); +} + +LJFOLD(TOSTR KINT) +LJFOLDF(kfold_tostr_kint) +{ + return lj_ir_kstr(J, lj_str_fromint(J->L, fleft->i)); +} + +LJFOLD(STRTO KGC) +LJFOLDF(kfold_strto) +{ + TValue n; + if (lj_str_numconv(strdata(ir_kstr(fleft)), &n)) + return lj_ir_knum(J, numV(&n)); + return FAILFOLD; +} + +LJFOLD(SNEW STRREF KINT) +LJFOLDF(kfold_snew) +{ + if (fright->i == 0) + return lj_ir_kstr(J, lj_str_new(J->L, "", 0)); + PHIBARRIER(fleft); + if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) { + const char *s = strdata(ir_kstr(IR(fleft->op1))); + int32_t ofs = IR(fleft->op2)->i; + return lj_ir_kstr(J, lj_str_new(J->L, s+ofs, (size_t)fright->i)); + } + return NEXTFOLD; +} + +/* Must not use kfold_kref for numbers (could be NaN). */ +LJFOLD(EQ KNUM KNUM) +LJFOLD(NE KNUM KNUM) +LJFOLD(LT KNUM KNUM) +LJFOLD(GE KNUM KNUM) +LJFOLD(LE KNUM KNUM) +LJFOLD(GT KNUM KNUM) +LJFOLD(ULT KNUM KNUM) +LJFOLD(UGE KNUM KNUM) +LJFOLD(ULE KNUM KNUM) +LJFOLD(UGT KNUM KNUM) +LJFOLDF(kfold_numcomp) +{ + return CONDFOLD(lj_ir_numcmp(knumleft, knumright, (IROp)fins->o)); +} + +LJFOLD(LT KINT KINT) +LJFOLD(GE KINT KINT) +LJFOLD(LE KINT KINT) +LJFOLD(GT KINT KINT) +LJFOLD(ULT KINT KINT) +LJFOLD(UGE KINT KINT) +LJFOLD(ULE KINT KINT) +LJFOLD(UGT KINT KINT) +LJFOLD(ABC KINT KINT) +LJFOLDF(kfold_intcomp) +{ + int32_t a = fleft->i, b = fright->i; + switch ((IROp)fins->o) { + case IR_LT: return CONDFOLD(a < b); + case IR_GE: return CONDFOLD(a >= b); + case IR_LE: return CONDFOLD(a <= b); + case IR_GT: return CONDFOLD(a > b); + case IR_ULT: return CONDFOLD((uint32_t)a < (uint32_t)b); + case IR_UGE: return CONDFOLD((uint32_t)a >= (uint32_t)b); + case IR_ULE: return CONDFOLD((uint32_t)a <= (uint32_t)b); + case IR_ABC: + case IR_UGT: return CONDFOLD((uint32_t)a > (uint32_t)b); + default: lua_assert(0); return FAILFOLD; + } +} + +LJFOLD(LT KGC KGC) +LJFOLD(GE KGC KGC) +LJFOLD(LE KGC KGC) +LJFOLD(GT KGC KGC) +LJFOLDF(kfold_strcomp) +{ + if (irt_isstr(fins->t)) { + GCstr *a = ir_kstr(fleft); + GCstr *b = ir_kstr(fright); + return CONDFOLD(lj_ir_strcmp(a, b, (IROp)fins->o)); + } + return NEXTFOLD; +} + +/* Don't constant-fold away FLOAD checks against KNULL. */ +LJFOLD(EQ FLOAD KNULL) +LJFOLD(NE FLOAD KNULL) +LJFOLDX(lj_opt_cse) + +/* But fold all other KNULL compares, since only KNULL is equal to KNULL. */ +LJFOLD(EQ any KNULL) +LJFOLD(NE any KNULL) +LJFOLD(EQ KNULL any) +LJFOLD(NE KNULL any) +LJFOLD(EQ KINT KINT) /* Constants are unique, so same refs <==> same value. */ +LJFOLD(NE KINT KINT) +LJFOLD(EQ KGC KGC) +LJFOLD(NE KGC KGC) +LJFOLDF(kfold_kref) +{ + return CONDFOLD((fins->op1 == fins->op2) ^ (fins->o == IR_NE)); +} + +/* -- Algebraic shortcuts ------------------------------------------------- */ + +LJFOLD(FPMATH FPMATH IRFPM_FLOOR) +LJFOLD(FPMATH FPMATH IRFPM_CEIL) +LJFOLD(FPMATH FPMATH IRFPM_TRUNC) +LJFOLDF(shortcut_round) +{ + IRFPMathOp op = (IRFPMathOp)fleft->op2; + if (op == IRFPM_FLOOR || op == IRFPM_CEIL || op == IRFPM_TRUNC) + return LEFTFOLD; /* round(round_left(x)) = round_left(x) */ + return NEXTFOLD; +} + +LJFOLD(FPMATH TONUM IRFPM_FLOOR) +LJFOLD(FPMATH TONUM IRFPM_CEIL) +LJFOLD(FPMATH TONUM IRFPM_TRUNC) +LJFOLD(ABS ABS KNUM) +LJFOLDF(shortcut_left) +{ + return LEFTFOLD; /* f(g(x)) ==> g(x) */ +} + +LJFOLD(ABS NEG KNUM) +LJFOLDF(shortcut_dropleft) +{ + PHIBARRIER(fleft); + fins->op1 = fleft->op1; /* abs(neg(x)) ==> abs(x) */ + return RETRYFOLD; +} + +/* Note: no safe shortcuts with STRTO and TOSTR ("1e2" ==> +100 ==> "100"). */ +LJFOLD(NEG NEG KNUM) +LJFOLD(BNOT BNOT) +LJFOLD(BSWAP BSWAP) +LJFOLDF(shortcut_leftleft) +{ + PHIBARRIER(fleft); /* See above. Fold would be ok, but not beneficial. */ + return fleft->op1; /* f(g(x)) ==> x */ +} + +LJFOLD(TONUM TOINT) +LJFOLDF(shortcut_leftleft_toint) +{ + PHIBARRIER(fleft); + if (irt_isguard(fleft->t)) /* Only safe with a guarded TOINT. */ + return fleft->op1; /* f(g(x)) ==> x */ + return NEXTFOLD; +} + +LJFOLD(TOINT TONUM any) +LJFOLD(TOBIT TONUM KNUM) /* The inverse must NOT be shortcut! */ +LJFOLDF(shortcut_leftleft_across_phi) +{ + /* Fold even across PHI to avoid expensive int->num->int conversions. */ + return fleft->op1; /* f(g(x)) ==> x */ +} + +/* -- FP algebraic simplifications ---------------------------------------- */ + +/* FP arithmetic is tricky -- there's not much to simplify. +** Please note the following common pitfalls before sending "improvements": +** x+0 ==> x is INVALID for x=-0 +** 0-x ==> -x is INVALID for x=+0 +** x*0 ==> 0 is INVALID for x=-0, x=+-Inf or x=NaN +*/ + +LJFOLD(ADD NEG any) +LJFOLDF(simplify_numadd_negx) +{ + PHIBARRIER(fleft); + fins->o = IR_SUB; /* (-a) + b ==> b - a */ + fins->op1 = fins->op2; + fins->op2 = fleft->op1; + return RETRYFOLD; +} + +LJFOLD(ADD any NEG) +LJFOLDF(simplify_numadd_xneg) +{ + PHIBARRIER(fright); + fins->o = IR_SUB; /* a + (-b) ==> a - b */ + fins->op2 = fright->op1; + return RETRYFOLD; +} + +LJFOLD(SUB any KNUM) +LJFOLDF(simplify_numsub_k) +{ + lua_Number n = knumright; + if (n == 0.0) /* x - (+-0) ==> x */ + return LEFTFOLD; + return NEXTFOLD; +} + +LJFOLD(SUB NEG KNUM) +LJFOLDF(simplify_numsub_negk) +{ + PHIBARRIER(fleft); + fins->op2 = fleft->op1; /* (-x) - k ==> (-k) - x */ + fins->op1 = (IRRef1)lj_ir_knum(J, -knumright); + return RETRYFOLD; +} + +LJFOLD(SUB any NEG) +LJFOLDF(simplify_numsub_xneg) +{ + PHIBARRIER(fright); + fins->o = IR_ADD; /* a - (-b) ==> a + b */ + fins->op2 = fright->op1; + return RETRYFOLD; +} + +LJFOLD(MUL any KNUM) +LJFOLD(DIV any KNUM) +LJFOLDF(simplify_nummuldiv_k) +{ + lua_Number n = knumright; + if (n == 1.0) { /* x o 1 ==> x */ + return LEFTFOLD; + } else if (n == -1.0) { /* x o -1 ==> -x */ + fins->o = IR_NEG; + fins->op2 = (IRRef1)lj_ir_knum_neg(J); + return RETRYFOLD; + } else if (fins->o == IR_MUL && n == 2.0) { /* x * 2 ==> x + x */ + fins->o = IR_ADD; + fins->op2 = fins->op1; + return RETRYFOLD; + } + return NEXTFOLD; +} + +LJFOLD(MUL NEG KNUM) +LJFOLD(DIV NEG KNUM) +LJFOLDF(simplify_nummuldiv_negk) +{ + PHIBARRIER(fleft); + fins->op1 = fleft->op1; /* (-a) o k ==> a o (-k) */ + fins->op2 = (IRRef1)lj_ir_knum(J, -knumright); + return RETRYFOLD; +} + +LJFOLD(MUL NEG NEG) +LJFOLD(DIV NEG NEG) +LJFOLDF(simplify_nummuldiv_negneg) +{ + PHIBARRIER(fleft); + PHIBARRIER(fright); + fins->op1 = fleft->op1; /* (-a) o (-b) ==> a o b */ + fins->op2 = fright->op1; + return RETRYFOLD; +} + +LJFOLD(POWI any KINT) +LJFOLDF(simplify_powi_xk) +{ + int32_t k = fright->i; + TRef ref = fins->op1; + if (k == 0) /* x ^ 0 ==> 1 */ + return lj_ir_knum_one(J); /* Result must be a number, not an int. */ + if (k == 1) /* x ^ 1 ==> x */ + return LEFTFOLD; + if ((uint32_t)(k+65536) > 2*65536u) /* Limit code explosion. */ + return NEXTFOLD; + if (k < 0) { /* x ^ (-k) ==> (1/x) ^ k. */ + ref = emitir(IRTN(IR_DIV), lj_ir_knum_one(J), ref); + k = -k; + } + /* Unroll x^k for 1 <= k <= 65536. */ + for (; (k & 1) == 0; k >>= 1) /* Handle leading zeros. */ + ref = emitir(IRTN(IR_MUL), ref, ref); + if ((k >>= 1) != 0) { /* Handle trailing bits. */ + TRef tmp = emitir(IRTN(IR_MUL), ref, ref); + for (; k != 1; k >>= 1) { + if (k & 1) + ref = emitir(IRTN(IR_MUL), ref, tmp); + tmp = emitir(IRTN(IR_MUL), tmp, tmp); + } + ref = emitir(IRTN(IR_MUL), ref, tmp); + } + return ref; +} + +LJFOLD(POWI KNUM any) +LJFOLDF(simplify_powi_kx) +{ + lua_Number n = knumleft; + if (n == 2.0) { /* 2.0 ^ i ==> ldexp(1.0, tonum(i)) */ + fins->o = IR_TONUM; + fins->op1 = fins->op2; + fins->op2 = 0; + fins->op2 = (IRRef1)lj_opt_fold(J); + fins->op1 = (IRRef1)lj_ir_knum_one(J); + fins->o = IR_LDEXP; + return RETRYFOLD; + } + return NEXTFOLD; +} + +/* -- FP conversion narrowing --------------------------------------------- */ + +LJFOLD(TOINT ADD any) +LJFOLD(TOINT SUB any) +LJFOLD(TOBIT ADD KNUM) +LJFOLD(TOBIT SUB KNUM) +LJFOLDF(narrow_convert) +{ + PHIBARRIER(fleft); + /* Narrowing ignores PHIs and repeating it inside the loop is not useful. */ + if (J->chain[IR_LOOP]) + return NEXTFOLD; + return lj_opt_narrow_convert(J); +} + +/* Relaxed CSE rule for TOINT allows commoning with stronger checks, too. */ +LJFOLD(TOINT any any) +LJFOLDF(cse_toint) +{ + if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) { + IRRef ref, op1 = fins->op1; + uint8_t guard = irt_isguard(fins->t); + for (ref = J->chain[IR_TOINT]; ref > op1; ref = IR(ref)->prev) + if (IR(ref)->op1 == op1 && irt_isguard(IR(ref)->t) >= guard) + return ref; + } + return EMITFOLD; /* No fallthrough to regular CSE. */ +} + +/* -- Integer algebraic simplifications ----------------------------------- */ + +LJFOLD(ADD any KINT) +LJFOLD(ADDOV any KINT) +LJFOLD(SUBOV any KINT) +LJFOLDF(simplify_intadd_k) +{ + if (fright->i == 0) /* i o 0 ==> i */ + return LEFTFOLD; + return NEXTFOLD; +} + +LJFOLD(SUB any KINT) +LJFOLDF(simplify_intsub_k) +{ + if (fright->i == 0) /* i - 0 ==> i */ + return LEFTFOLD; + fins->o = IR_ADD; /* i - k ==> i + (-k) */ + fins->op2 = (IRRef1)lj_ir_kint(J, -fright->i); /* Overflow for -2^31 ok. */ + return RETRYFOLD; +} + +LJFOLD(SUB any any) +LJFOLD(SUBOV any any) +LJFOLDF(simplify_intsub) +{ + if (fins->op1 == fins->op2 && !irt_isnum(fins->t)) /* i - i ==> 0 */ + return INTFOLD(0); + return NEXTFOLD; +} + +LJFOLD(SUB ADD any) +LJFOLDF(simplify_intsubadd_leftcancel) +{ + if (!irt_isnum(fins->t)) { + PHIBARRIER(fleft); + if (fins->op2 == fleft->op1) /* (i + j) - i ==> j */ + return fleft->op2; + if (fins->op2 == fleft->op2) /* (i + j) - j ==> i */ + return fleft->op1; + } + return NEXTFOLD; +} + +LJFOLD(SUB SUB any) +LJFOLDF(simplify_intsubsub_leftcancel) +{ + if (!irt_isnum(fins->t)) { + PHIBARRIER(fleft); + if (fins->op1 == fleft->op1) { /* (i - j) - i ==> 0 - j */ + fins->op1 = (IRRef1)lj_ir_kint(J, 0); + fins->op2 = fleft->op2; + return RETRYFOLD; + } + } + return NEXTFOLD; +} + +LJFOLD(SUB any SUB) +LJFOLDF(simplify_intsubsub_rightcancel) +{ + if (!irt_isnum(fins->t)) { + PHIBARRIER(fright); + if (fins->op1 == fright->op1) /* i - (i - j) ==> j */ + return fright->op2; + } + return NEXTFOLD; +} + +LJFOLD(SUB any ADD) +LJFOLDF(simplify_intsubadd_rightcancel) +{ + if (!irt_isnum(fins->t)) { + PHIBARRIER(fright); + if (fins->op1 == fright->op1) { /* i - (i + j) ==> 0 - j */ + fins->op2 = fright->op2; + fins->op1 = (IRRef1)lj_ir_kint(J, 0); + return RETRYFOLD; + } + if (fins->op1 == fright->op2) { /* i - (j + i) ==> 0 - j */ + fins->op2 = fright->op1; + fins->op1 = (IRRef1)lj_ir_kint(J, 0); + return RETRYFOLD; + } + } + return NEXTFOLD; +} + +LJFOLD(SUB ADD ADD) +LJFOLDF(simplify_intsubaddadd_cancel) +{ + if (!irt_isnum(fins->t)) { + PHIBARRIER(fleft); + PHIBARRIER(fright); + if (fleft->op1 == fright->op1) { /* (i + j1) - (i + j2) ==> j1 - j2 */ + fins->op1 = fleft->op2; + fins->op2 = fright->op2; + return RETRYFOLD; + } + if (fleft->op1 == fright->op2) { /* (i + j1) - (j2 + i) ==> j1 - j2 */ + fins->op1 = fleft->op2; + fins->op2 = fright->op1; + return RETRYFOLD; + } + if (fleft->op2 == fright->op1) { /* (j1 + i) - (i + j2) ==> j1 - j2 */ + fins->op1 = fleft->op1; + fins->op2 = fright->op2; + return RETRYFOLD; + } + if (fleft->op2 == fright->op2) { /* (j1 + i) - (j2 + i) ==> j1 - j2 */ + fins->op1 = fleft->op1; + fins->op2 = fright->op1; + return RETRYFOLD; + } + } + return NEXTFOLD; +} + +LJFOLD(BAND any KINT) +LJFOLDF(simplify_band_k) +{ + if (fright->i == 0) /* i & 0 ==> 0 */ + return RIGHTFOLD; + if (fright->i == -1) /* i & -1 ==> i */ + return LEFTFOLD; + return NEXTFOLD; +} + +LJFOLD(BOR any KINT) +LJFOLDF(simplify_bor_k) +{ + if (fright->i == 0) /* i | 0 ==> i */ + return LEFTFOLD; + if (fright->i == -1) /* i | -1 ==> -1 */ + return RIGHTFOLD; + return NEXTFOLD; +} + +LJFOLD(BXOR any KINT) +LJFOLDF(simplify_bxor_k) +{ + if (fright->i == 0) /* i xor 0 ==> i */ + return LEFTFOLD; + if (fright->i == -1) { /* i xor -1 ==> ~i */ + fins->o = IR_BNOT; + fins->op2 = 0; + return RETRYFOLD; + } + return NEXTFOLD; +} + +LJFOLD(BSHL any KINT) +LJFOLD(BSHR any KINT) +LJFOLD(BSAR any KINT) +LJFOLD(BROL any KINT) +LJFOLD(BROR any KINT) +LJFOLDF(simplify_shift_ik) +{ + int32_t k = (fright->i & 31); + if (k == 0) /* i o 0 ==> i */ + return LEFTFOLD; + if (k != fright->i) { /* i o k ==> i o (k & 31) */ + fins->op2 = (IRRef1)lj_ir_kint(J, k); + return RETRYFOLD; + } + if (fins->o == IR_BROR) { /* bror(i, k) ==> brol(i, (-k)&31) */ + fins->o = IR_BROL; + fins->op2 = (IRRef1)lj_ir_kint(J, (-k)&31); + return RETRYFOLD; + } + return NEXTFOLD; +} + +LJFOLD(BSHL any BAND) +LJFOLD(BSHR any BAND) +LJFOLD(BSAR any BAND) +LJFOLD(BROL any BAND) +LJFOLD(BROR any BAND) +LJFOLDF(simplify_shift_andk) +{ +#if LJ_TARGET_MASKEDSHIFT + IRIns *irk = IR(fright->op2); + PHIBARRIER(fright); + if (irk->o == IR_KINT) { /* i o (j & 31) ==> i o j */ + int32_t k = irk->i & 31; + if (k == 31) { + fins->op2 = fright->op1; + return RETRYFOLD; + } + } +#endif + return NEXTFOLD; +} + +LJFOLD(BSHL KINT any) +LJFOLD(BSHR KINT any) +LJFOLDF(simplify_shift1_ki) +{ + if (fleft->i == 0) /* 0 o i ==> 0 */ + return LEFTFOLD; + return NEXTFOLD; +} + +LJFOLD(BSAR KINT any) +LJFOLD(BROL KINT any) +LJFOLD(BROR KINT any) +LJFOLDF(simplify_shift2_ki) +{ + if (fleft->i == 0 || fleft->i == -1) /* 0 o i ==> 0; -1 o i ==> -1 */ + return LEFTFOLD; + return NEXTFOLD; +} + +/* -- Reassociation ------------------------------------------------------- */ + +LJFOLD(ADD ADD KINT) +LJFOLD(BAND BAND KINT) +LJFOLD(BOR BOR KINT) +LJFOLD(BXOR BXOR KINT) +LJFOLDF(reassoc_intarith_k) +{ + IRIns *irk = IR(fleft->op2); + if (irk->o == IR_KINT) { + int32_t k = kfold_intop(irk->i, fright->i, (IROp)fins->o); + if (k == irk->i) /* (i o k1) o k2 ==> i o k1, if (k1 o k2) == k1. */ + return LEFTFOLD; + PHIBARRIER(fleft); + fins->op1 = fleft->op1; + fins->op2 = (IRRef1)lj_ir_kint(J, k); + return RETRYFOLD; /* (i o k1) o k2 ==> i o (k1 o k2) */ + } + return NEXTFOLD; +} + +LJFOLD(MIN MIN any) +LJFOLD(MAX MAX any) +LJFOLD(BAND BAND any) +LJFOLD(BOR BOR any) +LJFOLDF(reassoc_dup) +{ + PHIBARRIER(fleft); + if (fins->op2 == fleft->op1 || fins->op2 == fleft->op2) + return LEFTFOLD; /* (a o b) o a ==> a o b; (a o b) o b ==> a o b */ + return NEXTFOLD; +} + +LJFOLD(BXOR BXOR any) +LJFOLDF(reassoc_bxor) +{ + PHIBARRIER(fleft); + if (fins->op2 == fleft->op1) /* (a xor b) xor a ==> b */ + return fleft->op2; + if (fins->op2 == fleft->op2) /* (a xor b) xor b ==> a */ + return fleft->op1; + return NEXTFOLD; +} + +LJFOLD(BSHL BSHL KINT) +LJFOLD(BSHR BSHR KINT) +LJFOLD(BSAR BSAR KINT) +LJFOLD(BROL BROL KINT) +LJFOLD(BROR BROR KINT) +LJFOLDF(reassoc_shift) +{ + IRIns *irk = IR(fleft->op2); + PHIBARRIER(fleft); /* The (shift any KINT) rule covers k2 == 0 and more. */ + if (irk->o == IR_KINT) { /* (i o k1) o k2 ==> i o (k1 + k2) */ + int32_t k = (irk->i & 31) + (fright->i & 31); + if (k > 31) { /* Combined shift too wide? */ + if (fins->o == IR_BSHL || fins->o == IR_BSHR) + return INTFOLD(0); + else if (fins->o == IR_BSAR) + k = 31; + else + k &= 31; + } + fins->op1 = fleft->op1; + fins->op2 = (IRRef1)lj_ir_kint(J, k); + return RETRYFOLD; + } + return NEXTFOLD; +} + +LJFOLD(MIN MIN KNUM) +LJFOLD(MAX MAX KNUM) +LJFOLDF(reassoc_minmax_k) +{ + IRIns *irk = IR(fleft->op2); + if (irk->o == IR_KNUM) { + lua_Number a = ir_knum(irk)->n; + lua_Number b = knumright; + lua_Number y = lj_vm_foldarith(a, b, fins->o - IR_ADD); + if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */ + return LEFTFOLD; + PHIBARRIER(fleft); + fins->op1 = fleft->op1; + fins->op2 = (IRRef1)lj_ir_knum(J, y); + return RETRYFOLD; /* (x o k1) o k2 ==> x o (k1 o k2) */ + } + return NEXTFOLD; +} + +LJFOLD(MIN MAX any) +LJFOLD(MAX MIN any) +LJFOLDF(reassoc_minmax_left) +{ + if (fins->op2 == fleft->op1 || fins->op2 == fleft->op2) + return RIGHTFOLD; /* (b o1 a) o2 b ==> b; (a o1 b) o2 b ==> b */ + return NEXTFOLD; +} + +LJFOLD(MIN any MAX) +LJFOLD(MAX any MIN) +LJFOLDF(reassoc_minmax_right) +{ + if (fins->op1 == fright->op1 || fins->op1 == fright->op2) + return LEFTFOLD; /* a o2 (a o1 b) ==> a; a o2 (b o1 a) ==> a */ + return NEXTFOLD; +} + +/* Eliminate ABC across PHIs to handle t[i-1] forwarding case. +** ABC(asize, (i+k)+(-k)) ==> ABC(asize, i), but only if it already exists. +** Could be generalized to (i+k1)+k2 ==> i+(k1+k2), but needs better disambig. +*/ +LJFOLD(ABC any ADD) +LJFOLDF(reassoc_abc) +{ + if (irref_isk(fright->op2)) { + IRIns *add2 = IR(fright->op1); + if (add2->o == IR_ADD && irref_isk(add2->op2) && + IR(fright->op2)->i == -IR(add2->op2)->i) { + IRRef ref = J->chain[IR_ABC]; + IRRef lim = add2->op1; + if (fins->op1 > lim) lim = fins->op1; + while (ref > lim) { + IRIns *ir = IR(ref); + if (ir->op1 == fins->op1 && ir->op2 == add2->op1) + return DROPFOLD; + ref = ir->prev; + } + } + } + return NEXTFOLD; +} + +/* -- Commutativity ------------------------------------------------------- */ + +/* The refs of commutative ops are canonicalized. Lower refs go to the right. +** Rationale behind this: +** - It (also) moves constants to the right. +** - It reduces the number of FOLD rules (e.g. (BOR any KINT) suffices). +** - It helps CSE to find more matches. +** - The assembler generates better code with constants at the right. +*/ + +LJFOLD(ADD any any) +LJFOLD(MUL any any) +LJFOLD(ADDOV any any) +LJFOLDF(comm_swap) +{ + if (fins->op1 < fins->op2) { /* Move lower ref to the right. */ + IRRef1 tmp = fins->op1; + fins->op1 = fins->op2; + fins->op2 = tmp; + return RETRYFOLD; + } + return NEXTFOLD; +} + +LJFOLD(EQ any any) +LJFOLD(NE any any) +LJFOLDF(comm_equal) +{ + /* For non-numbers only: x == x ==> drop; x ~= x ==> fail */ + if (fins->op1 == fins->op2 && !irt_isnum(fins->t)) + return CONDFOLD(fins->o == IR_EQ); + return comm_swap(J); +} + +LJFOLD(LT any any) +LJFOLD(GE any any) +LJFOLD(LE any any) +LJFOLD(GT any any) +LJFOLD(ULT any any) +LJFOLD(UGE any any) +LJFOLD(ULE any any) +LJFOLD(UGT any any) +LJFOLDF(comm_comp) +{ + /* For non-numbers only: x <=> x ==> drop; x <> x ==> fail */ + if (fins->op1 == fins->op2 && !irt_isnum(fins->t)) + return CONDFOLD(fins->o & 1); + if (fins->op1 < fins->op2) { /* Move lower ref to the right. */ + IRRef1 tmp = fins->op1; + fins->op1 = fins->op2; + fins->op2 = tmp; + fins->o ^= 3; /* GT <-> LT, GE <-> LE, does not affect U */ + return RETRYFOLD; + } + return NEXTFOLD; +} + +LJFOLD(BAND any any) +LJFOLD(BOR any any) +LJFOLD(MIN any any) +LJFOLD(MAX any any) +LJFOLDF(comm_dup) +{ + if (fins->op1 == fins->op2) /* x o x ==> x */ + return LEFTFOLD; + return comm_swap(J); +} + +LJFOLD(BXOR any any) +LJFOLDF(comm_bxor) +{ + if (fins->op1 == fins->op2) /* i xor i ==> 0 */ + return INTFOLD(0); + return comm_swap(J); +} + +/* -- Simplification of compound expressions ------------------------------ */ + +static int32_t kfold_xload(IRIns *ir, const void *p) +{ +#if !LJ_TARGET_X86ORX64 +#error "Missing support for unaligned loads" +#endif + switch (irt_type(ir->t)) { + case IRT_I8: return (int32_t)*(int8_t *)p; + case IRT_U8: return (int32_t)*(uint8_t *)p; + case IRT_I16: return (int32_t)*(int16_t *)p; + case IRT_U16: return (int32_t)*(uint16_t *)p; + default: lua_assert(irt_isint(ir->t)); return (int32_t)*(int32_t *)p; + } +} + +/* Turn: string.sub(str, a, b) == kstr +** into: string.byte(str, a) == string.byte(kstr, 1) etc. +** Note: this creates unaligned XLOADs! +*/ +LJFOLD(EQ SNEW KGC) +LJFOLD(NE SNEW KGC) +LJFOLDF(merge_eqne_snew_kgc) +{ + GCstr *kstr = ir_kstr(fright); + int32_t len = (int32_t)kstr->len; + lua_assert(irt_isstr(fins->t)); + if (len <= 4) { /* Handle string lengths 0, 1, 2, 3, 4. */ + IROp op = (IROp)fins->o; + IRRef strref = fleft->op1; + lua_assert(IR(strref)->o == IR_STRREF); + if (op == IR_EQ) { + emitir(IRTGI(IR_EQ), fleft->op2, lj_ir_kint(J, len)); + /* Caveat: fins/fleft/fright is no longer valid after emitir. */ + } else { + /* NE is not expanded since this would need an OR of two conds. */ + if (!irref_isk(fleft->op2)) /* Only handle the constant length case. */ + return NEXTFOLD; + if (IR(fleft->op2)->i != len) + return DROPFOLD; + } + if (len > 0) { + /* A 4 byte load for length 3 is ok -- all strings have an extra NUL. */ + uint16_t ot = (uint16_t)(len == 1 ? IRT(IR_XLOAD, IRT_I8) : + len == 2 ? IRT(IR_XLOAD, IRT_U16) : + IRTI(IR_XLOAD)); + TRef tmp = emitir(ot, strref, len > 1 ? IRXLOAD_UNALIGNED : 0); + TRef val = lj_ir_kint(J, kfold_xload(IR(tref_ref(tmp)), strdata(kstr))); + if (len == 3) + tmp = emitir(IRTI(IR_BAND), tmp, + lj_ir_kint(J, LJ_ENDIAN_SELECT(0x00ffffff, 0xffffff00))); + fins->op1 = (IRRef1)tmp; + fins->op2 = (IRRef1)val; + fins->ot = (IROpT)IRTGI(op); + return RETRYFOLD; + } else { + return DROPFOLD; + } + } + return NEXTFOLD; +} + +/* -- Loads --------------------------------------------------------------- */ + +/* Loads cannot be folded or passed on to CSE in general. +** Alias analysis is needed to check for forwarding opportunities. +** +** Caveat: *all* loads must be listed here or they end up at CSE! +*/ + +LJFOLD(ALOAD any) +LJFOLDX(lj_opt_fwd_aload) + +LJFOLD(HLOAD any) +LJFOLDX(lj_opt_fwd_hload) + +LJFOLD(ULOAD any) +LJFOLDX(lj_opt_fwd_uload) + +LJFOLD(TLEN any) +LJFOLDX(lj_opt_fwd_tlen) + +/* Upvalue refs are really loads, but there are no corresponding stores. +** So CSE is ok for them, except for UREFO across a GC step (see below). +** If the referenced function is const, its upvalue addresses are const, too. +** This can be used to improve CSE by looking for the same address, +** even if the upvalues originate from a different function. +*/ +LJFOLD(UREFO KGC any) +LJFOLD(UREFC KGC any) +LJFOLDF(cse_uref) +{ + if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) { + IRRef ref = J->chain[fins->o]; + GCfunc *fn = ir_kfunc(fleft); + GCupval *uv = gco2uv(gcref(fn->l.uvptr[fins->op2])); + while (ref > 0) { + IRIns *ir = IR(ref); + if (irref_isk(ir->op1)) { + GCfunc *fn2 = ir_kfunc(IR(ir->op1)); + if (gco2uv(gcref(fn2->l.uvptr[ir->op2])) == uv) { + if (fins->o == IR_UREFO && gcstep_barrier(J, ref)) + break; + return ref; + } + } + ref = ir->prev; + } + } + return EMITFOLD; +} + +/* We can safely FOLD/CSE array/hash refs and field loads, since there +** are no corresponding stores. But NEWREF may invalidate all of them. +** Lacking better disambiguation for table references, these optimizations +** are simply disabled across any NEWREF. +** Only HREF needs the NEWREF check -- AREF and HREFK already depend on +** FLOADs. And NEWREF itself is treated like a store (see below). +*/ +LJFOLD(HREF any any) +LJFOLDF(cse_href) +{ + TRef tr = lj_opt_cse(J); + return tref_ref(tr) < J->chain[IR_NEWREF] ? EMITFOLD : tr; +} + +LJFOLD(FLOAD TNEW IRFL_TAB_ASIZE) +LJFOLDF(fload_tab_tnew_asize) +{ + if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fins->op1 > J->chain[IR_NEWREF]) + return INTFOLD(fleft->op1); + return NEXTFOLD; +} + +LJFOLD(FLOAD TNEW IRFL_TAB_HMASK) +LJFOLDF(fload_tab_tnew_hmask) +{ + if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fins->op1 > J->chain[IR_NEWREF]) + return INTFOLD((1 << fleft->op2)-1); + return NEXTFOLD; +} + +LJFOLD(FLOAD TDUP IRFL_TAB_ASIZE) +LJFOLDF(fload_tab_tdup_asize) +{ + if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fins->op1 > J->chain[IR_NEWREF]) + return INTFOLD((int32_t)ir_ktab(IR(fleft->op1))->asize); + return NEXTFOLD; +} + +LJFOLD(FLOAD TDUP IRFL_TAB_HMASK) +LJFOLDF(fload_tab_tdup_hmask) +{ + if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fins->op1 > J->chain[IR_NEWREF]) + return INTFOLD((int32_t)ir_ktab(IR(fleft->op1))->hmask); + return NEXTFOLD; +} + +LJFOLD(FLOAD any IRFL_TAB_ARRAY) +LJFOLD(FLOAD any IRFL_TAB_NODE) +LJFOLD(FLOAD any IRFL_TAB_ASIZE) +LJFOLD(FLOAD any IRFL_TAB_HMASK) +LJFOLDF(fload_tab_ah) +{ + TRef tr = lj_opt_cse(J); + return tref_ref(tr) < J->chain[IR_NEWREF] ? EMITFOLD : tr; +} + +/* Strings are immutable, so we can safely FOLD/CSE the related FLOAD. */ +LJFOLD(FLOAD KGC IRFL_STR_LEN) +LJFOLDF(fload_str_len) +{ + if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) + return INTFOLD((int32_t)ir_kstr(fleft)->len); + return NEXTFOLD; +} + +LJFOLD(FLOAD any IRFL_STR_LEN) +LJFOLDX(lj_opt_cse) + +/* All other field loads need alias analysis. */ +LJFOLD(FLOAD any any) +LJFOLDX(lj_opt_fwd_fload) + +/* This is for LOOP only. Recording handles SLOADs internally. */ +LJFOLD(SLOAD any any) +LJFOLDF(fwd_sload) +{ + lua_assert(J->slot[fins->op1] != 0); + return J->slot[fins->op1]; +} + +/* Strings are immutable, so we can safely FOLD/CSE an XLOAD of a string. */ +LJFOLD(XLOAD STRREF any) +LJFOLDF(xload_str) +{ + if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) { + GCstr *str = ir_kstr(IR(fleft->op1)); + int32_t ofs = IR(fleft->op2)->i; + lua_assert((MSize)ofs < str->len); + lua_assert((MSize)(ofs + (1<<((fins->op2>>8)&3))) <= str->len); + return INTFOLD(kfold_xload(fins, strdata(str)+ofs)); + } + return CSEFOLD; +} +/* No XLOAD of non-strings (yet), so we don't need a (XLOAD any any) rule. */ + +/* -- Write barriers ------------------------------------------------------ */ + +/* Write barriers are amenable to CSE, but not across any incremental +** GC steps. +** +** The same logic applies to open upvalue references, because the stack +** may be resized during a GC step. +*/ +LJFOLD(TBAR any) +LJFOLD(OBAR any any) +LJFOLD(UREFO any any) +LJFOLDF(barrier_tab) +{ + TRef tr = lj_opt_cse(J); + if (gcstep_barrier(J, tref_ref(tr))) /* CSE across GC step? */ + return EMITFOLD; /* Raw emit. Assumes fins is left intact by CSE. */ + return tr; +} + +LJFOLD(TBAR TNEW) +LJFOLD(TBAR TDUP) +LJFOLDF(barrier_tnew_tdup) +{ + /* New tables are always white and never need a barrier. */ + if (fins->op1 < J->chain[IR_LOOP]) /* Except across a GC step. */ + return NEXTFOLD; + return DROPFOLD; +} + +/* -- Stores and allocations ---------------------------------------------- */ + +/* Stores and allocations cannot be folded or passed on to CSE in general. +** But some stores can be eliminated with dead-store elimination (DSE). +** +** Caveat: *all* stores and allocs must be listed here or they end up at CSE! +*/ + +LJFOLD(ASTORE any any) +LJFOLD(HSTORE any any) +LJFOLDX(lj_opt_dse_ahstore) + +LJFOLD(USTORE any any) +LJFOLDX(lj_opt_dse_ustore) + +LJFOLD(FSTORE any any) +LJFOLDX(lj_opt_dse_fstore) + +LJFOLD(NEWREF any any) /* Treated like a store. */ +LJFOLD(TNEW any any) +LJFOLD(TDUP any) +LJFOLDF(store_raw) +{ + return EMITFOLD; +} + +/* ------------------------------------------------------------------------ */ + +/* Every entry in the generated hash table is a 32 bit pattern: +** +** xxxxxxxx iiiiiiii llllllll rrrrrrrr +** +** xxxxxxxx = 8 bit index into fold function table +** iiiiiiii = 8 bit folded instruction opcode +** llllllll = 8 bit left instruction opcode +** rrrrrrrr = 8 bit right instruction opcode or 8 bits from literal field +*/ + +#include "lj_folddef.h" + +/* ------------------------------------------------------------------------ */ + +/* Fold IR instruction. */ +TRef LJ_FASTCALL lj_opt_fold(jit_State *J) +{ + uint32_t key, any; + IRRef ref; + + if (LJ_UNLIKELY((J->flags & JIT_F_OPT_MASK) != JIT_F_OPT_DEFAULT)) { + lua_assert(((JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE|JIT_F_OPT_DSE) | + JIT_F_OPT_DEFAULT) == JIT_F_OPT_DEFAULT); + /* Folding disabled? Chain to CSE, but not for loads/stores/allocs. */ + if (!(J->flags & JIT_F_OPT_FOLD) && irm_kind(lj_ir_mode[fins->o]) == IRM_N) + return lj_opt_cse(J); + + /* Forwarding or CSE disabled? Emit raw IR for loads, except for SLOAD. */ + if ((J->flags & (JIT_F_OPT_FWD|JIT_F_OPT_CSE)) != + (JIT_F_OPT_FWD|JIT_F_OPT_CSE) && + irm_kind(lj_ir_mode[fins->o]) == IRM_L && fins->o != IR_SLOAD) + return lj_ir_emit(J); + + /* DSE disabled? Emit raw IR for stores. */ + if (!(J->flags & JIT_F_OPT_DSE) && irm_kind(lj_ir_mode[fins->o]) == IRM_S) + return lj_ir_emit(J); + } + + /* Fold engine start/retry point. */ +retry: + /* Construct key from opcode and operand opcodes (unless literal/none). */ + key = ((uint32_t)fins->o << 16); + if (fins->op1 >= J->cur.nk) { + key += (uint32_t)IR(fins->op1)->o << 8; + *fleft = *IR(fins->op1); + } + if (fins->op2 >= J->cur.nk) { + key += (uint32_t)IR(fins->op2)->o; + *fright = *IR(fins->op2); + } else { + key += (fins->op2 & 0xffu); /* For IRFPM_* and IRFL_*. */ + } + + /* Check for a match in order from most specific to least specific. */ + any = 0; + for (;;) { + uint32_t k = key | any; + uint32_t h = fold_hashkey(k); + uint32_t fh = fold_hash[h]; /* Lookup key in semi-perfect hash table. */ + if ((fh & 0xffffff) == k || (fh = fold_hash[h+1], (fh & 0xffffff) == k)) { + ref = (IRRef)tref_ref(fold_func[fh >> 24](J)); + if (ref != NEXTFOLD) + break; + } + if (any == 0xffff) /* Exhausted folding. Pass on to CSE. */ + return lj_opt_cse(J); + any = (any | (any >> 8)) ^ 0xff00; + } + + /* Return value processing, ordered by frequency. */ + if (LJ_LIKELY(ref >= MAX_FOLD)) + return TREF(ref, irt_t(IR(ref)->t)); + if (ref == RETRYFOLD) + goto retry; + if (ref == KINTFOLD) + return lj_ir_kint(J, fins->i); + if (ref == FAILFOLD) + lj_trace_err(J, LJ_TRERR_GFAIL); + lua_assert(ref == DROPFOLD); + return REF_DROP; +} + +/* -- Common-Subexpression Elimination ------------------------------------ */ + +/* CSE an IR instruction. This is very fast due to the skip-list chains. */ +TRef LJ_FASTCALL lj_opt_cse(jit_State *J) +{ + /* Avoid narrow to wide store-to-load forwarding stall */ + IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16); + IROp op = fins->o; + if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) { + /* Limited search for same operands in per-opcode chain. */ + IRRef ref = J->chain[op]; + IRRef lim = fins->op1; + if (fins->op2 > lim) lim = fins->op2; /* Relies on lit < REF_BIAS. */ + while (ref > lim) { + if (IR(ref)->op12 == op12) + return TREF(ref, irt_t(IR(ref)->t)); /* Common subexpression found. */ + ref = IR(ref)->prev; + } + } + /* Otherwise emit IR (inlined for speed). */ + { + IRRef ref = lj_ir_nextins(J); + IRIns *ir = IR(ref); + ir->prev = J->chain[op]; + ir->op12 = op12; + J->chain[op] = (IRRef1)ref; + ir->o = fins->o; + J->guardemit.irt |= fins->t.irt; + return TREF(ref, irt_t((ir->t = fins->t))); + } +} + +/* ------------------------------------------------------------------------ */ + +#undef IR +#undef fins +#undef fleft +#undef fright +#undef knumleft +#undef knumright +#undef emitir + +#endif diff --git a/src/lj_opt_loop.c b/src/lj_opt_loop.c new file mode 100644 index 0000000000..adc0c4763b --- /dev/null +++ b/src/lj_opt_loop.c @@ -0,0 +1,358 @@ +/* +** LOOP: Loop Optimizations. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +*/ + +#define lj_opt_loop_c +#define LUA_CORE + +#include "lj_obj.h" + +#if LJ_HASJIT + +#include "lj_gc.h" +#include "lj_err.h" +#include "lj_str.h" +#include "lj_ir.h" +#include "lj_jit.h" +#include "lj_iropt.h" +#include "lj_trace.h" +#include "lj_snap.h" +#include "lj_vm.h" + +/* Loop optimization: +** +** Traditional Loop-Invariant Code Motion (LICM) splits the instructions +** of a loop into invariant and variant instructions. The invariant +** instructions are hoisted out of the loop and only the variant +** instructions remain inside the loop body. +** +** Unfortunately LICM is mostly useless for compiling dynamic languages. +** The IR has many guards and most of the subsequent instructions are +** control-dependent on them. The first non-hoistable guard would +** effectively prevent hoisting of all subsequent instructions. +** +** That's why we use a special form of unrolling using copy-substitution, +** combined with redundancy elimination: +** +** The recorded instruction stream is re-emitted to the compiler pipeline +** with substituted operands. The substitution table is filled with the +** refs returned by re-emitting each instruction. This can be done +** on-the-fly, because the IR is in strict SSA form, where every ref is +** defined before its use. +** +** This aproach generates two code sections, separated by the LOOP +** instruction: +** +** 1. The recorded instructions form a kind of pre-roll for the loop. It +** contains a mix of invariant and variant instructions and performs +** exactly one loop iteration (but not necessarily the 1st iteration). +** +** 2. The loop body contains only the variant instructions and performs +** all remaining loop iterations. +** +** On first sight that looks like a waste of space, because the variant +** instructions are present twice. But the key insight is that the +** pre-roll honors the control-dependencies for *both* the pre-roll itself +** *and* the loop body! +** +** It also means one doesn't have to explicitly model control-dependencies +** (which, BTW, wouldn't help LICM much). And it's much easier to +** integrate sparse snapshotting with this approach. +** +** One of the nicest aspects of this approach is that all of the +** optimizations of the compiler pipeline (FOLD, CSE, FWD, etc.) can be +** reused with only minor restrictions (e.g. one should not fold +** instructions across loop-carried dependencies). +** +** But in general all optimizations can be applied which only need to look +** backwards into the generated instruction stream. At any point in time +** during the copy-substitution process this contains both a static loop +** iteration (the pre-roll) and a dynamic one (from the to-be-copied +** instruction up to the end of the partial loop body). +** +** Since control-dependencies are implicitly kept, CSE also applies to all +** kinds of guards. The major advantage is that all invariant guards can +** be hoisted, too. +** +** Load/store forwarding works across loop iterations, too. This is +** important if loop-carried dependencies are kept in upvalues or tables. +** E.g. 'self.idx = self.idx + 1' deep down in some OO-style method may +** become a forwarded loop-recurrence after inlining. +** +** Since the IR is in SSA form, loop-carried dependencies have to be +** modeled with PHI instructions. The potential candidates for PHIs are +** collected on-the-fly during copy-substitution. After eliminating the +** redundant ones, PHI instructions are emitted *below* the loop body. +** +** Note that this departure from traditional SSA form doesn't change the +** semantics of the PHI instructions themselves. But it greatly simplifies +** on-the-fly generation of the IR and the machine code. +*/ + +/* Some local macros to save typing. Undef'd at the end. */ +#define IR(ref) (&J->cur.ir[(ref)]) + +/* Pass IR on to next optimization in chain (FOLD). */ +#define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J)) + +/* Emit raw IR without passing through optimizations. */ +#define emitir_raw(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_ir_emit(J)) + +/* -- PHI elimination ----------------------------------------------------- */ + +/* Emit or eliminate collected PHIs. */ +static void loop_emit_phi(jit_State *J, IRRef1 *subst, IRRef1 *phi, IRRef nphi) +{ + int pass2 = 0; + IRRef i, nslots; + IRRef invar = J->chain[IR_LOOP]; + /* Pass #1: mark redundant and potentially redundant PHIs. */ + for (i = 0; i < nphi; i++) { + IRRef lref = phi[i]; + IRRef rref = subst[lref]; + if (lref == rref || rref == REF_DROP) { /* Invariants are redundant. */ + irt_setmark(IR(lref)->t); + } else if (!(IR(rref)->op1 == lref || IR(rref)->op2 == lref)) { + /* Quick check for simple recurrences failed, need pass2. */ + irt_setmark(IR(lref)->t); + pass2 = 1; + } + } + /* Pass #2: traverse variant part and clear marks of non-redundant PHIs. */ + if (pass2) { + for (i = J->cur.nins-1; i > invar; i--) { + IRIns *ir = IR(i); + if (!irref_isk(ir->op1)) irt_clearmark(IR(ir->op1)->t); + if (!irref_isk(ir->op2)) irt_clearmark(IR(ir->op2)->t); + } + } + /* Pass #3: add PHIs for variant slots without a corresponding SLOAD. */ + nslots = J->baseslot+J->maxslot; + for (i = 1; i < nslots; i++) { + IRRef ref = tref_ref(J->slot[i]); + if (!irref_isk(ref) && ref != subst[ref]) { + IRIns *ir = IR(ref); + irt_clearmark(ir->t); /* Unmark potential uses, too. */ + if (!irt_isphi(ir->t) && !irt_ispri(ir->t)) { + irt_setphi(ir->t); + if (nphi >= LJ_MAX_PHI) + lj_trace_err(J, LJ_TRERR_PHIOV); + phi[nphi++] = (IRRef1)ref; + } + } + } + /* Pass #4: emit PHI instructions or eliminate PHIs. */ + for (i = 0; i < nphi; i++) { + IRRef lref = phi[i]; + IRIns *ir = IR(lref); + if (!irt_ismarked(ir->t)) { /* Emit PHI if not marked. */ + IRRef rref = subst[lref]; + if (rref > invar) + irt_setphi(IR(rref)->t); + emitir_raw(IRT(IR_PHI, irt_type(ir->t)), lref, rref); + } else { /* Otherwise eliminate PHI. */ + irt_clearmark(ir->t); + irt_clearphi(ir->t); + } + } +} + +/* -- Loop unrolling using copy-substitution ------------------------------ */ + +/* Unroll loop. */ +static void loop_unroll(jit_State *J) +{ + IRRef1 phi[LJ_MAX_PHI]; + uint32_t nphi = 0; + IRRef1 *subst; + SnapShot *osnap, *snap; + IRRef2 *loopmap; + BCReg loopslots; + MSize nsnap, nsnapmap; + IRRef ins, invar, osnapref; + + /* Use temp buffer for substitution table. + ** Only non-constant refs in [REF_BIAS,invar) are valid indexes. + ** Note: don't call into the VM or run the GC or the buffer may be gone. + */ + invar = J->cur.nins; + subst = (IRRef1 *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf, + (invar-REF_BIAS)*sizeof(IRRef1)) - REF_BIAS; + subst[REF_BASE] = REF_BASE; + + /* LOOP separates the pre-roll from the loop body. */ + emitir_raw(IRTG(IR_LOOP, IRT_NIL), 0, 0); + + /* Ensure size for copy-substituted snapshots (minus #0 and loop snapshot). */ + nsnap = J->cur.nsnap; + if (LJ_UNLIKELY(2*nsnap-2 > J->sizesnap)) { + MSize maxsnap = (MSize)J->param[JIT_P_maxsnap]; + if (2*nsnap-2 > maxsnap) + lj_trace_err(J, LJ_TRERR_SNAPOV); + lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot); + J->cur.snap = J->snapbuf; + } + nsnapmap = J->cur.nsnapmap; /* Use temp. copy to avoid undo. */ + if (LJ_UNLIKELY(nsnapmap*2 > J->sizesnapmap)) { + J->snapmapbuf = (IRRef2 *)lj_mem_realloc(J->L, J->snapmapbuf, + J->sizesnapmap*sizeof(IRRef2), + 2*J->sizesnapmap*sizeof(IRRef2)); + J->cur.snapmap = J->snapmapbuf; + J->sizesnapmap *= 2; + } + + /* The loop snapshot is used for fallback substitutions. */ + snap = &J->cur.snap[nsnap-1]; + loopmap = &J->cur.snapmap[snap->mapofs]; + loopslots = snap->nslots; + /* The PC of snapshot #0 and the loop snapshot must match. */ + lua_assert(loopmap[loopslots] == J->cur.snapmap[J->cur.snap[0].nslots]); + + /* Start substitution with snapshot #1 (#0 is empty for root traces). */ + osnap = &J->cur.snap[1]; + osnapref = osnap->ref; + + /* Copy and substitute all recorded instructions and snapshots. */ + for (ins = REF_FIRST; ins < invar; ins++) { + IRIns *ir; + IRRef op1, op2; + + /* Copy-substitute snapshot. */ + if (ins >= osnapref) { + IRRef2 *nmap, *omap = &J->cur.snapmap[osnap->mapofs]; + BCReg s, nslots; + uint32_t nmapofs, nframelinks; + if (irt_isguard(J->guardemit)) { /* Guard inbetween? */ + nmapofs = nsnapmap; + snap++; /* Add new snapshot. */ + } else { + nmapofs = snap->mapofs; /* Overwrite previous snapshot. */ + } + J->guardemit.irt = 0; + nslots = osnap->nslots; + nframelinks = osnap->nframelinks; + snap->mapofs = (uint16_t)nmapofs; + snap->ref = (IRRef1)J->cur.nins; + snap->nslots = (uint8_t)nslots; + snap->nframelinks = (uint8_t)nframelinks; + snap->count = 0; + osnap++; + osnapref = osnap->ref; + nsnapmap = nmapofs + nslots + nframelinks; + nmap = &J->cur.snapmap[nmapofs]; + /* Substitute snapshot slots. */ + for (s = 0; s < nslots; s++) { + IRRef ref = snap_ref(omap[s]); + if (ref) { + if (!irref_isk(ref)) + ref = subst[ref]; + } else if (s < loopslots) { + ref = loopmap[s]; + } + nmap[s] = ref; + } + /* Copy frame links. */ + nmap += nslots; + omap += nslots; + for (s = 0; s < nframelinks; s++) + nmap[s] = omap[s]; + } + + /* Substitute instruction operands. */ + ir = IR(ins); + op1 = ir->op1; + if (!irref_isk(op1)) op1 = subst[op1]; + op2 = ir->op2; + if (!irref_isk(op2)) op2 = subst[op2]; + if (irm_kind(lj_ir_mode[ir->o]) == IRM_N && + op1 == ir->op1 && op2 == ir->op2) { /* Regular invariant ins? */ + subst[ins] = (IRRef1)ins; /* Shortcut. */ + } else { + /* Re-emit substituted instruction to the FOLD/CSE/etc. pipeline. */ + IRType1 t = ir->t; /* Get this first, since emitir may invalidate ir. */ + IRRef ref = tref_ref(emitir(ir->ot & ~IRT_ISPHI, op1, op2)); + subst[ins] = (IRRef1)ref; + if (ref != ins && ref < invar) { /* Loop-carried dependency? */ + IRIns *irr = IR(ref); + /* Potential PHI? */ + if (!irref_isk(ref) && !irt_isphi(irr->t) && !irt_ispri(irr->t)) { + irt_setphi(irr->t); + if (nphi >= LJ_MAX_PHI) + lj_trace_err(J, LJ_TRERR_PHIOV); + phi[nphi++] = (IRRef1)ref; + } + /* Check all loop-carried dependencies for type instability. */ + if (!irt_sametype(t, irr->t)) { + if (irt_isnum(t) && irt_isinteger(irr->t)) /* Fix int->num case. */ + subst[ins] = tref_ref(emitir(IRTN(IR_TONUM), ref, 0)); + else + lj_trace_err(J, LJ_TRERR_TYPEINS); + } + } + } + } + if (irt_isguard(J->guardemit)) { /* Guard inbetween? */ + J->cur.nsnapmap = (uint16_t)nsnapmap; + snap++; + } else { + J->cur.nsnapmap = (uint16_t)snap->mapofs; /* Last snapshot is redundant. */ + } + J->cur.nsnap = (uint16_t)(snap - J->cur.snap); + lua_assert(J->cur.nsnapmap <= J->sizesnapmap); + + loop_emit_phi(J, subst, phi, nphi); +} + +/* Undo any partial changes made by the loop optimization. */ +static void loop_undo(jit_State *J, IRRef ins) +{ + lj_ir_rollback(J, ins); + for (ins--; ins >= REF_FIRST; ins--) { /* Remove flags. */ + IRIns *ir = IR(ins); + irt_clearphi(ir->t); + irt_clearmark(ir->t); + } +} + +/* Protected callback for loop optimization. */ +static TValue *cploop_opt(lua_State *L, lua_CFunction dummy, void *ud) +{ + UNUSED(L); UNUSED(dummy); + loop_unroll((jit_State *)ud); + return NULL; +} + +/* Loop optimization. */ +int lj_opt_loop(jit_State *J) +{ + IRRef nins = J->cur.nins; + int errcode = lj_vm_cpcall(J->L, cploop_opt, NULL, J); + if (LJ_UNLIKELY(errcode)) { + lua_State *L = J->L; + if (errcode == LUA_ERRRUN && tvisnum(L->top-1)) { /* Trace error? */ + int32_t e = lj_num2int(numV(L->top-1)); + switch ((TraceError)e) { + case LJ_TRERR_TYPEINS: /* Type instability. */ + case LJ_TRERR_GFAIL: /* Guard would always fail. */ + /* Unrolling via recording fixes many cases, e.g. a flipped boolean. */ + if (--J->instunroll < 0) /* But do not unroll forever. */ + break; + L->top--; /* Remove error object. */ + J->guardemit.irt = 0; + loop_undo(J, nins); + return 1; /* Loop optimization failed, continue recording. */ + default: + break; + } + } + lj_err_throw(L, errcode); /* Propagate all other errors. */ + } + return 0; /* Loop optimization is ok. */ +} + +#undef IR +#undef emitir +#undef emitir_raw + +#endif diff --git a/src/lj_opt_mem.c b/src/lj_opt_mem.c new file mode 100644 index 0000000000..77a9c0e72b --- /dev/null +++ b/src/lj_opt_mem.c @@ -0,0 +1,550 @@ +/* +** Memory access optimizations. +** AA: Alias Analysis using high-level semantic disambiguation. +** FWD: Load Forwarding (L2L) + Store Forwarding (S2L). +** DSE: Dead-Store Elimination. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +*/ + +#define lj_opt_mem_c +#define LUA_CORE + +#include "lj_obj.h" + +#if LJ_HASJIT + +#include "lj_tab.h" +#include "lj_ir.h" +#include "lj_jit.h" +#include "lj_iropt.h" + +/* Some local macros to save typing. Undef'd at the end. */ +#define IR(ref) (&J->cur.ir[(ref)]) +#define fins (&J->fold.ins) + +/* +** Caveat #1: return value is not always a TRef -- only use with tref_ref(). +** Caveat #2: FWD relies on active CSE for xREF operands -- see lj_opt_fold(). +*/ + +/* Return values from alias analysis. */ +typedef enum { + ALIAS_NO, /* The two refs CANNOT alias (exact). */ + ALIAS_MAY, /* The two refs MAY alias (inexact). */ + ALIAS_MUST /* The two refs MUST alias (exact). */ +} AliasRet; + +/* -- ALOAD/HLOAD forwarding and ASTORE/HSTORE elimination ---------------- */ + +/* Alias analysis for array and hash access using key-based disambiguation. */ +static AliasRet aa_ahref(jit_State *J, IRIns *refa, IRIns *refb) +{ + IRRef ka = refa->op2; + IRRef kb = refb->op2; + IRIns *keya, *keyb; + if (refa == refb) + return ALIAS_MUST; /* Shortcut for same refs. */ + keya = IR(ka); + if (keya->o == IR_KSLOT) { ka = keya->op1; keya = IR(ka); } + keyb = IR(kb); + if (keyb->o == IR_KSLOT) { kb = keyb->op1; keyb = IR(kb); } + if (ka == kb) { + /* Same key. Check for same table with different ref (NEWREF vs. HREF). */ + IRIns *ta = refa; + IRIns *tb = refb; + if (ta->o == IR_HREFK || ta->o == IR_AREF) ta = IR(ta->op1); + if (tb->o == IR_HREFK || tb->o == IR_AREF) tb = IR(tb->op1); + if (ta->op1 == tb->op1) + return ALIAS_MUST; /* Same key, same table. */ + else + return ALIAS_MAY; /* Same key, possibly different table. */ + } + if (irref_isk(ka) && irref_isk(kb)) + return ALIAS_NO; /* Different constant keys. */ + if (refa->o == IR_AREF) { + /* Disambiguate array references based on index arithmetic. */ + lua_assert(refb->o == IR_AREF); + if (refa->op1 == refb->op1) { + /* Same table, different non-const array keys. */ + int32_t ofsa = 0, ofsb = 0; + IRRef basea = ka, baseb = kb; + /* Gather base and offset from t[base] or t[base+-ofs]. */ + if (keya->o == IR_ADD && irref_isk(keya->op2)) { + basea = keya->op1; + ofsa = IR(keya->op2)->i; + if (basea == kb && ofsa != 0) + return ALIAS_NO; /* t[base+-ofs] vs. t[base]. */ + } + if (keyb->o == IR_ADD && irref_isk(keyb->op2)) { + baseb = keyb->op1; + ofsb = IR(keyb->op2)->i; + if (ka == baseb && ofsb != 0) + return ALIAS_NO; /* t[base] vs. t[base+-ofs]. */ + } + if (basea == baseb && ofsa != ofsb) + return ALIAS_NO; /* t[base+-o1] vs. t[base+-o2] and o1 != o2. */ + } + } else { + /* Disambiguate hash references based on the type of their keys. */ + lua_assert((refa->o==IR_HREF || refa->o==IR_HREFK || refa->o==IR_NEWREF) && + (refb->o==IR_HREF || refb->o==IR_HREFK || refb->o==IR_NEWREF)); + if (!irt_sametype(keya->t, keyb->t)) + return ALIAS_NO; /* Different key types. */ + } + return ALIAS_MAY; /* Anything else: we just don't know. */ +} + +/* Array and hash load forwarding. */ +static TRef fwd_ahload(jit_State *J, IRRef xref) +{ + IRIns *xr = IR(xref); + IRRef lim = xref; /* Search limit. */ + IRRef ref; + + /* Search for conflicting stores. */ + ref = J->chain[fins->o+IRDELTA_L2S]; + while (ref > xref) { + IRIns *store = IR(ref); + switch (aa_ahref(J, xr, IR(store->op1))) { + case ALIAS_NO: break; /* Continue searching. */ + case ALIAS_MAY: lim = ref; goto conflict; /* Limit search for load. */ + case ALIAS_MUST: return store->op2; /* Store forwarding. */ + } + ref = store->prev; + } + + /* No conflicting store (yet): const-fold loads from allocations. */ + { + IRIns *ir = (xr->o == IR_HREFK || xr->o == IR_AREF) ? IR(xr->op1) : xr; + IRRef tab = ir->op1; + ir = IR(tab); + if (ir->o == IR_TNEW || (ir->o == IR_TDUP && irref_isk(xr->op2))) { + /* A NEWREF with a number key may end up pointing to the array part. + ** But it's referenced from HSTORE and not found in the ASTORE chain. + ** For now simply consider this a conflict without forwarding anything. + */ + if (xr->o == IR_AREF) { + IRRef ref2 = J->chain[IR_NEWREF]; + while (ref2 > tab) { + IRIns *newref = IR(ref2); + if (irt_isnum(IR(newref->op2)->t)) + goto conflict; + ref2 = newref->prev; + } + } + /* NEWREF inhibits CSE for HREF, and dependent FLOADs from HREFK/AREF. + ** But the above search for conflicting stores was limited by xref. + ** So continue searching, limited by the TNEW/TDUP. Store forwarding + ** is ok, too. A conflict does NOT limit the search for a matching load. + */ + while (ref > tab) { + IRIns *store = IR(ref); + switch (aa_ahref(J, xr, IR(store->op1))) { + case ALIAS_NO: break; /* Continue searching. */ + case ALIAS_MAY: goto conflict; /* Conflicting store. */ + case ALIAS_MUST: return store->op2; /* Store forwarding. */ + } + ref = store->prev; + } + lua_assert(ir->o != IR_TNEW || irt_isnil(fins->t)); + if (irt_ispri(fins->t)) { + return TREF_PRI(irt_type(fins->t)); + } else if (irt_isnum(fins->t) || irt_isstr(fins->t)) { + TValue keyv; + cTValue *tv; + IRIns *key = IR(xr->op2); + if (key->o == IR_KSLOT) key = IR(key->op1); + lj_ir_kvalue(J->L, &keyv, key); + tv = lj_tab_get(J->L, ir_ktab(IR(ir->op1)), &keyv); + lua_assert(itype2irt(tv) == irt_type(fins->t)); + if (irt_isnum(fins->t)) + return lj_ir_knum_nn(J, tv->u64); + else + return lj_ir_kstr(J, strV(tv)); + } + /* Othwerwise: don't intern as a constant. */ + } + } + +conflict: + /* Try to find a matching load. Below the conflicting store, if any. */ + ref = J->chain[fins->o]; + while (ref > lim) { + IRIns *load = IR(ref); + if (load->op1 == xref) + return ref; /* Load forwarding. */ + ref = load->prev; + } + return 0; /* Conflict or no match. */ +} + +/* Reassociate ALOAD across PHIs to handle t[i-1] forwarding case. */ +static TRef fwd_aload_reassoc(jit_State *J) +{ + IRIns *irx = IR(fins->op1); + IRIns *key = IR(irx->op2); + if (key->o == IR_ADD && irref_isk(key->op2)) { + IRIns *add2 = IR(key->op1); + if (add2->o == IR_ADD && irref_isk(add2->op2) && + IR(key->op2)->i == -IR(add2->op2)->i) { + IRRef ref = J->chain[IR_AREF]; + IRRef lim = add2->op1; + if (irx->op1 > lim) lim = irx->op1; + while (ref > lim) { + IRIns *ir = IR(ref); + if (ir->op1 == irx->op1 && ir->op2 == add2->op1) + return fwd_ahload(J, ref); + ref = ir->prev; + } + } + } + return 0; +} + +/* ALOAD forwarding. */ +TRef LJ_FASTCALL lj_opt_fwd_aload(jit_State *J) +{ + IRRef ref; + if ((ref = fwd_ahload(J, fins->op1)) || + (ref = fwd_aload_reassoc(J))) + return ref; + return EMITFOLD; +} + +/* HLOAD forwarding. */ +TRef LJ_FASTCALL lj_opt_fwd_hload(jit_State *J) +{ + IRRef ref = fwd_ahload(J, fins->op1); + if (ref) + return ref; + return EMITFOLD; +} + +/* ASTORE/HSTORE elimination. */ +TRef LJ_FASTCALL lj_opt_dse_ahstore(jit_State *J) +{ + IRRef xref = fins->op1; /* xREF reference. */ + IRRef val = fins->op2; /* Stored value reference. */ + IRIns *xr = IR(xref); + IRRef1 *refp = &J->chain[fins->o]; + IRRef ref = *refp; + while (ref > xref) { /* Search for redundant or conflicting stores. */ + IRIns *store = IR(ref); + switch (aa_ahref(J, xr, IR(store->op1))) { + case ALIAS_NO: + break; /* Continue searching. */ + case ALIAS_MAY: /* Store to MAYBE the same location. */ + if (store->op2 != val) /* Conflict if the value is different. */ + goto doemit; + break; /* Otherwise continue searching. */ + case ALIAS_MUST: /* Store to the same location. */ + if (store->op2 == val) /* Same value: drop the new store. */ + return DROPFOLD; + /* Different value: try to eliminate the redundant store. */ + if (ref > J->chain[IR_LOOP]) { /* Quick check to avoid crossing LOOP. */ + IRIns *ir; + /* Check for any intervening guards (includes conflicting loads). */ + for (ir = IR(J->cur.nins-1); ir > store; ir--) + if (irt_isguard(ir->t)) + goto doemit; /* No elimination possible. */ + /* Remove redundant store from chain and replace with NOP. */ + *refp = store->prev; + store->o = IR_NOP; /* Unchained NOP -- does anybody care? */ + store->t.irt = IRT_NIL; + store->op1 = store->op2 = 0; + store->prev = 0; + /* Now emit the new store instead. */ + } + goto doemit; + } + ref = *(refp = &store->prev); + } +doemit: + return EMITFOLD; /* Otherwise we have a conflict or simply no match. */ +} + +/* -- ULOAD forwarding ---------------------------------------------------- */ + +/* The current alias analysis for upvalues is very simplistic. It only +** disambiguates between the unique upvalues of the same function. +** This is good enough for now, since most upvalues are read-only. +** +** A more precise analysis would be feasible with the help of the parser: +** generate a unique key for every upvalue, even across all prototypes. +** Lacking a realistic use-case, it's unclear whether this is beneficial. +*/ +static AliasRet aa_uref(IRIns *refa, IRIns *refb) +{ + if (refa->o != refb->o) + return ALIAS_NO; /* Different UREFx type. */ + if (refa->op1 != refb->op1) + return ALIAS_MAY; /* Different function. */ + else if (refa->op2 == refb->op2) + return ALIAS_MUST; /* Same function, same upvalue idx. */ + else + return ALIAS_NO; /* Same function, different upvalue idx. */ +} + +/* ULOAD forwarding. */ +TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J) +{ + IRRef uref = fins->op1; + IRRef lim = uref; /* Search limit. */ + IRIns *xr = IR(uref); + IRRef ref; + + /* Search for conflicting stores. */ + ref = J->chain[IR_USTORE]; + while (ref > uref) { + IRIns *store = IR(ref); + switch (aa_uref(xr, IR(store->op1))) { + case ALIAS_NO: break; /* Continue searching. */ + case ALIAS_MAY: lim = ref; goto conflict; /* Limit search for load. */ + case ALIAS_MUST: return store->op2; /* Store forwarding. */ + } + ref = store->prev; + } + +conflict: + /* Try to find a matching load. Below the conflicting store, if any. */ + ref = J->chain[IR_ULOAD]; + while (ref > lim) { + IRIns *load = IR(ref); + if (load->op1 == uref) + return ref; /* Load forwarding. */ + ref = load->prev; + } + return EMITFOLD; /* Conflict or no match. */ +} + +/* USTORE elimination. */ +TRef LJ_FASTCALL lj_opt_dse_ustore(jit_State *J) +{ + IRRef xref = fins->op1; /* xREF reference. */ + IRRef val = fins->op2; /* Stored value reference. */ + IRIns *xr = IR(xref); + IRRef1 *refp = &J->chain[IR_USTORE]; + IRRef ref = *refp; + while (ref > xref) { /* Search for redundant or conflicting stores. */ + IRIns *store = IR(ref); + switch (aa_uref(xr, IR(store->op1))) { + case ALIAS_NO: + break; /* Continue searching. */ + case ALIAS_MAY: /* Store to MAYBE the same location. */ + if (store->op2 != val) /* Conflict if the value is different. */ + goto doemit; + break; /* Otherwise continue searching. */ + case ALIAS_MUST: /* Store to the same location. */ + if (store->op2 == val) /* Same value: drop the new store. */ + return DROPFOLD; + /* Different value: try to eliminate the redundant store. */ + if (ref > J->chain[IR_LOOP]) { /* Quick check to avoid crossing LOOP. */ + IRIns *ir; + /* Check for any intervening guards (includes conflicting loads). */ + for (ir = IR(J->cur.nins-1); ir > store; ir--) + if (irt_isguard(ir->t)) + goto doemit; /* No elimination possible. */ + /* Remove redundant store from chain and replace with NOP. */ + *refp = store->prev; + store->o = IR_NOP; /* Unchained NOP -- does anybody care? */ + store->t.irt = IRT_NIL; + store->op1 = store->op2 = 0; + store->prev = 0; + /* Now emit the new store instead. */ + } + goto doemit; + } + ref = *(refp = &store->prev); + } +doemit: + return EMITFOLD; /* Otherwise we have a conflict or simply no match. */ +} + +/* -- FLOAD forwarding and FSTORE elimination ----------------------------- */ + +/* Alias analysis for field access. +** Field loads are cheap and field stores are rare. +** Simple disambiguation based on field types is good enough. +*/ +static AliasRet aa_fref(IRIns *refa, IRIns *refb) +{ + if (refa->op2 != refb->op2) + return ALIAS_NO; /* Different fields. */ + if (refa->op1 == refb->op1) + return ALIAS_MUST; /* Same field, same object. */ + else + return ALIAS_MAY; /* Same field, possibly different object. */ +} + +/* Only the loads for mutable fields end up here (see FOLD). */ +TRef LJ_FASTCALL lj_opt_fwd_fload(jit_State *J) +{ + IRRef oref = fins->op1; /* Object reference. */ + IRRef fid = fins->op2; /* Field ID. */ + IRRef lim = oref; /* Search limit. */ + IRRef ref; + + /* Search for conflicting stores. */ + ref = J->chain[IR_FSTORE]; + while (ref > oref) { + IRIns *store = IR(ref); + switch (aa_fref(fins, IR(store->op1))) { + case ALIAS_NO: break; /* Continue searching. */ + case ALIAS_MAY: lim = ref; goto conflict; /* Limit search for load. */ + case ALIAS_MUST: return store->op2; /* Store forwarding. */ + } + ref = store->prev; + } + + /* No conflicting store: const-fold field loads from allocations. */ + if (fid == IRFL_TAB_META) { + IRIns *ir = IR(oref); + if (ir->o == IR_TNEW || ir->o == IR_TDUP) + return lj_ir_knull(J, IRT_TAB); + } + +conflict: + /* Try to find a matching load. Below the conflicting store, if any. */ + ref = J->chain[IR_FLOAD]; + while (ref > lim) { + IRIns *load = IR(ref); + if (load->op1 == oref && load->op2 == fid) + return ref; /* Load forwarding. */ + ref = load->prev; + } + return EMITFOLD; /* Otherwise we have a conflict or simply no match. */ +} + +/* FSTORE elimination. */ +TRef LJ_FASTCALL lj_opt_dse_fstore(jit_State *J) +{ + IRRef fref = fins->op1; /* FREF reference. */ + IRRef val = fins->op2; /* Stored value reference. */ + IRIns *xr = IR(fref); + IRRef1 *refp = &J->chain[IR_FSTORE]; + IRRef ref = *refp; + while (ref > fref) { /* Search for redundant or conflicting stores. */ + IRIns *store = IR(ref); + switch (aa_fref(xr, IR(store->op1))) { + case ALIAS_NO: + break; /* Continue searching. */ + case ALIAS_MAY: + if (store->op2 != val) /* Conflict if the value is different. */ + goto doemit; + break; /* Otherwise continue searching. */ + case ALIAS_MUST: + if (store->op2 == val) /* Same value: drop the new store. */ + return DROPFOLD; + /* Different value: try to eliminate the redundant store. */ + if (ref > J->chain[IR_LOOP]) { /* Quick check to avoid crossing LOOP. */ + IRIns *ir; + /* Check for any intervening guards or conflicting loads. */ + for (ir = IR(J->cur.nins-1); ir > store; ir--) + if (irt_isguard(ir->t) || (ir->o == IR_FLOAD && ir->op2 == xr->op2)) + goto doemit; /* No elimination possible. */ + /* Remove redundant store from chain and replace with NOP. */ + *refp = store->prev; + store->o = IR_NOP; /* Unchained NOP -- does anybody care? */ + store->t.irt = IRT_NIL; + store->op1 = store->op2 = 0; + store->prev = 0; + /* Now emit the new store instead. */ + } + goto doemit; + } + ref = *(refp = &store->prev); + } +doemit: + return EMITFOLD; /* Otherwise we have a conflict or simply no match. */ +} + +/* -- TLEN forwarding ----------------------------------------------------- */ + +/* This is rather simplistic right now, but better than nothing. */ +TRef LJ_FASTCALL lj_opt_fwd_tlen(jit_State *J) +{ + IRRef tab = fins->op1; /* Table reference. */ + IRRef lim = tab; /* Search limit. */ + IRRef ref; + + /* Any ASTORE is a conflict and limits the search. */ + if (J->chain[IR_ASTORE] > lim) lim = J->chain[IR_ASTORE]; + + /* Search for conflicting HSTORE with numeric key. */ + ref = J->chain[IR_HSTORE]; + while (ref > lim) { + IRIns *store = IR(ref); + IRIns *href = IR(store->op1); + IRIns *key = IR(href->op2); + if (irt_isnum(key->o == IR_KSLOT ? IR(key->op1)->t : key->t)) { + lim = ref; /* Conflicting store found, limits search for TLEN. */ + break; + } + ref = store->prev; + } + + /* Try to find a matching load. Below the conflicting store, if any. */ + ref = J->chain[IR_TLEN]; + while (ref > lim) { + IRIns *tlen = IR(ref); + if (tlen->op1 == tab) + return ref; /* Load forwarding. */ + ref = tlen->prev; + } + return EMITFOLD; /* Otherwise we have a conflict or simply no match. */ +} + +/* -- ASTORE/HSTORE previous type analysis -------------------------------- */ + +/* Check whether the previous value for a table store is non-nil. +** This can be derived either from a previous store or from a previous +** load (because all loads from tables perform a type check). +** +** The result of the analysis can be used to avoid the metatable check +** and the guard against HREF returning niltv. Both of these are cheap, +** so let's not spend too much effort on the analysis. +** +** A result of 1 is exact: previous value CANNOT be nil. +** A result of 0 is inexact: previous value MAY be nil. +*/ +int lj_opt_fwd_wasnonnil(jit_State *J, IROpT loadop, IRRef xref) +{ + /* First check stores. */ + IRRef ref = J->chain[loadop+IRDELTA_L2S]; + while (ref > xref) { + IRIns *store = IR(ref); + if (store->op1 == xref) { /* Same xREF. */ + /* A nil store MAY alias, but a non-nil store MUST alias. */ + return !irt_isnil(store->t); + } else if (irt_isnil(store->t)) { /* Must check any nil store. */ + IRRef skref = IR(store->op1)->op2; + IRRef xkref = IR(xref)->op2; + /* Same key type MAY alias. */ + if (irt_sametype(IR(skref)->t, IR(xkref)->t)) { + if (skref == xkref || !irref_isk(skref) || !irref_isk(xkref)) + return 0; /* A nil store with same const key or var key MAY alias. */ + /* Different const keys CANNOT alias. */ + } /* Different key types CANNOT alias. */ + } /* Other non-nil stores MAY alias. */ + ref = store->prev; + } + + /* Check loads since nothing could be derived from stores. */ + ref = J->chain[loadop]; + while (ref > xref) { + IRIns *load = IR(ref); + if (load->op1 == xref) { /* Same xREF. */ + /* A nil load MAY alias, but a non-nil load MUST alias. */ + return !irt_isnil(load->t); + } /* Other non-nil loads MAY alias. */ + ref = load->prev; + } + return 0; /* Nothing derived at all, previous value MAY be nil. */ +} + +/* ------------------------------------------------------------------------ */ + +#undef IR +#undef fins + +#endif diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c new file mode 100644 index 0000000000..60a6afb8eb --- /dev/null +++ b/src/lj_opt_narrow.c @@ -0,0 +1,430 @@ +/* +** NARROW: Narrowing of numbers to integers (double to int32_t). +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +*/ + +#define lj_opt_narrow_c +#define LUA_CORE + +#include "lj_obj.h" + +#if LJ_HASJIT + +#include "lj_str.h" +#include "lj_bc.h" +#include "lj_ir.h" +#include "lj_jit.h" +#include "lj_iropt.h" +#include "lj_trace.h" + +/* Rationale for narrowing optimizations: +** +** Lua has only a single number type and this is a FP double by default. +** Narrowing doubles to integers does not pay off for the interpreter on a +** current-generation x86/x64 machine. Most FP operations need the same +** amount of execution resources as their integer counterparts, except +** with slightly longer latencies. Longer latencies are a non-issue for +** the interpreter, since they are usually hidden by other overhead. +** +** The total CPU execution bandwidth is the sum of the bandwidth of the FP +** and the integer units, because they execute in parallel. The FP units +** have an equal or higher bandwidth than the integer units. Not using +** them means losing execution bandwidth. Moving work away from them to +** the already quite busy integer units is a losing proposition. +** +** The situation for JIT-compiled code is a bit different: the higher code +** density makes the extra latencies much more visible. Tight loops expose +** the latencies for updating the induction variables. Array indexing +** requires narrowing conversions with high latencies and additional +** guards (to check that the index is really an integer). And many common +** optimizations only work on integers. +** +** One solution would be speculative, eager narrowing of all number loads. +** This causes many problems, like losing -0 or the need to resolve type +** mismatches between traces. It also effectively forces the integer type +** to have overflow-checking semantics. This impedes many basic +** optimizations and requires adding overflow checks to all integer +** arithmetic operations (whereas FP arithmetics can do without). +** +** Always replacing an FP op with an integer op plus an overflow check is +** counter-productive on a current-generation super-scalar CPU. Although +** the overflow check branches are highly predictable, they will clog the +** execution port for the branch unit and tie up reorder buffers. This is +** turning a pure data-flow dependency into a different data-flow +** dependency (with slightly lower latency) *plus* a control dependency. +** In general, you don't want to do this since latencies due to data-flow +** dependencies can be well hidden by out-of-order execution. +** +** A better solution is to keep all numbers as FP values and only narrow +** when it's beneficial to do so. LuaJIT uses predictive narrowing for +** induction variables and demand-driven narrowing for index expressions +** and bit operations. Additionally it can eliminate or hoists most of the +** resulting overflow checks. Regular arithmetic computations are never +** narrowed to integers. +** +** The integer type in the IR has convenient wrap-around semantics and +** ignores overflow. Extra operations have been added for +** overflow-checking arithmetic (ADDOV/SUBOV) instead of an extra type. +** Apart from reducing overall complexity of the compiler, this also +** nicely solves the problem where you want to apply algebraic +** simplifications to ADD, but not to ADDOV. And the assembler can use lea +** instead of an add for integer ADD, but not for ADDOV (lea does not +** affect the flags, but it helps to avoid register moves). +** +** Note that all of the above has to be reconsidered if LuaJIT is to be +** ported to architectures with slow FP operations or with no hardware FPU +** at all. In the latter case an integer-only port may be the best overall +** solution (if this still meets user demands). +*/ + +/* Some local macros to save typing. Undef'd at the end. */ +#define IR(ref) (&J->cur.ir[(ref)]) +#define fins (&J->fold.ins) + +/* Pass IR on to next optimization in chain (FOLD). */ +#define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J)) + +#define emitir_raw(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_ir_emit(J)) + +/* -- Elimination of narrowing type conversions --------------------------- */ + +/* Narrowing of index expressions and bit operations is demand-driven. The +** trace recorder emits a narrowing type conversion (TOINT or TOBIT) in +** all of these cases (e.g. array indexing or string indexing). FOLD +** already takes care of eliminating simple redundant conversions like +** TOINT(TONUM(x)) ==> x. +** +** But the surrounding code is FP-heavy and all arithmetic operations are +** performed on FP numbers. Consider a common example such as 'x=t[i+1]', +** with 'i' already an integer (due to induction variable narrowing). The +** index expression would be recorded as TOINT(ADD(TONUM(i), 1)), which is +** clearly suboptimal. +** +** One can do better by recursively backpropagating the narrowing type +** conversion across FP arithmetic operations. This turns FP ops into +** their corresponding integer counterparts. Depending on the semantics of +** the conversion they also need to check for overflow. Currently only ADD +** and SUB are supported. +** +** The above example can be rewritten as ADDOV(TOINT(TONUM(i)), 1) and +** then into ADDOV(i, 1) after folding of the conversions. The original FP +** ops remain in the IR and are eliminated by DCE since all references to +** them are gone. +** +** Special care has to be taken to avoid narrowing across an operation +** which is potentially operating on non-integral operands. One obvious +** case is when an expression contains a non-integral constant, but ends +** up as an integer index at runtime (like t[x+1.5] with x=0.5). +** +** Operations with two non-constant operands illustrate a similar problem +** (like t[a+b] with a=1.5 and b=2.5). Backpropagation has to stop there, +** unless it can be proven that either operand is integral (e.g. by CSEing +** a previous conversion). As a not-so-obvious corollary this logic also +** applies for a whole expression tree (e.g. t[(a+1)+(b+1)]). +** +** Correctness of the transformation is guaranteed by avoiding to expand +** the tree by adding more conversions than the one we would need to emit +** if not backpropagating. TOBIT employs a more optimistic rule, because +** the conversion has special semantics, designed to make the life of the +** compiler writer easier. ;-) +** +** Using on-the-fly backpropagation of an expression tree doesn't work +** because it's unknown whether the transform is correct until the end. +** This either requires IR rollback and cache invalidation for every +** subtree or a two-pass algorithm. The former didn't work out too well, +** so the code now combines a recursive collector with a stack-based +** emitter. +** +** [A recursive backpropagation algorithm with backtracking, employing +** skip-list lookup and round-robin caching, emitting stack operations +** on-the-fly for a stack-based interpreter -- and all of that in a meager +** kilobyte? Yep, compilers are a great treasure chest. Throw away your +** textbooks and read the codebase of a compiler today!] +** +** There's another optimization opportunity for array indexing: it's +** always accompanied by an array bounds-check. The outermost overflow +** check may be delegated to the ABC operation. This works because ABC is +** an unsigned comparison and wrap-around due to overflow creates negative +** numbers. +** +** But this optimization is only valid for constants that cannot overflow +** an int32_t into the range of valid array indexes [0..2^27+1). A check +** for +-2^30 is safe since -2^31 - 2^30 wraps to 2^30 and 2^31-1 + 2^30 +** wraps to -2^30-1. +** +** It's also good enough in practice, since e.g. t[i+1] or t[i-10] are +** quite common. So the above example finally ends up as ADD(i, 1)! +** +** Later on, the assembler is able to fuse the whole array reference and +** the ADD into the memory operands of loads and other instructions. This +** is why LuaJIT is able to generate very pretty (and fast) machine code +** for array indexing. And that, my dear, concludes another story about +** one of the hidden secrets of LuaJIT ... +*/ + +/* Maximum backpropagation depth and maximum stack size. */ +#define NARROW_MAX_BACKPROP 100 +#define NARROW_MAX_STACK 256 + +/* Context used for narrowing of type conversions. */ +typedef struct NarrowConv { + jit_State *J; /* JIT compiler state. */ + IRRef2 *sp; /* Current stack pointer. */ + IRRef2 *maxsp; /* Maximum stack pointer minus redzone. */ + int lim; /* Limit on the number of emitted conversions. */ + IRRef mode; /* Conversion mode (IRTOINT_*). */ + IRRef2 stack[NARROW_MAX_STACK]; /* Stack holding the stack-machine code. */ +} NarrowConv; + +/* The stack machine has a 32 bit instruction format: [IROpT | IRRef1] +** The lower 16 bits hold a reference (or 0). The upper 16 bits hold +** the IR opcode + type or one of the following special opcodes: +*/ +enum { + NARROW_REF, /* Push ref. */ + NARROW_CONV, /* Push conversion of ref. */ + NARROW_INT /* Push KINT ref. The next code holds an int32_t. */ +}; + +/* Lookup a reference in the backpropagation cache. */ +static IRRef narrow_bpc_get(jit_State *J, IRRef1 key, IRRef mode) +{ + ptrdiff_t i; + for (i = 0; i < BPROP_SLOTS; i++) { + BPropEntry *bp = &J->bpropcache[i]; + if (bp->key == key && bp->mode <= mode) /* Stronger checks are ok, too. */ + return bp->val; + } + return 0; +} + +/* Add an entry to the backpropagation cache. */ +static void narrow_bpc_set(jit_State *J, IRRef1 key, IRRef1 val, IRRef mode) +{ + uint32_t slot = J->bpropslot; + BPropEntry *bp = &J->bpropcache[slot]; + J->bpropslot = (slot + 1) & (BPROP_SLOTS-1); + bp->key = key; + bp->val = val; + bp->mode = mode; +} + +/* Backpropagate narrowing conversion. Return number of needed conversions. */ +static int narrow_conv_backprop(NarrowConv *nc, IRRef ref, int depth) +{ + jit_State *J = nc->J; + IRIns *ir = IR(ref); + IRRef cref; + + /* Check the easy cases first. */ + if (ir->o == IR_TONUM) { /* Undo inverse conversion. */ + *nc->sp++ = IRREF2(ir->op1, NARROW_REF); + return 0; + } else if (ir->o == IR_KNUM) { /* Narrow FP constant. */ + lua_Number n = ir_knum(ir)->n; + if (nc->mode == IRTOINT_TOBIT) { /* Allows a wider range of constants. */ + int64_t k64 = (int64_t)n; + if (n == cast_num(k64)) { /* Only if constant doesn't lose precision. */ + *nc->sp++ = IRREF2(0, NARROW_INT); + *nc->sp++ = (IRRef2)k64; /* But always truncate to 32 bits. */ + return 0; + } + } else { + int32_t k = lj_num2int(n); + if (n == cast_num(k)) { /* Only if constant is really an integer. */ + *nc->sp++ = IRREF2(0, NARROW_INT); + *nc->sp++ = (IRRef2)k; + return 0; + } + } + return 10; /* Never narrow other FP constants (this is rare). */ + } + + /* Try to CSE the conversion. Stronger checks are ok, too. */ + for (cref = J->chain[fins->o]; cref > ref; cref = IR(cref)->prev) + if (IR(cref)->op1 == ref && + irt_isguard(IR(cref)->t) >= irt_isguard(fins->t)) { + *nc->sp++ = IRREF2(cref, NARROW_REF); + return 0; /* Already there, no additional conversion needed. */ + } + + /* Backpropagate across ADD/SUB. */ + if (ir->o == IR_ADD || ir->o == IR_SUB) { + /* Try cache lookup first. */ + IRRef bpref, mode = nc->mode; + if (mode == IRTOINT_INDEX && depth > 0) + mode = IRTOINT_CHECK; /* Inner conversions need a stronger check. */ + bpref = narrow_bpc_get(nc->J, (IRRef1)ref, mode); + if (bpref) { + *nc->sp++ = IRREF2(bpref, NARROW_REF); + return 0; + } + if (++depth < NARROW_MAX_BACKPROP && nc->sp < nc->maxsp) { + IRRef2 *savesp = nc->sp; + int count = narrow_conv_backprop(nc, ir->op1, depth); + count += narrow_conv_backprop(nc, ir->op2, depth); + if (count <= nc->lim) { /* Limit total number of conversions. */ + *nc->sp++ = IRREF2(ref, IRTI(ir->o)); + return count; + } + nc->sp = savesp; /* Too many conversions, need to backtrack. */ + } + } + + /* Otherwise add a conversion. */ + *nc->sp++ = IRREF2(ref, NARROW_CONV); + return 1; +} + +/* Emit the conversions collected during backpropagation. */ +static IRRef narrow_conv_emit(jit_State *J, NarrowConv *nc) +{ + /* The fins fields must be saved now -- emitir() overwrites them. */ + IROpT guardot = irt_isguard(fins->t) ? IRTG(IR_ADDOV-IR_ADD, 0) : 0; + IROpT convot = fins->ot; + IRRef1 convop2 = fins->op2; + IRRef2 *next = nc->stack; /* List of instructions from backpropagation. */ + IRRef2 *last = nc->sp; + IRRef2 *sp = nc->stack; /* Recycle the stack to store operands. */ + while (next < last) { /* Simple stack machine to process the ins. list. */ + IRRef2 ref = *next++; + IROpT op = ref >> 16; + if (op == NARROW_REF) { + *sp++ = ref; + } else if (op == NARROW_CONV) { + *sp++ = emitir_raw(convot, ref, convop2); /* Raw emit avoids a loop. */ + } else if (op == NARROW_INT) { + lua_assert(next < last); + *sp++ = lj_ir_kint(J, *next++); + } else { /* Regular IROpT. Pops two operands and pushes one result. */ + IRRef mode = nc->mode; + lua_assert(sp >= nc->stack+2); + sp--; + /* Omit some overflow checks for array indexing. See comments above. */ + if (mode == IRTOINT_INDEX) { + if (next == last && irref_isk((IRRef1)sp[0]) && + (uint32_t)IR((IRRef1)sp[0])->i + 0x40000000 < 0x80000000) + guardot = 0; + else + mode = IRTOINT_CHECK; /* Otherwise cache a stronger check. */ + } + sp[-1] = emitir(op+guardot, sp[-1], sp[0]); + narrow_bpc_set(J, (IRRef1)ref, (IRRef1)sp[-1], mode); /* Add to cache. */ + } + } + lua_assert(sp == nc->stack+1); + return nc->stack[0]; +} + +/* Narrow a type conversion of an arithmetic operation. */ +TRef LJ_FASTCALL lj_opt_narrow_convert(jit_State *J) +{ + if ((J->flags & JIT_F_OPT_NARROW)) { + NarrowConv nc; + nc.J = J; + nc.sp = nc.stack; + nc.maxsp = &nc.stack[NARROW_MAX_STACK-4]; + if (fins->o == IR_TOBIT) { + nc.mode = IRTOINT_TOBIT; /* Used only in the backpropagation cache. */ + nc.lim = 2; /* TOBIT can use a more optimistic rule. */ + } else { + nc.mode = fins->op2; + nc.lim = 1; + } + if (narrow_conv_backprop(&nc, fins->op1, 0) <= nc.lim) + return narrow_conv_emit(J, &nc); + } + return NEXTFOLD; +} + +/* -- Narrowing of arithmetic operators ----------------------------------- */ + +/* Check whether a number fits into an int32_t (-0 is ok, too). */ +static int numisint(lua_Number n) +{ + return (n == cast_num(lj_num2int(n))); +} + +/* Narrowing of modulo operator. */ +TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc) +{ + TRef tmp; + if ((J->flags & JIT_F_OPT_NARROW) && + tref_isk(rc) && tref_isint(rc)) { /* Optimize x % k. */ + int32_t k = IR(tref_ref(rc))->i; + if (k > 0 && (k & (k-1)) == 0) { /* i % 2^k ==> band(i, 2^k-1) */ + if (tref_isint(rb)) + return emitir(IRTI(IR_BAND), rb, lj_ir_kint(J, k-1)); + } + } + /* b % c ==> b - floor(b/c)*c */ + rb = lj_ir_tonum(J, rb); + rc = lj_ir_tonum(J, rc); + tmp = emitir(IRTN(IR_DIV), rb, rc); + tmp = emitir(IRTN(IR_FPMATH), tmp, IRFPM_FLOOR); + tmp = emitir(IRTN(IR_MUL), tmp, rc); + return emitir(IRTN(IR_SUB), rb, tmp); +} + +/* Narrowing of power operator or math.pow. */ +TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc) +{ + lua_Number n; + if (tvisstr(vc) && !lj_str_numconv(strVdata(vc), vc)) + lj_trace_err(J, LJ_TRERR_BADTYPE); + n = numV(vc); + /* Limit narrowing for pow to small exponents (or for two constants). */ + if ((tref_isint(rc) && tref_isk(rc) && tref_isk(rb)) || + ((J->flags & JIT_F_OPT_NARROW) && + (numisint(n) && n >= -65536.0 && n <= 65536.0))) { + TRef tmp; + if (!tref_isinteger(rc)) { + if (tref_isstr(rc)) + rc = emitir(IRTG(IR_STRTO, IRT_NUM), rc, 0); + rc = emitir(IRTGI(IR_TOINT), rc, IRTOINT_CHECK); /* Guarded TOINT! */ + } + if (!tref_isk(rc)) { /* Range guard: -65536 <= i <= 65536 */ + tmp = emitir(IRTI(IR_ADD), rc, lj_ir_kint(J, 65536-2147483647-1)); + emitir(IRTGI(IR_LE), tmp, lj_ir_kint(J, 2*65536-2147483647-1)); + } + return emitir(IRTN(IR_POWI), rb, rc); + } + /* FOLD covers most cases, but some are easier to do here. */ + if (tref_isk(rb) && tvispone(ir_knum(IR(tref_ref(rb))))) + return rb; /* 1 ^ x ==> 1 */ + rc = lj_ir_tonum(J, rc); + if (tref_isk(rc) && ir_knum(IR(tref_ref(rc)))->n == 0.5) + return emitir(IRTN(IR_FPMATH), rb, IRFPM_SQRT); /* x ^ 0.5 ==> sqrt(x) */ + /* Split up b^c into exp2(c*log2(b)). Assembler may rejoin later. */ + rb = emitir(IRTN(IR_FPMATH), rb, IRFPM_LOG2); + rc = emitir(IRTN(IR_MUL), rb, rc); + return emitir(IRTN(IR_FPMATH), rc, IRFPM_EXP2); +} + +/* -- Predictive narrowing of induction variables ------------------------- */ + +/* Narrow the FORL index type by looking at the runtime values. */ +IRType lj_opt_narrow_forl(cTValue *forbase) +{ + lua_assert(tvisnum(&forbase[FORL_IDX]) && + tvisnum(&forbase[FORL_STOP]) && + tvisnum(&forbase[FORL_STEP])); + /* Narrow only if the runtime values of start/stop/step are all integers. */ + if (numisint(numV(&forbase[FORL_IDX])) && + numisint(numV(&forbase[FORL_STOP])) && + numisint(numV(&forbase[FORL_STEP]))) { + /* And if the loop index can't possibly overflow. */ + lua_Number step = numV(&forbase[FORL_STEP]); + lua_Number sum = numV(&forbase[FORL_STOP]) + step; + if (0 <= step ? sum <= 2147483647.0 : sum >= -2147483648.0) + return IRT_INT; + } + return IRT_NUM; +} + +#undef IR +#undef fins +#undef emitir +#undef emitir_raw + +#endif diff --git a/src/lj_parse.c b/src/lj_parse.c new file mode 100644 index 0000000000..663525abbf --- /dev/null +++ b/src/lj_parse.c @@ -0,0 +1,2198 @@ +/* +** Lua parser (source code -> bytecode). +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +** +** Major portions taken verbatim or adapted from the Lua interpreter. +** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h +*/ + +#define lj_parse_c +#define LUA_CORE + +#include "lj_obj.h" +#include "lj_gc.h" +#include "lj_err.h" +#include "lj_str.h" +#include "lj_tab.h" +#include "lj_func.h" +#include "lj_state.h" +#include "lj_bc.h" +#include "lj_lex.h" +#include "lj_parse.h" +#include "lj_vm.h" +#include "lj_vmevent.h" + +/* -- Parser structures and definitions ----------------------------------- */ + +/* Expression kinds. */ +typedef enum { + /* Constant expressions must be first and in this order: */ + VKNIL, + VKFALSE, + VKTRUE, + VKSTR, /* sval = string value */ + VKNUM, /* nval = numerical value */ + VKLAST = VKNUM, + /* Non-constant expressions follow: */ + VLOCAL, /* info = local register */ + VUPVAL, /* info = upvalue index */ + VGLOBAL, /* sval = string value */ + VINDEXED, /* info = table register, aux = index reg/byte/string const */ + VJMP, /* info = instruction PC */ + VRELOCABLE, /* info = instruction PC */ + VNONRELOC, /* info = result register */ + VCALL, /* info = instruction PC, aux = base */ + VVOID +} ExpKind; + +/* Expression descriptor. */ +typedef struct ExpDesc { + union { + struct { uint32_t info, aux; } s; + TValue nval; + GCstr *sval; + } u; + ExpKind k; + BCPos t; /* true condition exit list */ + BCPos f; /* false condition exit list */ +} ExpDesc; + +/* Tests for expression types */ +#define isK(e) ((uint32_t)((e)->k) <= VKLAST) +#define isnumK(e) ((e)->k == VKNUM) +#define isstrK(e) ((e)->k == VKSTR) +#define expnumV(e) check_exp(isnumK((e)), numV(&(e)->u.nval)) + +#define hasjumps(e) ((e)->t != (e)->f) +#define isKexp(e) (isK(e) && !hasjumps(e)) +#define isnumKexp(e) (isnumK(e) && !hasjumps(e)) + +#define priKk(k) check_exp((k) <= VKTRUE, (k) - VKNIL) +#define priK(e) priKk((e)->k) + +/* Per-function linked list of blocks. */ +typedef struct FuncBlock { + struct FuncBlock *previous; /* chain */ + BCPos breaklist; /* list of jumps out of this loop */ + uint8_t nactvar; /* # active locals outside the breakable structure */ + uint8_t upval; /* true if some variable in the block is an upvalue */ + uint8_t isbreakable; /* true if `block' is a loop */ +} FuncBlock; + +typedef struct UpValDesc { + uint8_t k; + uint8_t info; +} UpValDesc; + +/* Per-function state. */ +typedef struct FuncState { + GCproto *pt; /* current function header */ + GCtab *kt; /* table to find (and reuse) elements in `k' */ + struct FuncState *prev; /* enclosing function */ + struct LexState *ls; /* lexical state */ + struct lua_State *L; /* copy of the Lua state */ + struct FuncBlock *bl; /* chain of current blocks */ + BCPos pc; /* next bytecode position */ + BCPos lasttarget; /* PC of last jump target */ + BCPos jpc; /* list of pending jumps to PC */ + BCReg freereg; /* first free register */ + BCReg nkn, nkgc; /* number of lua_Number/GCobj constants */ + uint16_t nlocvars; /* number of elements in `locvars' */ + uint8_t nactvar; /* number of active local variables */ + uint8_t nuv; /* number of upvalues */ + UpValDesc upvalues[LJ_MAX_UPVAL]; /* upvalues */ + uint16_t actvar[LJ_MAX_LOCVAR]; /* declared-variable stack */ +} FuncState; + +/* Binary and unary operators. ORDER OPR */ +typedef enum BinOpr { + OPR_ADD, OPR_SUB, OPR_MUL, OPR_DIV, OPR_MOD, OPR_POW, /* ORDER ARITH */ + OPR_CONCAT, + OPR_NE, OPR_EQ, + OPR_LT, OPR_GE, OPR_LE, OPR_GT, + OPR_AND, OPR_OR, + OPR_NOBINOPR +} BinOpr; + +LJ_STATIC_ASSERT((int)BC_ISGE-(int)BC_ISLT == (int)OPR_GE-(int)OPR_LT); +LJ_STATIC_ASSERT((int)BC_ISLE-(int)BC_ISLT == (int)OPR_LE-(int)OPR_LT); +LJ_STATIC_ASSERT((int)BC_ISGT-(int)BC_ISLT == (int)OPR_GT-(int)OPR_LT); +LJ_STATIC_ASSERT((int)BC_SUBVV-(int)BC_ADDVV == (int)OPR_SUB-(int)OPR_ADD); +LJ_STATIC_ASSERT((int)BC_MULVV-(int)BC_ADDVV == (int)OPR_MUL-(int)OPR_ADD); +LJ_STATIC_ASSERT((int)BC_DIVVV-(int)BC_ADDVV == (int)OPR_DIV-(int)OPR_ADD); +LJ_STATIC_ASSERT((int)BC_MODVV-(int)BC_ADDVV == (int)OPR_MOD-(int)OPR_ADD); + +typedef enum UnOpr { OPR_MINUS, OPR_NOT, OPR_LEN, OPR_NOUNOPR } UnOpr; + +/* -- Error handling ------------------------------------------------------ */ + +LJ_NORET LJ_NOINLINE static void err_syntax(LexState *ls, ErrMsg em) +{ + lj_lex_error(ls, ls->token, em); +} + +LJ_NORET LJ_NOINLINE static void err_token(LexState *ls, LexToken token) +{ + lj_lex_error(ls, ls->token, LJ_ERR_XTOKEN, lj_lex_token2str(ls, token)); +} + +LJ_NORET static void err_limit(FuncState *fs, uint32_t limit, const char *what) +{ + if (fs->pt->linedefined == 0) + lj_lex_error(fs->ls, 0, LJ_ERR_XLIMM, limit, what); + else + lj_lex_error(fs->ls, 0, LJ_ERR_XLIMF, fs->pt->linedefined, limit, what); +} + +#define checklimit(fs, v, l, m) if ((v) >= (l)) err_limit(fs, l, m) +#define checklimitgt(fs, v, l, m) if ((v) > (l)) err_limit(fs, l, m) +#define checkcond(ls, c, em) { if (!(c)) err_syntax(ls, em); } + +/* -- Code emitter: branches ---------------------------------------------- */ + +static BCPos getjump(FuncState *fs, BCPos pc) +{ + ptrdiff_t delta = bc_j(fs->pt->bc[pc]); + if ((BCPos)delta == NO_JMP) + return NO_JMP; + else + return (BCPos)(((ptrdiff_t)pc+1)+delta); +} + +static int need_value(FuncState *fs, BCPos list) +{ + for (; list != NO_JMP; list = getjump(fs, list)) { + BCOp op = bc_op(fs->pt->bc[list >= 1 ? list-1 : list]); + if (!(op == BC_ISTC || op == BC_ISFC)) return 1; + } + return 0; /* Not found. */ +} + +static int patchtestreg(FuncState *fs, BCPos pc, BCReg reg) +{ + BCIns *i = &fs->pt->bc[pc >= 1 ? pc-1 : pc]; + BCOp op = bc_op(*i); + if (!(op == BC_ISTC || op == BC_ISFC)) + return 0; /* cannot patch other instructions */ + if (reg != NO_REG && reg != bc_d(*i)) { + setbc_a(i, reg); + } else { /* no register to put value or register already has the value */ + setbc_op(i, op+(BC_IST-BC_ISTC)); + setbc_a(i, 0); + } + return 1; +} + +static void removevalues(FuncState *fs, BCPos list) +{ + for (; list != NO_JMP; list = getjump(fs, list)) + patchtestreg(fs, list, NO_REG); +} + +static void fixjump(FuncState *fs, BCPos pc, BCPos dest) +{ + BCIns *jmp = &fs->pt->bc[pc]; + BCPos offset = dest-(pc+1)+BCBIAS_J; + lua_assert(dest != NO_JMP); + if (offset > BCMAX_D) + err_syntax(fs->ls, LJ_ERR_XJUMP); + setbc_d(jmp, offset); +} + +static void concatjumps(FuncState *fs, BCPos *l1, BCPos l2) +{ + if (l2 == NO_JMP) return; + else if (*l1 == NO_JMP) { + *l1 = l2; + } else { + BCPos list = *l1; + BCPos next; + while ((next = getjump(fs, list)) != NO_JMP) /* find last element */ + list = next; + fixjump(fs, list, l2); + } +} + +static void patchlistaux(FuncState *fs, BCPos list, BCPos vtarget, + BCReg reg, BCPos dtarget) +{ + while (list != NO_JMP) { + BCPos next = getjump(fs, list); + if (patchtestreg(fs, list, reg)) + fixjump(fs, list, vtarget); + else + fixjump(fs, list, dtarget); /* jump to default target */ + list = next; + } +} + +static void patchtohere(FuncState *fs, BCPos list) +{ + fs->lasttarget = fs->pc; + concatjumps(fs, &fs->jpc, list); +} + +static void patchlist(FuncState *fs, BCPos list, BCPos target) +{ + if (target == fs->pc) { + patchtohere(fs, list); + } else { + lua_assert(target < fs->pc); + patchlistaux(fs, list, target, NO_REG, target); + } +} + +/* -- Code emitter: instructions ------------------------------------------ */ + +static BCPos emitINS(FuncState *fs, BCIns i) +{ + GCproto *pt; + patchlistaux(fs, fs->jpc, fs->pc, NO_REG, fs->pc); + fs->jpc = NO_JMP; + pt = fs->pt; + if (LJ_UNLIKELY(fs->pc >= pt->sizebc)) { + checklimit(fs, fs->pc, LJ_MAX_BCINS, "bytecode instructions"); + lj_mem_growvec(fs->L, pt->bc, pt->sizebc, LJ_MAX_BCINS, BCIns); + lj_mem_growvec(fs->L, pt->lineinfo, pt->sizelineinfo, LJ_MAX_BCINS, BCLine); + } + pt->bc[fs->pc] = i; + pt->lineinfo[fs->pc] = fs->ls->lastline; + return fs->pc++; +} + +#define emitABC(fs, o, a, b, c) emitINS(fs, BCINS_ABC(o, a, b, c)) +#define emitAD(fs, o, a, d) emitINS(fs, BCINS_AD(o, a, d)) +#define emitAJ(fs, o, a, j) emitINS(fs, BCINS_AJ(o, a, j)) + +#define bcptr(fs, e) (&(fs)->pt->bc[(e)->u.s.info]) + +static BCPos emit_jump(FuncState *fs) +{ + BCPos jpc = fs->jpc; /* save list of jumps to here */ + BCPos j = fs->pc - 1; + fs->jpc = NO_JMP; + if ((int32_t)j >= (int32_t)fs->lasttarget && bc_op(fs->pt->bc[j]) == BC_UCLO) + setbc_j(&fs->pt->bc[j], NO_JMP); + else + j = emitAJ(fs, BC_JMP, fs->freereg, NO_JMP); + concatjumps(fs, &j, jpc); /* keep them on hold */ + return j; +} + +/* -- Code emitter: constants --------------------------------------------- */ + +static BCReg numK(FuncState *fs, ExpDesc *e) +{ + lua_State *L = fs->L; + TValue *val; + lua_assert(isnumK(e)); + val = lj_tab_set(L, fs->kt, &e->u.nval); + if (tvisnum(val)) + return val->u32.lo; + val->u64 = fs->nkn; + return fs->nkn++; +} + +static BCReg gcK(FuncState *fs, GCobj *gc, int itype) +{ + lua_State *L = fs->L; + TValue o, *val; + setgcV(L, &o, &gc->gch, itype); + val = lj_tab_set(L, fs->kt, &o); + if (tvisnum(val)) + return val->u32.lo; + val->u64 = fs->nkgc; + return fs->nkgc++; +} + +static BCReg strK(FuncState *fs, ExpDesc *e) +{ + lua_assert(isstrK(e) || e->k == VGLOBAL); + return gcK(fs, obj2gco(e->u.sval), LJ_TSTR); +} + +GCstr *lj_parse_keepstr(LexState *ls, const char *str, size_t len) +{ + lua_State *L = ls->L; + GCstr *s = lj_str_new(L, str, len); + TValue *tv = lj_tab_setstr(L, ls->fs->kt, s); + if (tvisnil(tv)) setboolV(tv, 1); /* Anchor string to avoid GC. */ + return s; +} + +static void keep_token(LexState *ls) +{ + if (ls->token == TK_name || ls->token == TK_string) { + TValue *tv = lj_tab_setstr(ls->L, ls->fs->kt, strV(&ls->tokenval)); + if (tvisnil(tv)) setboolV(tv, 1); /* Anchor string to avoid GC. */ + } +} + +static void nilK(FuncState *fs, BCReg from, BCReg n) +{ + BCIns *pr; + if (fs->pc > fs->lasttarget) { /* no jumps to current position? */ + BCReg pfrom, pto; + pr = &fs->pt->bc[fs->pc-1]; + pfrom = bc_a(*pr); + switch (bc_op(*pr)) { + case BC_KPRI: + if (bc_d(*pr) != ~LJ_TNIL) break; + if (from == pfrom) { + if (n == 1) return; + } else if (from == pfrom+1) { + from = pfrom; + n++; + } else { + break; + } + fs->pc--; + break; + case BC_KNIL: + pto = bc_d(*pr); + if (pfrom <= from && from <= pto+1) { /* can connect both? */ + if (from+n-1 > pto) + setbc_d(pr, from+n-1); + return; + } + break; + default: + break; + } + } + emitINS(fs, n == 1 ? BCINS_AD(BC_KPRI, from, priKk(VKNIL)) + : BCINS_AD(BC_KNIL, from, from+n-1)); +} + +/* -- Code emitter: registers --------------------------------------------- */ + +static void checkframe(FuncState *fs, BCReg n) +{ + BCReg sz = fs->freereg + n; + if (sz > fs->pt->framesize) { + if (sz >= LJ_MAX_SLOTS) + err_syntax(fs->ls, LJ_ERR_XSLOTS); + fs->pt->framesize = cast_byte(sz); + } +} + +static void reserveregs(FuncState *fs, BCReg n) +{ + checkframe(fs, n); + fs->freereg += n; +} + +static void freereg(FuncState *fs, BCReg reg) +{ + if (reg >= fs->nactvar) { + fs->freereg--; + lua_assert(reg == fs->freereg); + } +} + +static void freeexp(FuncState *fs, ExpDesc *e) +{ + if (e->k == VNONRELOC) + freereg(fs, e->u.s.info); +} + +/* -- Code emitter: expressions ------------------------------------------- */ + +static void dischargevars(FuncState *fs, ExpDesc *e) +{ + BCIns ins; + switch (e->k) { + case VUPVAL: + ins = BCINS_AD(BC_UGET, 0, e->u.s.info); + break; + case VGLOBAL: + ins = BCINS_AD(BC_GGET, 0, strK(fs, e)); + break; + case VINDEXED: { + /* TGET[VSB] key = reg, string const or byte const */ + BCReg rc = e->u.s.aux; + if ((int32_t)rc < 0) { + ins = BCINS_ABC(BC_TGETS, 0, e->u.s.info, ~rc); + } else if (rc > BCMAX_C) { + ins = BCINS_ABC(BC_TGETB, 0, e->u.s.info, rc-(BCMAX_C+1)); + } else { + freereg(fs, rc); + ins = BCINS_ABC(BC_TGETV, 0, e->u.s.info, rc); + } + freereg(fs, e->u.s.info); + break; + } + case VCALL: + e->u.s.info = e->u.s.aux; + /* fallthrough */ + case VLOCAL: + e->k = VNONRELOC; + /* fallthrough */ + default: + return; + } + e->u.s.info = emitINS(fs, ins); + e->k = VRELOCABLE; +} + +static void discharge2reg(FuncState *fs, ExpDesc *e, BCReg reg) +{ + BCIns ins; + dischargevars(fs, e); + switch (e->k) { + case VKNIL: case VKFALSE: case VKTRUE: + ins = BCINS_AD(BC_KPRI, reg, priK(e)); + break; + case VKSTR: + ins = BCINS_AD(BC_KSTR, reg, strK(fs, e)); + break; + case VKNUM: { + lua_Number n = expnumV(e); + int32_t k = lj_num2int(n); + if (checki16(k) && n == cast_num(k)) + ins = BCINS_AD(BC_KSHORT, reg, (BCReg)(uint16_t)k); + else + ins = BCINS_AD(BC_KNUM, reg, numK(fs, e)); + break; + } + case VRELOCABLE: + setbc_a(bcptr(fs, e), reg); + goto noins; + case VNONRELOC: + if (reg == e->u.s.info) + goto noins; + ins = BCINS_AD(BC_MOV, reg, e->u.s.info); + break; + default: + lua_assert(e->k == VVOID || e->k == VJMP); + return; /* nothing to do... */ + } + emitINS(fs, ins); +noins: + e->u.s.info = reg; + e->k = VNONRELOC; +} + +static void exp2reg(FuncState *fs, ExpDesc *e, BCReg reg) +{ + discharge2reg(fs, e, reg); + if (e->k == VJMP) + concatjumps(fs, &e->t, e->u.s.info); /* put this jump in `t' list */ + if (hasjumps(e)) { + BCPos final; /* position after whole expression */ + BCPos p_f = NO_JMP; /* position of an eventual LOAD false */ + BCPos p_t = NO_JMP; /* position of an eventual LOAD true */ + if (need_value(fs, e->t) || need_value(fs, e->f)) { + BCPos fj = (e->k == VJMP) ? NO_JMP : emit_jump(fs); + p_f = emitAD(fs, BC_KPRI, reg, priKk(VKFALSE)); + emitAJ(fs, BC_JMP, fs->freereg, 1); + p_t = emitAD(fs, BC_KPRI, reg, priKk(VKTRUE)); + patchtohere(fs, fj); + } + final = fs->pc; + fs->lasttarget = final; + patchlistaux(fs, e->f, final, reg, p_f); + patchlistaux(fs, e->t, final, reg, p_t); + } + e->f = e->t = NO_JMP; + e->u.s.info = reg; + e->k = VNONRELOC; +} + +static void exp2nextreg(FuncState *fs, ExpDesc *e) +{ + dischargevars(fs, e); + freeexp(fs, e); + reserveregs(fs, 1); + exp2reg(fs, e, fs->freereg - 1); +} + +static BCReg exp2anyreg(FuncState *fs, ExpDesc *e) +{ + dischargevars(fs, e); + if (e->k == VNONRELOC) { + if (!hasjumps(e)) return e->u.s.info; /* exp is already in a register */ + if (e->u.s.info >= fs->nactvar) { /* reg. is not a local? */ + exp2reg(fs, e, e->u.s.info); /* put value on it */ + return e->u.s.info; + } + } + exp2nextreg(fs, e); /* default */ + return e->u.s.info; +} + +static void exp2val(FuncState *fs, ExpDesc *e) +{ + if (hasjumps(e)) + exp2anyreg(fs, e); + else + dischargevars(fs, e); +} + +static void storevar(FuncState *fs, ExpDesc *var, ExpDesc *e) +{ + BCIns ins; + switch (var->k) { + case VLOCAL: + freeexp(fs, e); + exp2reg(fs, e, var->u.s.info); + return; + case VUPVAL: + exp2val(fs, e); + switch (e->k) { + case VKNIL: case VKFALSE: case VKTRUE: + ins = BCINS_AD(BC_USETP, var->u.s.info, priK(e)); + break; + case VKSTR: + ins = BCINS_AD(BC_USETS, var->u.s.info, strK(fs, e)); + break; + case VKNUM: + ins = BCINS_AD(BC_USETN, var->u.s.info, numK(fs, e)); + break; + default: + ins = BCINS_AD(BC_USETV, var->u.s.info, exp2anyreg(fs, e)); + break; + } + break; + case VGLOBAL: { + BCReg ra = exp2anyreg(fs, e); + ins = BCINS_AD(BC_GSET, ra, strK(fs, var)); + break; + } + case VINDEXED: { + /* TSET[VSB] key = reg, string const or byte const */ + BCReg ra = exp2anyreg(fs, e); + BCReg rc = var->u.s.aux; + if ((int32_t)rc < 0) { + ins = BCINS_ABC(BC_TSETS, ra, var->u.s.info, ~rc); + } else if (rc > BCMAX_C) { + ins = BCINS_ABC(BC_TSETB, ra, var->u.s.info, rc-(BCMAX_C+1)); + } else { + /* Free late alloced key reg to avoid assert on free of value reg. */ + /* This can only happen when called from constructor(). */ + lua_assert(e->k != VNONRELOC || ra < fs->nactvar || + rc < ra || (freereg(fs, rc),1)); + ins = BCINS_ABC(BC_TSETV, ra, var->u.s.info, rc); + } + break; + } + default: + lua_assert(0); /* invalid var kind to store */ + return; + } + emitINS(fs, ins); + freeexp(fs, e); +} + +static void indexexp(FuncState *fs, ExpDesc *t, ExpDesc *e) +{ + /* already called: exp2val(fs, e) */ + t->k = VINDEXED; + if (isnumK(e)) { + lua_Number n = expnumV(e); + int32_t k = lj_num2int(n); + if (checku8(k) && n == cast_num(k)) { + t->u.s.aux = BCMAX_C+1+(uint32_t)k; /* 256..511: const byte key */ + return; + } + } else if (isstrK(e)) { + BCReg idx = strK(fs, e); + if (idx <= BCMAX_C) { + t->u.s.aux = ~idx; /* -256..-1: const string key */ + return; + } + } + t->u.s.aux = exp2anyreg(fs, e); /* 0..255: register */ +} + +static void methodexp(FuncState *fs, ExpDesc *e, ExpDesc *key) +{ + BCReg idx, func, tab = exp2anyreg(fs, e); + freeexp(fs, e); + func = fs->freereg; + emitAD(fs, BC_MOV, func+1, tab); + lua_assert(isstrK(key)); + idx = strK(fs, key); + if (idx <= BCMAX_C) { + reserveregs(fs, 2); + emitABC(fs, BC_TGETS, func, tab, idx); + } else { + reserveregs(fs, 3); + emitAD(fs, BC_KSTR, func+2, idx); + emitABC(fs, BC_TGETV, func, tab, func+2); + fs->freereg--; + } + e->u.s.info = func; + e->k = VNONRELOC; +} + +/* -- Code emitter: conditionals ------------------------------------------ */ + +static void invertjump(FuncState *fs, ExpDesc *e) +{ + BCIns *i = bcptr(fs, e) - 1; + setbc_op(i, bc_op(*i)^1); +} + +static BCPos jumponcond(FuncState *fs, ExpDesc *e, int cond) +{ + if (e->k == VRELOCABLE) { + BCIns *i = bcptr(fs, e); + if (bc_op(*i) == BC_NOT) { + *i = BCINS_AD(cond ? BC_ISF : BC_IST, 0, bc_d(*i)); + return emit_jump(fs); + } + /* else go through */ + } + if (e->k != VNONRELOC) { + reserveregs(fs, 1); + discharge2reg(fs, e, fs->freereg-1); + } + freeexp(fs, e); + emitAD(fs, cond ? BC_ISTC : BC_ISFC, NO_REG, e->u.s.info); + return emit_jump(fs); +} + +static void goiftrue(FuncState *fs, ExpDesc *e) +{ + BCPos pc; /* PC of last jump. */ + dischargevars(fs, e); + switch (e->k) { + case VKSTR: case VKNUM: case VKTRUE: + pc = NO_JMP; /* always true; do nothing */ + break; + case VJMP: + invertjump(fs, e); + pc = e->u.s.info; + break; + case VKFALSE: + if (!hasjumps(e)) { + pc = emit_jump(fs); /* always jump */ + break; + } + /* fallthrough */ + default: + pc = jumponcond(fs, e, 0); + break; + } + concatjumps(fs, &e->f, pc); /* insert last jump in `f' list */ + patchtohere(fs, e->t); + e->t = NO_JMP; +} + +static void goiffalse(FuncState *fs, ExpDesc *e) +{ + BCPos pc; /* PC of last jump. */ + dischargevars(fs, e); + switch (e->k) { + case VKNIL: case VKFALSE: + pc = NO_JMP; /* always false; do nothing */ + break; + case VJMP: + pc = e->u.s.info; + break; + case VKTRUE: + if (!hasjumps(e)) { + pc = emit_jump(fs); /* always jump */ + break; + } + /* fallthrough */ + default: + pc = jumponcond(fs, e, 1); + break; + } + concatjumps(fs, &e->t, pc); /* insert last jump in `t' list */ + patchtohere(fs, e->f); + e->f = NO_JMP; +} + +/* -- Code emitter: operators --------------------------------------------- */ + +static int foldarith(BinOpr opr, ExpDesc *e1, ExpDesc *e2) +{ + TValue o; + if (!isnumKexp(e1) || !isnumKexp(e2)) return 0; + setnumV(&o, lj_vm_foldarith(expnumV(e1), expnumV(e2), (int)opr-OPR_ADD)); + if (tvisnan(&o) || tvismzero(&o)) return 0; /* Avoid NaN and -0 as consts. */ + setnumV(&e1->u.nval, numV(&o)); + return 1; +} + +static void codearith(FuncState *fs, BinOpr opr, ExpDesc *e1, ExpDesc *e2) +{ + BCReg rb, rc, t; + uint32_t op; + if (foldarith(opr, e1, e2)) + return; + if (opr == OPR_POW) { + op = BC_POW; + rc = exp2anyreg(fs, e2); + rb = exp2anyreg(fs, e1); + } else { + op = opr-OPR_ADD+BC_ADDVV; + /* must discharge 2nd operand first since VINDEXED might free regs */ + exp2val(fs, e2); + if (isnumK(e2) && (rc = numK(fs, e2)) <= BCMAX_C) + op -= BC_ADDVV-BC_ADDVN; + else + rc = exp2anyreg(fs, e2); + /* emit_prebinop discharges 1st operand, but may need to use KNUM/KSHORT */ + lua_assert(isnumK(e1) || e1->k == VNONRELOC); + exp2val(fs, e1); + /* avoid two consts to satisfy bytecode constraints */ + if (isnumK(e1) && !isnumK(e2) && (t = numK(fs, e1)) <= BCMAX_B) { + rb = rc; rc = t; op -= BC_ADDVV-BC_ADDNV; + } else { + rb = exp2anyreg(fs, e1); + } + } + /* using freeexp might cause asserts if the order is wrong */ + if (e1->k == VNONRELOC && e1->u.s.info >= fs->nactvar) fs->freereg--; + if (e2->k == VNONRELOC && e2->u.s.info >= fs->nactvar) fs->freereg--; + e1->u.s.info = emitABC(fs, op, 0, rb, rc); + e1->k = VRELOCABLE; +} + +static void codecomp(FuncState *fs, BinOpr opr, ExpDesc *e1, ExpDesc *e2) +{ + ExpDesc *eret = e1; + BCIns ins; + exp2val(fs, e1); + if (opr == OPR_EQ || opr == OPR_NE) { + BCOp op = opr == OPR_EQ ? BC_ISEQV : BC_ISNEV; + BCReg ra; + if (isK(e1)) { e1 = e2; e2 = eret; } /* need constant in 2nd arg */ + ra = exp2anyreg(fs, e1); /* first arg must be in a reg */ + exp2val(fs, e2); + switch (e2->k) { + case VKNIL: case VKFALSE: case VKTRUE: + ins = BCINS_AD(op+(BC_ISEQP-BC_ISEQV), ra, priK(e2)); + break; + case VKSTR: + ins = BCINS_AD(op+(BC_ISEQS-BC_ISEQV), ra, strK(fs, e2)); + break; + case VKNUM: + ins = BCINS_AD(op+(BC_ISEQN-BC_ISEQV), ra, numK(fs, e2)); + break; + default: + ins = BCINS_AD(op, ra, exp2anyreg(fs, e2)); + break; + } + } else { + uint32_t op = opr-OPR_LT+BC_ISLT; + BCReg ra; + if ((op-BC_ISLT) & 1) { /* GT -> LT, GE -> LE */ + e1 = e2; e2 = eret; /* swap operands */ + op = ((op-BC_ISLT)^3)+BC_ISLT; + } + ra = exp2anyreg(fs, e1); + ins = BCINS_AD(op, ra, exp2anyreg(fs, e2)); + } + /* using freeexp might cause asserts if the order is wrong */ + if (e1->k == VNONRELOC && e1->u.s.info >= fs->nactvar) fs->freereg--; + if (e2->k == VNONRELOC && e2->u.s.info >= fs->nactvar) fs->freereg--; + emitINS(fs, ins); + eret->u.s.info = emit_jump(fs); + eret->k = VJMP; +} + +static void emit_unop(FuncState *fs, UnOpr uop, ExpDesc *e) +{ + BCOp op = BC_LEN; + switch (uop) { + case OPR_MINUS: + if (isnumKexp(e) && expnumV(e) != 0) { /* Avoid const-folding to -0. */ + setnumV(&e->u.nval, -expnumV(e)); + return; + } + op = BC_UNM; + /* fallthrough */ + case OPR_LEN: + exp2anyreg(fs, e); + break; + case OPR_NOT: + /* interchange true and false lists */ + { BCPos temp = e->f; e->f = e->t; e->t = temp; } + removevalues(fs, e->f); + removevalues(fs, e->t); + dischargevars(fs, e); + switch (e->k) { + case VKNIL: case VKFALSE: + e->k = VKTRUE; + return; + case VKSTR: case VKNUM: case VKTRUE: + e->k = VKFALSE; + return; + case VJMP: + invertjump(fs, e); + return; + case VRELOCABLE: + reserveregs(fs, 1); + setbc_a(bcptr(fs, e), fs->freereg-1); + e->u.s.info = fs->freereg-1; + e->k = VNONRELOC; + break; + case VNONRELOC: + break; + default: lua_assert(0); return; + } + op = BC_NOT; + break; + default: lua_assert(0); return; + } + freeexp(fs, e); + e->u.s.info = emitAD(fs, op, 0, e->u.s.info); + e->k = VRELOCABLE; +} + +static void prepare_binop(FuncState *fs, BinOpr op, ExpDesc *e) +{ + switch (op) { + case OPR_AND: + goiftrue(fs, e); + break; + case OPR_OR: + goiffalse(fs, e); + break; + case OPR_CONCAT: + exp2nextreg(fs, e); /* operand must be on the `stack' */ + break; + case OPR_EQ: case OPR_NE: + if (!isKexp(e)) exp2anyreg(fs, e); + break; + default: + if (!isnumKexp(e)) exp2anyreg(fs, e); + break; + } +} + +static void emit_binop(FuncState *fs, BinOpr op, ExpDesc *e1, ExpDesc *e2) +{ + switch (op) { + case OPR_AND: + lua_assert(e1->t == NO_JMP); /* list must be closed */ + dischargevars(fs, e2); + concatjumps(fs, &e2->f, e1->f); + *e1 = *e2; + break; + case OPR_OR: + lua_assert(e1->f == NO_JMP); /* list must be closed */ + dischargevars(fs, e2); + concatjumps(fs, &e2->t, e1->t); + *e1 = *e2; + break; + case OPR_CONCAT: + exp2val(fs, e2); + if (e2->k == VRELOCABLE && bc_op(*bcptr(fs, e2)) == BC_CAT) { + lua_assert(e1->u.s.info == bc_b(*bcptr(fs, e2))-1); + freeexp(fs, e1); + setbc_b(bcptr(fs, e2), e1->u.s.info); + e1->u.s.info = e2->u.s.info; + } else { + exp2nextreg(fs, e2); + freeexp(fs, e2); + freeexp(fs, e1); + e1->u.s.info = emitABC(fs, BC_CAT, 0, e1->u.s.info, e2->u.s.info); + } + e1->k = VRELOCABLE; + break; + case OPR_ADD: case OPR_SUB: case OPR_MUL: + case OPR_DIV: case OPR_MOD: case OPR_POW: + codearith(fs, op, e1, e2); + break; + case OPR_EQ: case OPR_NE: + case OPR_LT: case OPR_LE: case OPR_GT: case OPR_GE: + codecomp(fs, op, e1, e2); + break; + default: lua_assert(0); break; + } +} + +/* -- Lexer support ------------------------------------------------------- */ + +static int testnext(LexState *ls, LexToken tok) +{ + if (ls->token == tok) { + lj_lex_next(ls); + return 1; + } + return 0; +} + +static void checknext(LexState *ls, LexToken tok) +{ + if (ls->token != tok) + err_token(ls, tok); + lj_lex_next(ls); +} + +static void checkmatch(LexState *ls, LexToken what, LexToken who, BCLine line) +{ + if (!testnext(ls, what)) { + if (line == ls->linenumber) { + err_token(ls, what); + } else { + const char *swhat = lj_lex_token2str(ls, what); + const char *swho = lj_lex_token2str(ls, who); + lj_lex_error(ls, ls->token, LJ_ERR_XMATCH, swhat, swho, line); + } + } +} + +static GCstr *str_checkname(LexState *ls) +{ + GCstr *s; + if (ls->token != TK_name) + err_token(ls, TK_name); + s = strV(&ls->tokenval); + lj_lex_next(ls); + return s; +} + +static void init_exp(ExpDesc *e, ExpKind k, uint32_t info) +{ + e->k = k; + e->u.s.info = info; + e->f = e->t = NO_JMP; +} + +static void checkname(LexState *ls, ExpDesc *e) +{ + init_exp(e, VKSTR, 0); + e->u.sval = str_checkname(ls); +} + +/* -- Variable handling --------------------------------------------------- */ + +#define getlocvar(fs, i) ((fs)->pt->varinfo[(fs)->actvar[(i)]]) + +static BCReg registerlocalvar(LexState *ls, GCstr *name) +{ + FuncState *fs = ls->fs; + GCproto *pt = fs->pt; + if (LJ_UNLIKELY(fs->nlocvars >= pt->sizevarinfo)) { + MSize oldsize = pt->sizevarinfo; + checklimit(fs, fs->nlocvars, 32767, "local variables"); + lj_mem_growvec(fs->L, pt->varinfo, pt->sizevarinfo, 32767, VarInfo); + while (oldsize < pt->sizevarinfo) pt->varinfo[oldsize++].name = NULL; + } + pt->varinfo[fs->nlocvars].name = name; + lj_gc_objbarrier(ls->L, pt, name); + return fs->nlocvars++; +} + +static void new_localvar(LexState *ls, GCstr *name, BCReg n) +{ + FuncState *fs = ls->fs; + checklimit(fs, fs->nactvar+n, LJ_MAX_LOCVAR, "local variables"); + fs->actvar[fs->nactvar+n] = cast(uint16_t, registerlocalvar(ls, name)); +} + +#define new_localvarliteral(ls,v,n) \ + new_localvar(ls, lj_parse_keepstr(ls, "" v, sizeof(v)-1), n) + +static void adjustlocalvars(LexState *ls, BCReg nvars) +{ + FuncState *fs = ls->fs; + fs->nactvar = cast_byte(fs->nactvar + nvars); + for (; nvars; nvars--) + getlocvar(fs, fs->nactvar - nvars).startpc = fs->pc; +} + +static void removevars(LexState *ls, BCReg tolevel) +{ + FuncState *fs = ls->fs; + while (fs->nactvar > tolevel) + getlocvar(fs, --fs->nactvar).endpc = fs->pc; +} + +static uint32_t indexupvalue(FuncState *fs, GCstr *name, ExpDesc *v) +{ + uint32_t i; + GCproto *pt = fs->pt; + for (i = 0; i < fs->nuv; i++) { + if (fs->upvalues[i].k == v->k && fs->upvalues[i].info == v->u.s.info) { + lua_assert(pt->uvname[i] == name); + return i; + } + } + /* Not found, create a new upvalue for this name. */ + if (LJ_UNLIKELY(fs->nuv >= pt->sizeuvname)) { + MSize oldsize = pt->sizeuvname; + checklimit(fs, fs->nuv, LJ_MAX_UPVAL, "upvalues"); + lj_mem_growvec(fs->L, pt->uvname, pt->sizeuvname, LJ_MAX_UPVAL, GCstr *); + while (oldsize < pt->sizeuvname) pt->uvname[oldsize++] = NULL; + } + pt->uvname[fs->nuv] = name; + lj_gc_objbarrier(fs->L, pt, name); + lua_assert(v->k == VLOCAL || v->k == VUPVAL); + fs->upvalues[fs->nuv].k = cast_byte(v->k); + fs->upvalues[fs->nuv].info = cast_byte(v->u.s.info); + return fs->nuv++; +} + +static BCReg searchvar(FuncState *fs, GCstr *n) +{ + int i; + for (i = fs->nactvar-1; i >= 0; i--) { + if (n == getlocvar(fs, i).name) + return (BCReg)i; + } + return (BCReg)-1; /* Not found. */ +} + +static void markupval(FuncState *fs, BCReg level) +{ + FuncBlock *bl = fs->bl; + while (bl && bl->nactvar > level) bl = bl->previous; + if (bl) bl->upval = 1; +} + +static int singlevaraux(FuncState *fs, GCstr *name, ExpDesc *e, int first) +{ + if (fs == NULL) { /* no more levels? */ + init_exp(e, VGLOBAL, 0); /* default is global variable */ + e->u.sval = name; + return 1; + } else { + BCReg reg = searchvar(fs, name); /* look up at current level */ + if ((int32_t)reg >= 0) { + init_exp(e, VLOCAL, reg); + if (!first) + markupval(fs, reg); /* local will be used as an upval */ + return 0; + } else { /* not found at current level; try upper one */ + if (singlevaraux(fs->prev, name, e, 0)) /* global? */ + return 1; + e->u.s.info = indexupvalue(fs, name, e); /* else was local or upvalue */ + e->k = VUPVAL; /* upvalue in this level */ + return 0; + } + } +} + +#define singlevar(ls, e) singlevaraux((ls)->fs, str_checkname(ls), (e), 1) + +static void adjust_assign(LexState *ls, BCReg nvars, BCReg nexps, ExpDesc *e) +{ + FuncState *fs = ls->fs; + int32_t extra = (int32_t)nvars - (int32_t)nexps; + if (e->k == VCALL) { + extra++; /* includes call itself */ + if (extra < 0) extra = 0; + setbc_b(bcptr(fs, e), extra+1); + if (extra > 1) reserveregs(fs, (BCReg)extra-1); + } else { + if (e->k != VVOID) exp2nextreg(fs, e); /* close last expression */ + if (extra > 0) { + BCReg reg = fs->freereg; + reserveregs(fs, (BCReg)extra); + nilK(fs, reg, (BCReg)extra); + } + } +} + +/* -- Function handling --------------------------------------------------- */ + +/* Forward declaration. */ +static void chunk(LexState *ls); + +static void open_func(LexState *ls, FuncState *fs) +{ + lua_State *L = ls->L; + GCproto *pt = lj_func_newproto(L); + fs->pt = pt; + fs->prev = ls->fs; /* linked list of funcstates */ + fs->ls = ls; + fs->L = L; + ls->fs = fs; + fs->pc = 0; + fs->lasttarget = 0; + fs->jpc = NO_JMP; + fs->freereg = 0; + fs->nkgc = 0; + fs->nkn = 0; + fs->nlocvars = 0; + fs->nactvar = 0; + fs->nuv = 0; + fs->bl = NULL; + pt->chunkname = ls->chunkname; + pt->framesize = 2; /* registers 0/1 are always valid */ + fs->kt = lj_tab_new(L, 0, 0); + /* anchor table of constants and prototype (to avoid being collected) */ + settabV(L, L->top, fs->kt); + incr_top(L); + setprotoV(L, L->top, pt); + incr_top(L); +} + +static void collectk(FuncState *fs, GCproto *pt) +{ + GCtab *kt; + TValue *array; + Node *node; + BCReg nkgc; + MSize i, hmask, sizek; + GCRef *kstart; + checklimitgt(fs, fs->nkn, BCMAX_D+1, "constants"); + checklimitgt(fs, fs->nkgc, BCMAX_D+1, "constants"); + nkgc = round_nkgc(fs->nkgc); + sizek = (MSize)(nkgc*sizeof(MRef) + fs->nkn*sizeof(lua_Number)); + kstart = lj_mem_newt(fs->L, sizek, GCRef); + if (nkgc) setgcrefnull(kstart[0]); /* May be uninitialized otherwise. */ + pt->k.gc = kstart + nkgc; + pt->sizekn = fs->nkn; + pt->sizekgc = fs->nkgc; + kt = fs->kt; + array = tvref(kt->array); + for (i = 0; i < kt->asize; i++) + if (tvisnum(&array[i])) + pt->k.n[array[i].u32.lo] = cast_num(i); + node = noderef(kt->node); + hmask = kt->hmask; + for (i = 0; i <= hmask; i++) { + Node *n = &node[i]; + if (tvisnum(&n->val)) { + ptrdiff_t kidx = (ptrdiff_t)n->val.u32.lo; + if (tvisnum(&n->key)) { + pt->k.n[kidx] = numV(&n->key); + } else { + GCobj *o = gcV(&n->key); + setgcref(pt->k.gc[~kidx], o); + lj_gc_objbarrier(fs->L, pt, o); + } + } + } +} + +static void collectuv(FuncState *fs, GCproto *pt) +{ + uint32_t i; + pt->uv = lj_mem_newvec(fs->L, fs->nuv, int16_t); + pt->sizeuv = fs->nuv; + for (i = 0; i < pt->sizeuv; i++) { + uint32_t v = fs->upvalues[i].info; + if (fs->upvalues[i].k == VUPVAL) v = ~v; + pt->uv[i] = (int16_t)v; + } +} + +static void finalret(FuncState *fs, GCproto *pt) +{ + BCPos lastpc = fs->pc; + if (lastpc > fs->lasttarget) { + switch (bc_op(pt->bc[lastpc-1])) { + case BC_CALLMT: case BC_CALLT: + case BC_RETM: case BC_RET: case BC_RET0: case BC_RET1: + goto suppress_return; /* already got a return */ + default: break; + } + } + if (fs->pt->flags & PROTO_HAS_FNEW) + emitAJ(fs, BC_UCLO, 0, 0); + emitAD(fs, BC_RET0, 0, 1); /* final return */ +suppress_return: + /* may need to fixup returns encoded before first function was created */ + if (fs->pt->flags & PROTO_FIXUP_RETURN) { + BCPos pc; + for (pc = 0; pc < lastpc; pc++) { + BCIns i = pt->bc[pc]; + BCPos offset; + switch (bc_op(i)) { + case BC_CALLMT: case BC_CALLT: + case BC_RETM: case BC_RET: case BC_RET0: case BC_RET1: + offset = emitINS(fs, i)-(pc+1)+BCBIAS_J; /* copy return ins */ + if (offset > BCMAX_D) + err_syntax(fs->ls, LJ_ERR_XFIXUP); + pt->bc[pc] = BCINS_AD(BC_UCLO, 0, offset); /* replace w/ UCLO+branch */ + break; + case BC_UCLO: return; /* we're done */ + default: break; + } + } + } +} + +static void close_func(LexState *ls) +{ + lua_State *L = ls->L; + FuncState *fs = ls->fs; + GCproto *pt = fs->pt; + removevars(ls, 0); + finalret(fs, pt); + lj_mem_reallocvec(L, pt->bc, pt->sizebc, fs->pc, BCIns); + pt->sizebc = fs->pc; + collectk(fs, pt); + collectuv(fs, pt); + lj_mem_reallocvec(L, pt->lineinfo, pt->sizelineinfo, fs->pc, BCLine); + pt->sizelineinfo = fs->pc; + lj_mem_reallocvec(L, pt->varinfo, pt->sizevarinfo, fs->nlocvars, VarInfo); + pt->sizevarinfo = fs->nlocvars; + lj_mem_reallocvec(L, pt->uvname, pt->sizeuvname, fs->nuv, GCstr *); + pt->sizeuvname = fs->nuv; + lua_assert(fs->bl == NULL); + lj_vmevent_send(L, BC, + setprotoV(L, L->top++, pt); + ); + ls->fs = fs->prev; + L->top -= 2; /* Remove table and prototype from the stack. */ + lua_assert(ls->fs != NULL || ls->token == TK_eof); + keep_token(ls); /* Re-anchor last token. */ +} + +GCproto *lj_parse(LexState *ls) +{ + struct FuncState fs; + ls->level = 0; + open_func(ls, &fs); + fs.pt->flags |= PROTO_IS_VARARG; /* Main chunk is always a vararg func. */ + lj_lex_next(ls); /* Read-ahead first token. */ + chunk(ls); + if (ls->token != TK_eof) + err_token(ls, TK_eof); + fs.pt->lastlinedefined = ls->linenumber; + close_func(ls); + lua_assert(fs.prev == NULL); + lua_assert(fs.pt->sizeuv == 0); + lua_assert(ls->fs == NULL); + return fs.pt; +} + +/* -- Expressions --------------------------------------------------------- */ + +/* forward declaration */ +static void expr(LexState *ls, ExpDesc *v); + +static void field(LexState *ls, ExpDesc *v) +{ + /* field -> ['.' | ':'] NAME */ + FuncState *fs = ls->fs; + ExpDesc key; + exp2anyreg(fs, v); + lj_lex_next(ls); /* skip the dot or colon */ + checkname(ls, &key); + indexexp(fs, v, &key); +} + +static void yindex(LexState *ls, ExpDesc *v) +{ + /* index -> '[' expr ']' */ + lj_lex_next(ls); /* skip the '[' */ + expr(ls, v); + exp2val(ls->fs, v); + checknext(ls, ']'); +} + +static void kexp2tv(TValue *v, ExpDesc *e) +{ + switch (e->k) { + case VKNIL: case VKFALSE: case VKTRUE: v->it = ~(int32_t)e->k; break; + case VKSTR: + setgcref(v->gcr, obj2gco(e->u.sval)); v->it = LJ_TSTR; break; + case VKNUM: setnumV(v, expnumV(e)); break; + default: lua_assert(0); break; + } +} + +static void constructor(LexState *ls, ExpDesc *e) +{ + FuncState *fs = ls->fs; + BCLine line = ls->linenumber; + GCtab *t = NULL; + int vcall = 0, needarr = 0; + int32_t narr = 1; /* first array index */ + uint32_t nhash = 0; /* number of hash entries */ + BCReg freg = fs->freereg; + BCPos pc = emitAD(fs, BC_TNEW, freg, 0); + init_exp(e, VNONRELOC, freg); + reserveregs(fs, 1); + freg++; + checknext(ls, '{'); + while (ls->token != '}') { + ExpDesc key, val; + vcall = 0; + if (ls->token == '[') { + yindex(ls, &key); /* already calls exp2val */ + if (!isK(&key)) indexexp(fs, e, &key); + if (isnumK(&key) && expnumV(&key) == 0) needarr = 1; else nhash++; + checknext(ls, '='); + } else if (ls->token == TK_name && lj_lex_lookahead(ls) == '=') { + checkname(ls, &key); + checknext(ls, '='); + nhash++; + } else { + init_exp(&key, VKNUM, 0); + setintV(&key.u.nval, narr); + narr++; + needarr = vcall = 1; + } + expr(ls, &val); + if (isKexp(&val) && isK(&key) && key.k != VKNIL) { + TValue k; + if (!t) { /* create template table on demand */ + BCReg kidx; + t = lj_tab_new(fs->L, 0, 0); + kidx = gcK(fs, obj2gco(t), LJ_TTAB); + fs->pt->bc[pc] = BCINS_AD(BC_TDUP, freg-1, kidx); + } + vcall = 0; + kexp2tv(&k, &key); + kexp2tv(lj_tab_set(fs->L, t, &k), &val); + if (val.k == VKSTR) + lj_gc_objbarriert(fs->L, t, val.u.sval); + } else { + if (isK(&key)) indexexp(fs, e, &key); + if (val.k != VCALL) vcall = 0; + storevar(fs, e, &val); + } + fs->freereg = freg; + if (!testnext(ls, ',') && !testnext(ls, ';')) break; + } + checkmatch(ls, '}', '{', line); + if (vcall) { + BCIns *i = &fs->pt->bc[fs->pc-1]; + ExpDesc en; + lua_assert(bc_a(*i)==freg && bc_op(*i) == (narr>256?BC_TSETV:BC_TSETB)); + init_exp(&en, VKNUM, 0); + setintV(&en.u.nval, narr-1); + if (narr > 256) { fs->pc--; i--; } + *i = BCINS_AD(BC_TSETM, freg, numK(fs, &en)); + setbc_b(i-1, 0); + } + if (pc == fs->pc-1) { /* make expr relocable if possible */ + e->u.s.info = pc; + fs->freereg--; + e->k = VRELOCABLE; + } else { + e->k = VNONRELOC; /* indexexp may have changed it */ + } + if (!t) { /* Construct TNEW RD: hhhhhaaaaaaaaaaa. */ + if (!needarr) narr = 0; + else if (narr < 3) narr = 3; + else if (narr > 0x7ff) narr = 0x7ff; + setbc_d(&fs->pt->bc[pc], (uint32_t)narr | (hsize2hbits(nhash) << 11)); + } +} + +static void parlist(LexState *ls) +{ + /* parlist -> [ param { `,' param } ] */ + FuncState *fs = ls->fs; + GCproto *pt = fs->pt; + BCReg nparams = 0; + if (ls->token != ')') { /* is `parlist' not empty? */ + do { + switch (ls->token) { + case TK_name: /* param -> NAME */ + new_localvar(ls, str_checkname(ls), nparams++); + break; + case TK_dots: /* param -> `...' */ + lj_lex_next(ls); + pt->flags |= PROTO_IS_VARARG; + break; + default: + err_syntax(ls, LJ_ERR_XPARAM); + break; + } + } while (!(pt->flags & PROTO_IS_VARARG) && testnext(ls, ',')); + } + adjustlocalvars(ls, nparams); + pt->numparams = cast_byte(fs->nactvar); + reserveregs(fs, fs->nactvar); /* reserve register for parameters */ +} + +static void body(LexState *ls, ExpDesc *e, int needself, BCLine line) +{ + /* body -> `(' parlist `)' chunk END */ + FuncState *fs, new_fs; + BCReg kidx; + open_func(ls, &new_fs); + new_fs.pt->linedefined = line; + checknext(ls, '('); + if (needself) { + new_localvarliteral(ls, "self", 0); + adjustlocalvars(ls, 1); + } + parlist(ls); + checknext(ls, ')'); + chunk(ls); + new_fs.pt->lastlinedefined = ls->linenumber; + checkmatch(ls, TK_end, TK_function, line); + close_func(ls); + fs = ls->fs; + kidx = gcK(fs, obj2gco(new_fs.pt), LJ_TPROTO); + init_exp(e, VRELOCABLE, emitAD(fs, BC_FNEW, 0, kidx)); + if (!(fs->pt->flags & PROTO_HAS_FNEW)) { + if (fs->pt->flags & PROTO_HAS_RETURN) + fs->pt->flags |= PROTO_FIXUP_RETURN; + fs->pt->flags |= PROTO_HAS_FNEW; + } +} + +static BCReg explist1(LexState *ls, ExpDesc *v) +{ + /* explist1 -> expr { `,' expr } */ + BCReg n = 1; /* at least one expression */ + expr(ls, v); + while (testnext(ls, ',')) { + exp2nextreg(ls->fs, v); + expr(ls, v); + n++; + } + return n; +} + +static void funcargs(LexState *ls, ExpDesc *e) +{ + FuncState *fs = ls->fs; + ExpDesc args; + BCIns ins; + BCReg base; + BCLine line = ls->linenumber; + switch (ls->token) { + case '(': { /* funcargs -> `(' [ explist1 ] `)' */ + if (line != ls->lastline) + err_syntax(ls, LJ_ERR_XAMBIG); + lj_lex_next(ls); + if (ls->token == ')') { /* arg list is empty? */ + args.k = VVOID; + } else { + explist1(ls, &args); + if (args.k == VCALL) + setbc_b(bcptr(fs, &args), 0); + } + checkmatch(ls, ')', '(', line); + break; + } + case '{': { /* funcargs -> constructor */ + constructor(ls, &args); + break; + } + case TK_string: { /* funcargs -> STRING */ + init_exp(&args, VKSTR, 0); + args.u.sval = strV(&ls->tokenval); + lj_lex_next(ls); /* must use `seminfo' before `next' */ + break; + } + default: { + err_syntax(ls, LJ_ERR_XFUNARG); + return; + } + } + lua_assert(e->k == VNONRELOC); + base = e->u.s.info; /* base register for call */ + if (args.k == VCALL) { + ins = BCINS_ABC(BC_CALLM, base, 2, args.u.s.aux - base - 1); + } else { + if (args.k != VVOID) + exp2nextreg(fs, &args); /* close last argument */ + ins = BCINS_ABC(BC_CALL, base, 2, fs->freereg - base); + } + init_exp(e, VCALL, emitINS(fs, ins)); + e->u.s.aux = base; + fs->pt->lineinfo[fs->pc - 1] = line; + fs->freereg = base+1; /* call removes function and arguments and leaves + (unless changed) one result */ +} + +static void prefixexp(LexState *ls, ExpDesc *v) +{ + /* prefixexp -> NAME | '(' expr ')' */ + switch (ls->token) { + case '(': { + BCLine line = ls->linenumber; + lj_lex_next(ls); + expr(ls, v); + checkmatch(ls, ')', '(', line); + dischargevars(ls->fs, v); + return; + } + case TK_name: { + singlevar(ls, v); + return; + } + default: { + err_syntax(ls, LJ_ERR_XSYMBOL); + return; + } + } +} + +static void primaryexp(LexState *ls, ExpDesc *v) +{ + /* primaryexp -> + prefixexp { `.' NAME | `[' exp `]' | `:' NAME funcargs | funcargs } */ + FuncState *fs = ls->fs; + prefixexp(ls, v); + for (;;) { + switch (ls->token) { + case '.': /* field */ + field(ls, v); + break; + case '[': { /* `[' exp1 `]' */ + ExpDesc key; + exp2anyreg(fs, v); + yindex(ls, &key); + indexexp(fs, v, &key); + break; + } + case ':': { /* `:' NAME funcargs */ + ExpDesc key; + lj_lex_next(ls); + checkname(ls, &key); + methodexp(fs, v, &key); + funcargs(ls, v); + break; + } + case '(': case TK_string: case '{': /* funcargs */ + exp2nextreg(fs, v); + funcargs(ls, v); + break; + default: return; + } + } +} + +static void simpleexp(LexState *ls, ExpDesc *v) +{ + /* simpleexp -> NUMBER | STRING | NIL | true | false | ... | + constructor | FUNCTION body | primaryexp */ + switch (ls->token) { + case TK_number: + init_exp(v, VKNUM, 0); + setnumV(&v->u.nval, numV(&ls->tokenval)); + break; + case TK_string: + init_exp(v, VKSTR, 0); + v->u.sval = strV(&ls->tokenval); + break; + case TK_nil: + init_exp(v, VKNIL, 0); + break; + case TK_true: + init_exp(v, VKTRUE, 0); + break; + case TK_false: + init_exp(v, VKFALSE, 0); + break; + case TK_dots: { /* vararg */ + FuncState *fs = ls->fs; + BCReg base; + checkcond(ls, fs->pt->flags & PROTO_IS_VARARG, LJ_ERR_XDOTS); + reserveregs(fs, 1); + base = fs->freereg-1; + init_exp(v, VCALL, emitABC(fs, BC_VARG, base, 2, 1)); + v->u.s.aux = base; + break; + } + case '{': /* constructor */ + constructor(ls, v); + return; + case TK_function: + lj_lex_next(ls); + body(ls, v, 0, ls->linenumber); + return; + default: + primaryexp(ls, v); + return; + } + lj_lex_next(ls); +} + +static void enterlevel(LexState *ls) +{ + if (++ls->level >= LJ_MAX_XLEVEL) + lj_lex_error(ls, 0, LJ_ERR_XLEVELS); +} + +#define leavelevel(ls) ((ls)->level--) + +static UnOpr getunopr(LexToken tok) +{ + switch (tok) { + case TK_not: return OPR_NOT; + case '-': return OPR_MINUS; + case '#': return OPR_LEN; + default: return OPR_NOUNOPR; + } +} + +static BinOpr getbinopr(LexToken tok) +{ + switch (tok) { + case '+': return OPR_ADD; + case '-': return OPR_SUB; + case '*': return OPR_MUL; + case '/': return OPR_DIV; + case '%': return OPR_MOD; + case '^': return OPR_POW; + case TK_concat: return OPR_CONCAT; + case TK_ne: return OPR_NE; + case TK_eq: return OPR_EQ; + case '<': return OPR_LT; + case TK_le: return OPR_LE; + case '>': return OPR_GT; + case TK_ge: return OPR_GE; + case TK_and: return OPR_AND; + case TK_or: return OPR_OR; + default: return OPR_NOBINOPR; + } +} + +static const struct { + uint8_t left; /* left priority for each binary operator */ + uint8_t right; /* right priority */ +} priority[] = { /* ORDER OPR */ + {6,6}, {6,6}, {7,7}, {7,7}, {7,7}, /* ADD SUB MUL DIV MOD */ + {10,9}, {5,4}, /* POW CONCAT (right associative) */ + {3,3}, {3,3}, /* EQ NE */ + {3,3}, {3,3}, {3,3}, {3,3}, /* LT GE GT LE */ + {2,2}, {1,1} /* AND OR */ +}; + +#define UNARY_PRIORITY 8 /* priority for unary operators */ + +/* +** subexpr -> (simpleexp | unop subexpr) { binop subexpr } +** where `binop' is any binary operator with a priority higher than `limit' +*/ +static BinOpr subexpr(LexState *ls, ExpDesc *v, uint32_t limit) +{ + BinOpr op; + UnOpr uop; + enterlevel(ls); + uop = getunopr(ls->token); + if (uop != OPR_NOUNOPR) { + lj_lex_next(ls); + subexpr(ls, v, UNARY_PRIORITY); + emit_unop(ls->fs, uop, v); + } else { + simpleexp(ls, v); + } + /* expand while operators have priorities higher than `limit' */ + op = getbinopr(ls->token); + while (op != OPR_NOBINOPR && priority[op].left > limit) { + ExpDesc v2; + BinOpr nextop; + lj_lex_next(ls); + prepare_binop(ls->fs, op, v); + /* read sub-expression with higher priority */ + nextop = subexpr(ls, &v2, priority[op].right); + emit_binop(ls->fs, op, v, &v2); + op = nextop; + } + leavelevel(ls); + return op; /* return first untreated operator */ +} + +static void expr(LexState *ls, ExpDesc *v) +{ + subexpr(ls, v, 0); +} + +static BCPos condexpr(LexState *ls) +{ + /* cond -> exp */ + ExpDesc v; + expr(ls, &v); /* read condition */ + if (v.k == VKNIL) v.k = VKFALSE; /* `falses' are all equal here */ + goiftrue(ls->fs, &v); + return v.f; +} + +/* -- Scope handling ------------------------------------------------------ */ + +static void enterblock(FuncState *fs, FuncBlock *bl, int isbreakable) +{ + bl->breaklist = NO_JMP; + bl->isbreakable = (uint8_t)isbreakable; + bl->nactvar = fs->nactvar; + bl->upval = 0; + bl->previous = fs->bl; + fs->bl = bl; + lua_assert(fs->freereg == fs->nactvar); +} + +static void leaveblock(FuncState *fs) +{ + FuncBlock *bl = fs->bl; + fs->bl = bl->previous; + removevars(fs->ls, bl->nactvar); + fs->freereg = fs->nactvar; /* free registers */ + lua_assert(bl->nactvar == fs->nactvar); + /* a block either controls scope or breaks (never both) */ + lua_assert(!bl->isbreakable || !bl->upval); + if (bl->upval) + emitAJ(fs, BC_UCLO, bl->nactvar, 0); + else /* avoid in upval case, it clears lasttarget and kills UCLO+JMP join */ + patchtohere(fs, bl->breaklist); +} + +static void block(LexState *ls) +{ + /* block -> chunk */ + FuncState *fs = ls->fs; + FuncBlock bl; + enterblock(fs, &bl, 0); + chunk(ls); + lua_assert(bl.breaklist == NO_JMP); + leaveblock(fs); +} + +/* -- Statements ---------------------------------------------------------- */ + +/* +** structure to chain all variables in the left-hand side of an +** assignment +*/ +struct LHS_assign { + ExpDesc v; /* variable (global, local, upvalue, or indexed) */ + struct LHS_assign *prev; +}; + +/* +** check whether, in an assignment to a local variable, the local variable +** is needed in a previous assignment (to a table). If so, save original +** local value in a safe place and use this safe copy in the previous +** assignment. +*/ +static void check_conflict(LexState *ls, struct LHS_assign *lh, + const ExpDesc *v) +{ + FuncState *fs = ls->fs; + BCReg reg = fs->freereg; /* eventual position to save local variable */ + int conflict = 0; + for (; lh; lh = lh->prev) { + if (lh->v.k == VINDEXED) { + if (lh->v.u.s.info == v->u.s.info) { /* conflict? */ + conflict = 1; + lh->v.u.s.info = reg; /* previous assignment will use safe copy */ + } + if (lh->v.u.s.aux == v->u.s.info) { /* conflict? */ + conflict = 1; + lh->v.u.s.aux = reg; /* previous assignment will use safe copy */ + } + } + } + if (conflict) { + emitAD(fs, BC_MOV, reg, v->u.s.info); /* make copy */ + reserveregs(fs, 1); + } +} + +static void assignment(LexState *ls, struct LHS_assign *lh, BCReg nvars) +{ + ExpDesc e; + checkcond(ls, VLOCAL <= lh->v.k && lh->v.k <= VINDEXED, LJ_ERR_XSYNTAX); + if (testnext(ls, ',')) { /* assignment -> `,' primaryexp assignment */ + struct LHS_assign nv; + nv.prev = lh; + primaryexp(ls, &nv.v); + if (nv.v.k == VLOCAL) + check_conflict(ls, lh, &nv.v); + checklimit(ls->fs, ls->level + nvars, LJ_MAX_XLEVEL, "variable names"); + assignment(ls, &nv, nvars+1); + } else { /* assignment -> `=' explist1 */ + BCReg nexps; + checknext(ls, '='); + nexps = explist1(ls, &e); + if (nexps == nvars) { + if (e.k == VCALL) { + if (bc_op(*bcptr(ls->fs, &e)) == BC_VARG) { + ls->fs->freereg--; + e.k = VRELOCABLE; + } else { + e.u.s.info = e.u.s.aux; + e.k = VNONRELOC; + } + } + storevar(ls->fs, &lh->v, &e); + return; + } + adjust_assign(ls, nvars, nexps, &e); + if (nexps > nvars) + ls->fs->freereg -= nexps - nvars; /* remove extra values */ + } + init_exp(&e, VNONRELOC, ls->fs->freereg-1); /* default assignment */ + storevar(ls->fs, &lh->v, &e); +} + +static void breakstat(LexState *ls) +{ + FuncState *fs = ls->fs; + FuncBlock *bl = fs->bl; + int upval = 0; + while (bl && !bl->isbreakable) { + upval |= bl->upval; + bl = bl->previous; + } + if (!bl) + err_syntax(ls, LJ_ERR_XBREAK); + if (upval) + emitAJ(fs, BC_UCLO, bl->nactvar, 0); + concatjumps(fs, &bl->breaklist, emit_jump(fs)); +} + +static void whilestat(LexState *ls, BCLine line) +{ + /* whilestat -> WHILE cond DO block END */ + FuncState *fs = ls->fs; + BCPos start, loop, condexit; + FuncBlock bl; + lj_lex_next(ls); /* skip WHILE */ + start = fs->lasttarget = fs->pc; + condexit = condexpr(ls); + enterblock(fs, &bl, 1); + checknext(ls, TK_do); + loop = emitAD(fs, BC_LOOP, fs->nactvar, 0); + block(ls); + patchlist(fs, emit_jump(fs), start); + checkmatch(ls, TK_end, TK_while, line); + leaveblock(fs); + patchtohere(fs, condexit); /* false conditions finish the loop */ + fixjump(fs, loop, fs->pc); +} + +static void repeatstat(LexState *ls, BCLine line) +{ + /* repeatstat -> REPEAT block UNTIL cond */ + FuncState *fs = ls->fs; + BCPos loop = fs->lasttarget = fs->pc; + BCPos condexit; + FuncBlock bl1, bl2; + enterblock(fs, &bl1, 1); /* loop block */ + enterblock(fs, &bl2, 0); /* scope block */ + lj_lex_next(ls); /* skip REPEAT */ + emitAD(fs, BC_LOOP, fs->nactvar, 0); + chunk(ls); + checkmatch(ls, TK_until, TK_repeat, line); + condexit = condexpr(ls); /* read condition (inside scope block) */ + if (!bl2.upval) { /* no upvalues? */ + leaveblock(fs); /* finish scope */ + } else { /* complete semantics when there are upvalues */ + breakstat(ls); /* if condition then break */ + patchtohere(fs, condexit); /* else... */ + leaveblock(fs); /* finish scope... */ + condexit = emit_jump(fs); /* and repeat */ + } + patchlist(fs, condexit, loop); /* close the loop */ + fixjump(fs, loop, fs->pc); + leaveblock(fs); /* finish loop */ +} + +static void exp1(LexState *ls) +{ + ExpDesc e; + expr(ls, &e); + exp2nextreg(ls->fs, &e); +} + +static void forbody(LexState *ls, BCReg base, BCLine line, BCReg nvars, + int isnum) +{ + /* forbody -> DO block */ + FuncBlock bl; + FuncState *fs = ls->fs; + BCPos loop, loopend; + adjustlocalvars(ls, 3); /* control variables */ + checknext(ls, TK_do); + loop = isnum ? emitAJ(fs, BC_FORI, base, NO_JMP) : + emitAJ(fs, BC_JMP, fs->freereg, NO_JMP); + enterblock(fs, &bl, 0); /* scope for declared variables */ + adjustlocalvars(ls, nvars); + reserveregs(fs, nvars); + block(ls); + leaveblock(fs); /* end of scope for declared variables */ + if (isnum) { + loopend = emitAJ(fs, BC_FORL, base, NO_JMP); + fixjump(fs, loop, fs->pc); + } else { + fixjump(fs, loop, fs->pc); + emitABC(fs, BC_ITERC, base+3, nvars+1, 2+1); + loopend = emitAJ(fs, BC_ITERL, base+3, NO_JMP); + fs->pt->lineinfo[loopend-1] = line; + } + fs->pt->lineinfo[loopend] = line; /* pretend last op starts the loop */ + fixjump(fs, loopend, loop+1); +} + +static void fornum(LexState *ls, GCstr *varname, BCLine line) +{ + /* fornum -> NAME = exp1,exp1[,exp1] forbody */ + FuncState *fs = ls->fs; + BCReg base = fs->freereg; + new_localvarliteral(ls, "(for index)", FORL_IDX); + new_localvarliteral(ls, "(for limit)", FORL_STOP); + new_localvarliteral(ls, "(for step)", FORL_STEP); + new_localvar(ls, varname, FORL_EXT); + checknext(ls, '='); + exp1(ls); /* initial value */ + checknext(ls, ','); + exp1(ls); /* limit */ + if (testnext(ls, ',')) { + exp1(ls); /* optional step */ + } else { /* default step = 1 */ + emitAD(fs, BC_KSHORT, fs->freereg, 1); + reserveregs(fs, 1); + } + forbody(ls, base, line, 1, 1); +} + +static void forlist(LexState *ls, GCstr *indexname) +{ + /* forlist -> NAME {,NAME} IN explist1 forbody */ + FuncState *fs = ls->fs; + ExpDesc e; + BCReg nvars = 0; + BCLine line; + BCReg base = fs->freereg; + /* create control variables */ + new_localvarliteral(ls, "(for generator)", nvars++); + new_localvarliteral(ls, "(for state)", nvars++); + new_localvarliteral(ls, "(for control)", nvars++); + /* create declared variables */ + new_localvar(ls, indexname, nvars++); + while (testnext(ls, ',')) + new_localvar(ls, str_checkname(ls), nvars++); + checknext(ls, TK_in); + line = ls->linenumber; + adjust_assign(ls, 3, explist1(ls, &e), &e); + checkframe(fs, 3); /* extra space to call generator */ + forbody(ls, base, line, nvars - 3, 0); +} + +static void forstat(LexState *ls, BCLine line) +{ + /* forstat -> FOR (fornum | forlist) END */ + FuncState *fs = ls->fs; + GCstr *varname; + FuncBlock bl; + enterblock(fs, &bl, 1); /* scope for loop and control variables */ + lj_lex_next(ls); /* skip `for' */ + varname = str_checkname(ls); /* first variable name */ + switch (ls->token) { + case '=': fornum(ls, varname, line); break; + case ',': case TK_in: forlist(ls, varname); break; + default: err_syntax(ls, LJ_ERR_XFOR); + } + checkmatch(ls, TK_end, TK_for, line); + leaveblock(fs); /* loop scope (`break' jumps to this point) */ +} + +static BCPos test_then_block(LexState *ls) +{ + /* test_then_block -> [IF | ELSEIF] cond THEN block */ + BCPos condexit; + lj_lex_next(ls); /* skip IF or ELSEIF */ + condexit = condexpr(ls); + checknext(ls, TK_then); + block(ls); /* `then' part */ + return condexit; +} + +static void ifstat(LexState *ls, BCLine line) +{ + /* ifstat -> IF cond THEN block {ELSEIF cond THEN block} [ELSE block] END */ + FuncState *fs = ls->fs; + BCPos flist; + BCPos escapelist = NO_JMP; + flist = test_then_block(ls); /* IF cond THEN block */ + while (ls->token == TK_elseif) { + concatjumps(fs, &escapelist, emit_jump(fs)); + patchtohere(fs, flist); + flist = test_then_block(ls); /* ELSEIF cond THEN block */ + } + if (ls->token == TK_else) { + concatjumps(fs, &escapelist, emit_jump(fs)); + patchtohere(fs, flist); + lj_lex_next(ls); /* skip ELSE (after patch, for correct line info) */ + block(ls); /* `else' part */ + } else { + concatjumps(fs, &escapelist, flist); + } + patchtohere(fs, escapelist); + checkmatch(ls, TK_end, TK_if, line); +} + +static void localfunc(LexState *ls) +{ + ExpDesc v, b; + FuncState *fs = ls->fs; + new_localvar(ls, str_checkname(ls), 0); + init_exp(&v, VLOCAL, fs->freereg); + reserveregs(fs, 1); + adjustlocalvars(ls, 1); + body(ls, &b, 0, ls->linenumber); + storevar(fs, &v, &b); + /* debug information will only see the variable after this point! */ + getlocvar(fs, fs->nactvar - 1).startpc = fs->pc; +} + +static void localstat(LexState *ls) +{ + /* stat -> LOCAL NAME {`,' NAME} [`=' explist1] */ + BCReg nvars = 0; + BCReg nexps; + ExpDesc e; + do { + new_localvar(ls, str_checkname(ls), nvars++); + } while (testnext(ls, ',')); + if (testnext(ls, '=')) { + nexps = explist1(ls, &e); + } else { + e.k = VVOID; + nexps = 0; + } + adjust_assign(ls, nvars, nexps, &e); + adjustlocalvars(ls, nvars); +} + +static int func_name(LexState *ls, ExpDesc *v) +{ + /* func_name -> NAME {field} [`:' NAME] */ + int needself = 0; + singlevar(ls, v); + while (ls->token == '.') + field(ls, v); + if (ls->token == ':') { + needself = 1; + field(ls, v); + } + return needself; +} + +static void funcstat(LexState *ls, BCLine line) +{ + /* funcstat -> FUNCTION func_name body */ + FuncState *fs; + int needself; + ExpDesc v, b; + lj_lex_next(ls); /* skip FUNCTION */ + needself = func_name(ls, &v); + body(ls, &b, needself, line); + fs = ls->fs; + storevar(fs, &v, &b); + fs->pt->lineinfo[fs->pc - 1] = line; +} + +static void exprstat(LexState *ls) +{ + /* stat -> func | assignment */ + FuncState *fs = ls->fs; + struct LHS_assign v; + primaryexp(ls, &v.v); + if (v.v.k == VCALL) { /* stat -> func */ + setbc_b(bcptr(fs, &v.v), 1); /* call statement uses no results */ + } else { /* stat -> assignment */ + v.prev = NULL; + assignment(ls, &v, 1); + } +} + +static int block_follow(LexToken token) +{ + switch (token) { + case TK_else: case TK_elseif: case TK_end: case TK_until: case TK_eof: + return 1; + default: + return 0; + } +} + +static void retstat(LexState *ls) +{ + /* stat -> RETURN explist */ + BCIns ins; + FuncState *fs = ls->fs; + lj_lex_next(ls); /* skip RETURN */ + fs->pt->flags |= PROTO_HAS_RETURN; + if (block_follow(ls->token) || ls->token == ';') { + ins = BCINS_AD(BC_RET0, 0, 1); /* return no values */ + } else { + ExpDesc e; + BCReg nret = explist1(ls, &e); /* optional return values */ + if (nret == 1) { + if (e.k == VCALL) { + BCIns *i = bcptr(fs, &e); + /* It doesn't pay off to add BC_VARGT just for 'return ...'. */ + if (bc_op(*i) == BC_VARG) goto notailcall; + fs->pc--; + ins = BCINS_AD(bc_op(*i)-BC_CALL+BC_CALLT, bc_a(*i), bc_c(*i)); + } else { + ins = BCINS_AD(BC_RET1, exp2anyreg(fs, &e), 2); + } + } else { + if (e.k == VCALL) { + notailcall: + setbc_b(bcptr(fs, &e), 0); + ins = BCINS_AD(BC_RETM, fs->nactvar, e.u.s.aux - fs->nactvar); + } else { + exp2nextreg(fs, &e); /* values must go to the `stack' */ + ins = BCINS_AD(BC_RET, fs->nactvar, nret+1); + } + } + } + if (fs->pt->flags & PROTO_HAS_FNEW) + emitAJ(fs, BC_UCLO, 0, 0); + emitINS(fs, ins); +} + +static int statement(LexState *ls) +{ + BCLine line = ls->linenumber; /* may be needed for error messages */ + switch (ls->token) { + case TK_if: + ifstat(ls, line); + return 0; + case TK_while: + whilestat(ls, line); + return 0; + case TK_do: + lj_lex_next(ls); /* skip DO */ + block(ls); + checkmatch(ls, TK_end, TK_do, line); + return 0; + case TK_for: + forstat(ls, line); + return 0; + case TK_repeat: + repeatstat(ls, line); + return 0; + case TK_function: + funcstat(ls, line); + return 0; + case TK_local: + lj_lex_next(ls); /* skip LOCAL */ + if (testnext(ls, TK_function)) /* local function? */ + localfunc(ls); + else + localstat(ls); + return 0; + case TK_return: + retstat(ls); + return 1; /* must be last statement */ + case TK_break: + lj_lex_next(ls); /* skip BREAK */ + breakstat(ls); + return 1; /* must be last statement */ + default: + exprstat(ls); + return 0; + } +} + +static void chunk(LexState *ls) +{ + /* chunk -> { stat [`;'] } */ + int islast = 0; + enterlevel(ls); + while (!islast && !block_follow(ls->token)) { + islast = statement(ls); + testnext(ls, ';'); + lua_assert(ls->fs->pt->framesize >= ls->fs->freereg && + ls->fs->freereg >= ls->fs->nactvar); + ls->fs->freereg = ls->fs->nactvar; /* free registers */ + } + leavelevel(ls); +} + diff --git a/src/lj_parse.h b/src/lj_parse.h new file mode 100644 index 0000000000..72aac2c6b1 --- /dev/null +++ b/src/lj_parse.h @@ -0,0 +1,15 @@ +/* +** Lua parser (source code -> bytecode). +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +*/ + +#ifndef _LJ_PARSE_H +#define _LJ_PARSE_H + +#include "lj_obj.h" +#include "lj_lex.h" + +LJ_FUNC GCproto *lj_parse(LexState *ls); +LJ_FUNC GCstr *lj_parse_keepstr(LexState *ls, const char *str, size_t l); + +#endif diff --git a/src/lj_record.c b/src/lj_record.c new file mode 100644 index 0000000000..e101ba2344 --- /dev/null +++ b/src/lj_record.c @@ -0,0 +1,2136 @@ +/* +** Trace recorder (bytecode -> SSA IR). +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +*/ + +#define lj_record_c +#define LUA_CORE + +#include "lj_obj.h" + +#if LJ_HASJIT + +#include "lj_gc.h" +#include "lj_err.h" +#include "lj_str.h" +#include "lj_tab.h" +#include "lj_state.h" +#include "lj_frame.h" +#include "lj_bc.h" +#include "lj_ff.h" +#include "lj_ir.h" +#include "lj_jit.h" +#include "lj_iropt.h" +#include "lj_trace.h" +#include "lj_record.h" +#include "lj_snap.h" +#include "lj_asm.h" +#include "lj_dispatch.h" +#include "lj_vm.h" + +/* Some local macros to save typing. Undef'd at the end. */ +#define IR(ref) (&J->cur.ir[(ref)]) + +/* Pass IR on to next optimization in chain (FOLD). */ +#define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J)) + +/* Emit raw IR without passing through optimizations. */ +#define emitir_raw(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_ir_emit(J)) + +/* Context for recording an indexed load/store. */ +typedef struct RecordIndex { + TValue tabv; /* Runtime value of table (or indexed object). */ + TValue keyv; /* Runtime value of key. */ + TValue valv; /* Runtime value of stored value. */ + TValue mobjv; /* Runtime value of metamethod object. */ + GCtab *mtv; /* Runtime value of metatable object. */ + cTValue *oldv; /* Runtime value of previously stored value. */ + TRef tab; /* Table (or indexed object) reference. */ + TRef key; /* Key reference. */ + TRef val; /* Value reference for a store or 0 for a load. */ + TRef mt; /* Metatable reference. */ + TRef mobj; /* Metamethod object reference. */ + int idxchain; /* Index indirections left or 0 for raw lookup. */ +} RecordIndex; + +/* Requested results from rec_call(). */ +enum { + /* Non-negative numbers are number of requested results. */ + CALLRES_MULTI = -1, /* Return multiple results. */ + CALLRES_TAILCALL = -2, /* Tail call. */ + CALLRES_PENDING = -3, /* Call is pending, no results yet. */ + CALLRES_CONT = -4 /* Continuation call. */ +}; + +/* Forward declarations. */ +static TRef rec_idx(jit_State *J, RecordIndex *ix); +static int rec_call(jit_State *J, BCReg func, int cres, int nargs); + +/* -- Sanity checks ------------------------------------------------------- */ + +#ifdef LUA_USE_ASSERT +/* Sanity check the whole IR -- sloooow. */ +static void rec_check_ir(jit_State *J) +{ + IRRef i, nins = J->cur.nins, nk = J->cur.nk; + lua_assert(nk <= REF_BIAS && nins >= REF_BIAS && nins < 65536); + for (i = nins-1; i >= nk; i--) { + IRIns *ir = IR(i); + uint32_t mode = lj_ir_mode[ir->o]; + IRRef op1 = ir->op1; + IRRef op2 = ir->op2; + switch (irm_op1(mode)) { + case IRMnone: lua_assert(op1 == 0); break; + case IRMref: lua_assert(op1 >= nk); + lua_assert(i >= REF_BIAS ? op1 < i : op1 > i); break; + case IRMlit: break; + case IRMcst: lua_assert(i < REF_BIAS); continue; + } + switch (irm_op2(mode)) { + case IRMnone: lua_assert(op2 == 0); break; + case IRMref: lua_assert(op2 >= nk); + lua_assert(i >= REF_BIAS ? op2 < i : op2 > i); break; + case IRMlit: break; + case IRMcst: lua_assert(0); break; + } + if (ir->prev) { + lua_assert(ir->prev >= nk); + lua_assert(i >= REF_BIAS ? ir->prev < i : ir->prev > i); + lua_assert(IR(ir->prev)->o == ir->o); + } + } +} + +/* Sanity check the slots. */ +static void rec_check_slots(jit_State *J) +{ + BCReg s, nslots = J->baseslot + J->maxslot; + lua_assert(J->baseslot >= 1 && J->baseslot < LJ_MAX_JSLOTS); + lua_assert(nslots < LJ_MAX_JSLOTS); + for (s = 0; s < nslots; s++) { + TRef tr = J->slot[s]; + if (tr) { + IRRef ref = tref_ref(tr); + lua_assert(ref >= J->cur.nk && ref < J->cur.nins); + lua_assert(irt_t(IR(ref)->t) == tref_t(tr)); + } + } +} +#endif + +/* -- Type handling and specialization ------------------------------------ */ + +/* Note: these functions return tagged references (TRef). */ + +/* Specialize a slot to a specific type. Note: slot can be negative! */ +static TRef sloadt(jit_State *J, int32_t slot, IRType t, int mode) +{ + /* No guard, since none of the callers need a type-checking SLOAD. */ + TRef ref = emitir_raw(IRT(IR_SLOAD, t), (int32_t)J->baseslot+slot, mode); + J->base[slot] = ref; + return ref; +} + +/* Specialize a slot to the runtime type. Note: slot can be negative! */ +static TRef sload(jit_State *J, int32_t slot) +{ + IRType t = itype2irt(&J->L->base[slot]); + TRef ref = emitir_raw(IRTG(IR_SLOAD, t), (int32_t)J->baseslot+slot, 0); + if (irtype_ispri(t)) ref = TREF_PRI(t); /* Canonicalize primitive refs. */ + J->base[slot] = ref; + return ref; +} + +/* Get TRef from slot. Load slot and specialize if not done already. */ +#define getslot(J, s) (J->base[(s)] ? J->base[(s)] : sload(J, (int32_t)(s))) + +/* Get TRef for current function. */ +static TRef getcurrf(jit_State *J) +{ + if (J->base[-1]) { + IRIns *ir = IR(tref_ref(J->base[-1])); + if (ir->o == IR_FRAME) /* Shortcut if already specialized. */ + return TREF(ir->op2, IRT_FUNC); /* Return TRef of KFUNC. */ + return J->base[-1]; + } else { + lua_assert(J->baseslot == 1); + return sloadt(J, -1, IRT_FUNC, IRSLOAD_READONLY); + } +} + +/* Compare for raw object equality. +** Returns 0 if the objects are the same. +** Returns 1 if they are different, but the same type. +** Returns 2 for two different types. +** Comparisons between primitives always return 1 -- no caller cares about it. +*/ +static int rec_objcmp(jit_State *J, TRef a, TRef b, cTValue *av, cTValue *bv) +{ + int diff = !lj_obj_equal(av, bv); + if (!tref_isk2(a, b)) { /* Shortcut, also handles primitives. */ + IRType ta = tref_type(a); + IRType tb = tref_type(b); + if (ta != tb) { + /* Widen mixed number/int comparisons to number/number comparison. */ + if (ta == IRT_INT && tb == IRT_NUM) { + a = emitir(IRTN(IR_TONUM), a, 0); + ta = IRT_NUM; + } else if (ta == IRT_NUM && tb == IRT_INT) { + b = emitir(IRTN(IR_TONUM), b, 0); + } else { + return 2; /* Two different types are never equal. */ + } + } + emitir(IRTG(diff ? IR_NE : IR_EQ, ta), a, b); + } + return diff; +} + +/* -- Record loop ops ----------------------------------------------------- */ + +/* Loop event. */ +typedef enum { + LOOPEV_LEAVE, /* Loop is left or not entered. */ + LOOPEV_ENTER /* Loop is entered. */ +} LoopEvent; + +/* Canonicalize slots: convert integers to numbers. */ +static void canonicalize_slots(jit_State *J) +{ + BCReg s; + for (s = J->baseslot+J->maxslot-1; s >= 1; s--) { + TRef tr = J->slot[s]; + if (tref_isinteger(tr)) { + IRIns *ir = IR(tref_ref(tr)); + if (!(ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_READONLY))) + J->slot[s] = emitir(IRTN(IR_TONUM), tr, 0); + } + } +} + +/* Stop recording. */ +static void rec_stop(jit_State *J, TraceNo lnk) +{ + lj_trace_end(J); + J->cur.link = (uint16_t)lnk; + if (lnk == J->curtrace) { /* Looping back? */ + if ((J->flags & JIT_F_OPT_LOOP)) /* Shall we try to create a loop? */ + goto nocanon; /* Do not canonicalize or we lose the narrowing. */ + if (J->cur.root) /* Otherwise ensure we always link to the root trace. */ + J->cur.link = J->cur.root; + } + canonicalize_slots(J); +nocanon: + /* Note: all loop ops must set J->pc to the following instruction! */ + lj_snap_add(J); /* Add loop snapshot. */ + J->needsnap = 0; + J->mergesnap = 1; /* In case recording continues. */ +} + +/* Peek before FORI to find a const initializer, otherwise load from slot. */ +static TRef fori_arg(jit_State *J, const BCIns *pc, BCReg slot, IRType t) +{ + /* A store to slot-1 means there's no conditional assignment for slot. */ + if (bc_a(pc[-1]) == slot-1 && bcmode_a(bc_op(pc[-1])) == BCMdst) { + BCIns ins = pc[0]; + if (bc_a(ins) == slot) { + if (bc_op(ins) == BC_KSHORT) { + int32_t k = (int32_t)(int16_t)bc_d(ins); + if (t == IRT_INT) + return lj_ir_kint(J, k); + else + return lj_ir_knum(J, cast_num(k)); + } else if (bc_op(ins) == BC_KNUM) { + lua_Number n = J->pt->k.n[bc_d(ins)]; + if (t == IRT_INT) + return lj_ir_kint(J, lj_num2int(n)); + else + return lj_ir_knum(J, n); + } + } + } + if (J->base[slot]) + return J->base[slot]; + else + return sloadt(J, (int32_t)slot, t, IRSLOAD_READONLY|IRSLOAD_INHERIT); +} + +/* Simulate the runtime behavior of the FOR loop iterator. +** It's important to exactly reproduce the semantics of the interpreter. +*/ +static LoopEvent for_iter(jit_State *J, IROp *op, BCReg ra, int isforl) +{ + cTValue *forbase = &J->L->base[ra]; + lua_Number stopv = numV(&forbase[FORL_STOP]); + lua_Number idxv = numV(&forbase[FORL_IDX]); + if (isforl) + idxv += numV(&forbase[FORL_STEP]); + if ((int32_t)forbase[FORL_STEP].u32.hi >= 0) { + if (idxv <= stopv) { *op = IR_LE; return LOOPEV_ENTER; } + *op = IR_GT; return LOOPEV_LEAVE; + } else { + if (stopv <= idxv) { *op = IR_GE; return LOOPEV_ENTER; } + *op = IR_LT; return LOOPEV_LEAVE; + } +} + +/* Record FORL/JFORL or FORI/JFORI. */ +static LoopEvent rec_for(jit_State *J, const BCIns *fori, int isforl) +{ + BCReg ra = bc_a(*fori); + IROp op; + LoopEvent ev = for_iter(J, &op, ra, isforl); + TRef *tr = &J->base[ra]; + TRef idx, stop; + IRType t; + if (isforl) { /* Handle FORL/JFORL opcodes. */ + TRef step; + idx = tr[FORL_IDX]; + if (!idx) idx = sloadt(J, (int32_t)(ra+FORL_IDX), IRT_NUM, 0); + t = tref_type(idx); + stop = fori_arg(J, fori-2, ra+FORL_STOP, t); + step = fori_arg(J, fori-1, ra+FORL_STEP, t); + tr[FORL_IDX] = idx = emitir(IRT(IR_ADD, t), idx, step); + } else { /* Handle FORI/JFORI opcodes. */ + BCReg i; + t = IRT_NUM; + for (i = FORL_IDX; i <= FORL_STEP; i++) { + lua_assert(J->base[ra+i] != 0); /* Assumes the slots are already set. */ + tr[i] = lj_ir_tonum(J, J->base[ra+i]); + } + idx = tr[FORL_IDX]; + stop = tr[FORL_STOP]; + if (!tref_isk(tr[FORL_STEP])) /* Non-const step: need direction guard. */ + emitir(IRTG(((op-IR_LT)>>1)+IR_LT, IRT_NUM), + tr[FORL_STEP], lj_ir_knum_zero(J)); + } + + tr[FORL_EXT] = idx; + if (ev == LOOPEV_LEAVE) { + J->maxslot = ra+FORL_EXT+1; + J->pc = fori+1; + } else { + J->maxslot = ra; + J->pc = fori+bc_j(*fori)+1; + } + lj_snap_add(J); + + emitir(IRTG(op, t), idx, stop); + + if (ev == LOOPEV_LEAVE) { + J->maxslot = ra; + J->pc = fori+bc_j(*fori)+1; + } else { + J->maxslot = ra+FORL_EXT+1; + J->pc = fori+1; + } + J->needsnap = 1; + return ev; +} + +/* Record ITERL/JITERL. */ +static LoopEvent rec_iterl(jit_State *J, const BCIns iterins) +{ + BCReg ra = bc_a(iterins); + lua_assert(J->base[ra] != 0); + if (!tref_isnil(J->base[ra])) { /* Looping back? */ + J->base[ra-1] = J->base[ra]; /* Copy result of ITERC to control var. */ + J->maxslot = ra-1+bc_b(J->pc[-1]); + J->pc += bc_j(iterins)+1; + return LOOPEV_ENTER; + } else { + J->maxslot = ra-3; + J->pc++; + return LOOPEV_LEAVE; + } +} + +/* Record LOOP/JLOOP. Now, that was easy. */ +static LoopEvent rec_loop(jit_State *J, BCReg ra) +{ + J->maxslot = ra; + J->pc++; + return LOOPEV_ENTER; +} + +/* Check if a loop repeatedly failed to trace because it didn't loop back. */ +static int innerloopleft(jit_State *J, const BCIns *pc) +{ + ptrdiff_t i; + for (i = 0; i < PENALTY_SLOTS; i++) + if (J->penalty[i].pc == pc) { + if (J->penalty[i].reason == LJ_TRERR_LLEAVE && + J->penalty[i].val >= 2*HOTCOUNT_MIN_PENALTY) + return 1; + break; + } + return 0; +} + +/* Handle the case when an interpreted loop op is hit. */ +static void rec_loop_interp(jit_State *J, const BCIns *pc, LoopEvent ev) +{ + if (J->parent == 0) { + if (pc == J->startpc && J->framedepth == 0) { /* Same loop? */ + if (ev == LOOPEV_LEAVE) /* Must loop back to form a root trace. */ + lj_trace_err(J, LJ_TRERR_LLEAVE); + rec_stop(J, J->curtrace); /* Root trace forms a loop. */ + } else if (ev != LOOPEV_LEAVE) { /* Entering inner loop? */ + /* It's usually better to abort here and wait until the inner loop + ** is traced. But if the inner loop repeatedly didn't loop back, + ** this indicates a low trip count. In this case try unrolling + ** an inner loop even in a root trace. But it's better to be a bit + ** more conservative here and only do it for very short loops. + */ + if (!innerloopleft(J, pc)) + lj_trace_err(J, LJ_TRERR_LINNER); /* Root trace hit an inner loop. */ + if ((J->loopref && J->cur.nins - J->loopref > 8) || --J->loopunroll < 0) + lj_trace_err(J, LJ_TRERR_LUNROLL); /* Limit loop unrolling. */ + J->loopref = J->cur.nins; + } + } else if (ev != LOOPEV_LEAVE) { /* Side trace enters an inner loop. */ + J->loopref = J->cur.nins; + if (--J->loopunroll < 0) + lj_trace_err(J, LJ_TRERR_LUNROLL); /* Limit loop unrolling. */ + } /* Side trace continues across a loop that's left or not entered. */ +} + +/* Handle the case when an already compiled loop op is hit. */ +static void rec_loop_jit(jit_State *J, TraceNo lnk, LoopEvent ev) +{ + if (J->parent == 0) { /* Root trace hit an inner loop. */ + /* Better let the inner loop spawn a side trace back here. */ + lj_trace_err(J, LJ_TRERR_LINNER); + } else if (ev != LOOPEV_LEAVE) { /* Side trace enters a compiled loop. */ + J->instunroll = 0; /* Cannot continue across a compiled loop op. */ + if (J->pc == J->startpc && J->framedepth == 0) + lnk = J->curtrace; /* Can form an extra loop. */ + rec_stop(J, lnk); /* Link to the loop. */ + } /* Side trace continues across a loop that's left or not entered. */ +} + +/* -- Metamethod handling ------------------------------------------------- */ + +/* Prepare to record call to metamethod. */ +static BCReg rec_mm_prep(jit_State *J, ASMFunction cont) +{ + BCReg s, top = curr_proto(J->L)->framesize; + TRef trcont; + setcont(&J->L->base[top], cont); +#if LJ_64 + trcont = lj_ir_kptr(J, (void *)((int64_t)cont - (int64_t)lj_vm_asm_begin)); +#else + trcont = lj_ir_kptr(J, (void *)cont); +#endif + J->base[top] = emitir(IRTG(IR_FRAME, IRT_PTR), trcont, trcont); + for (s = J->maxslot; s < top; s++) + J->base[s] = 0; + return top+1; +} + +/* Record metamethod lookup. */ +static int rec_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm) +{ + RecordIndex mix; + GCtab *mt; + if (tref_istab(ix->tab)) { + mt = tabref(tabV(&ix->tabv)->metatable); + mix.tab = emitir(IRT(IR_FLOAD, IRT_TAB), ix->tab, IRFL_TAB_META); + } else if (tref_isudata(ix->tab)) { + mt = tabref(udataV(&ix->tabv)->metatable); + mix.tab = emitir(IRT(IR_FLOAD, IRT_TAB), ix->tab, IRFL_UDATA_META); + } else { + /* Specialize to base metatable. Must flush mcode in lua_setmetatable(). */ + mt = tabref(J2G(J)->basemt[itypemap(&ix->tabv)]); + if (mt == NULL) + return 0; /* No metamethod. */ + mix.tab = lj_ir_ktab(J, mt); + goto nocheck; + } + ix->mt = mix.tab; + emitir(IRTG(mt ? IR_NE : IR_EQ, IRT_TAB), mix.tab, lj_ir_knull(J, IRT_TAB)); +nocheck: + if (mt) { + GCstr *mmstr = strref(J2G(J)->mmname[mm]); + cTValue *mo = lj_tab_getstr(mt, mmstr); + if (mo && !tvisnil(mo)) + copyTV(J->L, &ix->mobjv, mo); + ix->mtv = mt; + settabV(J->L, &mix.tabv, mt); + setstrV(J->L, &mix.keyv, mmstr); + mix.key = lj_ir_kstr(J, mmstr); + mix.val = 0; + mix.idxchain = 0; + ix->mobj = rec_idx(J, &mix); + return !tref_isnil(ix->mobj); /* 1 if metamethod found, 0 if not. */ + } + return 0; /* No metamethod. */ +} + +/* Record call to arithmetic metamethod (and MM_len). */ +static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm) +{ + /* Set up metamethod call first to save ix->tab and ix->tabv. */ + BCReg func = rec_mm_prep(J, lj_cont_ra); + TRef *base = J->base + func; + TValue *basev = J->L->base + func; + base[1] = ix->tab; base[2] = ix->key; + copyTV(J->L, basev+1, &ix->tabv); + copyTV(J->L, basev+2, &ix->keyv); + if (!rec_mm_lookup(J, ix, mm)) { /* Lookup metamethod on 1st operand. */ + if (mm != MM_len) { + ix->tab = ix->key; + copyTV(J->L, &ix->tabv, &ix->keyv); + if (rec_mm_lookup(J, ix, mm)) /* Lookup metamethod on 2nd operand. */ + goto ok; + } + lj_trace_err(J, LJ_TRERR_NOMM); + } +ok: + base[0] = ix->mobj; + copyTV(J->L, basev+0, &ix->mobjv); + return rec_call(J, func, CALLRES_CONT, 2) ? J->base[func] : 0; +} + +/* Call a comparison metamethod. */ +static void rec_mm_callcomp(jit_State *J, RecordIndex *ix, int op) +{ + BCReg func = rec_mm_prep(J, (op&1) ? lj_cont_condf : lj_cont_condt); + TRef *base = J->base + func; + TValue *tv = J->L->base + func; + base[0] = ix->mobj; base[1] = ix->val; base[2] = ix->key; + copyTV(J->L, tv+0, &ix->mobjv); + copyTV(J->L, tv+1, &ix->valv); + copyTV(J->L, tv+2, &ix->keyv); + rec_call(J, func, CALLRES_CONT, 2); + /* It doesn't matter whether this is immediately resolved or not. + ** Type specialization of the return type suffices to specialize + ** the control flow. + */ +} + +/* Record call to equality comparison metamethod (for tab and udata only). */ +static void rec_mm_equal(jit_State *J, RecordIndex *ix, int op) +{ + ix->tab = ix->val; + copyTV(J->L, &ix->tabv, &ix->valv); + if (rec_mm_lookup(J, ix, MM_eq)) { /* Lookup metamethod on 1st operand. */ + cTValue *bv; + TRef mo1 = ix->mobj; + TValue mo1v; + copyTV(J->L, &mo1v, &ix->mobjv); + /* Avoid the 2nd lookup and the objcmp if the metatables are equal. */ + bv = &ix->keyv; + if (tvistab(bv) && tabref(tabV(bv)->metatable) == ix->mtv) { + TRef mt2 = emitir(IRT(IR_FLOAD, IRT_TAB), ix->key, IRFL_TAB_META); + emitir(IRTG(IR_EQ, IRT_TAB), mt2, ix->mt); + } else if (tvisudata(bv) && tabref(udataV(bv)->metatable) == ix->mtv) { + TRef mt2 = emitir(IRT(IR_FLOAD, IRT_TAB), ix->key, IRFL_UDATA_META); + emitir(IRTG(IR_EQ, IRT_TAB), mt2, ix->mt); + } else { /* Lookup metamethod on 2nd operand and compare both. */ + ix->tab = ix->key; + copyTV(J->L, &ix->tabv, bv); + if (!rec_mm_lookup(J, ix, MM_eq) || + rec_objcmp(J, mo1, ix->mobj, &mo1v, &ix->mobjv)) + return; + } + rec_mm_callcomp(J, ix, op); + } +} + +/* Record call to ordered comparison metamethods (for arbitrary objects). */ +static void rec_mm_comp(jit_State *J, RecordIndex *ix, int op) +{ + ix->tab = ix->val; + copyTV(J->L, &ix->tabv, &ix->valv); + while (1) { + MMS mm = (op & 2) ? MM_le : MM_lt; /* Try __le + __lt or only __lt. */ + if (rec_mm_lookup(J, ix, mm)) { /* Lookup metamethod on 1st operand. */ + cTValue *bv; + TRef mo1 = ix->mobj; + TValue mo1v; + copyTV(J->L, &mo1v, &ix->mobjv); + /* Avoid the 2nd lookup and the objcmp if the metatables are equal. */ + bv = &ix->keyv; + if (tvistab(bv) && tabref(tabV(bv)->metatable) == ix->mtv) { + TRef mt2 = emitir(IRT(IR_FLOAD, IRT_TAB), ix->key, IRFL_TAB_META); + emitir(IRTG(IR_EQ, IRT_TAB), mt2, ix->mt); + } else if (tvisudata(bv) && tabref(udataV(bv)->metatable) == ix->mtv) { + TRef mt2 = emitir(IRT(IR_FLOAD, IRT_TAB), ix->key, IRFL_UDATA_META); + emitir(IRTG(IR_EQ, IRT_TAB), mt2, ix->mt); + } else { /* Lookup metamethod on 2nd operand and compare both. */ + ix->tab = ix->key; + copyTV(J->L, &ix->tabv, bv); + if (!rec_mm_lookup(J, ix, mm) || + rec_objcmp(J, mo1, ix->mobj, &mo1v, &ix->mobjv)) + goto nomatch; + } + rec_mm_callcomp(J, ix, op); + return; + } + nomatch: + /* First lookup failed. Retry with __lt and swapped operands. */ + if (!(op & 2)) break; /* Already at __lt. Interpreter will throw. */ + ix->tab = ix->key; ix->key = ix->val; ix->val = ix->tab; + copyTV(J->L, &ix->tabv, &ix->keyv); + copyTV(J->L, &ix->keyv, &ix->valv); + copyTV(J->L, &ix->valv, &ix->tabv); + op ^= 3; + } +} + +/* -- Indexed access ------------------------------------------------------ */ + +/* Record indexed key lookup. */ +static TRef rec_idx_key(jit_State *J, RecordIndex *ix) +{ + TRef key; + GCtab *t = tabV(&ix->tabv); + ix->oldv = lj_tab_get(J->L, t, &ix->keyv); /* Lookup previous value. */ + + /* Integer keys are looked up in the array part first. */ + key = ix->key; + if (tref_isnumber(key)) { + lua_Number n = numV(&ix->keyv); + int32_t k = lj_num2int(n); + lua_assert(tvisnum(&ix->keyv)); + /* Potential array key? */ + if ((MSize)k < LJ_MAX_ASIZE && n == cast_num(k)) { + TRef asizeref, ikey = key; + if (!tref_isinteger(ikey)) + ikey = emitir(IRTGI(IR_TOINT), ikey, IRTOINT_INDEX); + asizeref = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_ASIZE); + if ((MSize)k < t->asize) { /* Currently an array key? */ + TRef arrayref; + emitir(IRTGI(IR_ABC), asizeref, ikey); /* Bounds check. */ + arrayref = emitir(IRT(IR_FLOAD, IRT_PTR), ix->tab, IRFL_TAB_ARRAY); + return emitir(IRT(IR_AREF, IRT_PTR), arrayref, ikey); + } else { /* Currently not in array (may be an array extension)? */ + emitir(IRTGI(IR_ULE), asizeref, ikey); /* Inv. bounds check. */ + if (k == 0 && tref_isk(key)) + key = lj_ir_knum_zero(J); /* Canonicalize 0 or +-0.0 to +0.0. */ + /* And continue with the hash lookup. */ + } + } else if (!tref_isk(key)) { + /* We can rule out const numbers which failed the integerness test + ** above. But all other numbers are potential array keys. + */ + if (t->asize == 0) { /* True sparse tables have an empty array part. */ + /* Guard that the array part stays empty. */ + TRef tmp = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_ASIZE); + emitir(IRTGI(IR_EQ), tmp, lj_ir_kint(J, 0)); + } else { + lj_trace_err(J, LJ_TRERR_NYITMIX); + } + } + } + + /* Otherwise the key is located in the hash part. */ + if (tref_isinteger(key)) /* Hash keys are based on numbers, not ints. */ + ix->key = key = emitir(IRTN(IR_TONUM), key, 0); + if (tref_isk(key)) { + /* Optimize lookup of constant hash keys. */ + MSize hslot = (MSize)((char *)ix->oldv - (char *)&noderef(t->node)[0].val); + if (t->hmask > 0 && hslot <= t->hmask*(MSize)sizeof(Node) && + hslot <= 65535*(MSize)sizeof(Node)) { + TRef node, kslot; + TRef hm = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_HMASK); + emitir(IRTGI(IR_EQ), hm, lj_ir_kint(J, (int32_t)t->hmask)); + node = emitir(IRT(IR_FLOAD, IRT_PTR), ix->tab, IRFL_TAB_NODE); + kslot = lj_ir_kslot(J, key, hslot / sizeof(Node)); + return emitir(IRTG(IR_HREFK, IRT_PTR), node, kslot); + } + } + /* Fall back to a regular hash lookup. */ + return emitir(IRT(IR_HREF, IRT_PTR), ix->tab, key); +} + +/* Determine whether a key is NOT one of the fast metamethod names. */ +static int nommstr(jit_State *J, TRef key) +{ + if (tref_isstr(key)) { + if (tref_isk(key)) { + GCstr *str = ir_kstr(IR(tref_ref(key))); + uint32_t i; + for (i = 0; i <= MM_FAST; i++) + if (strref(J2G(J)->mmname[i]) == str) + return 0; /* MUST be one the fast metamethod names. */ + } else { + return 0; /* Variable string key MAY be a metamethod name. */ + } + } + return 1; /* CANNOT be a metamethod name. */ +} + +/* Record indexed load/store. */ +static TRef rec_idx(jit_State *J, RecordIndex *ix) +{ + TRef xref; + IROp xrefop, loadop; + cTValue *oldv; + + while (!tref_istab(ix->tab)) { /* Handle non-table lookup. */ + lua_assert(ix->idxchain != 0); /* Never call raw rec_idx() on non-table. */ + if (!rec_mm_lookup(J, ix, ix->val ? MM_newindex : MM_index)) + lj_trace_err(J, LJ_TRERR_NOMM); + handlemm: + if (tref_isfunc(ix->mobj)) { /* Handle metamethod call. */ + BCReg func = rec_mm_prep(J, ix->val ? lj_cont_nop : lj_cont_ra); + TRef *base = J->base + func; + TValue *tv = J->L->base + func; + base[0] = ix->mobj; base[1] = ix->tab; base[2] = ix->key; + setfuncV(J->L, tv+0, funcV(&ix->mobjv)); + copyTV(J->L, tv+1, &ix->tabv); + copyTV(J->L, tv+2, &ix->keyv); + if (ix->val) { + base[3] = ix->val; + copyTV(J->L, tv+3, &ix->valv); + rec_call(J, func, CALLRES_CONT, 3); /* mobj(tab, key, val) */ + return 0; + } else { + /* res = mobj(tab, key) */ + return rec_call(J, func, CALLRES_CONT, 2) ? J->base[func] : 0; + } + } + /* Otherwise retry lookup with metaobject. */ + ix->tab = ix->mobj; + copyTV(J->L, &ix->tabv, &ix->mobjv); + if (--ix->idxchain == 0) + lj_trace_err(J, LJ_TRERR_IDXLOOP); + } + + /* First catch nil and NaN keys for tables. */ + if (tvisnil(&ix->keyv) || (tvisnum(&ix->keyv) && tvisnan(&ix->keyv))) { + if (ix->val) /* Better fail early. */ + lj_trace_err(J, LJ_TRERR_STORENN); + if (tref_isk(ix->key)) { + if (ix->idxchain && rec_mm_lookup(J, ix, MM_index)) + goto handlemm; + return TREF_NIL; + } + } + + /* Record the key lookup. */ + xref = rec_idx_key(J, ix); + xrefop = IR(tref_ref(xref))->o; + loadop = xrefop == IR_AREF ? IR_ALOAD : IR_HLOAD; + oldv = ix->oldv; + + if (ix->val == 0) { /* Indexed load */ + IRType t = itype2irt(oldv); + TRef res = emitir(IRTG(loadop, t), xref, 0); + if (t == IRT_NIL && ix->idxchain && rec_mm_lookup(J, ix, MM_index)) + goto handlemm; + if (irtype_ispri(t)) res = TREF_PRI(t); /* Canonicalize primitives. */ + return res; + } else { /* Indexed store. */ + GCtab *mt = tabref(tabV(&ix->tabv)->metatable); + if (tvisnil(oldv)) { /* Previous value was nil? */ + /* Need to duplicate the hasmm check for the early guards. */ + int hasmm = 0; + if (ix->idxchain && mt) { + cTValue *mo = lj_tab_getstr(mt, strref(J2G(J)->mmname[MM_newindex])); + hasmm = mo && !tvisnil(mo); + } + if (hasmm || oldv == niltvg(J2G(J))) + emitir(IRTG(loadop, IRT_NIL), xref, 0); /* Guard for nil value. */ + else if (xrefop == IR_HREF) + emitir(IRTG(IR_NE, IRT_PTR), xref, lj_ir_kptr(J, niltvg(J2G(J)))); + if (ix->idxchain && rec_mm_lookup(J, ix, MM_newindex)) { /* Metamethod? */ + lua_assert(hasmm); + goto handlemm; + } + lua_assert(!hasmm); + if (oldv == niltvg(J2G(J))) { /* Need to insert a new key. */ + TRef key = ix->key; + if (tref_isinteger(key)) /* NEWREF needs a TValue as a key. */ + key = emitir(IRTN(IR_TONUM), key, 0); + xref = emitir(IRT(IR_NEWREF, IRT_PTR), ix->tab, key); + } + } else if (!lj_opt_fwd_wasnonnil(J, loadop, tref_ref(xref))) { + /* Cannot derive that the previous value was non-nil, must do checks. */ + if (xrefop == IR_HREF) /* Guard against store to niltv. */ + emitir(IRTG(IR_NE, IRT_PTR), xref, lj_ir_kptr(J, niltvg(J2G(J)))); + if (ix->idxchain) { /* Metamethod lookup required? */ + /* A check for NULL metatable is cheaper (hoistable) than a load. */ + if (!mt) { + TRef mtref = emitir(IRT(IR_FLOAD, IRT_TAB), ix->tab, IRFL_TAB_META); + emitir(IRTG(IR_EQ, IRT_TAB), mtref, lj_ir_knull(J, IRT_TAB)); + } else { + IRType t = itype2irt(oldv); + emitir(IRTG(loadop, t), xref, 0); /* Guard for non-nil value. */ + } + } + } + if (tref_isinteger(ix->val)) /* Convert int to number before storing. */ + ix->val = emitir(IRTN(IR_TONUM), ix->val, 0); + emitir(IRT(loadop+IRDELTA_L2S, tref_type(ix->val)), xref, ix->val); + if (tref_isgcv(ix->val)) + emitir(IRT(IR_TBAR, IRT_NIL), ix->tab, 0); + /* Invalidate neg. metamethod cache for stores with certain string keys. */ + if (!nommstr(J, ix->key)) { + TRef fref = emitir(IRT(IR_FREF, IRT_PTR), ix->tab, IRFL_TAB_NOMM); + emitir(IRT(IR_FSTORE, IRT_U8), fref, lj_ir_kint(J, 0)); + } + J->needsnap = 1; + return 0; + } +} + +/* -- Upvalue access ------------------------------------------------------ */ + +/* Record upvalue load/store. */ +static TRef rec_upvalue(jit_State *J, uint32_t uv, TRef val) +{ + GCupval *uvp = &gcref(J->fn->l.uvptr[uv])->uv; + TRef fn = getcurrf(J); + IRRef uref; + int needbarrier = 0; + if (!uvp->closed) { + /* In current stack? */ + if (uvp->v >= J->L->stack && uvp->v < J->L->maxstack) { + int32_t slot = (int32_t)(uvp->v - (J->L->base - J->baseslot)); + if (slot >= 0) { /* Aliases an SSA slot? */ + slot -= (int32_t)J->baseslot; /* Note: slot number may be negative! */ + /* NYI: add IR to guard that it's still aliasing the same slot. */ + if (val == 0) { + return getslot(J, slot); + } else { + J->base[slot] = val; + if (slot >= (int32_t)J->maxslot) J->maxslot = (BCReg)(slot+1); + return 0; + } + } + } + uref = tref_ref(emitir(IRTG(IR_UREFO, IRT_PTR), fn, uv)); + } else { + needbarrier = 1; + uref = tref_ref(emitir(IRTG(IR_UREFC, IRT_PTR), fn, uv)); + } + if (val == 0) { /* Upvalue load */ + IRType t = itype2irt(uvp->v); + TRef res = emitir(IRTG(IR_ULOAD, t), uref, 0); + if (irtype_ispri(t)) res = TREF_PRI(t); /* Canonicalize primitive refs. */ + return res; + } else { /* Upvalue store. */ + if (tref_isinteger(val)) /* Convert int to number before storing. */ + val = emitir(IRTN(IR_TONUM), val, 0); + emitir(IRT(IR_USTORE, tref_type(val)), uref, val); + if (needbarrier && tref_isgcv(val)) + emitir(IRT(IR_OBAR, IRT_NIL), uref, val); + J->needsnap = 1; + return 0; + } +} + +/* -- Record calls to fast functions -------------------------------------- */ + +/* Note: The function and the arguments for the bytecode CALL instructions +** always occupy _new_ stack slots (above the highest active variable). +** This means they must have been stored there by previous instructions +** (MOV, K*, ADD etc.) which must be part of the same trace. This in turn +** means their reference slots are already valid and their types have +** already been specialized (i.e. getslot() would be redundant). +** The 1st slot beyond the arguments is set to 0 before calling recff_*. +*/ + +/* Data used by handlers to record a fast function. */ +typedef struct RecordFFData { + TValue *argv; /* Runtime argument values. */ + GCfunc *fn; /* The currently recorded function. */ + int nargs; /* Number of passed arguments. */ + int nres; /* Number of returned results (defaults to 1). */ + int cres; /* Wanted number of call results. */ + uint32_t data; /* Per-ffid auxiliary data (opcode, literal etc.). */ +} RecordFFData; + +/* Type of handler to record a fast function. */ +typedef void (*RecordFunc)(jit_State *J, TRef *res, RecordFFData *rd); + +/* Avoid carrying two pointers around. */ +#define arg (res+1) + +/* Get runtime value of int argument. */ +static int32_t argv2int(jit_State *J, TValue *o) +{ + if (tvisstr(o) && !lj_str_numconv(strVdata(o), o)) + lj_trace_err(J, LJ_TRERR_BADTYPE); + return lj_num2bit(numV(o)); +} + +/* Get runtime value of string argument. */ +static GCstr *argv2str(jit_State *J, TValue *o) +{ + if (LJ_LIKELY(tvisstr(o))) { + return strV(o); + } else { + GCstr *s; + lua_assert(tvisnum(o)); + s = lj_str_fromnum(J->L, &o->n); + setstrV(J->L, o, s); + return s; + } +} + +/* Fallback handler for all fast functions that are not recorded (yet). */ +static void recff_nyi(jit_State *J, TRef *res, RecordFFData *rd) +{ + UNUSED(res); + setfuncV(J->L, &J->errinfo, rd->fn); + lj_trace_err_info(J, LJ_TRERR_NYIFF); +} + +LJ_NORET static void recff_err_ffu(jit_State *J, RecordFFData *rd) +{ + setfuncV(J->L, &J->errinfo, rd->fn); + lj_trace_err_info(J, LJ_TRERR_NYIFFU); +} + +/* C functions can have arbitrary side-effects and are not recorded (yet). */ +static void recff_c(jit_State *J, TRef *res, RecordFFData *rd) +{ + UNUSED(res); + setlightudV(&J->errinfo, (void *)rd->fn->c.f); + lj_trace_err_info(J, LJ_TRERR_NYICF); +} + +/* -- Base library fast functions ----------------------------------------- */ + +static void recff_assert(jit_State *J, TRef *res, RecordFFData *rd) +{ + /* Arguments already specialized. The interpreter throws for nil/false. */ + BCReg i; + for (i = 0; arg[i]; i++) /* Need to pass through all arguments. */ + res[i] = arg[i]; + rd->nres = (int)i; + UNUSED(J); +} + +static void recff_type(jit_State *J, TRef *res, RecordFFData *rd) +{ + /* Arguments already specialized. Result is a constant string. Neat, huh? */ + IRType t = tref_isinteger(arg[0]) ? IRT_NUM : tref_type(arg[0]); + res[0] = lj_ir_kstr(J, strV(&rd->fn->c.upvalue[t])); +} + +static void recff_getmetatable(jit_State *J, TRef *res, RecordFFData *rd) +{ + TRef tr = arg[0]; + if (tref_istab(tr)) { + RecordIndex ix; + ix.tab = tr; + copyTV(J->L, &ix.tabv, &rd->argv[0]); + if (rec_mm_lookup(J, &ix, MM_metatable)) + res[0] = ix.mobj; + else + res[0] = ix.mt; + } /* else: Interpreter will throw. */ +} + +static void recff_setmetatable(jit_State *J, TRef *res, RecordFFData *rd) +{ + TRef tr = arg[0]; + TRef mt = arg[1]; + if (tref_istab(tr) && (tref_istab(mt) || (mt && tref_isnil(mt)))) { + TRef fref, mtref; + RecordIndex ix; + ix.tab = tr; + copyTV(J->L, &ix.tabv, &rd->argv[0]); + rec_mm_lookup(J, &ix, MM_metatable); /* Guard for no __metatable field. */ + fref = emitir(IRT(IR_FREF, IRT_PTR), tr, IRFL_TAB_META); + mtref = tref_isnil(mt) ? lj_ir_knull(J, IRT_TAB) : mt; + emitir(IRT(IR_FSTORE, IRT_TAB), fref, mtref); + if (!tref_isnil(mt)) + emitir(IRT(IR_TBAR, IRT_TAB), tr, 0); + res[0] = tr; + J->needsnap = 1; + } /* else: Interpreter will throw. */ +} + +static void recff_rawget(jit_State *J, TRef *res, RecordFFData *rd) +{ + if (tref_istab(arg[0]) && arg[1]) { + RecordIndex ix; + ix.tab = arg[0]; ix.key = arg[1]; ix.val = 0; ix.idxchain = 0; + settabV(J->L, &ix.tabv, tabV(&rd->argv[0])); + copyTV(J->L, &ix.keyv, &rd->argv[1]); + res[0] = rec_idx(J, &ix); + } /* else: Interpreter will throw. */ +} + +static void recff_rawset(jit_State *J, TRef *res, RecordFFData *rd) +{ + if (tref_istab(arg[0]) && arg[1] && arg[2]) { + RecordIndex ix; + ix.tab = arg[0]; ix.key = arg[1]; ix.val = arg[2]; ix.idxchain = 0; + settabV(J->L, &ix.tabv, tabV(&rd->argv[0])); + copyTV(J->L, &ix.keyv, &rd->argv[1]); + copyTV(J->L, &ix.valv, &rd->argv[2]); + rec_idx(J, &ix); + res[0] = arg[0]; /* Returns table. */ + } /* else: Interpreter will throw. */ +} + +static void recff_rawequal(jit_State *J, TRef *res, RecordFFData *rd) +{ + if (arg[0] && arg[1]) { + int diff = rec_objcmp(J, arg[0], arg[1], &rd->argv[0], &rd->argv[1]); + res[0] = diff ? TREF_FALSE : TREF_TRUE; + } /* else: Interpreter will throw. */ +} + +static void recff_tonumber(jit_State *J, TRef *res, RecordFFData *rd) +{ + TRef tr = arg[0]; + if (tref_isnumber_str(tr)) { + if (arg[1]) { + TRef base = lj_ir_toint(J, arg[1]); + if (!tref_isk(base) || IR(tref_ref(base))->i != 10) + recff_err_ffu(J, rd); + } + if (tref_isstr(tr)) + tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0); + } else { + tr = TREF_NIL; + } + res[0] = tr; + UNUSED(rd); +} + +static void recff_tostring(jit_State *J, TRef *res, RecordFFData *rd) +{ + TRef tr = arg[0]; + if (tref_isstr(tr)) { + /* Ignore __tostring in the string base metatable. */ + res[0] = tr; + } else { + RecordIndex ix; + ix.tab = tr; + copyTV(J->L, &ix.tabv, &rd->argv[0]); + if (rec_mm_lookup(J, &ix, MM_tostring)) { /* Has __tostring metamethod? */ + res[0] = ix.mobj; + copyTV(J->L, rd->argv - 1, &ix.mobjv); + if (!rec_call(J, (BCReg)(res - J->base), 1, 1)) /* Pending call? */ + rd->cres = CALLRES_PENDING; + /* Otherwise res[0] already contains the result. */ + } else if (tref_isnumber(tr)) { + res[0] = emitir(IRT(IR_TOSTR, IRT_STR), tr, 0); + } else { + recff_err_ffu(J, rd); + } + } +} + +static void recff_ipairs_aux(jit_State *J, TRef *res, RecordFFData *rd) +{ + RecordIndex ix; + ix.tab = arg[0]; + if (tref_istab(ix.tab)) { + if (!tvisnum(&rd->argv[1])) /* No support for string coercion. */ + lj_trace_err(J, LJ_TRERR_BADTYPE); + setnumV(&ix.keyv, numV(&rd->argv[1])+(lua_Number)1); + settabV(J->L, &ix.tabv, tabV(&rd->argv[0])); + ix.val = 0; ix.idxchain = 0; + ix.key = lj_ir_toint(J, arg[1]); + res[0] = ix.key = emitir(IRTI(IR_ADD), ix.key, lj_ir_kint(J, 1)); + res[1] = rec_idx(J, &ix); + rd->nres = tref_isnil(res[1]) ? 0 : 2; + } /* else: Interpreter will throw. */ +} + +static void recff_ipairs(jit_State *J, TRef *res, RecordFFData *rd) +{ + TRef tab = arg[0]; + if (tref_istab(tab)) { + res[0] = lj_ir_kfunc(J, funcV(&rd->fn->c.upvalue[0])); + res[1] = tab; + res[2] = lj_ir_kint(J, 0); + rd->nres = 3; + } /* else: Interpreter will throw. */ +} + +static void recff_pcall(jit_State *J, TRef *res, RecordFFData *rd) +{ + if (rd->nargs >= 1) { + BCReg parg = (BCReg)(arg - J->base); + if (rec_call(J, parg, CALLRES_MULTI, rd->nargs - 1)) { /* Resolved call. */ + res[0] = TREF_TRUE; /* Prepend true result. No need to move results. */ + rd->nres = (int)((J->maxslot - parg) + 1); + } else { /* Propagate pending call. */ + rd->cres = CALLRES_PENDING; + } + } /* else: Interpreter will throw. */ +} + +/* Struct to pass context across lj_vm_cpcall. */ +typedef struct RecordXpcall { + BCReg parg; + int nargs; + int resolved; +} RecordXpcall; + +static TValue *recff_xpcall_cp(lua_State *L, lua_CFunction dummy, void *ud) +{ + jit_State *J = L2J(L); + RecordXpcall *rx = (RecordXpcall *)ud; + UNUSED(dummy); + rx->resolved = rec_call(J, rx->parg, CALLRES_MULTI, rx->nargs); + return NULL; +} + +static void recff_xpcall(jit_State *J, TRef *res, RecordFFData *rd) +{ + if (rd->nargs >= 2) { + RecordXpcall rx; + BCReg parg = (BCReg)(arg - J->base) + 1; + TRef tmp; + TValue argv0, argv1; + ptrdiff_t oargv; + int errcode; + /* Swap function and traceback. */ + tmp = arg[0]; arg[0] = arg[1]; arg[1] = tmp; + copyTV(J->L, &argv0, &rd->argv[0]); + copyTV(J->L, &argv1, &rd->argv[1]); + copyTV(J->L, &rd->argv[0], &argv1); + copyTV(J->L, &rd->argv[1], &argv0); + oargv = savestack(J->L, rd->argv); + /* Need to protect rec_call because the recorder may throw. */ + rx.parg = parg; + rx.nargs = rd->nargs - 2; + errcode = lj_vm_cpcall(J->L, recff_xpcall_cp, NULL, &rx); + /* Always undo Lua stack swap to avoid confusing the interpreter. */ + rd->argv = restorestack(J->L, oargv); /* Stack may have been resized. */ + copyTV(J->L, &rd->argv[0], &argv0); + copyTV(J->L, &rd->argv[1], &argv1); + if (errcode) + lj_err_throw(J->L, errcode); /* Propagate errors. */ + if (rx.resolved) { /* Resolved call. */ + int i, nres = (int)(J->maxslot - parg); + rd->nres = nres + 1; + res[0] = TREF_TRUE; /* Prepend true result. */ + for (i = 1; i <= nres; i++) /* Move results down. */ + res[i] = res[i+1]; + } else { /* Propagate pending call. */ + rd->cres = CALLRES_PENDING; + } + } /* else: Interpreter will throw. */ +} + +/* -- Math library fast functions ----------------------------------------- */ + +static void recff_math_abs(jit_State *J, TRef *res, RecordFFData *rd) +{ + TRef tr = lj_ir_tonum(J, arg[0]); + res[0] = emitir(IRTN(IR_ABS), tr, lj_ir_knum_abs(J)); + UNUSED(rd); +} + +/* Record rounding functions math.floor and math.ceil. */ +static void recff_math_round(jit_State *J, TRef *res, RecordFFData *rd) +{ + if (tref_isinteger(arg[0])) + res[0] = arg[0]; + else + res[0] = emitir(IRTN(IR_FPMATH), lj_ir_tonum(J, arg[0]), rd->data); + /* Note: result is integral (or NaN/Inf), but may not fit into an integer. */ +} + +/* Record unary math.* functions, mapped to IR_FPMATH opcode. */ +static void recff_math_unary(jit_State *J, TRef *res, RecordFFData *rd) +{ + res[0] = emitir(IRTN(IR_FPMATH), lj_ir_tonum(J, arg[0]), rd->data); +} + +/* Record binary math.* functions math.atan2 and math.ldexp. */ +static void recff_math_binary(jit_State *J, TRef *res, RecordFFData *rd) +{ + TRef tr = lj_ir_tonum(J, arg[0]); + res[0] = emitir(IRTN(rd->data), tr, lj_ir_tonum(J, arg[1])); +} + +/* Record math.asin, math.acos, math.atan. */ +static void recff_math_atrig(jit_State *J, TRef *res, RecordFFData *rd) +{ + TRef y = lj_ir_tonum(J, arg[0]); + TRef x = lj_ir_knum_one(J); + uint32_t ffid = rd->data; + if (ffid != FF_math_atan) { + TRef tmp = emitir(IRTN(IR_MUL), y, y); + tmp = emitir(IRTN(IR_SUB), x, tmp); + tmp = emitir(IRTN(IR_FPMATH), tmp, IRFPM_SQRT); + if (ffid == FF_math_asin) { x = tmp; } else { x = y; y = tmp; } + } + res[0] = emitir(IRTN(IR_ATAN2), y, x); +} + +static void recff_math_modf(jit_State *J, TRef *res, RecordFFData *rd) +{ + TRef tr = arg[0]; + if (tref_isinteger(arg[0])) { + res[0] = tr; + res[1] = lj_ir_kint(J, 0); + } else { + tr = lj_ir_tonum(J, tr); + res[0] = emitir(IRTN(IR_FPMATH), tr, IRFPM_TRUNC); + res[1] = emitir(IRTN(IR_SUB), tr, res[0]); + } + rd->nres = 2; +} + +static void recff_math_degrad(jit_State *J, TRef *res, RecordFFData *rd) +{ + TRef tr = lj_ir_tonum(J, arg[0]); + res[0] = emitir(IRTN(IR_MUL), tr, lj_ir_knum(J, numV(&rd->fn->c.upvalue[0]))); +} + +static void recff_math_pow(jit_State *J, TRef *res, RecordFFData *rd) +{ + if (!tref_isnumber_str(arg[1])) + lj_trace_err(J, LJ_TRERR_BADTYPE); + res[0] = lj_opt_narrow_pow(J, lj_ir_tonum(J, arg[0]), arg[1], &rd->argv[1]); + UNUSED(rd); +} + +static void recff_math_minmax(jit_State *J, TRef *res, RecordFFData *rd) +{ + TRef tr = lj_ir_tonum(J, arg[0]); + uint32_t op = rd->data; + BCReg i; + for (i = 1; arg[i]; i++) + tr = emitir(IRTN(op), tr, lj_ir_tonum(J, arg[i])); + res[0] = tr; +} + +/* -- Bit library fast functions ------------------------------------------ */ + +/* Record unary bit.tobit, bit.bnot, bit.bswap. */ +static void recff_bit_unary(jit_State *J, TRef *res, RecordFFData *rd) +{ + TRef tr = lj_ir_tobit(J, arg[0]); + res[0] = (rd->data == IR_TOBIT) ? tr : emitir(IRTI(rd->data), tr, 0); +} + +/* Record N-ary bit.band, bit.bor, bit.bxor. */ +static void recff_bit_nary(jit_State *J, TRef *res, RecordFFData *rd) +{ + TRef tr = lj_ir_tobit(J, arg[0]); + uint32_t op = rd->data; + BCReg i; + for (i = 1; arg[i]; i++) + tr = emitir(IRTI(op), tr, lj_ir_tobit(J, arg[i])); + res[0] = tr; +} + +/* Record bit shifts. */ +static void recff_bit_shift(jit_State *J, TRef *res, RecordFFData *rd) +{ + TRef tr = lj_ir_tobit(J, arg[0]); + TRef tsh = lj_ir_tobit(J, arg[1]); +#if !LJ_TARGET_MASKEDSHIFT + if (!tref_isk(tsh)) + tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 31)); +#endif + res[0] = emitir(IRTI(rd->data), tr, tsh); +} + +/* -- String library fast functions --------------------------------------- */ + +static void recff_string_len(jit_State *J, TRef *res, RecordFFData *rd) +{ + res[0] = emitir(IRTI(IR_FLOAD), lj_ir_tostr(J, arg[0]), IRFL_STR_LEN); + UNUSED(rd); +} + +/* Handle string.byte (rd->data = 0) and string.sub (rd->data = 1). */ +static void recff_string_range(jit_State *J, TRef *res, RecordFFData *rd) +{ + TRef trstr = lj_ir_tostr(J, arg[0]); + TRef trlen = emitir(IRTI(IR_FLOAD), trstr, IRFL_STR_LEN); + TRef tr0 = lj_ir_kint(J, 0); + TRef trstart, trend; + GCstr *str = argv2str(J, &rd->argv[0]); + int32_t start, end; + if (rd->data) { /* string.sub(str, start [,end]) */ + trstart = lj_ir_toint(J, arg[1]); + trend = tref_isnil(arg[2]) ? lj_ir_kint(J, -1) : lj_ir_toint(J, arg[2]); + start = argv2int(J, &rd->argv[1]); + end = tref_isnil(arg[2]) ? -1 : argv2int(J, &rd->argv[2]); + } else { /* string.byte(str, [,start [,end]]) */ + if (arg[1]) { + trstart = lj_ir_toint(J, arg[1]); + trend = tref_isnil(arg[2]) ? trstart : lj_ir_toint(J, arg[2]); + start = argv2int(J, &rd->argv[1]); + end = tref_isnil(arg[2]) ? start : argv2int(J, &rd->argv[2]); + } else { + trend = trstart = lj_ir_kint(J, 1); + end = start = 1; + } + } + if (end < 0) { + emitir(IRTGI(IR_LT), trend, tr0); + trend = emitir(IRTI(IR_ADD), emitir(IRTI(IR_ADD), trlen, trend), + lj_ir_kint(J, 1)); + end = end+(int32_t)str->len+1; + } else if ((MSize)end <= str->len) { + emitir(IRTGI(IR_ULE), trend, trlen); + } else { + emitir(IRTGI(IR_GT), trend, trlen); + end = (int32_t)str->len; + trend = trlen; + } + if (start < 0) { + emitir(IRTGI(IR_LT), trstart, tr0); + trstart = emitir(IRTI(IR_ADD), trlen, trstart); + start = start+(int32_t)str->len; + emitir(start < 0 ? IRTGI(IR_LT) : IRTGI(IR_GE), trstart, tr0); + if (start < 0) { + trstart = tr0; + start = 0; + } + } else { + if (start == 0) { + emitir(IRTGI(IR_EQ), trstart, tr0); + trstart = tr0; + } else { + trstart = emitir(IRTI(IR_ADD), trstart, lj_ir_kint(J, -1)); + emitir(IRTGI(IR_GE), trstart, tr0); + start--; + } + } + if (rd->data) { /* Return string.sub result. */ + if (end - start >= 0) { + /* Also handle empty range here, to avoid extra traces. */ + TRef trptr, trslen = emitir(IRTI(IR_SUB), trend, trstart); + emitir(IRTGI(IR_GE), trslen, tr0); + trptr = emitir(IRT(IR_STRREF, IRT_PTR), trstr, trstart); + res[0] = emitir(IRT(IR_SNEW, IRT_STR), trptr, trslen); + } else { /* Range underflow: return empty string. */ + emitir(IRTGI(IR_LT), trend, trstart); + res[0] = lj_ir_kstr(J, lj_str_new(J->L, strdata(str), 0)); + } + } else { /* Return string.byte result(s). */ + int32_t i, len = end - start; + if (len > 0) { + TRef trslen = emitir(IRTI(IR_SUB), trend, trstart); + emitir(IRTGI(IR_EQ), trslen, lj_ir_kint(J, len)); + if (res + len > J->slot + LJ_MAX_JSLOTS) + lj_trace_err(J, LJ_TRERR_STACKOV); + rd->nres = len; + for (i = 0; i < len; i++) { + TRef tmp = emitir(IRTI(IR_ADD), trstart, lj_ir_kint(J, i)); + tmp = emitir(IRT(IR_STRREF, IRT_PTR), trstr, tmp); + res[i] = emitir(IRT(IR_XLOAD, IRT_U8), tmp, 0); + } + } else { /* Empty range or range underflow: return no results. */ + emitir(IRTGI(IR_LE), trend, trstart); + rd->nres = 0; + } + } +} + +/* -- Table library fast functions ---------------------------------------- */ + +static void recff_table_getn(jit_State *J, TRef *res, RecordFFData *rd) +{ + if (tref_istab(arg[0])) { + res[0] = emitir(IRTI(IR_TLEN), arg[0], 0); + } /* else: Interpreter will throw. */ + UNUSED(rd); +} + +/* -- Record calls and returns -------------------------------------------- */ + +#undef arg + +#include "lj_recdef.h" + +/* Record return. */ +static void rec_ret(jit_State *J, BCReg rbase, int gotresults) +{ + TValue *frame = J->L->base - 1; + TRef *res = J->base + rbase; + J->tailcalled = 0; + while (frame_ispcall(frame)) { + BCReg cbase = (BCReg)frame_delta(frame); + lua_assert(J->baseslot > 1); + J->baseslot -= (BCReg)cbase; + J->base -= cbase; + *--res = TREF_TRUE; /* Prepend true to results. */ + gotresults++; + J->framedepth--; + frame = frame_prevd(frame); + } + if (J->framedepth-- <= 0) + lj_trace_err(J, LJ_TRERR_NYIRETL); + lua_assert(J->baseslot > 1); + if (frame_islua(frame)) { + BCIns callins = *(J->pc = frame_pc(frame)-1); + ptrdiff_t nresults = bc_b(callins) ? (int)bc_b(callins)-1 : gotresults; + BCReg cbase = bc_a(callins); + int i; + for (i = 0; i < nresults; i++) + J->base[i-1] = i < gotresults ? res[i] : TREF_NIL; + J->maxslot = cbase+(BCReg)nresults; + J->baseslot -= cbase+1; + J->base -= cbase+1; + } else if (frame_iscont(frame)) { + ASMFunction cont = frame_contf(frame); + BCReg i, cbase = (BCReg)frame_delta(frame); + J->pc = frame_contpc(frame)-1; + J->baseslot -= (BCReg)cbase; + J->base -= cbase; + /* Shrink maxslot as much as possible after return from continuation. */ + for (i = cbase-2; i > 0 && J->base[i] == 0; i--) ; + J->maxslot = i; + if (cont == lj_cont_ra) { + /* Copy result to destination slot. */ + BCReg dst = bc_a(*J->pc); + J->base[dst] = res[0]; + if (dst > J->maxslot) J->maxslot = dst+1; + } else if (cont == lj_cont_nop) { + /* Nothing to do here. */ + } else if (cont == lj_cont_cat) { + lua_assert(0); + } else { + /* Result type already specialized. */ + lua_assert(cont == lj_cont_condf || cont == lj_cont_condt); + } + } else { + lua_assert(0); + } + lua_assert(J->baseslot >= 1); +} + +/* Check unroll limits for calls. */ +static void check_call_unroll(jit_State *J, GCfunc *fn) +{ + TValue *first = J->L->base - J->baseslot; + TValue *frame = J->L->base - 1; + int count = 0; + while (frame > first) { + if (frame_func(frame) == fn) + count++; + if (frame_isvarg(frame)) + frame = frame_prevd(frame); + frame = frame_prev(frame); + } + if (frame_func(first) == fn && bc_op(J->cur.startins) == BC_CALL) { + if (count >= J->param[JIT_P_recunroll]) + lj_trace_err(J, LJ_TRERR_NYIRECU); + } else { + if (count >= J->param[JIT_P_callunroll]) + lj_trace_err(J, LJ_TRERR_CUNROLL); + } +} + +/* Record call. Returns 0 for pending calls and 1 for resolved calls. */ +static int rec_call(jit_State *J, BCReg func, int cres, int nargs) +{ + RecordFFData rd; + TRef *res = &J->base[func]; + TValue *tv = &J->L->base[func]; + + if (tref_isfunc(res[0])) { /* Regular function call. */ + rd.fn = funcV(tv); + rd.argv = tv+1; + } else { /* Otherwise resolve __call metamethod for called object. */ + RecordIndex ix; + int i; + ix.tab = res[0]; + copyTV(J->L, &ix.tabv, tv); + if (!rec_mm_lookup(J, &ix, MM_call) || !tref_isfunc(ix.mobj)) + lj_trace_err(J, LJ_TRERR_NOMM); + /* Update the recorder state, but not the Lua stack. */ + for (i = ++nargs; i > 0; i--) + res[i] = res[i-1]; + res[0] = ix.mobj; + rd.fn = funcV(&ix.mobjv); + rd.argv = tv; /* The called object is the 1st arg. */ + } + + /* Specialize to the runtime value of the called function. */ + res[0] = emitir(IRTG(IR_FRAME, IRT_FUNC), res[0], lj_ir_kfunc(J, rd.fn)); + + if (isluafunc(rd.fn)) { /* Record call to Lua function. */ + GCproto *pt = funcproto(rd.fn); + if ((pt->flags & PROTO_NO_JIT)) + lj_trace_err(J, LJ_TRERR_CJITOFF); + if ((pt->flags & PROTO_IS_VARARG)) { + if (rd.fn->l.gate != lj_gate_lv) + lj_trace_err(J, LJ_TRERR_NYILNKF); + lj_trace_err(J, LJ_TRERR_NYIVF); + } else { + if (rd.fn->l.gate != lj_gate_lf) + lj_trace_err(J, LJ_TRERR_NYILNKF); + } + check_call_unroll(J, rd.fn); + if (cres == CALLRES_TAILCALL) { + int i; + /* Tailcalls can form a loop, so count towards the loop unroll limit. */ + if (++J->tailcalled > J->loopunroll) + lj_trace_err(J, LJ_TRERR_LUNROLL); + for (i = 0; i <= nargs; i++) /* Move func + args down. */ + J->base[i-1] = res[i]; + /* Note: the new FRAME is now at J->base[-1] (even for slot #0). */ + } else { /* Regular call. */ + J->base += func+1; + J->baseslot += func+1; + J->framedepth++; + } + if (J->baseslot + pt->framesize >= LJ_MAX_JSLOTS) + lj_trace_err(J, LJ_TRERR_STACKOV); + /* Fill up missing args with nil. */ + while (nargs < pt->numparams) + J->base[nargs++] = TREF_NIL; + /* The remaining slots should never be read before they are written. */ + J->maxslot = pt->numparams; + return 0; /* No result yet. */ + } else { /* Record call to C function or fast function. */ + uint32_t m = 0; + res[1+nargs] = 0; + rd.nargs = nargs; + if (rd.fn->c.ffid < sizeof(recff_idmap)/sizeof(recff_idmap[0])) + m = recff_idmap[rd.fn->c.ffid]; + rd.data = m & 0xff; + rd.cres = cres; + rd.nres = 1; /* Default is one result. */ + (recff_func[m >> 8])(J, res, &rd); /* Call recff_* handler. */ + cres = rd.cres; + if (cres >= 0) { + /* Caller takes fixed number of results: local a,b = f() */ + J->maxslot = func + (BCReg)cres; + while (rd.nres < cres) /* Fill up missing results with nil. */ + res[rd.nres++] = TREF_NIL; + } else if (cres == CALLRES_MULTI) { + /* Caller takes any number of results: return 1,f() */ + J->maxslot = func + (BCReg)rd.nres; + } else if (cres == CALLRES_TAILCALL) { + /* Tail call: return f() */ + rec_ret(J, func, rd.nres); + } else if (cres == CALLRES_CONT) { + /* Note: immediately resolved continuations must not change J->maxslot. */ + res[rd.nres] = TREF_NIL; /* Turn 0 results into nil result. */ + } else { + J->framedepth++; + lua_assert(cres == CALLRES_PENDING); + return 0; /* Pending call, no result yet. */ + } + return 1; /* Result resolved immediately. */ + } +} + +/* -- Record allocations -------------------------------------------------- */ + +static TRef rec_tnew(jit_State *J, uint32_t ah) +{ + uint32_t asize = ah & 0x7ff; + uint32_t hbits = ah >> 11; + if (asize == 0x7ff) asize = 0x801; + return emitir(IRT(IR_TNEW, IRT_TAB), asize, hbits); +} + +/* -- Record bytecode ops ------------------------------------------------- */ + +/* Optimize state after comparison. */ +static void optstate_comp(jit_State *J, int cond) +{ + BCIns jmpins = J->pc[1]; + const BCIns *npc = J->pc + 2 + (cond ? bc_j(jmpins) : 0); + SnapShot *snap = &J->cur.snap[J->cur.nsnap-1]; + /* Avoid re-recording the comparison in side traces. */ + J->cur.snapmap[snap->mapofs + snap->nslots] = u32ptr(npc); + J->needsnap = 1; + /* Shrink last snapshot if possible. */ + if (bc_a(jmpins) < J->maxslot) { + J->maxslot = bc_a(jmpins); + lj_snap_shrink(J); + } +} + +/* Record the next bytecode instruction (_before_ it's executed). */ +void lj_record_ins(jit_State *J) +{ + cTValue *lbase; + RecordIndex ix; + const BCIns *pc; + BCIns ins; + BCOp op; + TRef ra, rb, rc; + + /* Need snapshot before recording next bytecode (e.g. after a store). */ + if (J->needsnap) { + J->needsnap = 0; + lj_snap_add(J); + J->mergesnap = 1; + } + + /* Record only closed loops for root traces. */ + pc = J->pc; + if (J->framedepth == 0 && + (MSize)((char *)pc - (char *)J->bc_min) >= J->bc_extent) + lj_trace_err(J, LJ_TRERR_LLEAVE); + +#ifdef LUA_USE_ASSERT + rec_check_slots(J); + rec_check_ir(J); +#endif + + /* Keep a copy of the runtime values of var/num/str operands. */ +#define rav (&ix.valv) +#define rbv (&ix.tabv) +#define rcv (&ix.keyv) + + lbase = J->L->base; + ins = *pc; + op = bc_op(ins); + ra = bc_a(ins); + ix.val = 0; + switch (bcmode_a(op)) { + case BCMvar: + copyTV(J->L, rav, &lbase[ra]); ix.val = ra = getslot(J, ra); break; + default: break; /* Handled later. */ + } + rb = bc_b(ins); + rc = bc_c(ins); + switch (bcmode_b(op)) { + case BCMnone: rb = 0; rc = bc_d(ins); break; /* Upgrade rc to 'rd'. */ + case BCMvar: + copyTV(J->L, rbv, &lbase[rb]); ix.tab = rb = getslot(J, rb); break; + case BCMnum: { lua_Number n = J->pt->k.n[rb]; + setnumV(rbv, n); ix.tab = rb = lj_ir_knumint(J, n); } break; + default: break; /* Handled later. */ + } + switch (bcmode_c(op)) { + case BCMvar: + copyTV(J->L, rcv, &lbase[rc]); ix.key = rc = getslot(J, rc); break; + case BCMpri: setitype(rcv, (int32_t)~rc); rc = TREF_PRI(IRT_NIL+rc); break; + case BCMnum: { lua_Number n = J->pt->k.n[rc]; + setnumV(rcv, n); ix.key = rc = lj_ir_knumint(J, n); } break; + case BCMstr: { GCstr *s = strref(J->pt->k.gc[~rc]); + setstrV(J->L, rcv, s); ix.key = rc = lj_ir_kstr(J, s); } break; + default: break; /* Handled later. */ + } + + switch (op) { + + /* -- Comparison ops ---------------------------------------------------- */ + + case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: + /* Emit nothing for two numeric or string consts. */ + if (!(tref_isk2(ra,rc) && tref_isnumber_str(ra) && tref_isnumber_str(rc))) { + IRType ta = tref_type(ra); + IRType tc = tref_type(rc); + int irop; + if (ta != tc) { + /* Widen mixed number/int comparisons to number/number comparison. */ + if (ta == IRT_INT && tc == IRT_NUM) { + ra = emitir(IRTN(IR_TONUM), ra, 0); + ta = IRT_NUM; + } else if (ta == IRT_NUM && tc == IRT_INT) { + rc = emitir(IRTN(IR_TONUM), rc, 0); + } else if (!((ta == IRT_FALSE || ta == IRT_TRUE) && + (tc == IRT_FALSE || tc == IRT_TRUE))) { + break; /* Interpreter will throw for two different types. */ + } + } + lj_snap_add(J); + irop = (int)op - (int)BC_ISLT + (int)IR_LT; + if (ta == IRT_NUM) { + if ((irop & 1)) irop ^= 4; /* ISGE/ISGT are unordered. */ + if (!lj_ir_numcmp(numV(rav), numV(rcv), (IROp)irop)) irop ^= 5; + } else if (ta == IRT_INT) { + if (!lj_ir_numcmp(numV(rav), numV(rcv), (IROp)irop)) irop ^= 1; + } else if (ta == IRT_STR) { + if (!lj_ir_strcmp(strV(rav), strV(rcv), (IROp)irop)) irop ^= 1; + } else { + rec_mm_comp(J, &ix, (int)op); + break; + } + emitir(IRTG(irop, ta), ra, rc); + optstate_comp(J, ((int)op ^ irop) & 1); + } + break; + + case BC_ISEQV: case BC_ISNEV: + case BC_ISEQS: case BC_ISNES: + case BC_ISEQN: case BC_ISNEN: + case BC_ISEQP: case BC_ISNEP: + /* Emit nothing for two non-table, non-udata consts. */ + if (!(tref_isk2(ra, rc) && !(tref_istab(ra) || tref_isudata(ra)))) { + int diff; + lj_snap_add(J); + diff = rec_objcmp(J, ra, rc, rav, rcv); + if (diff == 1 && (tref_istab(ra) || tref_isudata(ra))) { + /* Only check __eq if different, but the same type (table or udata). */ + rec_mm_equal(J, &ix, (int)op); + break; + } + optstate_comp(J, ((int)op & 1) == !diff); + } + break; + + /* -- Unary test and copy ops ------------------------------------------- */ + + case BC_ISTC: case BC_ISFC: + if ((op & 1) == tref_istruecond(rc)) + rc = 0; /* Don't store if condition is not true. */ + /* fallthrough */ + case BC_IST: case BC_ISF: /* Type specialization suffices. */ + if (bc_a(pc[1]) < J->maxslot) + J->maxslot = bc_a(pc[1]); /* Shrink used slots. */ + break; + + /* -- Unary ops --------------------------------------------------------- */ + + case BC_NOT: + /* Type specialization already forces const result. */ + rc = tref_istruecond(rc) ? TREF_FALSE : TREF_TRUE; + break; + + case BC_LEN: + if (tref_isstr(rc)) { + rc = emitir(IRTI(IR_FLOAD), rc, IRFL_STR_LEN); + } else if (tref_istab(rc)) { + rc = emitir(IRTI(IR_TLEN), rc, 0); + } else { + ix.tab = rc; + copyTV(J->L, &ix.tabv, &ix.keyv); + ix.key = IRT_NIL; + setnilV(&ix.keyv); + rc = rec_mm_arith(J, &ix, MM_len); + } + break; + + /* -- Arithmetic ops ---------------------------------------------------- */ + + case BC_UNM: + if (tref_isnumber_str(rc)) { + rc = lj_ir_tonum(J, rc); + rc = emitir(IRTN(IR_NEG), rc, lj_ir_knum_neg(J)); + } else { + ix.tab = rc; + copyTV(J->L, &ix.tabv, &ix.keyv); + rc = rec_mm_arith(J, &ix, MM_unm); + } + break; + + case BC_ADDNV: case BC_SUBNV: case BC_MULNV: case BC_DIVNV: case BC_MODNV: + ix.tab = rc; ix.key = rc = rb; rb = ix.tab; + copyTV(J->L, &ix.valv, &ix.tabv); + copyTV(J->L, &ix.tabv, &ix.keyv); + copyTV(J->L, &ix.keyv, &ix.valv); + if (op == BC_MODNV) + goto recmod; + /* fallthrough */ + case BC_ADDVN: case BC_SUBVN: case BC_MULVN: case BC_DIVVN: + case BC_ADDVV: case BC_SUBVV: case BC_MULVV: case BC_DIVVV: { + MMS mm = bcmode_mm(op); + if (tref_isnumber_str(rb) && tref_isnumber_str(rc)) { + rb = lj_ir_tonum(J, rb); + rc = lj_ir_tonum(J, rc); + rc = emitir(IRTN((int)mm - (int)MM_add + (int)IR_ADD), rb, rc); + } else { + rc = rec_mm_arith(J, &ix, mm); + } + break; + } + + case BC_MODVN: case BC_MODVV: + recmod: + if (tref_isnumber_str(rb) && tref_isnumber_str(rc)) + rc = lj_opt_narrow_mod(J, rb, rc); + else + rc = rec_mm_arith(J, &ix, MM_mod); + break; + + case BC_POW: + if (tref_isnumber_str(rb) && tref_isnumber_str(rc)) + rc = lj_opt_narrow_pow(J, lj_ir_tonum(J, rb), rc, rcv); + else + rc = rec_mm_arith(J, &ix, MM_pow); + break; + + /* -- Constant and move ops --------------------------------------------- */ + + case BC_KSTR: case BC_KNUM: case BC_KPRI: case BC_MOV: + break; + case BC_KSHORT: + rc = lj_ir_kint(J, (int32_t)(int16_t)rc); + break; + case BC_KNIL: + while (ra <= rc) + J->base[ra++] = TREF_NIL; + if (rc >= J->maxslot) J->maxslot = rc+1; + break; + + /* -- Upvalue and function ops ------------------------------------------ */ + + case BC_UGET: + rc = rec_upvalue(J, rc, 0); + break; + case BC_USETV: case BC_USETS: case BC_USETN: case BC_USETP: + rec_upvalue(J, ra, rc); + break; + + /* -- Table ops --------------------------------------------------------- */ + + case BC_GGET: case BC_GSET: + settabV(J->L, &ix.tabv, tabref(J->fn->l.env)); + ix.tab = emitir(IRT(IR_FLOAD, IRT_TAB), getcurrf(J), IRFL_FUNC_ENV); + ix.idxchain = LJ_MAX_IDXCHAIN; + rc = rec_idx(J, &ix); + break; + + case BC_TGETB: case BC_TSETB: + setintV(&ix.keyv, (int32_t)rc); + ix.key = lj_ir_kint(J, (int32_t)rc); + /* fallthrough */ + case BC_TGETV: case BC_TGETS: case BC_TSETV: case BC_TSETS: + ix.idxchain = LJ_MAX_IDXCHAIN; + rc = rec_idx(J, &ix); + break; + + case BC_TNEW: + rc = rec_tnew(J, rc); + break; + case BC_TDUP: + rc = emitir(IRT(IR_TDUP, IRT_TAB), + lj_ir_ktab(J, tabref(J->pt->k.gc[~rc])), 0); + break; + + /* -- Calls and vararg handling ----------------------------------------- */ + + case BC_ITERC: + J->base[ra] = getslot(J, ra-3); + J->base[ra+1] = getslot(J, ra-2); + J->base[ra+2] = getslot(J, ra-1); + { /* Have to do the actual copy now because rec_call needs the values. */ + TValue *b = &J->L->base[ra]; + copyTV(J->L, b, b-3); + copyTV(J->L, b+1, b-2); + copyTV(J->L, b+2, b-1); + } + goto callop; + + case BC_CALLMT: + rb = (TRef)(CALLRES_TAILCALL+1); + /* fallthrough */ + case BC_CALLM: + /* L->top is set to L->base+ra+rc+NRESULTS-1+1, see lj_dispatch_ins(). */ + rc = (BCReg)(J->L->top - J->L->base) - ra; + goto callop; + + case BC_CALLT: + rb = (TRef)(CALLRES_TAILCALL+1); + /* fallthrough */ + case BC_CALL: + callop: + if (rb == (TRef)(CALLRES_TAILCALL+1)) { /* Tail call. */ + } + rec_call(J, ra, (int)(rb-1), (int)(rc-1)); + break; + + /* -- Returns ----------------------------------------------------------- */ + + case BC_RETM: + /* L->top is set to L->base+ra+rc+NRESULTS-1, see lj_dispatch_ins(). */ + rc = (BCReg)(J->L->top - J->L->base) - ra + 1; + /* fallthrough */ + case BC_RET: case BC_RET0: case BC_RET1: + rec_ret(J, ra, (int)(rc-1)); + break; + + /* -- Loops and branches ------------------------------------------------ */ + + case BC_FORI: + if (rec_for(J, pc, 0) != LOOPEV_LEAVE) + J->loopref = J->cur.nins; + break; + case BC_JFORI: + lua_assert(bc_op(pc[(ptrdiff_t)rc-BCBIAS_J]) == BC_JFORL); + if (rec_for(J, pc, 0) != LOOPEV_LEAVE) /* Link to existing loop. */ + rec_stop(J, bc_d(pc[(ptrdiff_t)rc-BCBIAS_J])); + /* Continue tracing if the loop is not entered. */ + break; + + case BC_FORL: + rec_loop_interp(J, pc, rec_for(J, pc+((ptrdiff_t)rc-BCBIAS_J), 1)); + break; + case BC_ITERL: + rec_loop_interp(J, pc, rec_iterl(J, *pc)); + break; + case BC_LOOP: + rec_loop_interp(J, pc, rec_loop(J, ra)); + break; + + case BC_JFORL: + rec_loop_jit(J, rc, rec_for(J, pc+bc_j(J->trace[rc]->startins), 1)); + break; + case BC_JITERL: + rec_loop_jit(J, rc, rec_iterl(J, J->trace[rc]->startins)); + break; + case BC_JLOOP: + rec_loop_jit(J, rc, rec_loop(J, ra)); + break; + + case BC_IFORL: + case BC_IITERL: + case BC_ILOOP: + lj_trace_err_info(J, LJ_TRERR_LBLACKL); + break; + + case BC_JMP: + if (ra < J->maxslot) + J->maxslot = ra; /* Shrink used slots. */ + break; + + case BC_CAT: + case BC_UCLO: + case BC_FNEW: + case BC_TSETM: + case BC_VARG: + default: + setintV(&J->errinfo, (int32_t)op); + lj_trace_err_info(J, LJ_TRERR_NYIBC); + break; + } + + /* rc == 0 if we have no result yet, e.g. pending __index metamethod call. */ + if (bcmode_a(op) == BCMdst && rc) { + J->base[ra] = rc; + if (ra >= J->maxslot) J->maxslot = ra+1; + } + +#undef rav +#undef rbv +#undef rcv + + /* Limit the number of recorded IR instructions. */ + if (J->cur.nins > REF_FIRST+(IRRef)J->param[JIT_P_maxrecord]) + lj_trace_err(J, LJ_TRERR_TRACEOV); +} + +/* -- Recording setup ----------------------------------------------------- */ + +/* Setup recording for a FORL loop. */ +static void rec_setup_forl(jit_State *J, const BCIns *fori) +{ + BCReg ra = bc_a(*fori); + cTValue *forbase = &J->L->base[ra]; + IRType t = (J->flags & JIT_F_OPT_NARROW) ? lj_opt_narrow_forl(forbase) + : IRT_NUM; + TRef stop = fori_arg(J, fori-2, ra+FORL_STOP, t); + TRef step = fori_arg(J, fori-1, ra+FORL_STEP, t); + int dir = (0 <= numV(&forbase[FORL_STEP])); + lua_assert(bc_op(*fori) == BC_FORI || bc_op(*fori) == BC_JFORI); + if (!tref_isk(step)) { + /* Non-constant step: need a guard for the direction. */ + TRef zero = (t == IRT_INT) ? lj_ir_kint(J, 0) : lj_ir_knum_zero(J); + emitir(IRTG(dir ? IR_GE : IR_LT, t), step, zero); + /* Add hoistable overflow checks for a narrowed FORL index. */ + if (t == IRT_INT) { + if (tref_isk(stop)) { + /* Constant stop: optimize check away or to a range check for step. */ + int32_t k = IR(tref_ref(stop))->i; + if (dir) { + if (k > 0) + emitir(IRTGI(IR_LE), step, lj_ir_kint(J, (int32_t)0x7fffffff-k)); + } else { + if (k < 0) + emitir(IRTGI(IR_GE), step, lj_ir_kint(J, (int32_t)0x80000000-k)); + } + } else { + /* Stop+step variable: need full overflow check (with dead result). */ + emitir(IRTGI(IR_ADDOV), step, stop); + } + } + } else if (t == IRT_INT && !tref_isk(stop)) { + /* Constant step: optimize overflow check to a range check for stop. */ + int32_t k = IR(tref_ref(step))->i; + k = (int32_t)(dir ? 0x7fffffff : 0x80000000) - k; + emitir(IRTGI(dir ? IR_LE : IR_GE), stop, lj_ir_kint(J, k)); + } + J->base[ra+FORL_EXT] = sloadt(J, (int32_t)(ra+FORL_IDX), t, IRSLOAD_INHERIT); + J->maxslot = ra+FORL_EXT+1; +} + +/* Setup recording for a root trace started by a hot loop. */ +static const BCIns *rec_setup_root(jit_State *J) +{ + /* Determine the next PC and the bytecode range for the loop. */ + const BCIns *pcj, *pc = J->pc; + BCIns ins = *pc; + BCReg ra = bc_a(ins); + switch (bc_op(ins)) { + case BC_FORL: + J->bc_extent = (MSize)(-bc_j(ins))*sizeof(BCIns); + pc += 1+bc_j(ins); + J->bc_min = pc; + break; + case BC_ITERL: + lua_assert(bc_op(pc[-1]) == BC_ITERC); + J->maxslot = ra + bc_b(pc[-1]) - 1; + J->bc_extent = (MSize)(-bc_j(ins))*sizeof(BCIns); + pc += 1+bc_j(ins); + lua_assert(bc_op(pc[-1]) == BC_JMP); + J->bc_min = pc; + break; + case BC_LOOP: + /* Only check BC range for real loops, but not for "repeat until true". */ + pcj = pc + bc_j(ins); + ins = *pcj; + if (bc_op(ins) == BC_JMP && bc_j(ins) < 0) { + J->bc_min = pcj+1 + bc_j(ins); + J->bc_extent = (MSize)(-bc_j(ins))*sizeof(BCIns); + } + J->maxslot = ra; + pc++; + break; + default: + lua_assert(0); + break; + } + return pc; +} + +/* Setup recording for a side trace. */ +static void rec_setup_side(jit_State *J, Trace *T) +{ + SnapShot *snap = &T->snap[J->exitno]; + IRRef2 *map = &T->snapmap[snap->mapofs]; + BCReg s, nslots = snap->nslots; + BloomFilter seen = 0; + for (s = 0; s < nslots; s++) { + IRRef ref = snap_ref(map[s]); + if (ref) { + IRIns *ir = &T->ir[ref]; + TRef tr = 0; + /* The bloom filter avoids O(nslots^2) overhead for de-duping slots. */ + if (bloomtest(seen, ref)) { + BCReg j; + for (j = 0; j < s; j++) + if (snap_ref(map[j]) == ref) { + if (ir->o == IR_FRAME && irt_isfunc(ir->t)) + J->baseslot = s+1; + tr = J->slot[j]; + goto dupslot; + } + } + bloomset(seen, ref); + switch ((IROp)ir->o) { + case IR_KPRI: tr = TREF_PRI(irt_type(ir->t)); break; + case IR_KINT: tr = lj_ir_kint(J, ir->i); break; + case IR_KGC: tr = lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t)); break; + case IR_KNUM: tr = lj_ir_knum_addr(J, ir_knum(ir)); break; + case IR_FRAME: /* Placeholder FRAMEs don't need a guard. */ + if (irt_isfunc(ir->t)) { + J->baseslot = s+1; + J->framedepth++; + tr = lj_ir_kfunc(J, ir_kfunc(&T->ir[ir->op2])); + tr = emitir_raw(IRT(IR_FRAME, IRT_FUNC), tr, tr); + } else { + tr = lj_ir_kptr(J, mref(T->ir[ir->op2].ptr, void)); + tr = emitir_raw(IRT(IR_FRAME, IRT_PTR), tr, tr); + } + break; + case IR_SLOAD: /* Inherited SLOADs don't need a guard. */ + tr = emitir_raw(ir->ot & ~IRT_GUARD, s, + (ir->op2&IRSLOAD_READONLY) | IRSLOAD_INHERIT|IRSLOAD_PARENT); + break; + default: /* Parent refs are already typed and don't need a guard. */ + tr = emitir_raw(IRT(IR_SLOAD, irt_type(ir->t)), s, + IRSLOAD_INHERIT|IRSLOAD_PARENT); + break; + } + dupslot: + J->slot[s] = tr; + } + } + J->base = J->slot + J->baseslot; + J->maxslot = nslots - J->baseslot; + lj_snap_add(J); +} + +/* Setup for recording a new trace. */ +void lj_record_setup(jit_State *J) +{ + uint32_t i; + + /* Initialize state related to current trace. */ + memset(J->slot, 0, sizeof(J->slot)); + memset(J->chain, 0, sizeof(J->chain)); + memset(J->bpropcache, 0, sizeof(J->bpropcache)); + + J->baseslot = 1; /* Invoking function is at base[-1]. */ + J->base = J->slot + J->baseslot; + J->maxslot = 0; + J->framedepth = 0; + + J->instunroll = J->param[JIT_P_instunroll]; + J->loopunroll = J->param[JIT_P_loopunroll]; + J->tailcalled = 0; + J->loopref = 0; + + J->bc_min = NULL; /* Means no limit. */ + J->bc_extent = ~(MSize)0; + + /* Emit instructions for fixed references. Also triggers initial IR alloc. */ + emitir_raw(IRT(IR_BASE, IRT_PTR), J->parent, J->exitno); + for (i = 0; i <= 2; i++) { + IRIns *ir = IR(REF_NIL-i); + ir->i = 0; + ir->t.irt = (uint8_t)(IRT_NIL+i); + ir->o = IR_KPRI; + ir->prev = 0; + } + J->cur.nk = REF_TRUE; + + setgcref(J->cur.startpt, obj2gco(J->pt)); + J->startpc = J->pc; + if (J->parent) { /* Side trace. */ + Trace *T = J->trace[J->parent]; + TraceNo root = T->root ? T->root : J->parent; + J->cur.root = (uint16_t)root; + J->cur.startins = BCINS_AD(BC_JMP, 0, 0); + /* Check whether we could at least potentially form an extra loop. */ + if (J->exitno == 0 && T->snap[0].nslots == 1 && T->snapmap[0] == 0) { + /* We can narrow a FORL for some side traces, too. */ + if (J->pc > J->pt->bc && bc_op(J->pc[-1]) == BC_JFORI && + bc_d(J->pc[bc_j(J->pc[-1])-1]) == root) { + lj_snap_add(J); + rec_setup_forl(J, J->pc-1); + goto sidecheck; + } + } else { + J->startpc = NULL; /* Prevent forming an extra loop. */ + } + rec_setup_side(J, T); + sidecheck: + if (J->trace[J->cur.root]->nchild >= J->param[JIT_P_maxside] || + T->snap[J->exitno].count >= J->param[JIT_P_hotexit] + + J->param[JIT_P_tryside]) + rec_stop(J, TRACE_INTERP); + } else { /* Root trace. */ + J->cur.root = 0; + if (J->pc >= J->pt->bc) { /* Not a hot CALL? */ + J->cur.startins = *J->pc; + J->pc = rec_setup_root(J); + /* Note: the loop instruction itself is recorded at the end and not + ** at the start! So snapshot #0 needs to point to the *next* instruction. + */ + } else { + J->cur.startins = BCINS_ABC(BC_CALL, 0, 0, 0); + } + lj_snap_add(J); + if (bc_op(J->cur.startins) == BC_FORL) + rec_setup_forl(J, J->pc-1); + if (1 + J->pt->framesize >= LJ_MAX_JSLOTS) + lj_trace_err(J, LJ_TRERR_STACKOV); + } +} + +#undef IR +#undef emitir_raw +#undef emitir + +#endif diff --git a/src/lj_record.h b/src/lj_record.h new file mode 100644 index 0000000000..7bb7952c40 --- /dev/null +++ b/src/lj_record.h @@ -0,0 +1,17 @@ +/* +** Trace recorder (bytecode -> SSA IR). +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +*/ + +#ifndef _LJ_RECORD_H +#define _LJ_RECORD_H + +#include "lj_obj.h" +#include "lj_jit.h" + +#if LJ_HASJIT +LJ_FUNC void lj_record_ins(jit_State *J); +LJ_FUNC void lj_record_setup(jit_State *J); +#endif + +#endif diff --git a/src/lj_snap.c b/src/lj_snap.c new file mode 100644 index 0000000000..09cd095c9f --- /dev/null +++ b/src/lj_snap.c @@ -0,0 +1,286 @@ +/* +** Snapshot handling. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +*/ + +#define lj_snap_c +#define LUA_CORE + +#include "lj_obj.h" + +#if LJ_HASJIT + +#include "lj_gc.h" +#include "lj_state.h" +#include "lj_frame.h" +#include "lj_ir.h" +#include "lj_jit.h" +#include "lj_iropt.h" +#include "lj_trace.h" +#include "lj_snap.h" +#include "lj_target.h" + +/* Some local macros to save typing. Undef'd at the end. */ +#define IR(ref) (&J->cur.ir[(ref)]) + +/* -- Snapshot generation ------------------------------------------------- */ + +/* NYI: Snapshots are in need of a redesign. The current storage model for +** snapshot maps is too wasteful. They could be compressed (1D or 2D) and +** made more flexible at the same time. Iterators should no longer need to +** skip unmodified slots. IR_FRAME should be eliminated, too. +*/ + +/* Add all modified slots to the snapshot. */ +static void snapshot_slots(jit_State *J, IRRef2 *map, BCReg nslots) +{ + BCReg s; + for (s = 0; s < nslots; s++) { + IRRef ref = tref_ref(J->slot[s]); + if (ref) { + IRIns *ir = IR(ref); + if (ir->o == IR_SLOAD && ir->op1 == s && !(ir->op2 & IRSLOAD_INHERIT)) + ref = 0; + } + map[s] = (IRRef2)ref; + } +} + +/* Add frame links at the end of the snapshot. */ +static MSize snapshot_framelinks(jit_State *J, IRRef2 *map) +{ + cTValue *frame = J->L->base - 1; + cTValue *lim = J->L->base - J->baseslot; + MSize f = 0; + map[f++] = u32ptr(J->pc); + while (frame > lim) { + if (frame_islua(frame)) { + map[f++] = u32ptr(frame_pc(frame)); + frame = frame_prevl(frame); + } else if (frame_ispcall(frame)) { + map[f++] = (uint32_t)frame_ftsz(frame); + frame = frame_prevd(frame); + } else if (frame_iscont(frame)) { + map[f++] = (uint32_t)frame_ftsz(frame); + map[f++] = u32ptr(frame_contpc(frame)); + frame = frame_prevd(frame); + } else { + lua_assert(0); + } + } + return f; +} + +/* Take a snapshot of the current stack. */ +static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap) +{ + BCReg nslots = J->baseslot + J->maxslot; + MSize nsm, nframelinks; + IRRef2 *p; + /* Conservative estimate. Continuation frames need 2 slots. */ + nsm = nsnapmap + nslots + (uint32_t)J->framedepth*2+1; + if (LJ_UNLIKELY(nsm > J->sizesnapmap)) { /* Need to grow snapshot map? */ + if (nsm < 2*J->sizesnapmap) + nsm = 2*J->sizesnapmap; + else if (nsm < 64) + nsm = 64; + J->snapmapbuf = (IRRef2 *)lj_mem_realloc(J->L, J->snapmapbuf, + J->sizesnapmap*sizeof(IRRef2), nsm*sizeof(IRRef2)); + J->cur.snapmap = J->snapmapbuf; + J->sizesnapmap = nsm; + } + p = &J->cur.snapmap[nsnapmap]; + snapshot_slots(J, p, nslots); + nframelinks = snapshot_framelinks(J, p + nslots); + J->cur.nsnapmap = (uint16_t)(nsnapmap + nslots + nframelinks); + snap->mapofs = (uint16_t)nsnapmap; + snap->ref = (IRRef1)J->cur.nins; + snap->nslots = (uint8_t)nslots; + snap->nframelinks = (uint8_t)nframelinks; + snap->count = 0; +} + +/* Add or merge a snapshot. */ +void lj_snap_add(jit_State *J) +{ + MSize nsnap = J->cur.nsnap; + MSize nsnapmap = J->cur.nsnapmap; + /* Merge if no ins. inbetween or if requested and no guard inbetween. */ + if (J->mergesnap ? !irt_isguard(J->guardemit) : + (nsnap > 0 && J->cur.snap[nsnap-1].ref == J->cur.nins)) { + nsnapmap = J->cur.snap[--nsnap].mapofs; + } else { + /* Need to grow snapshot buffer? */ + if (LJ_UNLIKELY(nsnap >= J->sizesnap)) { + MSize maxsnap = (MSize)J->param[JIT_P_maxsnap]; + if (nsnap >= maxsnap) + lj_trace_err(J, LJ_TRERR_SNAPOV); + lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot); + J->cur.snap = J->snapbuf; + } + J->cur.nsnap = (uint16_t)(nsnap+1); + } + J->mergesnap = 0; + J->guardemit.irt = 0; + snapshot_stack(J, &J->cur.snap[nsnap], nsnapmap); +} + +/* Shrink last snapshot. */ +void lj_snap_shrink(jit_State *J) +{ + BCReg nslots = J->baseslot + J->maxslot; + SnapShot *snap = &J->cur.snap[J->cur.nsnap-1]; + IRRef2 *oflinks = &J->cur.snapmap[snap->mapofs + snap->nslots]; + IRRef2 *nflinks = &J->cur.snapmap[snap->mapofs + nslots]; + uint32_t s, nframelinks = snap->nframelinks; + lua_assert(nslots < snap->nslots); + snap->nslots = (uint8_t)nslots; + J->cur.nsnapmap = (uint16_t)(snap->mapofs + nslots + nframelinks); + for (s = 0; s < nframelinks; s++) /* Move frame links down. */ + nflinks[s] = oflinks[s]; +} + +/* -- Snapshot access ----------------------------------------------------- */ + +/* Initialize a Bloom Filter with all renamed refs. +** There are very few renames (often none), so the filter has +** very few bits set. This makes it suitable for negative filtering. +*/ +static BloomFilter snap_renamefilter(Trace *T, SnapNo lim) +{ + BloomFilter rfilt = 0; + IRIns *ir; + for (ir = &T->ir[T->nins-1]; ir->o == IR_RENAME; ir--) + if (ir->op2 <= lim) + bloomset(rfilt, ir->op1); + return rfilt; +} + +/* Process matching renames to find the original RegSP. */ +static RegSP snap_renameref(Trace *T, SnapNo lim, IRRef ref, RegSP rs) +{ + IRIns *ir; + for (ir = &T->ir[T->nins-1]; ir->o == IR_RENAME; ir--) + if (ir->op1 == ref && ir->op2 <= lim) + rs = ir->prev; + return rs; +} + +/* Convert a snapshot into a linear slot -> RegSP map. */ +void lj_snap_regspmap(uint16_t *rsmap, Trace *T, SnapNo snapno) +{ + SnapShot *snap = &T->snap[snapno]; + BCReg s, nslots = snap->nslots; + IRRef2 *map = &T->snapmap[snap->mapofs]; + BloomFilter rfilt = snap_renamefilter(T, snapno); + for (s = 0; s < nslots; s++) { + IRRef ref = snap_ref(map[s]); + if (!irref_isk(ref)) { + IRIns *ir = &T->ir[ref]; + uint32_t rs = ir->prev; + if (bloomtest(rfilt, ref)) + rs = snap_renameref(T, snapno, ref, rs); + rsmap[s] = (uint16_t)rs; + } + } +} + +/* Restore interpreter state from exit state with the help of a snapshot. */ +void lj_snap_restore(jit_State *J, void *exptr) +{ + ExitState *ex = (ExitState *)exptr; + SnapNo snapno = J->exitno; /* For now, snapno == exitno. */ + Trace *T = J->trace[J->parent]; + SnapShot *snap = &T->snap[snapno]; + BCReg s, nslots = snap->nslots; + IRRef2 *map = &T->snapmap[snap->mapofs]; + IRRef2 *flinks = map + nslots + snap->nframelinks; + TValue *o, *newbase, *ntop; + BloomFilter rfilt = snap_renamefilter(T, snapno); + lua_State *L = J->L; + + /* Make sure the stack is big enough for the slots from the snapshot. */ + if (L->base + nslots >= L->maxstack) { + L->top = curr_topL(L); + lj_state_growstack(L, nslots - curr_proto(L)->framesize); + } + + /* Fill stack slots with data from the registers and spill slots. */ + newbase = NULL; + ntop = L->base; + for (s = 0, o = L->base-1; s < nslots; s++, o++) { + IRRef ref = snap_ref(map[s]); + if (ref) { + IRIns *ir = &T->ir[ref]; + if (irref_isk(ref)) { /* Restore constant slot. */ + lj_ir_kvalue(L, o, ir); + } else { + IRType1 t = ir->t; + RegSP rs = ir->prev; + if (LJ_UNLIKELY(bloomtest(rfilt, ref))) + rs = snap_renameref(T, snapno, ref, rs); + if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */ + int32_t *sps = &ex->spill[regsp_spill(rs)]; + if (irt_isinteger(t)) { + setintV(o, *sps); + } else if (irt_isnum(t)) { + o->u64 = *(uint64_t *)sps; + } else { + lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */ + setgcrefi(o->gcr, *sps); + setitype(o, irt_toitype(t)); + } + } else if (ra_hasreg(regsp_reg(rs))) { /* Restore from register. */ + Reg r = regsp_reg(rs); + if (irt_isinteger(t)) { + setintV(o, ex->gpr[r-RID_MIN_GPR]); + } else if (irt_isnum(t)) { + setnumV(o, ex->fpr[r-RID_MIN_FPR]); + } else { + if (!irt_ispri(t)) + setgcrefi(o->gcr, ex->gpr[r-RID_MIN_GPR]); + setitype(o, irt_toitype(t)); + } + } else { /* Restore frame slot. */ + lua_assert(ir->o == IR_FRAME); + /* This works for both PTR and FUNC IR_FRAME. */ + setgcrefp(o->fr.func, mref(T->ir[ir->op2].ptr, void)); + if (s != 0) /* Do not overwrite link to previous frame. */ + o->fr.tp.ftsz = (int32_t)*--flinks; + if (irt_isfunc(ir->t)) { + GCfunc *fn = gco2func(gcref(T->ir[ir->op2].gcr)); + if (isluafunc(fn)) { + TValue *fs; + newbase = o+1; + fs = newbase + funcproto(fn)->framesize; + if (fs > ntop) ntop = fs; /* Update top for newly added frames. */ + } + } + } + } + } else if (newbase) { + setnilV(o); /* Clear unreferenced slots of newly added frames. */ + } + } + if (newbase) { /* Clear remainder of newly added frames. */ + L->base = newbase; + if (ntop >= L->maxstack) { /* Need to grow the stack again. */ + MSize need = (MSize)(ntop - o); + L->top = o; + lj_state_growstack(L, need); + o = L->top; + ntop = o + need; + } + L->top = curr_topL(L); + for (; o < ntop; o++) + setnilV(o); + } else { /* Must not clear slots of existing frame. */ + L->top = curr_topL(L); + } + lua_assert(map + nslots == flinks-1); + J->pc = (const BCIns *)(uintptr_t)(*--flinks); +} + +#undef IR + +#endif diff --git a/src/lj_snap.h b/src/lj_snap.h new file mode 100644 index 0000000000..806047b129 --- /dev/null +++ b/src/lj_snap.h @@ -0,0 +1,19 @@ +/* +** Snapshot handling. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +*/ + +#ifndef _LJ_SNAP_H +#define _LJ_SNAP_H + +#include "lj_obj.h" +#include "lj_jit.h" + +#if LJ_HASJIT +LJ_FUNC void lj_snap_add(jit_State *J); +LJ_FUNC void lj_snap_shrink(jit_State *J); +LJ_FUNC void lj_snap_regspmap(uint16_t *rsmap, Trace *T, SnapNo snapno); +LJ_FUNC void lj_snap_restore(jit_State *J, void *exptr); +#endif + +#endif diff --git a/src/lj_state.c b/src/lj_state.c new file mode 100644 index 0000000000..b4bc7a0cf8 --- /dev/null +++ b/src/lj_state.c @@ -0,0 +1,255 @@ +/* +** State and stack handling. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +** +** Portions taken verbatim or adapted from the Lua interpreter. +** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h +*/ + +#define lj_state_c +#define LUA_CORE + +#include "lj_obj.h" +#include "lj_gc.h" +#include "lj_err.h" +#include "lj_str.h" +#include "lj_tab.h" +#include "lj_func.h" +#include "lj_meta.h" +#include "lj_state.h" +#include "lj_frame.h" +#include "lj_trace.h" +#include "lj_dispatch.h" +#include "lj_vm.h" +#include "lj_lex.h" +#include "lj_alloc.h" + +/* -- Stack handling ------------------------------------------------------ */ + +/* Stack sizes. */ +#define LJ_STACK_MIN LUA_MINSTACK /* Min. stack size. */ +#define LJ_STACK_MAX LUAI_MAXSTACK /* Max. stack size. */ +#define LJ_STACK_START (2*LJ_STACK_MIN) /* Starting stack size. */ +#define LJ_STACK_MAXEX (LJ_STACK_MAX + 1 + LJ_STACK_EXTRA) + +/* Explanation of LJ_STACK_EXTRA: +** +** Calls to metamethods store their arguments beyond the current top +** without checking for the stack limit. This avoids stack resizes which +** would invalidate passed TValue pointers. The stack check is performed +** later by the call gate. This can safely resize the stack or raise an +** error. Thus we need some extra slots beyond the current stack limit. +** +** Most metamethods need 4 slots above top (cont, mobj, arg1, arg2) plus +** one extra slot if mobj is not a function. Only lj_meta_tset needs 5 +** slots above top, but then mobj is always a function. So we can get by +** with 5 extra slots. +*/ + +/* Resize stack slots and adjust pointers in state. */ +static void resizestack(lua_State *L, MSize n) +{ + TValue *oldst = L->stack; + ptrdiff_t delta; + MSize realsize = n + 1 + LJ_STACK_EXTRA; + GCobj *up; + lua_assert((MSize)(L->maxstack-L->stack) == L->stacksize-LJ_STACK_EXTRA-1); + lj_mem_reallocvec(L, L->stack, L->stacksize, realsize, TValue); + delta = (char *)L->stack - (char *)oldst; + L->maxstack = L->stack + n; + L->stacksize = realsize; + L->base = (TValue *)((char *)L->base + delta); + L->top = (TValue *)((char *)L->top + delta); + for (up = gcref(L->openupval); up != NULL; up = gcnext(up)) + gco2uv(up)->v = (TValue *)((char *)gco2uv(up)->v + delta); + if (obj2gco(L) == gcref(G(L)->jit_L)) + setmref(G(L)->jit_base, mref(G(L)->jit_base, char) + delta); +} + +/* Relimit stack after error, in case the limit was overdrawn. */ +void lj_state_relimitstack(lua_State *L) +{ + if (L->stacksize > LJ_STACK_MAXEX && L->top - L->stack < LJ_STACK_MAX-1) + resizestack(L, LJ_STACK_MAX); +} + +/* Try to shrink the stack (called from GC). */ +void lj_state_shrinkstack(lua_State *L, MSize used) +{ + if (L->stacksize > LJ_STACK_MAXEX) + return; /* Avoid stack shrinking while handling stack overflow. */ + if (4*used < L->stacksize && + 2*(LJ_STACK_START+LJ_STACK_EXTRA) < L->stacksize && + obj2gco(L) != gcref(G(L)->jit_L)) /* Don't shrink stack of live trace. */ + resizestack(L, L->stacksize >> 1); +} + +/* Try to grow stack. */ +void lj_state_growstack(lua_State *L, MSize need) +{ + if (L->stacksize > LJ_STACK_MAXEX) /* overflow while handling overflow? */ + lj_err_throw(L, LUA_ERRERR); + resizestack(L, L->stacksize + (need > L->stacksize ? need : L->stacksize)); + if (L->stacksize > LJ_STACK_MAXEX) { + if (curr_funcisL(L)) { /* Clear slots of incomplete Lua frame. */ + TValue *top = curr_topL(L); + while (--top >= L->top) setnilV(top); + } + lj_err_msg(L, LJ_ERR_STKOV); /* ... to allow L->top = curr_topL(L). */ + } +} + +void lj_state_growstack1(lua_State *L) +{ + lj_state_growstack(L, 1); +} + +/* Allocate basic stack for new state. */ +static void stack_init(lua_State *L1, lua_State *L) +{ + L1->stack = lj_mem_newvec(L, LJ_STACK_START + LJ_STACK_EXTRA, TValue); + L1->stacksize = LJ_STACK_START + LJ_STACK_EXTRA; + L1->top = L1->stack; + L1->maxstack = L1->stack+(L1->stacksize - LJ_STACK_EXTRA)-1; + setthreadV(L1, L1->top, L1); /* needed for curr_funcisL() on empty stack */ + setnilV(L1->top); /* but clear its type */ + L1->base = ++L1->top; +} + +/* -- State handling ------------------------------------------------------ */ + +/* Open parts that may cause memory-allocation errors. */ +static TValue *cpluaopen(lua_State *L, lua_CFunction dummy, void *ud) +{ + global_State *g = G(L); + UNUSED(dummy); + UNUSED(ud); + stack_init(L, L); + /* NOBARRIER: State initialization, all objects are white. */ + setgcref(L->env, obj2gco(lj_tab_new(L, 0, LJ_MIN_GLOBAL))); + settabV(L, registry(L), lj_tab_new(L, 0, LJ_MIN_REGISTRY)); + lj_str_resize(L, LJ_MIN_STRTAB-1); + lj_meta_init(L); + lj_lex_init(L); + fixstring(lj_err_str(L, LJ_ERR_ERRMEM)); /* Preallocate memory error msg. */ + g->gc.threshold = 4*g->gc.total; + return NULL; +} + +static void close_state(lua_State *L) +{ + global_State *g = G(L); +#ifndef LUAJIT_USE_SYSMALLOC + if (g->allocf == lj_alloc_f) { + lj_alloc_destroy(g->allocd); + } else +#endif + { + lj_func_closeuv(L, L->stack); + lj_gc_freeall(g); + lua_assert(gcref(g->gc.root) == obj2gco(L)); + lua_assert(g->strnum == 0); + lj_trace_freestate(g); + lj_mem_freevec(g, g->strhash, g->strmask+1, GCstr *); + lj_str_freebuf(g, &g->tmpbuf); + lj_mem_freevec(g, L->stack, L->stacksize, TValue); + lua_assert(g->gc.total == sizeof(GG_State)); + g->allocf(g->allocd, G2GG(g), sizeof(GG_State), 0); + } +} + +LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud) +{ + GG_State *GG = cast(GG_State *, f(ud, NULL, 0, sizeof(GG_State))); + lua_State *L = &GG->L; + global_State *g = &GG->g; + if (GG == NULL) return NULL; + memset(GG, 0, sizeof(GG_State)); + L->gct = ~LJ_TTHREAD; + L->marked = LJ_GC_WHITE0 | LJ_GC_FIXED | LJ_GC_SFIXED; /* Prevent free. */ + L->dummy_ffid = FF_C; + setmref(L->glref, g); + g->gc.currentwhite = LJ_GC_WHITE0 | LJ_GC_FIXED; + g->allocf = f; + g->allocd = ud; + setgcref(g->mainthref, obj2gco(L)); + setgcref(g->uvhead.prev, obj2gco(&g->uvhead)); + setgcref(g->uvhead.next, obj2gco(&g->uvhead)); + g->strmask = ~(MSize)0; + setnilV(registry(L)); + setnilV(&g->nilnode.val); + setnilV(&g->nilnode.key); + lj_str_initbuf(L, &g->tmpbuf); + g->gc.state = GCSpause; + setgcref(g->gc.root, obj2gco(L)); + g->gc.sweep = &g->gc.root; + g->gc.total = sizeof(GG_State); + g->gc.pause = LUAI_GCPAUSE; + g->gc.stepmul = LUAI_GCMUL; + lj_dispatch_init((GG_State *)L); + L->status = LUA_ERRERR+1; /* Avoid touching the stack upon memory error. */ + if (lj_vm_cpcall(L, cpluaopen, NULL, NULL) != 0) { + /* Memory allocation error: free partial state. */ + close_state(L); + return NULL; + } + L->status = 0; + return L; +} + +static TValue *cpfinalize(lua_State *L, lua_CFunction dummy, void *ud) +{ + UNUSED(dummy); + UNUSED(ud); + lj_gc_finalizeudata(L); + /* Frame pop omitted. */ + return NULL; +} + +LUA_API void lua_close(lua_State *L) +{ + global_State *g = G(L); + L = mainthread(g); /* Only the main thread can be closed. */ + lj_func_closeuv(L, L->stack); + lj_gc_separateudata(g, 1); /* Separate udata which have GC metamethods. */ +#if LJ_HASJIT + G2J(g)->flags &= ~JIT_F_ON; + G2J(g)->state = LJ_TRACE_IDLE; + lj_dispatch_update(g); +#endif + do { + hook_enter(g); + L->status = 0; + L->cframe = NULL; + L->base = L->top = L->stack + 1; + } while (lj_vm_cpcall(L, cpfinalize, NULL, NULL) != 0); + close_state(L); +} + +lua_State *lj_state_new(lua_State *L) +{ + lua_State *L1 = lj_mem_newobj(L, lua_State); + L1->gct = ~LJ_TTHREAD; + L1->dummy_ffid = FF_C; + L1->status = 0; + L1->stacksize = 0; + L1->stack = NULL; + L1->cframe = NULL; + /* NOBARRIER: The lua_State is new (marked white). */ + setgcrefnull(L1->openupval); + setmrefr(L1->glref, L->glref); + setgcrefr(L1->env, L->env); + stack_init(L1, L); /* init stack */ + lua_assert(iswhite(obj2gco(L1))); + return L1; +} + +void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L) +{ + lua_assert(L != mainthread(g)); + lj_func_closeuv(L, L->stack); + lua_assert(gcref(L->openupval) == NULL); + lj_mem_freevec(g, L->stack, L->stacksize, TValue); + lj_mem_freet(g, L); +} + diff --git a/src/lj_state.h b/src/lj_state.h new file mode 100644 index 0000000000..54e8540573 --- /dev/null +++ b/src/lj_state.h @@ -0,0 +1,31 @@ +/* +** State and stack handling. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +*/ + +#ifndef _LJ_STATE_H +#define _LJ_STATE_H + +#include "lj_obj.h" + +#define incr_top(L) \ + (++L->top >= L->maxstack && (lj_state_growstack1(L), 0)) + +#define savestack(L, p) ((char *)(p) - (char *)L->stack) +#define restorestack(L, n) ((TValue *)((char *)L->stack + (n))) + +LJ_FUNC void lj_state_relimitstack(lua_State *L); +LJ_FUNC void lj_state_shrinkstack(lua_State *L, MSize used); +LJ_FUNCA void lj_state_growstack(lua_State *L, MSize need); +LJ_FUNCA void lj_state_growstack1(lua_State *L); + +static LJ_AINLINE void lj_state_checkstack(lua_State *L, MSize need) +{ + if ((MSize)((char *)L->maxstack-(char *)L->top) <= need*(MSize)sizeof(TValue)) + lj_state_growstack(L, need); +} + +LJ_FUNC lua_State *lj_state_new(lua_State *L); +LJ_FUNC void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L); + +#endif diff --git a/src/lj_str.c b/src/lj_str.c new file mode 100644 index 0000000000..26f91cba0a --- /dev/null +++ b/src/lj_str.c @@ -0,0 +1,301 @@ +/* +** String handling. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +** +** Portions taken verbatim or adapted from the Lua interpreter. +** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h +*/ + +#include + +#define lj_str_c +#define LUA_CORE + +#include "lj_obj.h" +#include "lj_gc.h" +#include "lj_err.h" +#include "lj_str.h" +#include "lj_state.h" +#include "lj_ctype.h" + +/* -- String interning ---------------------------------------------------- */ + +/* Ordered compare of strings. Assumes string data is 4-byte aligned. */ +int32_t lj_str_cmp(GCstr *a, GCstr *b) +{ + MSize i, n = a->len > b->len ? b->len : a->len; + for (i = 0; i < n; i += 4) { + /* Note: innocuous access up to end of string + 3. */ + uint32_t va = *(const uint32_t *)(strdata(a)+i); + uint32_t vb = *(const uint32_t *)(strdata(b)+i); + if (va != vb) { +#if LJ_ARCH_ENDIAN == LUAJIT_LE + va = lj_bswap(va); vb = lj_bswap(vb); +#endif + i -= n; + if ((int32_t)i >= -3) { + va >>= 32+(i<<3); vb >>= 32+(i<<3); + if (va == vb) break; + } + return (int32_t)(va - vb); + } + } + return (int32_t)(a->len - b->len); +} + +/* Resize the string hash table (grow and shrink). */ +void lj_str_resize(lua_State *L, MSize newmask) +{ + global_State *g = G(L); + GCRef *newhash; + MSize i; + if (g->gc.state == GCSsweepstring || newmask >= LJ_MAX_STRTAB-1) + return; /* No resizing during GC traversal or if already too big. */ + newhash = lj_mem_newvec(L, newmask+1, GCRef); + memset(newhash, 0, (newmask+1)*sizeof(GCRef)); + for (i = g->strmask; i != ~(MSize)0; i--) { /* Rehash old table. */ + GCobj *p = gcref(g->strhash[i]); + while (p) { /* Follow each hash chain and reinsert all strings. */ + MSize h = gco2str(p)->hash & newmask; + GCobj *next = gcnext(p); + /* NOBARRIER: The string table is a GC root. */ + setgcrefr(p->gch.nextgc, newhash[h]); + setgcref(newhash[h], p); + p = next; + } + } + lj_mem_freevec(g, g->strhash, g->strmask+1, GCstr *); + g->strmask = newmask; + g->strhash = newhash; +} + +/* Intern a string and return string object. */ +GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx) +{ + global_State *g; + GCstr *s; + GCobj *o; + MSize len = (MSize)lenx; + MSize h = len; + MSize step = (len>>5)+1; /* Partial hash. */ + MSize l1; + if (lenx >= LJ_MAX_STR) + lj_err_msg(L, LJ_ERR_STROV); + for (l1 = len; l1 >= step; l1 -= step) /* Compute string hash. */ + h = h ^ ((h<<5)+(h>>2)+cast(unsigned char, str[l1-1])); + /* Check if the string has already been interned. */ + g = G(L); + for (o = gcref(g->strhash[h & g->strmask]); o != NULL; o = gcnext(o)) { + GCstr *tso = gco2str(o); + if (tso->len == len && (memcmp(str, strdata(tso), len) == 0)) { + if (isdead(g, o)) flipwhite(o); /* Resurrect if dead. */ + return tso; /* Return existing string. */ + } + } + /* Nope, create a new string. */ + s = lj_mem_newt(L, sizeof(GCstr)+len+1, GCstr); + newwhite(g, s); + s->gct = ~LJ_TSTR; + s->len = len; + s->hash = h; + s->reserved = 0; + memcpy(strdatawr(s), str, len); + strdatawr(s)[len] = '\0'; /* Zero-terminate string. */ + /* Add it to string hash table. */ + h &= g->strmask; + s->nextgc = g->strhash[h]; + /* NOBARRIER: The string table is a GC root. */ + setgcref(g->strhash[h], obj2gco(s)); + if (g->strnum++ > g->strmask) /* Allow a 100% load factor. */ + lj_str_resize(L, (g->strmask<<1)+1); /* Grow string table. */ + return s; /* Return newly interned string. */ +} + +void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s) +{ + g->strnum--; + lj_mem_free(g, s, sizestring(s)); +} + +/* -- Type conversions ---------------------------------------------------- */ + +/* Convert string to number. */ +int lj_str_numconv(const char *s, TValue *n) +{ + lua_Number sign = 1; + const uint8_t *p = (const uint8_t *)s; + while (lj_ctype_isspace(*p)) p++; + if (*p == '-') { p++; sign = -1; } else if (*p == '+') { p++; } + if ((uint32_t)(*p - '0') < 10) { + uint32_t k = (uint32_t)(*p++ - '0'); + if (k == 0 && ((*p & ~0x20) == 'X')) { + p++; + while (lj_ctype_isxdigit(*p)) { + if (k >= 0x10000000) goto parsedbl; + k = (k << 4) + (*p & 15u); + if (!lj_ctype_isdigit(*p)) k += 9; + p++; + } + } else { + while ((uint32_t)(*p - '0') < 10) { + if (k >= 0x19999999) goto parsedbl; + k = k * 10u + (uint32_t)(*p++ - '0'); + } + } + while (LJ_UNLIKELY(lj_ctype_isspace(*p))) p++; + if (LJ_LIKELY(*p == '\0')) { + setnumV(n, sign * cast_num(k)); + return 1; + } + } +parsedbl: + { + TValue tv; + char *endptr; + setnumV(&tv, lua_str2number(s, &endptr)); + if (endptr == s) return 0; /* conversion failed */ + if (LJ_UNLIKELY(*endptr != '\0')) { + while (lj_ctype_isspace((uint8_t)*endptr)) endptr++; + if (*endptr != '\0') return 0; /* invalid trailing characters? */ + } + if (LJ_LIKELY(!tvisnan(&tv))) + setnumV(n, numV(&tv)); + else + setnanV(n); /* Canonicalize injected NaNs. */ + return 1; + } +} + +/* Convert number to string. */ +GCstr *lj_str_fromnum(lua_State *L, const lua_Number *np) +{ + char s[LUAI_MAXNUMBER2STR]; + lua_Number n = *np; + size_t len = (size_t)lua_number2str(s, n); + return lj_str_new(L, s, len); +} + +/* Convert integer to string. */ +GCstr *lj_str_fromint(lua_State *L, int32_t k) +{ + char s[1+10]; + char *p = s+sizeof(s); + uint32_t i = (uint32_t)(k < 0 ? -k : k); + do { *--p = (char)('0' + i % 10); } while (i /= 10); + if (k < 0) *--p = '-'; + return lj_str_new(L, p, (size_t)(s+sizeof(s)-p)); +} + +/* -- String formatting --------------------------------------------------- */ + +static void addstr(lua_State *L, SBuf *sb, const char *str, MSize len) +{ + char *p; + MSize i; + if (sb->n + len > sb->sz) { + MSize sz = sb->sz * 2; + while (sb->n + len > sz) sz = sz * 2; + lj_str_resizebuf(L, sb, sz); + } + p = sb->buf + sb->n; + sb->n += len; + for (i = 0; i < len; i++) p[i] = str[i]; +} + +static void addchar(lua_State *L, SBuf *sb, int c) +{ + if (sb->n + 1 > sb->sz) { + MSize sz = sb->sz * 2; + lj_str_resizebuf(L, sb, sz); + } + sb->buf[sb->n++] = cast(char, c); +} + +/* Push formatted message as a string object to Lua stack. va_list variant. */ +const char *lj_str_pushvf(lua_State *L, const char *fmt, va_list argp) +{ + SBuf *sb = &G(L)->tmpbuf; + lj_str_needbuf(L, sb, (MSize)strlen(fmt)); + lj_str_resetbuf(sb); + for (;;) { + const char *e = strchr(fmt, '%'); + if (e == NULL) break; + addstr(L, sb, fmt, (MSize)(e-fmt)); + /* This function only handles %s, %c, %d, %f and %p formats. */ + switch (e[1]) { + case 's': { + const char *s = va_arg(argp, char *); + if (s == NULL) s = "(null)"; + addstr(L, sb, s, (MSize)strlen(s)); + break; + } + case 'c': + addchar(L, sb, va_arg(argp, int)); + break; + case 'd': { + char buff[1+10]; + char *p = buff+sizeof(buff); + int32_t k = va_arg(argp, int32_t); + uint32_t i = (uint32_t)(k < 0 ? -k : k); + do { *--p = (char)('0' + i % 10); } while (i /= 10); + if (k < 0) *--p = '-'; + addstr(L, sb, p, (MSize)(buff+sizeof(buff)-p)); + break; + } + case 'f': { + char buff[LUAI_MAXNUMBER2STR]; + lua_Number n = cast_num(va_arg(argp, LUAI_UACNUMBER)); + MSize len = (MSize)lua_number2str(buff, n); + addstr(L, sb, buff, len); + break; + } + case 'p': { +#define FMTP_CHARS (2*sizeof(ptrdiff_t)) + char buff[2+FMTP_CHARS]; + ptrdiff_t p = (ptrdiff_t)(va_arg(argp, void *)); + int i; + buff[0] = '0'; + buff[1] = 'x'; + for (i = 2+FMTP_CHARS-1; i >= 2; i--, p >>= 4) + buff[i] = "0123456789abcdef"[(p & 15)]; + addstr(L, sb, buff, 2+FMTP_CHARS); + break; + } + case '%': + addchar(L, sb, '%'); + break; + default: + addchar(L, sb, '%'); + addchar(L, sb, e[1]); + break; + } + fmt = e+2; + } + addstr(L, sb, fmt, (MSize)strlen(fmt)); + setstrV(L, L->top, lj_str_new(L, sb->buf, sb->n)); + incr_top(L); + return strVdata(L->top - 1); +} + +/* Push formatted message as a string object to Lua stack. Vararg variant. */ +const char *lj_str_pushf(lua_State *L, const char *fmt, ...) +{ + const char *msg; + va_list argp; + va_start(argp, fmt); + msg = lj_str_pushvf(L, fmt, argp); + va_end(argp); + return msg; +} + +/* -- Buffer handling ----------------------------------------------------- */ + +char *lj_str_needbuf(lua_State *L, SBuf *sb, MSize sz) +{ + if (sz > sb->sz) { + if (sz < LJ_MIN_SBUF) sz = LJ_MIN_SBUF; + lj_str_resizebuf(L, sb, sz); + } + return sb->buf; +} + diff --git a/src/lj_str.h b/src/lj_str.h new file mode 100644 index 0000000000..f7e56d16eb --- /dev/null +++ b/src/lj_str.h @@ -0,0 +1,45 @@ +/* +** String handling. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +*/ + +#ifndef _LJ_STR_H +#define _LJ_STR_H + +#include + +#include "lj_obj.h" + +/* String interning. */ +LJ_FUNCA int32_t lj_str_cmp(GCstr *a, GCstr *b); +LJ_FUNC void lj_str_resize(lua_State *L, MSize newmask); +LJ_FUNCA GCstr *lj_str_new(lua_State *L, const char *str, size_t len); +LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s); + +#define lj_str_newz(L, s) (lj_str_new(L, s, strlen(s))) +#define lj_str_newlit(L, s) (lj_str_new(L, "" s, sizeof(s)-1)) + +/* Type conversions. */ +LJ_FUNCA int lj_str_numconv(const char *s, TValue *n); +LJ_FUNCA GCstr *lj_str_fromnum(lua_State *L, const lua_Number *np); +LJ_FUNCA GCstr *lj_str_fromint(lua_State *L, int32_t k); + +/* String formatting. */ +LJ_FUNC const char *lj_str_pushvf(lua_State *L, const char *fmt, va_list argp); +LJ_FUNC const char *lj_str_pushf(lua_State *L, const char *fmt, ...) +#if defined(__GNUC__) + __attribute__ ((format (printf, 2, 3))) +#endif + ; + +/* Resizable string buffers. Struct definition in lj_obj.h. */ +LJ_FUNC char *lj_str_needbuf(lua_State *L, SBuf *sb, MSize sz); + +#define lj_str_initbuf(L, sb) ((sb)->buf = NULL, (sb)->sz = 0) +#define lj_str_resetbuf(sb) ((sb)->n = 0) +#define lj_str_resizebuf(L, sb, size) \ + ((sb)->buf = (char *)lj_mem_realloc(L, (sb)->buf, (sb)->sz, (size)), \ + (sb)->sz = (size)) +#define lj_str_freebuf(g, sb) lj_mem_free(g, (void *)(sb)->buf, (sb)->sz) + +#endif diff --git a/src/lj_tab.c b/src/lj_tab.c new file mode 100644 index 0000000000..633ea20cec --- /dev/null +++ b/src/lj_tab.c @@ -0,0 +1,618 @@ +/* +** Table handling. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +** +** Major portions taken verbatim or adapted from the Lua interpreter. +** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h +*/ + +#define lj_tab_c +#define LUA_CORE + +#include "lj_obj.h" +#include "lj_gc.h" +#include "lj_err.h" +#include "lj_tab.h" + +/* -- Object hashing ------------------------------------------------------ */ + +/* Hash values are masked with the table hash mask and used as an index. */ +#define hashmask(t, x) (&noderef(t->node)[(x) & t->hmask]) + +/* String hashes are precomputed when they are interned. */ +#define hashstr(t, s) hashmask(t, (s)->hash) + +#define hashnum(t, o) hashrot(t, (o)->u32.lo, (o)->u32.hi&0x7fffffff) +#define hashgcref(t, r) hashrot(t, gcrefu(r), gcrefu(r)-0x04c11db7) + +/* Scramble the bits of numbers and pointers. */ +static LJ_AINLINE Node *hashrot(const GCtab *t, uint32_t lo, uint32_t hi) +{ + lo ^= hi; hi = lj_rol(hi, 14); + lo -= hi; hi = lj_rol(hi, 5); + hi ^= lo; hi -= lj_rol(lo, 27); + return hashmask(t, hi); +} + +/* Hash an arbitrary key and return its anchor position in the hash table. */ +static Node *hashkey(const GCtab *t, cTValue *key) +{ + if (tvisstr(key)) + return hashstr(t, strV(key)); + else if (tvisnum(key)) + return hashnum(t, key); + else if (tvisbool(key)) + return hashmask(t, boolV(key)); + else + return hashgcref(t, key->gcr); + /* Only hash 32 bits of lightuserdata on a 64 bit CPU. Good enough? */ +} + +/* -- Table creation and destruction -------------------------------------- */ + +/* Create new hash part for table. */ +static LJ_AINLINE void newhpart(lua_State *L, GCtab *t, uint32_t hbits) +{ + uint32_t hsize; + Node *node; + lua_assert(hbits != 0); + if (hbits > LJ_MAX_HBITS) + lj_err_msg(L, LJ_ERR_TABOV); + hsize = 1u << hbits; + node = lj_mem_newvec(L, hsize, Node); + setmref(t->node, node); + t->hmask = hsize-1; + setmref(t->lastfree, &node[hsize]); +} + +/* +** Q: Why all of these copies of t->hmask, t->node etc. to local variables? +** A: Because alias analysis for C is _really_ tough. +** Even state-of-the-art C compilers won't produce good code without this. +*/ + +/* Clear hash part of table. */ +static LJ_AINLINE void clearhpart(GCtab *t) +{ + uint32_t i, hmask = t->hmask; + Node *node = noderef(t->node); + lua_assert(t->hmask != 0); + for (i = 0; i <= hmask; i++) { + Node *n = &node[i]; + setmref(n->next, NULL); + setnilV(&n->key); + setnilV(&n->val); + } +} + +/* Clear array part of table. */ +static LJ_AINLINE void clearapart(GCtab *t) +{ + uint32_t i, asize = t->asize; + TValue *array = tvref(t->array); + for (i = 0; i < asize; i++) + setnilV(&array[i]); +} + +/* Create a new table. Note: the slots are not initialized (yet). */ +static GCtab *newtab(lua_State *L, uint32_t asize, uint32_t hbits) +{ + GCtab *t; + global_State *g; + /* First try to colocate the array part. */ + if (LJ_MAX_COLOSIZE && asize > 0 && asize <= LJ_MAX_COLOSIZE) { + /* This is ugly. (sizeof(GCtab)&7) != 0. So prepend the colocated array. */ + TValue *array = lj_mem_newt(L, sizetabcolo(asize), TValue); + t = cast(GCtab *, array + asize); + g = G(L); + setgcrefr(t->nextgc, g->gc.root); + setgcref(g->gc.root, obj2gco(t)); + newwhite(g, t); + t->gct = ~LJ_TTAB; + t->nomm = cast_byte(~0); + t->colo = (int8_t)asize; + setmref(t->array, array); + setgcrefnull(t->metatable); + t->asize = asize; + t->hmask = 0; + setmref(t->node, &g->nilnode); + setmref(t->lastfree, &g->nilnode); + } else { /* Otherwise separately allocate the array part. */ + t = lj_mem_newobj(L, GCtab); + t->gct = ~LJ_TTAB; + t->nomm = cast_byte(~0); + t->colo = 0; + setmref(t->array, NULL); + setgcrefnull(t->metatable); + t->asize = 0; /* In case the array allocation fails. */ + t->hmask = 0; + g = G(L); + setmref(t->node, &g->nilnode); + setmref(t->lastfree, &g->nilnode); + if (asize > 0) { + if (asize > LJ_MAX_ASIZE) + lj_err_msg(L, LJ_ERR_TABOV); + setmref(t->array, lj_mem_newvec(L, asize, TValue)); + t->asize = asize; + } + } + if (hbits) + newhpart(L, t, hbits); + return t; +} + +/* Create a new table. +** +** IMPORTANT NOTE: The API differs from lua_createtable()! +** +** The array size is non-inclusive. E.g. asize=128 creates array slots +** for 0..127, but not for 128. If you need slots 1..128, pass asize=129 +** (slot 0 is wasted in this case). +** +** The hash size is given in hash bits. hbits=0 means no hash part. +** hbits=1 creates 2 hash slots, hbits=2 creates 4 hash slots and so on. +*/ +GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits) +{ + GCtab *t = newtab(L, asize, hbits); + clearapart(t); + if (t->hmask > 0) clearhpart(t); + return t; +} + +/* Duplicate a table. */ +GCtab *lj_tab_dup(lua_State *L, const GCtab *kt) +{ + GCtab *t; + uint32_t asize, hmask; + t = newtab(L, kt->asize, kt->hmask > 0 ? lj_fls(kt->hmask)+1 : 0); + lua_assert(kt->asize == t->asize && kt->hmask == t->hmask); + t->nomm = 0; /* Keys with metamethod names may be present. */ + asize = kt->asize; + if (asize > 0) { + TValue *array = tvref(t->array); + TValue *karray = tvref(kt->array); + if (asize < 64) { /* An inlined loop beats memcpy for < 512 bytes. */ + uint32_t i; + for (i = 0; i < asize; i++) + copyTV(L, &array[i], &karray[i]); + } else { + memcpy(array, karray, asize*sizeof(TValue)); + } + } + hmask = kt->hmask; + if (hmask > 0) { + uint32_t i; + Node *node = noderef(t->node); + Node *knode = noderef(kt->node); + ptrdiff_t d = (char *)node - (char *)knode; + setmref(t->lastfree, (Node *)((char *)noderef(kt->lastfree) + d)); + for (i = 0; i <= hmask; i++) { + Node *kn = &knode[i]; + Node *n = &node[i]; + Node *next = nextnode(kn); + copyTV(L, &n->val, &kn->val); + copyTV(L, &n->key, &kn->key); + setmref(n->next, next == NULL? next : (Node *)((char *)next + d)); + } + } + return t; +} + +/* Free a table. */ +void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t) +{ + if (t->hmask > 0) + lj_mem_freevec(g, noderef(t->node), t->hmask+1, Node); + if (LJ_MAX_COLOSIZE && t->colo) { + ptrdiff_t n; + if (t->colo < 0 && t->asize > 0) /* Array part was separated. */ + lj_mem_freevec(g, tvref(t->array), t->asize, TValue); + n = t->colo & 0x7f; + lj_mem_free(g, (TValue *)t - n, sizetabcolo((uint32_t)n)); + } else { + if (t->asize > 0) + lj_mem_freevec(g, tvref(t->array), t->asize, TValue); + lj_mem_freet(g, t); + } +} + +/* -- Table resizing ------------------------------------------------------ */ + +/* Resize a table to fit the new array/hash part sizes. */ +static void resizetab(lua_State *L, GCtab *t, uint32_t asize, uint32_t hbits) +{ + Node *oldnode = noderef(t->node); + uint32_t oldasize = t->asize; + uint32_t oldhmask = t->hmask; + if (asize > oldasize) { /* Array part grows? */ + TValue *array; + uint32_t i; + if (asize > LJ_MAX_ASIZE) + lj_err_msg(L, LJ_ERR_TABOV); + if (LJ_MAX_COLOSIZE && t->colo > 0) { + /* A colocated array must be separated and copied. */ + TValue *oarray = tvref(t->array); + array = lj_mem_newvec(L, asize, TValue); + t->colo = (int8_t)(t->colo | 0x80); /* Mark as separated (colo < 0). */ + for (i = 0; i < oldasize; i++) + copyTV(L, &array[i], &oarray[i]); + } else { + array = (TValue *)lj_mem_realloc(L, tvref(t->array), + oldasize*sizeof(TValue), asize*sizeof(TValue)); + } + setmref(t->array, array); + t->asize = asize; + for (i = oldasize; i < asize; i++) /* Clear newly allocated slots. */ + setnilV(&array[i]); + } + /* Create new (empty) hash part. */ + if (hbits) { + newhpart(L, t, hbits); + clearhpart(t); + } else { + global_State *g = G(L); + setmref(t->node, &g->nilnode); + setmref(t->lastfree, &g->nilnode); + t->hmask = 0; + } + if (asize < oldasize) { /* Array part shrinks? */ + TValue *array = tvref(t->array); + uint32_t i; + t->asize = asize; /* Note: This 'shrinks' even colocated arrays. */ + for (i = asize; i < oldasize; i++) /* Reinsert old array values. */ + if (!tvisnil(&array[i])) + copyTV(L, lj_tab_setinth(L, t, (int32_t)i), &array[i]); + /* Physically shrink only separated arrays. */ + if (LJ_MAX_COLOSIZE && t->colo <= 0) + setmref(t->array, lj_mem_realloc(L, array, + oldasize*sizeof(TValue), asize*sizeof(TValue))); + } + if (oldhmask > 0) { /* Reinsert pairs from old hash part. */ + global_State *g; + uint32_t i; + for (i = 0; i <= oldhmask; i++) { + Node *n = &oldnode[i]; + if (!tvisnil(&n->val)) + copyTV(L, lj_tab_set(L, t, &n->key), &n->val); + } + g = G(L); + lj_mem_freevec(g, oldnode, oldhmask+1, Node); + } +} + +static uint32_t countint(cTValue *key, uint32_t *bins) +{ + if (tvisnum(key)) { + lua_Number nk = numV(key); + int32_t k = lj_num2int(nk); + if ((uint32_t)k < LJ_MAX_ASIZE && nk == cast_num(k)) { + bins[(k > 2 ? lj_fls((uint32_t)(k-1)) : 0)]++; + return 1; + } + } + return 0; +} + +static uint32_t countarray(const GCtab *t, uint32_t *bins) +{ + uint32_t na, b, i; + if (t->asize == 0) return 0; + for (na = i = b = 0; b < LJ_MAX_ABITS; b++) { + uint32_t n, top = 2u << b; + TValue *array; + if (top >= t->asize) { + top = t->asize-1; + if (i > top) + break; + } + array = tvref(t->array); + for (n = 0; i <= top; i++) + if (!tvisnil(&array[i])) + n++; + bins[b] += n; + na += n; + } + return na; +} + +static uint32_t counthash(const GCtab *t, uint32_t *bins, uint32_t *narray) +{ + uint32_t total, na, i, hmask = t->hmask; + Node *node = noderef(t->node); + for (total = na = 0, i = 0; i <= hmask; i++) { + Node *n = &node[i]; + if (!tvisnil(&n->val)) { + na += countint(&n->key, bins); + total++; + } + } + *narray += na; + return total; +} + +static uint32_t bestasize(uint32_t bins[], uint32_t *narray) +{ + uint32_t b, sum, na = 0, sz = 0, nn = *narray; + for (b = 0, sum = 0; (1u< 0 && (sum += bins[b]) >= (1u<hmask > 0 ? lj_fls(t->hmask)+1 : 0); +} + +/* -- Table getters ------------------------------------------------------- */ + +cTValue *lj_tab_getinth(GCtab *t, int32_t key) +{ + TValue k; + Node *n; + k.n = cast_num(key); + n = hashnum(t, &k); + do { + if (tvisnum(&n->key) && n->key.n == k.n) + return &n->val; + } while ((n = nextnode(n))); + return NULL; +} + +cTValue *lj_tab_getstr(GCtab *t, GCstr *key) +{ + Node *n = hashstr(t, key); + do { + if (tvisstr(&n->key) && strV(&n->key) == key) + return &n->val; + } while ((n = nextnode(n))); + return NULL; +} + +cTValue *lj_tab_get(lua_State *L, GCtab *t, cTValue *key) +{ + if (tvisstr(key)) { + cTValue *tv = lj_tab_getstr(t, strV(key)); + if (tv) + return tv; + } else if (tvisnum(key)) { + lua_Number nk = numV(key); + int32_t k = lj_num2int(nk); + if (nk == cast_num(k)) { + cTValue *tv = lj_tab_getint(t, k); + if (tv) + return tv; + } else { + goto genlookup; /* Else use the generic lookup. */ + } + } else if (!tvisnil(key)) { + Node *n; + genlookup: + n = hashkey(t, key); + do { + if (lj_obj_equal(&n->key, key)) + return &n->val; + } while ((n = nextnode(n))); + } + return niltv(L); +} + +/* -- Table setters ------------------------------------------------------- */ + +static Node *getfreepos(GCtab *t) +{ + Node *node = noderef(t->node); + Node *lastfree = noderef(t->lastfree); + while (lastfree > node) { + lastfree--; + setmref(t->lastfree, lastfree); + if (tvisnil(&lastfree->key)) + return lastfree; + } + return NULL; /* could not find a free place */ +} + +/* +** inserts a new key into a hash table; first, check whether key's main +** position is free. If not, check whether colliding node is in its main +** position or not: if it is not, move colliding node to an empty place and +** put new key in its main position; otherwise (colliding node is in its main +** position), new key goes to an empty position. +*/ +TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key) +{ + Node *mp = hashkey(t, key); + if (!tvisnil(&mp->val) || t->hmask == 0) { + Node *othern; + Node *n = getfreepos(t); /* get a free place */ + if (n == NULL) { /* cannot find a free place? */ + rehashtab(L, t, key); /* grow table */ + return lj_tab_set(L, t, key); /* re-insert key into grown table */ + } + lua_assert(n != &G(L)->nilnode); + othern = hashkey(t, &mp->key); + if (othern != mp) { /* is colliding node out of its main position? */ + /* yes; move colliding node into free position */ + while (noderef(othern->next) != mp) + othern = nextnode(othern); /* find previous */ + setmref(othern->next, n); /* redo the chain with `n' in place of `mp' */ + *n = *mp; /* copy colliding node into free pos. (mp->next also goes) */ + setmref(mp->next, NULL); /* now `mp' is free */ + setnilV(&mp->val); + } else { /* colliding node is in its own main position */ + /* new node will go into free position */ + setmrefr(n->next, mp->next); /* chain new position */ + setmref(mp->next, n); + mp = n; + } + } + mp->key.u64 = key->u64; + if (LJ_UNLIKELY(tvismzero(&mp->key))) + mp->key.u64 = 0; + lj_gc_barriert(L, t, key); + lua_assert(tvisnil(&mp->val)); + return &mp->val; +} + +TValue *lj_tab_setinth(lua_State *L, GCtab *t, int32_t key) +{ + TValue k; + Node *n; + k.n = cast_num(key); + n = hashnum(t, &k); + do { + if (tvisnum(&n->key) && n->key.n == k.n) + return &n->val; + } while ((n = nextnode(n))); + return lj_tab_newkey(L, t, &k); +} + +TValue *lj_tab_setstr(lua_State *L, GCtab *t, GCstr *key) +{ + TValue k; + Node *n = hashstr(t, key); + do { + if (tvisstr(&n->key) && strV(&n->key) == key) + return &n->val; + } while ((n = nextnode(n))); + setstrV(L, &k, key); + return lj_tab_newkey(L, t, &k); +} + +TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key) +{ + Node *n; + t->nomm = 0; /* Invalidate negative metamethod cache. */ + if (tvisstr(key)) { + return lj_tab_setstr(L, t, strV(key)); + } else if (tvisnum(key)) { + lua_Number nk = numV(key); + int32_t k = lj_num2int(nk); + if (nk == cast_num(k)) + return lj_tab_setint(L, t, k); + if (tvisnan(key)) + lj_err_msg(L, LJ_ERR_NANIDX); + /* Else use the generic lookup. */ + } else if (tvisnil(key)) { + lj_err_msg(L, LJ_ERR_NILIDX); + } + n = hashkey(t, key); + do { + if (lj_obj_equal(&n->key, key)) + return &n->val; + } while ((n = nextnode(n))); + return lj_tab_newkey(L, t, key); +} + +/* -- Table traversal ----------------------------------------------------- */ + +/* Get the traversal index of a key. */ +static uint32_t keyindex(lua_State *L, GCtab *t, cTValue *key) +{ + if (tvisnum(key)) { + lua_Number nk = numV(key); + int32_t k = lj_num2int(nk); + if ((uint32_t)k < t->asize && nk == cast_num(k)) + return (uint32_t)k; /* Array key indexes: [0..t->asize-1] */ + } + if (!tvisnil(key)) { + Node *n = hashkey(t, key); + do { + if (lj_obj_equal(&n->key, key) || + (itype(&n->key) == LJ_TDEADKEY && tvisgcv(key) && + gcV(&n->key) == gcV(key))) + return t->asize + (uint32_t)(n - noderef(t->node)); + /* Hash key indexes: [t->asize..t->asize+t->nmask] */ + } while ((n = nextnode(n))); + lj_err_msg(L, LJ_ERR_NEXTIDX); + return 0; /* unreachable */ + } + return ~0u; /* A nil key starts the traversal. */ +} + +/* Advance to the next step in a table traversal. */ +int lj_tab_next(lua_State *L, GCtab *t, TValue *key) +{ + uint32_t i = keyindex(L, t, key); /* Find predecessor key index. */ + for (i++; i < t->asize; i++) /* First traverse the array keys. */ + if (!tvisnil(arrayslot(t, i))) { + setintV(key, i); + copyTV(L, key+1, arrayslot(t, i)); + return 1; + } + for (i -= t->asize; i <= t->hmask; i++) { /* Then traverse the hash keys. */ + Node *n = &noderef(t->node)[i]; + if (!tvisnil(&n->val)) { + copyTV(L, key, &n->key); + copyTV(L, key+1, &n->val); + return 1; + } + } + return 0; /* End of traversal. */ +} + +/* -- Table length calculation -------------------------------------------- */ + +static MSize unbound_search(GCtab *t, MSize j) +{ + cTValue *tv; + MSize i = j; /* i is zero or a present index */ + j++; + /* find `i' and `j' such that i is present and j is not */ + while ((tv = lj_tab_getint(t, cast(int32_t, j))) && !tvisnil(tv)) { + i = j; + j *= 2; + if (j > (MSize)(INT_MAX-2)) { /* overflow? */ + /* table was built with bad purposes: resort to linear search */ + i = 1; + while ((tv = lj_tab_getint(t, cast(int32_t, i))) && !tvisnil(tv)) i++; + return i - 1; + } + } + /* now do a binary search between them */ + while (j - i > 1) { + MSize m = (i+j)/2; + cTValue *tvb = lj_tab_getint(t, cast(int32_t, m)); + if (tvb && !tvisnil(tvb)) i = m; else j = m; + } + return i; +} + +/* +** Try to find a boundary in table `t'. A `boundary' is an integer index +** such that t[i] is non-nil and t[i+1] is nil (and 0 if t[1] is nil). +*/ +MSize lj_tab_len(GCtab *t) +{ + MSize j = (MSize)t->asize; + if (j > 1 && tvisnil(arrayslot(t, j-1))) { + MSize i = 1; + while (j - i > 1) { + MSize m = (i+j)/2; + if (tvisnil(arrayslot(t, m-1))) j = m; else i = m; + } + return i-1; + } + if (j) j--; + if (t->hmask <= 0) + return j; + return unbound_search(t, j); +} + diff --git a/src/lj_tab.h b/src/lj_tab.h new file mode 100644 index 0000000000..e9e8bcd10a --- /dev/null +++ b/src/lj_tab.h @@ -0,0 +1,41 @@ +/* +** Table handling. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +*/ + +#ifndef _LJ_TAB_H +#define _LJ_TAB_H + +#include "lj_obj.h" + +#define hsize2hbits(s) ((s) ? ((s)==1 ? 1 : 1+lj_fls((uint32_t)((s)-1))) : 0) + +LJ_FUNCA GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits); +LJ_FUNCA GCtab *lj_tab_dup(lua_State *L, const GCtab *kt); +LJ_FUNC void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t); +LJ_FUNCA void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize); + +/* Caveat: all getters except lj_tab_get() can return NULL! */ + +LJ_FUNCA cTValue *lj_tab_getinth(GCtab *t, int32_t key); +LJ_FUNC cTValue *lj_tab_getstr(GCtab *t, GCstr *key); +LJ_FUNCA cTValue *lj_tab_get(lua_State *L, GCtab *t, cTValue *key); + +/* Caveat: all setters require a write barrier for the stored value. */ + +LJ_FUNCA TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key); +LJ_FUNC TValue *lj_tab_setinth(lua_State *L, GCtab *t, int32_t key); +LJ_FUNC TValue *lj_tab_setstr(lua_State *L, GCtab *t, GCstr *key); +LJ_FUNC TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key); + +#define inarray(t, key) ((MSize)(key) < (MSize)(t)->asize) +#define arrayslot(t, i) (&tvref((t)->array)[(i)]) +#define lj_tab_getint(t, key) \ + (inarray((t), (key)) ? arrayslot((t), (key)) : lj_tab_getinth((t), (key))) +#define lj_tab_setint(L, t, key) \ + (inarray((t), (key)) ? arrayslot((t), (key)) : lj_tab_setinth(L, (t), (key))) + +LJ_FUNCA int lj_tab_next(lua_State *L, GCtab *t, TValue *key); +LJ_FUNCA MSize lj_tab_len(GCtab *t); + +#endif diff --git a/src/lj_target.h b/src/lj_target.h new file mode 100644 index 0000000000..0b464d3f05 --- /dev/null +++ b/src/lj_target.h @@ -0,0 +1,132 @@ +/* +** Definitions for target CPU. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +*/ + +#ifndef _LJ_TARGET_H +#define _LJ_TARGET_H + +#include "lj_def.h" +#include "lj_arch.h" + +/* -- Registers and spill slots ------------------------------------------- */ + +/* Register type (uint8_t in ir->r). */ +typedef uint32_t Reg; + +/* The hi-bit is NOT set for an allocated register. This means the value +** can be directly used without masking. The hi-bit is set for a register +** allocation hint or for RID_INIT. +*/ +#define RID_NONE 0x80 +#define RID_MASK 0x7f +#define RID_INIT (RID_NONE|RID_MASK) + +#define ra_noreg(r) ((r) & RID_NONE) +#define ra_hasreg(r) (!((r) & RID_NONE)) + +/* The ra_hashint() macro assumes a previous test for ra_noreg(). */ +#define ra_hashint(r) ((r) != RID_INIT) +#define ra_gethint(r) ((Reg)((r) & RID_MASK)) +#define ra_sethint(rr, r) rr = (uint8_t)((r)|RID_NONE) +#define ra_samehint(r1, r2) (ra_gethint((r1)^(r2)) == 0) + +/* Spill slot 0 means no spill slot has been allocated. */ +#define SPS_NONE 0 + +#define ra_hasspill(s) ((s) != SPS_NONE) + +/* Combined register and spill slot (uint16_t in ir->prev). */ +typedef uint32_t RegSP; + +#define REGSP(r, s) ((r) + ((s) << 8)) +#define REGSP_HINT(r) ((r)|RID_NONE) +#define REGSP_INIT REGSP(RID_INIT, 0) + +#define regsp_reg(rs) ((rs) & 255) +#define regsp_spill(rs) ((rs) >> 8) +#define regsp_used(rs) \ + (((rs) & ~REGSP(RID_MASK, 0)) != REGSP(RID_NONE, 0)) + +/* -- Register sets ------------------------------------------------------- */ + +/* Bitset for registers. 32 registers suffice right now. +** Note that one set holds bits for both GPRs and FPRs. +*/ +typedef uint32_t RegSet; + +#define RID2RSET(r) (((RegSet)1) << (r)) +#define RSET_EMPTY 0 +#define RSET_RANGE(lo, hi) ((RID2RSET((hi)-(lo))-1) << (lo)) + +#define rset_test(rs, r) (((rs) >> (r)) & 1) +#define rset_set(rs, r) (rs |= RID2RSET(r)) +#define rset_clear(rs, r) (rs &= ~RID2RSET(r)) +#define rset_exclude(rs, r) (rs & ~RID2RSET(r)) +#define rset_picktop(rs) ((Reg)lj_fls(rs)) +#define rset_pickbot(rs) ((Reg)lj_ffs(rs)) + +/* -- Register allocation cost -------------------------------------------- */ + +/* The register allocation heuristic keeps track of the cost for allocating +** a specific register: +** +** A free register (obviously) has a cost of 0 and a 1-bit in the free mask. +** +** An already allocated register has the (non-zero) IR reference in the lowest +** bits and the result of a blended cost-model in the higher bits. +** +** The allocator first checks the free mask for a hit. Otherwise an (unrolled) +** linear search for the minimum cost is used. The search doesn't need to +** keep track of the position of the minimum, which makes it very fast. +** The lowest bits of the minimum cost show the desired IR reference whose +** register is the one to evict. +** +** Without the cost-model this degenerates to the standard heuristics for +** (reverse) linear-scan register allocation. Since code generation is done +** in reverse, a live interval extends from the last use to the first def. +** For an SSA IR the IR reference is the first (and only) def and thus +** trivially marks the end of the interval. The LSRA heuristics says to pick +** the register whose live interval has the furthest extent, i.e. the lowest +** IR reference in our case. +** +** A cost-model should take into account other factors, like spill-cost and +** restore- or rematerialization-cost, which depend on the kind of instruction. +** E.g. constants have zero spill costs, variant instructions have higher +** costs than invariants and PHIs should preferably never be spilled. +** +** Here's a first cut at simple, but effective blended cost-model for R-LSRA: +** - Due to careful design of the IR, constants already have lower IR +** references than invariants and invariants have lower IR references +** than variants. +** - The cost in the upper 16 bits is the sum of the IR reference and a +** weighted score. The score currently only takes into account whether +** the IRT_ISPHI bit is set in the instruction type. +** - The PHI weight is the minimum distance (in IR instructions) a PHI +** reference has to be further apart from a non-PHI reference to be spilled. +** - It should be a power of two (for speed) and must be between 2 and 32768. +** Good values for the PHI weight seem to be between 40 and 150. +** - Further study is required. +*/ +#define REGCOST_PHI_WEIGHT 64 + +/* Cost for allocating a specific register. */ +typedef uint32_t RegCost; + +/* Note: assumes 16 bit IRRef1. */ +#define REGCOST(cost, ref) ((RegCost)(ref) + ((RegCost)(cost) << 16)) +#define regcost_ref(rc) ((IRRef1)(rc)) + +#define REGCOST_T(t) \ + ((RegCost)((t)&IRT_ISPHI) * (((RegCost)(REGCOST_PHI_WEIGHT)<<16)/IRT_ISPHI)) +#define REGCOST_REF_T(ref, t) (REGCOST((ref), (ref)) + REGCOST_T((t))) + +/* -- Target-specific definitions ----------------------------------------- */ + +#if LJ_TARGET_X86ORX64 +#include "lj_target_x86.h" +#else +#error "Missing include for target CPU" +#endif + +#endif diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h new file mode 100644 index 0000000000..3ee4fa006f --- /dev/null +++ b/src/lj_target_x86.h @@ -0,0 +1,257 @@ +/* +** Definitions for x86 and x64 CPUs. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +*/ + +#ifndef _LJ_TARGET_X86_H +#define _LJ_TARGET_X86_H + +/* -- Registers IDs ------------------------------------------------------- */ + +#if LJ_64 +#define GPRDEF(_) \ + _(EAX) _(ECX) _(EDX) _(EBX) _(ESP) _(EBP) _(ESI) _(EDI) \ + _(R8D) _(R9D) _(R10D) _(R11D) _(R12D) _(R13D) _(R14D) _(R15D) +#define FPRDEF(_) \ + _(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7) \ + _(XMM8) _(XMM9) _(XMM10) _(XMM11) _(XMM12) _(XMM13) _(XMM14) _(XMM15) +#else +#define GPRDEF(_) \ + _(EAX) _(ECX) _(EDX) _(EBX) _(ESP) _(EBP) _(ESI) _(EDI) +#define FPRDEF(_) \ + _(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7) +#endif + +#define RIDENUM(name) RID_##name, + +enum { + GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */ + FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */ + RID_MAX, + RID_MRM = RID_MAX, /* Pseudo-id for ModRM operand. */ + + /* Calling conventions. */ + RID_RET = RID_EAX, + + /* These definitions must match with the *.dasc file(s): */ + RID_BASE = RID_EDX, /* Interpreter BASE. */ + RID_PC = RID_ESI, /* Interpreter PC. */ + RID_DISPATCH = RID_EBX, /* Interpreter DISPATCH table. */ + + /* Register ranges [min, max) and number of registers. */ + RID_MIN_GPR = RID_EAX, + RID_MIN_FPR = RID_XMM0, + RID_MAX_GPR = RID_MIN_FPR, + RID_MAX_FPR = RID_MAX, + RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR, + RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR, +}; + +/* -- Register sets ------------------------------------------------------- */ + +/* Make use of all registers, except the stack pointer. */ +#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR)-RID2RSET(RID_ESP)) +#define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)) +#define RSET_ALL (RSET_GPR|RSET_FPR) + +#if LJ_64 +/* Note: this requires the use of FORCE_REX! */ +#define RSET_GPR8 RSET_GPR +#else +#define RSET_GPR8 (RSET_RANGE(RID_EAX, RID_EBX+1)) +#endif + +/* ABI-specific register sets. */ +#define RSET_ACD (RID2RSET(RID_EAX)|RID2RSET(RID_ECX)|RID2RSET(RID_EDX)) +#if LJ_64 +#ifdef _WIN64 +/* Windows x64 ABI. */ +#define RSET_SCRATCH \ + (RSET_ACD|RSET_RANGE(RID_R8D, RID_R11D+1)|RSET_RANGE(RID_XMM0, RID_XMM5+1)) +#else +/* The rest of the civilized x64 world has a common ABI. */ +#define RSET_SCRATCH \ + (RSET_ACD|RSET_RANGE(RID_ESI, RID_R11D+1)|RSET_FPR) +#endif +#else +/* Common x86 ABI. */ +#define RSET_SCRATCH (RSET_ACD|RSET_FPR) +#endif + +#if LJ_64 +/* Prefer the low 8 regs of each type to reduce REX prefixes. */ +#undef rset_picktop +#define rset_picktop(rs) (lj_fls(lj_bswap(rs)) ^ 0x18) +#endif + +/* -- Spill slots --------------------------------------------------------- */ + +/* Stack layout for the compiled machine code (after stack adjustment). */ +enum { + SPS_TEMP1, /* Temps (3*dword) for calls and asm_x87load. */ + SPS_TEMP2, + SPS_TEMP3, + SPS_FIRST, /* First spill slot for general use. */ + + /* This definition must match with the *.dasc file(s). */ + SPS_FIXED = 6 /* Available fixed spill slots in interpreter frame. */ +}; + +/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. */ +#define sps_scale(slot) (4 * (int32_t)(slot)) +#define sps_adjust(as) (sps_scale((as->evenspill-SPS_FIXED+3)&~3)) + +/* -- Exit state ---------------------------------------------------------- */ + +/* This definition must match with the *.dasc file(s). */ +typedef struct { + lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ + int32_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ + int32_t spill[256]; /* Spill slots. */ +} ExitState; + +/* -- x86 ModRM operand encoding ------------------------------------------ */ + +typedef enum { + XM_OFS0 = 0x00, XM_OFS8 = 0x40, XM_OFS32 = 0x80, XM_REG = 0xc0, + XM_SCALE1 = 0x00, XM_SCALE2 = 0x40, XM_SCALE4 = 0x80, XM_SCALE8 = 0xc0, + XM_MASK = 0xc0 +} x86Mode; + +/* Structure to hold variable ModRM operand. */ +typedef struct { + int32_t ofs; /* Offset. */ + uint8_t base; /* Base register or RID_NONE. */ + uint8_t idx; /* Index register or RID_NONE. */ + uint8_t scale; /* Index scale (XM_SCALE1 .. XM_SCALE8). */ +} x86ModRM; + +/* -- Opcodes ------------------------------------------------------------- */ + +/* Macros to construct variable-length x86 opcodes. -(len+1) is in LSB. */ +#define XO_(o) ((uint32_t)(0x0000fe + (0x##o<<24))) +#define XO_FPU(a,b) ((uint32_t)(0x00fd + (0x##a<<16)+(0x##b<<24))) +#define XO_0f(o) ((uint32_t)(0x0f00fd + (0x##o<<24))) +#define XO_66(o) ((uint32_t)(0x6600fd + (0x##o<<24))) +#define XO_660f(o) ((uint32_t)(0x0f66fc + (0x##o<<24))) +#define XO_f20f(o) ((uint32_t)(0x0ff2fc + (0x##o<<24))) +#define XO_f30f(o) ((uint32_t)(0x0ff3fc + (0x##o<<24))) + +/* This list of x86 opcodes is not intended to be complete. Opcodes are only +** included when needed. Take a look at DynASM or jit.dis_x86 to see the +** whole mess. +*/ +typedef enum { + /* Fixed length opcodes. XI_* prefix. */ + XI_NOP = 0x90, + XI_CALL = 0xe8, + XI_JMP = 0xe9, + XI_JMPs = 0xeb, + XI_JCCs = 0x70, /* Really 7x. */ + XI_JCCn = 0x80, /* Really 0f8x. */ + XI_LEA = 0x8d, + XI_MOVri = 0xb8, /* Really b8+r. */ + XI_ARITHib = 0x80, + XI_ARITHi = 0x81, + XI_ARITHi8 = 0x83, + XI_PUSHi8 = 0x6a, + XI_TEST = 0x85, + XI_MOVmi = 0xc7, + XI_BSWAP = 0xc8, /* Really 0fc8+r. */ + + /* Note: little-endian byte-order! */ + XI_FLDZ = 0xeed9, + XI_FLD1 = 0xe8d9, + XI_FLDLG2 = 0xecd9, + XI_FLDLN2 = 0xedd9, + XI_FPOP = 0xd8dd, /* Really fstp st0. */ + XI_FPOP1 = 0xd9dd, /* Really fstp st1. */ + XI_FRNDINT = 0xfcd9, + XI_FSIN = 0xfed9, + XI_FCOS = 0xffd9, + XI_FPTAN = 0xf2d9, + XI_FPATAN = 0xf3d9, + XI_FSCALE = 0xfdd9, + XI_FYL2X = 0xf1d9, + + /* Variable-length opcodes. XO_* prefix. */ + XO_MOV = XO_(8b), + XO_MOVto = XO_(89), + XO_MOVtow = XO_66(89), + XO_MOVtob = XO_(88), + XO_MOVmi = XO_(c7), + XO_MOVmib = XO_(c6), + XO_LEA = XO_(8d), + XO_ARITHib = XO_(80), + XO_ARITHi = XO_(81), + XO_ARITHi8 = XO_(83), + XO_SHIFTi = XO_(c1), + XO_SHIFT1 = XO_(d1), + XO_SHIFTcl = XO_(d3), + XO_IMULi8 = XO_(6b), + XO_CMP = XO_(3b), + XO_TEST = XO_(85), + XO_GROUP3b = XO_(f6), + XO_GROUP3 = XO_(f7), + XO_MOVZXb = XO_0f(b6), + XO_MOVZXw = XO_0f(b7), + XO_MOVSXb = XO_0f(be), + XO_MOVSXw = XO_0f(bf), + + XO_MOVSD = XO_f20f(10), + XO_MOVSDto = XO_f20f(11), + XO_MOVLPD = XO_660f(12), + XO_MOVAPS = XO_0f(28), + XO_XORPS = XO_0f(57), + XO_ANDPS = XO_0f(54), + XO_ADDSD = XO_f20f(58), + XO_SUBSD = XO_f20f(5c), + XO_MULSD = XO_f20f(59), + XO_DIVSD = XO_f20f(5e), + XO_SQRTSD = XO_f20f(51), + XO_MINSD = XO_f20f(5d), + XO_MAXSD = XO_f20f(5f), + XO_ROUNDSD = 0x0b3a0ffc, /* Really 66 0f 3a 0b. See asm_fpmath. */ + XO_UCOMISD = XO_660f(2e), + XO_CVTSI2SD = XO_f20f(2a), + XO_CVTSD2SI = XO_f20f(2d), + XO_CVTTSD2SI= XO_f20f(2c), + XO_MOVDto = XO_660f(7e), + + XO_FLDq = XO_(dd), XOg_FLDq = 0, + XO_FILDd = XO_(db), XOg_FILDd = 0, + XO_FSTPq = XO_(dd), XOg_FSTPq = 3, + XO_FISTPq = XO_(df), XOg_FISTPq = 7, +} x86Op; + +/* x86 opcode groups. */ +typedef uint32_t x86Group; + +#define XG_(i8, i, g) ((x86Group)(((i8) << 16) + ((i) << 8) + (g))) +#define XG_ARITHi(g) XG_(XI_ARITHi8, XI_ARITHi, g) + +#define XO_ARITH(a) ((x86Op)(0x030000fe + ((a)<<27))) + +typedef enum { + XOg_ADD, XOg_OR, XOg_ADC, XOg_SBB, XOg_AND, XOg_SUB, XOg_XOR, XOg_CMP +} x86Arith; + +typedef enum { + XOg_ROL, XOg_ROR, XOg_RCL, XOg_RCR, XOg_SHL, XOg_SHR, XOg_SAL, XOg_SAR +} x86Shift; + +typedef enum { + XOg_TEST, XOg_TEST_, XOg_NOT, XOg_NEG, XOg_MUL, XOg_IMUL, XOg_DIV, XOg_IDIV +} x86Group3; + +/* x86 condition codes. */ +typedef enum { + CC_O, CC_NO, CC_B, CC_NB, CC_E, CC_NE, CC_BE, CC_NBE, + CC_S, CC_NS, CC_P, CC_NP, CC_L, CC_NL, CC_LE, CC_NLE, + CC_C = CC_B, CC_NAE = CC_C, CC_NC = CC_NB, CC_AE = CC_NB, + CC_Z = CC_E, CC_NZ = CC_NE, CC_NA = CC_BE, CC_A = CC_NBE, + CC_PE = CC_P, CC_PO = CC_NP, CC_NGE = CC_L, CC_GE = CC_NL, + CC_NG = CC_LE, CC_G = CC_NLE +} x86CC; + +#endif diff --git a/src/lj_trace.c b/src/lj_trace.c new file mode 100644 index 0000000000..6ceb56338b --- /dev/null +++ b/src/lj_trace.c @@ -0,0 +1,591 @@ +/* +** Trace management. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +*/ + +#define lj_trace_c +#define LUA_CORE + +#include "lj_obj.h" + +#if LJ_HASJIT + +#include "lj_gc.h" +#include "lj_err.h" +#include "lj_str.h" +#include "lj_frame.h" +#include "lj_state.h" +#include "lj_bc.h" +#include "lj_ir.h" +#include "lj_jit.h" +#include "lj_iropt.h" +#include "lj_mcode.h" +#include "lj_trace.h" +#include "lj_snap.h" +#include "lj_gdbjit.h" +#include "lj_record.h" +#include "lj_asm.h" +#include "lj_dispatch.h" +#include "lj_vm.h" +#include "lj_vmevent.h" +#include "lj_target.h" + +/* -- Error handling ------------------------------------------------------ */ + +/* Synchronous abort with error message. */ +void lj_trace_err(jit_State *J, TraceError e) +{ + setnilV(&J->errinfo); /* No error info. */ + setintV(J->L->top++, (int32_t)e); + lj_err_throw(J->L, LUA_ERRRUN); +} + +/* Synchronous abort with error message and error info. */ +void lj_trace_err_info(jit_State *J, TraceError e) +{ + setintV(J->L->top++, (int32_t)e); + lj_err_throw(J->L, LUA_ERRRUN); +} + +/* -- Trace management ---------------------------------------------------- */ + +/* The current trace is first assembled in J->cur. The variable length +** arrays point to shared, growable buffers (J->irbuf etc.). The trace is +** kept in this state until a new trace needs to be created. Then the current +** trace and its data structures are copied to a new (compact) Trace object. +*/ + +/* Find a free trace number. */ +static TraceNo trace_findfree(jit_State *J) +{ + MSize osz, lim; + if (J->freetrace == 0) + J->freetrace = 1; + for (; J->freetrace < J->sizetrace; J->freetrace++) + if (J->trace[J->freetrace] == NULL) + return J->freetrace++; + /* Need to grow trace array. */ + lim = (MSize)J->param[JIT_P_maxtrace] + 1; + if (lim < 2) lim = 2; else if (lim > 65535) lim = 65535; + osz = J->sizetrace; + if (osz >= lim) + return 0; /* Too many traces. */ + lj_mem_growvec(J->L, J->trace, J->sizetrace, lim, Trace *); + while (osz < J->sizetrace) + J->trace[osz++] = NULL; + return J->freetrace; +} + +#define TRACE_COPYELEM(field, szfield, tp) \ + T2->field = (tp *)p; \ + memcpy(p, T->field, T->szfield*sizeof(tp)); \ + p += T->szfield*sizeof(tp); + +/* Save a trace by copying and compacting it. */ +static Trace *trace_save(jit_State *J, Trace *T) +{ + size_t sztr = ((sizeof(Trace)+7)&~7); + size_t szins = (T->nins-T->nk)*sizeof(IRIns); + size_t sz = sztr + szins + + T->nsnap*sizeof(SnapShot) + + T->nsnapmap*sizeof(IRRef2); + Trace *T2 = lj_mem_newt(J->L, (MSize)sz, Trace); + char *p = (char *)T2 + sztr; + memcpy(T2, T, sizeof(Trace)); + T2->ir = (IRIns *)p - T->nk; + memcpy(p, T->ir+T->nk, szins); + p += szins; + TRACE_COPYELEM(snap, nsnap, SnapShot) + TRACE_COPYELEM(snapmap, nsnapmap, IRRef2) + lj_gc_barriertrace(J2G(J), T); + return T2; +} + +/* Free a trace. */ +static void trace_free(jit_State *J, TraceNo traceno) +{ + lua_assert(traceno != 0); + if (traceno < J->freetrace) + J->freetrace = traceno; + lj_gdbjit_deltrace(J, J->trace[traceno]); + if (traceno == J->curtrace) { + lua_assert(J->trace[traceno] == &J->cur); + J->trace[traceno] = NULL; + J->curtrace = 0; + } else { + Trace *T = J->trace[traceno]; + lua_assert(T != NULL && T != &J->cur); + J->trace[traceno] = NULL; + lj_mem_free(J2G(J), T, + ((sizeof(Trace)+7)&~7) + (T->nins-T->nk)*sizeof(IRIns) + + T->nsnap*sizeof(SnapShot) + T->nsnapmap*sizeof(IRRef2)); + } +} + +/* Free all traces associated with a prototype. No unpatching needed. */ +void lj_trace_freeproto(global_State *g, GCproto *pt) +{ + jit_State *J = G2J(g); + TraceNo traceno; + /* Free all root traces. */ + for (traceno = pt->trace; traceno != 0; ) { + TraceNo side, nextroot = J->trace[traceno]->nextroot; + /* Free all side traces. */ + for (side = J->trace[traceno]->nextside; side != 0; ) { + TraceNo next = J->trace[side]->nextside; + trace_free(J, side); + side = next; + } + /* Now free the trace itself. */ + trace_free(J, traceno); + traceno = nextroot; + } +} + +/* Re-enable compiling a prototype by unpatching any modified bytecode. */ +void lj_trace_reenableproto(GCproto *pt) +{ + if ((pt->flags & PROTO_HAS_ILOOP)) { + BCIns *bc = pt->bc; + BCPos i, sizebc = pt->sizebc;; + pt->flags &= ~PROTO_HAS_ILOOP; + for (i = 0; i < sizebc; i++) { + BCOp op = bc_op(bc[i]); + if (op == BC_IFORL || op == BC_IITERL || op == BC_ILOOP) + setbc_op(&bc[i], (int)op+(int)BC_LOOP-(int)BC_ILOOP); + } + } +} + +/* Unpatch the bytecode modified by a root trace. */ +static void trace_unpatch(jit_State *J, Trace *T) +{ + BCOp op = bc_op(T->startins); + uint32_t pcofs = T->snap[0].mapofs + T->snap[0].nslots; + BCIns *pc = ((BCIns *)(uintptr_t)T->snapmap[pcofs]) - 1; + switch (op) { + case BC_FORL: + lua_assert(bc_op(*pc) == BC_JFORI); + setbc_op(pc, BC_FORI); /* Unpatch JFORI, too. */ + pc += bc_j(*pc); + lua_assert(bc_op(*pc) == BC_JFORL && J->trace[bc_d(*pc)] == T); + *pc = T->startins; + break; + case BC_LOOP: + lua_assert(bc_op(*pc) == BC_JLOOP && J->trace[bc_d(*pc)] == T); + *pc = T->startins; + break; + case BC_ITERL: + lua_assert(bc_op(*pc) == BC_JMP); + pc += bc_j(*pc)+2; + lua_assert(bc_op(*pc) == BC_JITERL && J->trace[bc_d(*pc)] == T); + *pc = T->startins; + break; + case BC_CALL: + lj_trace_err(J, LJ_TRERR_NYILNKF); + break; + case BC_JMP: /* No need to unpatch branches in parent traces (yet). */ + default: + lua_assert(0); + break; + } +} + +/* Flush a root trace and any attached side traces. */ +void lj_trace_flush(jit_State *J, TraceNo traceno) +{ + Trace *T = NULL; + GCproto *pt; + if (traceno > 0 && traceno <= J->sizetrace) + T = J->trace[traceno]; + if (T == NULL) + return; + pt = &gcref(T->startpt)->pt; + if (T->root == 0 && pt != NULL) { + TraceNo side; + /* First unpatch any modified bytecode. */ + trace_unpatch(J, T); + /* Unlink root trace from chain anchored in prototype. */ + if (pt->trace == traceno) { /* Trace is first in chain. Easy. */ + pt->trace = T->nextroot; + } else { /* Otherwise search in chain of root traces. */ + Trace *T2 = J->trace[pt->trace]; + while (T2->nextroot != traceno) { + lua_assert(T2->nextroot != 0); + T2 = J->trace[T2->nextroot]; + } + T2->nextroot = T->nextroot; /* Unlink from chain. */ + } + /* Free all side traces. */ + for (side = T->nextside; side != 0; ) { + TraceNo next = J->trace[side]->nextside; + trace_free(J, side); + side = next; + } + /* Now free the trace itself. */ + trace_free(J, traceno); + } /* Flush for non-root traces is currently ignored. */ +} + +/* Flush all traces associated with a prototype. */ +void lj_trace_flushproto(global_State *g, GCproto *pt) +{ + while (pt->trace != 0) + lj_trace_flush(G2J(g), pt->trace); +} + +/* Flush all traces. */ +int lj_trace_flushall(lua_State *L) +{ + jit_State *J = L2J(L); + ptrdiff_t i; + if ((J2G(J)->hookmask & HOOK_GC)) + return 1; + for (i = (ptrdiff_t)J->sizetrace-1; i > 0; i--) + lj_trace_flush(J, (TraceNo)i); +#ifdef LUA_USE_ASSERT + for (i = 0; i < (ptrdiff_t)J->sizetrace; i++) + lua_assert(J->trace[i] == NULL); +#endif + J->freetrace = 0; + /* Free the whole machine code and invalidate all exit stub groups. */ + lj_mcode_free(J); + memset(J->exitstubgroup, 0, sizeof(J->exitstubgroup)); + lj_vmevent_send(L, TRACE, + setstrV(L, L->top++, lj_str_newlit(L, "flush")); + ); + return 0; +} + +/* Free everything associated with the JIT compiler state. */ +void lj_trace_freestate(global_State *g) +{ + jit_State *J = G2J(g); +#ifdef LUA_USE_ASSERT + { /* This assumes all traces have already been freed. */ + ptrdiff_t i; + for (i = 0; i < (ptrdiff_t)J->sizetrace; i++) + lua_assert(J->trace[i] == NULL); + } +#endif + lj_mcode_free(J); + lj_ir_knum_freeall(J); + lj_mem_freevec(g, J->snapmapbuf, J->sizesnapmap, IRRef2); + lj_mem_freevec(g, J->snapbuf, J->sizesnap, SnapShot); + lj_mem_freevec(g, J->irbuf + J->irbotlim, J->irtoplim - J->irbotlim, IRIns); + lj_mem_freevec(g, J->trace, J->sizetrace, Trace *); +} + +/* -- Trace compiler state machine ---------------------------------------- */ + +/* Penalize a bytecode instruction by bumping its hot counter. */ +static void hotpenalty(jit_State *J, const BCIns *pc, TraceError e) +{ + uint32_t i, val = HOTCOUNT_MIN_PENALTY; + for (i = 0; i < PENALTY_SLOTS; i++) + if (J->penalty[i].pc == pc) { + val = ((uint32_t)J->penalty[i].val << 1) + 1; + if (val > HOTCOUNT_MAX_PENALTY) val = HOTCOUNT_MAX_PENALTY; + goto setpenalty; + } + i = J->penaltyslot; + J->penaltyslot = (J->penaltyslot + 1) & (PENALTY_SLOTS-1); + J->penalty[i].pc = pc; +setpenalty: + J->penalty[i].val = (uint16_t)val; + J->penalty[i].reason = e; + hotcount_set(J2GG(J), pc+1, val); +} + +/* Start tracing. */ +static void trace_start(jit_State *J) +{ + lua_State *L; + + if (J->curtrace != 0 && J->trace[J->curtrace] == &J->cur) { + J->trace[J->curtrace] = trace_save(J, &J->cur); /* Save current trace. */ + J->curtrace = 0; + } + + if ((J->pt->flags & PROTO_NO_JIT)) { /* JIT disabled for this proto? */ + if (J->parent == 0) { + if (J->pc >= J->pt->bc) { + /* Lazy bytecode patching to disable hotcount events. */ + setbc_op(J->pc, (int)bc_op(*J->pc)+(int)BC_ILOOP-(int)BC_LOOP); + J->pt->flags |= PROTO_HAS_ILOOP; + } else { + /* NYI: lazy closure patching to disable hotcall events. */ + lua_assert(0); + } + } + J->state = LJ_TRACE_IDLE; /* Silently ignored. */ + return; + } + + /* Get a new trace number. */ + J->curtrace = trace_findfree(J); + if (LJ_UNLIKELY(J->curtrace == 0)) { /* No free trace? */ + lua_assert((J2G(J)->hookmask & HOOK_GC) == 0); + lj_trace_flushall(J->L); + J->state = LJ_TRACE_IDLE; /* Silently ignored. */ + return; + } + J->trace[J->curtrace] = &J->cur; + + /* Setup enough of the current trace to be able to send the vmevent. */ + memset(&J->cur, 0, sizeof(Trace)); + J->cur.nins = J->cur.nk = REF_BASE; + J->cur.ir = J->irbuf; + J->cur.snap = J->snapbuf; + J->cur.snapmap = J->snapmapbuf; + /* J->cur.nsnapmap = 0; */ + J->mergesnap = 0; + J->needsnap = 0; + J->guardemit.irt = 0; + + L = J->L; + lj_vmevent_send(L, TRACE, + setstrV(L, L->top++, lj_str_newlit(L, "start")); + setintV(L->top++, J->curtrace); + setfuncV(L, L->top++, J->fn); + setintV(L->top++, J->pc - J->pt->bc + 1); + if (J->parent) { + setintV(L->top++, J->parent); + setintV(L->top++, J->exitno); + } + ); + lj_record_setup(J); +} + +/* Stop tracing. */ +static void trace_stop(jit_State *J) +{ + BCIns *pc = (BCIns *)J->startpc; /* Not const here. */ + BCOp op = bc_op(J->cur.startins); + GCproto *pt = &gcref(J->cur.startpt)->pt; + lua_State *L; + + switch (op) { + case BC_FORL: + setbc_op(pc+bc_j(J->cur.startins), BC_JFORI); /* Patch FORI, too. */ + /* fallthrough */ + case BC_LOOP: + case BC_ITERL: + /* Patch bytecode of starting instruction in root trace. */ + setbc_op(pc, (int)op+(int)BC_JLOOP-(int)BC_LOOP); + setbc_d(pc, J->curtrace); + /* Add to root trace chain in prototype. */ + J->cur.nextroot = pt->trace; + pt->trace = (TraceNo1)J->curtrace; + break; + case BC_CALL: + lj_trace_err(J, LJ_TRERR_NYILNKF); + break; + case BC_JMP: + /* Patch exit branch in parent to side trace entry. */ + lua_assert(J->parent != 0 && J->cur.root != 0); + lj_asm_patchexit(J, J->trace[J->parent], J->exitno, J->cur.mcode); + /* Avoid compiling a side trace twice (stack resizing uses parent exit). */ + J->trace[J->parent]->snap[J->exitno].count = SNAPCOUNT_DONE; + /* Add to side trace chain in root trace. */ + { + Trace *root = J->trace[J->cur.root]; + root->nchild++; + J->cur.nextside = root->nextside; + root->nextside = (TraceNo1)J->curtrace; + } + break; + default: + lua_assert(0); + break; + } + + /* Commit new mcode only after all patching is done. */ + lj_mcode_commit(J, J->cur.mcode); + lj_gdbjit_addtrace(J, &J->cur, J->curtrace); + + L = J->L; + lj_vmevent_send(L, TRACE, + setstrV(L, L->top++, lj_str_newlit(L, "stop")); + setintV(L->top++, J->curtrace); + ); +} + +/* Abort tracing. */ +static int trace_abort(jit_State *J) +{ + lua_State *L = J->L; + TraceError e = LJ_TRERR_RECERR; + lj_mcode_abort(J); + if (tvisnum(L->top-1)) + e = (TraceError)lj_num2int(numV(L->top-1)); + if (e == LJ_TRERR_MCODELM) { + J->state = LJ_TRACE_ASM; + return 1; /* Retry ASM with new MCode area. */ + } + if (J->parent == 0) + hotpenalty(J, J->startpc, e); /* Penalize starting instruction. */ + if (J->curtrace) { /* Is there anything to abort? */ + ptrdiff_t errobj = savestack(L, L->top-1); /* Stack may be resized. */ + lj_vmevent_send(L, TRACE, + setstrV(L, L->top++, lj_str_newlit(L, "abort")); + setintV(L->top++, J->curtrace); + setfuncV(L, L->top++, J->fn); + setintV(L->top++, J->pc - J->pt->bc + 1); + copyTV(L, L->top++, restorestack(L, errobj)); + copyTV(L, L->top++, &J->errinfo); + ); + /* Drop aborted trace after the vmevent (which may still access it). */ + J->trace[J->curtrace] = NULL; + if (J->curtrace < J->freetrace) + J->freetrace = J->curtrace; + J->curtrace = 0; + } + L->top--; /* Remove error object */ + if (e == LJ_TRERR_MCODEAL) + lj_trace_flushall(L); + return 0; +} + +/* State machine for the trace compiler. Protected callback. */ +static TValue *trace_state(lua_State *L, lua_CFunction dummy, void *ud) +{ + jit_State *J = (jit_State *)ud; + UNUSED(dummy); + do { + switch (J->state) { + case LJ_TRACE_START: + J->state = LJ_TRACE_RECORD; /* trace_start() may change state. */ + trace_start(J); + lj_dispatch_update(J2G(J)); + break; + + case LJ_TRACE_RECORD: + setvmstate(J2G(J), RECORD); + lj_vmevent_send(L, RECORD, + setintV(L->top++, J->curtrace); + setfuncV(L, L->top++, J->fn); + setintV(L->top++, J->pc - J->pt->bc + 1); + setintV(L->top++, J->framedepth); + if (bcmode_mm(bc_op(*J->pc)) == MM_call) { + cTValue *o = &L->base[bc_a(*J->pc)]; + if (bc_op(*J->pc) == BC_ITERC) o -= 3; + copyTV(L, L->top++, o); + } + ); + lj_record_ins(J); + break; + + case LJ_TRACE_END: + J->loopref = 0; + if ((J->flags & JIT_F_OPT_LOOP) && J->cur.link == J->curtrace) { + setvmstate(J2G(J), OPT); + lj_opt_dce(J); + if (lj_opt_loop(J)) { /* Loop optimization failed? */ + J->loopref = J->cur.nins; + J->state = LJ_TRACE_RECORD; /* Try to continue recording. */ + break; + } + J->loopref = J->chain[IR_LOOP]; /* Needed by assembler. */ + } + J->state = LJ_TRACE_ASM; + break; + + case LJ_TRACE_ASM: + setvmstate(J2G(J), ASM); + lj_asm_trace(J, &J->cur); + trace_stop(J); + setvmstate(J2G(J), INTERP); + J->state = LJ_TRACE_IDLE; + lj_dispatch_update(J2G(J)); + return NULL; + + default: /* Trace aborted asynchronously. */ + setintV(L->top++, (int32_t)LJ_TRERR_RECERR); + /* fallthrough */ + case LJ_TRACE_ERR: + if (trace_abort(J)) + break; /* Retry. */ + setvmstate(J2G(J), INTERP); + J->state = LJ_TRACE_IDLE; + lj_dispatch_update(J2G(J)); + return NULL; + } + } while (J->state > LJ_TRACE_RECORD); + return NULL; +} + +/* -- Event handling ------------------------------------------------------ */ + +/* A bytecode instruction is about to be executed. Record it. */ +void lj_trace_ins(jit_State *J) +{ + while (lj_vm_cpcall(J->L, trace_state, NULL, (void *)J) != 0) + J->state = LJ_TRACE_ERR; +} + +/* Start recording a new trace. */ +static void trace_new(jit_State *J) +{ + /* Only start a new trace if not inside __gc call or vmevent. */ + if (!(J2G(J)->hookmask & (HOOK_GC|HOOK_VMEVENT))) { + lua_assert(J->state == LJ_TRACE_IDLE); + J->state = LJ_TRACE_START; + J->fn = curr_func(J->L); + J->pt = funcproto(J->fn); + lj_trace_ins(J); + } +} + +/* A hotcount triggered. Start recording a root trace. */ +void lj_trace_hot(jit_State *J, const BCIns *pc) +{ + lua_State *L = J->L; + L->top = curr_topL(L); /* Only called from Lua and NRESULTS is not used. */ + hotcount_set(J2GG(J), pc, J->param[JIT_P_hotloop]+1); /* Reset hotcount. */ + J->parent = 0; /* Root trace. */ + J->exitno = 0; + J->pc = pc-1; /* The interpreter bytecode PC is offset by 1. */ + trace_new(J); +} + +/* A trace exited. Restore interpreter state and check for hot exits. */ +void *lj_trace_exit(jit_State *J, void *exptr) +{ + lua_State *L = J->L; + void *cf; + + /* Restore interpreter state. */ + lj_snap_restore(J, exptr); + cf = cframe_raw(L->cframe); + cframe_pc(cf) = J->pc; + + lj_vmevent_send(L, TEXIT, + ExitState *ex = (ExitState *)exptr; + uint32_t i; + lj_state_checkstack(L, 4+RID_NUM_GPR+RID_NUM_FPR+LUA_MINSTACK); + setintV(L->top++, J->parent); + setintV(L->top++, J->exitno); + setintV(L->top++, RID_NUM_GPR); + setintV(L->top++, RID_NUM_FPR); + for (i = 0; i < RID_NUM_GPR; i++) + setintV(L->top++, ex->gpr[i]); + for (i = 0; i < RID_NUM_FPR; i++) { + setnumV(L->top, ex->fpr[i]); + if (LJ_UNLIKELY(tvisnan(L->top))) + setnanV(L->top); + L->top++; + } + ); + + { /* Check for a hot exit. */ + SnapShot *snap = &J->trace[J->parent]->snap[J->exitno]; + if (snap->count != SNAPCOUNT_DONE && + ++snap->count >= J->param[JIT_P_hotexit]) + trace_new(J); /* Start recording a side trace. */ + } + + return cf; /* Return the interpreter C frame. */ +} + +#endif diff --git a/src/lj_trace.h b/src/lj_trace.h new file mode 100644 index 0000000000..9d8eb79016 --- /dev/null +++ b/src/lj_trace.h @@ -0,0 +1,52 @@ +/* +** Trace management. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +*/ + +#ifndef _LJ_TRACE_H +#define _LJ_TRACE_H + +#if LJ_HASJIT + +#include "lj_obj.h" +#include "lj_jit.h" +#include "lj_dispatch.h" + +/* Trace errors. */ +typedef enum { +#define TREDEF(name, msg) LJ_TRERR_##name, +#include "lj_traceerr.h" + LJ_TRERR__MAX +} TraceError; + +LJ_FUNC_NORET void lj_trace_err(jit_State *J, TraceError e); +LJ_FUNC_NORET void lj_trace_err_info(jit_State *J, TraceError e); + +/* Trace management. */ +LJ_FUNC void lj_trace_freeproto(global_State *g, GCproto *pt); +LJ_FUNC void lj_trace_reenableproto(GCproto *pt); +LJ_FUNC void lj_trace_flushproto(global_State *g, GCproto *pt); +LJ_FUNC void lj_trace_flush(jit_State *J, TraceNo traceno); +LJ_FUNC int lj_trace_flushall(lua_State *L); +LJ_FUNC void lj_trace_freestate(global_State *g); + +/* Event handling. */ +LJ_FUNC void lj_trace_ins(jit_State *J); +LJ_FUNCA void lj_trace_hot(jit_State *J, const BCIns *pc); +LJ_FUNCA void *lj_trace_exit(jit_State *J, void *exptr); + +/* Signal asynchronous abort of trace or end of trace. */ +#define lj_trace_abort(g) (G2J(g)->state &= ~LJ_TRACE_ACTIVE) +#define lj_trace_end(J) (J->state = LJ_TRACE_END) + +#else + +#define lj_trace_flushall(L) (UNUSED(L), 0) +#define lj_trace_freestate(g) UNUSED(g) +#define lj_trace_freeproto(g, pt) (UNUSED(g), UNUSED(pt), (void)0) +#define lj_trace_abort(g) UNUSED(g) +#define lj_trace_end(J) UNUSED(J) + +#endif + +#endif diff --git a/src/lj_traceerr.h b/src/lj_traceerr.h new file mode 100644 index 0000000000..2c8347b073 --- /dev/null +++ b/src/lj_traceerr.h @@ -0,0 +1,59 @@ +/* +** Trace compiler error messages. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +*/ + +/* This file may be included multiple times with different TREDEF macros. */ + +/* Recording. */ +TREDEF(RECERR, "error thrown or hook called during recording") +TREDEF(TRACEOV, "trace too long") +TREDEF(STACKOV, "trace too deep") +TREDEF(SNAPOV, "too many snapshots") +TREDEF(NYIBC, "NYI: bytecode %d") + +/* Recording loop ops. */ +TREDEF(LLEAVE, "leaving loop in root trace") +TREDEF(LINNER, "inner loop in root trace") +TREDEF(LUNROLL, "loop unroll limit reached") +TREDEF(LBLACKL, "blacklisted loop") + +/* Recording calls/returns. */ +TREDEF(BADTYPE, "bad argument type") +TREDEF(CJITOFF, "call to JIT-disabled function") +TREDEF(CUNROLL, "call unroll limit reached") +TREDEF(NYIRECU, "NYI: recursive calls") +TREDEF(NYILNKF, "NYI: linking/patching function calls") +TREDEF(NYIVF, "NYI: vararg function") +TREDEF(NYICF, "NYI: C function %p") +TREDEF(NYIFF, "NYI: FastFunc %s") +TREDEF(NYIFFU, "NYI: unsupported variant of FastFunc %s") +TREDEF(NYIRETL, "NYI: return to lower frame") + +/* Recording indexed load/store. */ +TREDEF(STORENN, "store with nil or NaN key") +TREDEF(NOMM, "missing metamethod") +TREDEF(IDXLOOP, "looping index lookup") +TREDEF(NYITMIX, "NYI: mixed sparse/dense table") + +/* Optimizations. */ +TREDEF(GFAIL, "guard would always fail") +TREDEF(PHIOV, "too many PHIs") +TREDEF(TYPEINS, "persistent type instability") + +/* Assembler. */ +TREDEF(MCODEAL, "failed to allocate mcode memory") +TREDEF(MCODEOV, "machine code too long") +TREDEF(MCODELM, "hit mcode limit (retrying)") +TREDEF(SPILLOV, "too many spill slots") +TREDEF(BADRA, "inconsistent register allocation") +TREDEF(NYIIR, "NYI: cannot assemble IR instruction %d") +TREDEF(NYIPHI, "NYI: PHI shuffling too complex") +TREDEF(NYICOAL, "NYI: register coalescing too complex") +TREDEF(NYIGCF, "NYI: gcstep sync with frames") + +#undef TREDEF + +/* Detecting unused error messages: + awk -F, '/^TREDEF/ { gsub(/TREDEF./, ""); printf "grep -q LJ_TRERR_%s *.[ch] || echo %s\n", $1, $1}' lj_traceerr.h | sh +*/ diff --git a/src/lj_udata.c b/src/lj_udata.c new file mode 100644 index 0000000000..863889c968 --- /dev/null +++ b/src/lj_udata.c @@ -0,0 +1,33 @@ +/* +** Userdata handling. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +*/ + +#define lj_udata_c +#define LUA_CORE + +#include "lj_obj.h" +#include "lj_gc.h" +#include "lj_udata.h" + +GCudata *lj_udata_new(lua_State *L, MSize sz, GCtab *env) +{ + GCudata *ud = lj_mem_newt(L, sizeof(GCudata) + sz, GCudata); + global_State *g = G(L); + newwhite(g, ud); /* Not finalized. */ + ud->gct = ~LJ_TUDATA; + ud->len = sz; + /* NOBARRIER: The GCudata is new (marked white). */ + setgcrefnull(ud->metatable); + setgcref(ud->env, obj2gco(env)); + /* Chain to userdata list (after main thread). */ + setgcrefr(ud->nextgc, mainthread(g)->nextgc); + setgcref(mainthread(g)->nextgc, obj2gco(ud)); + return ud; +} + +void LJ_FASTCALL lj_udata_free(global_State *g, GCudata *ud) +{ + lj_mem_free(g, ud, sizeudata(ud)); +} + diff --git a/src/lj_udata.h b/src/lj_udata.h new file mode 100644 index 0000000000..981304f83a --- /dev/null +++ b/src/lj_udata.h @@ -0,0 +1,14 @@ +/* +** Userdata handling. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +*/ + +#ifndef _LJ_UDATA_H +#define _LJ_UDATA_H + +#include "lj_obj.h" + +LJ_FUNC GCudata *lj_udata_new(lua_State *L, MSize sz, GCtab *env); +LJ_FUNC void LJ_FASTCALL lj_udata_free(global_State *g, GCudata *ud); + +#endif diff --git a/src/lj_vm.h b/src/lj_vm.h new file mode 100644 index 0000000000..f50614bbe4 --- /dev/null +++ b/src/lj_vm.h @@ -0,0 +1,66 @@ +/* +** Assembler VM interface definitions. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +*/ + +#ifndef _LJ_VM_H +#define _LJ_VM_H + +#include "lj_obj.h" + +/* Entry points for ASM parts of VM. */ +LJ_ASMF void lj_vm_call(lua_State *L, TValue *base, int nres1); +LJ_ASMF int lj_vm_pcall(lua_State *L, TValue *base, int nres1, ptrdiff_t ef); +typedef TValue *(*lua_CPFunction)(lua_State *L, lua_CFunction func, void *ud); +LJ_ASMF int lj_vm_cpcall(lua_State *L, lua_CPFunction cp, lua_CFunction func, + void *ud); +LJ_ASMF int lj_vm_resume(lua_State *L, TValue *base, int nres1, ptrdiff_t ef); +LJ_ASMF_NORET void lj_vm_unwind_c(void *cframe, int errcode); +LJ_ASMF_NORET void lj_vm_unwind_ff(void *cframe); + +/* Miscellaneous functions. */ +#if LJ_TARGET_X86ORX64 +LJ_ASMF int lj_vm_cpuid(uint32_t f, uint32_t res[4]); +#endif +LJ_ASMF double lj_vm_foldarith(double x, double y, int op); +LJ_ASMF double lj_vm_foldfpm(double x, int op); + +/* Dispatch targets for recording and hooks. */ +LJ_ASMF void lj_vm_record(void); +LJ_ASMF void lj_vm_hook(void); + +/* Trace exit handling. */ +LJ_ASMF void lj_vm_exit_handler(void); +LJ_ASMF void lj_vm_exit_interp(void); + +/* Handlers callable from compiled code. */ +LJ_ASMF void lj_vm_floor(void); +LJ_ASMF void lj_vm_ceil(void); +LJ_ASMF void lj_vm_trunc(void); +LJ_ASMF void lj_vm_exp(void); +LJ_ASMF void lj_vm_exp2(void); +LJ_ASMF void lj_vm_pow(void); +LJ_ASMF void lj_vm_powi(void); + +/* Call gates for functions. */ +LJ_ASMF void lj_gate_lf(void); +LJ_ASMF void lj_gate_lv(void); +LJ_ASMF void lj_gate_c(void); + +/* Continuations for metamethods. */ +LJ_ASMF void lj_cont_cat(void); /* Continue with concatenation. */ +LJ_ASMF void lj_cont_ra(void); /* Store result in RA from instruction. */ +LJ_ASMF void lj_cont_nop(void); /* Do nothing, just continue execution. */ +LJ_ASMF void lj_cont_condt(void); /* Branch if result is true. */ +LJ_ASMF void lj_cont_condf(void); /* Branch if result is false. */ + +/* Start of the ASM code. */ +LJ_ASMF void lj_vm_asm_begin(void); + +/* Opcode handler offsets, relative to lj_vm_asm_begin. */ +LJ_ASMF const uint16_t lj_vm_op_ofs[]; + +#define makeasmfunc(ofs) \ + ((ASMFunction)((char *)lj_vm_asm_begin + (ofs))) + +#endif diff --git a/src/lj_vmevent.c b/src/lj_vmevent.c new file mode 100644 index 0000000000..657eb8d72b --- /dev/null +++ b/src/lj_vmevent.c @@ -0,0 +1,56 @@ +/* +** VM event handling. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +*/ + +#include + +#define lj_vmevent_c +#define LUA_CORE + +#include "lj_obj.h" +#include "lj_str.h" +#include "lj_tab.h" +#include "lj_state.h" +#include "lj_dispatch.h" +#include "lj_vm.h" +#include "lj_vmevent.h" + +ptrdiff_t lj_vmevent_prepare(lua_State *L, VMEvent ev) +{ + global_State *g = G(L); + GCstr *s = lj_str_newlit(L, LJ_VMEVENTS_REGKEY); + cTValue *tv = lj_tab_getstr(tabV(registry(L)), s); + if (tvistab(tv)) { + int hash = VMEVENT_HASH(ev); + tv = lj_tab_getint(tabV(tv), hash); + if (tv && tvisfunc(tv)) { + lj_state_checkstack(L, LUA_MINSTACK); + setfuncV(L, L->top++, funcV(tv)); + return savestack(L, L->top); + } + } + g->vmevmask &= ~VMEVENT_MASK(ev); /* No handler: cache this fact. */ + return 0; +} + +void lj_vmevent_call(lua_State *L, ptrdiff_t argbase) +{ + global_State *g = G(L); + uint8_t oldmask = g->vmevmask; + uint8_t oldh = hook_save(g); + int status; + g->vmevmask = 0; /* Disable all events. */ + hook_vmevent(g); + status = lj_vm_pcall(L, restorestack(L, argbase), 0+1, 0); + if (LJ_UNLIKELY(status)) { + /* Really shouldn't use stderr here, but where else to complain? */ + L->top--; + fprintf(stderr, "VM handler failed: %s\n", + tvisstr(L->top) ? strVdata(L->top) : "?"); + } + hook_restore(g, oldh); + if (g->vmevmask != VMEVENT_NOCACHE) + g->vmevmask = oldmask; /* Restore event mask, but not if not modified. */ +} + diff --git a/src/lj_vmevent.h b/src/lj_vmevent.h new file mode 100644 index 0000000000..9eaa52e1d0 --- /dev/null +++ b/src/lj_vmevent.h @@ -0,0 +1,49 @@ +/* +** VM event handling. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +*/ + +#ifndef _LJ_VMEVENT_H +#define _LJ_VMEVENT_H + +#include "lj_obj.h" + +/* Registry key for VM event handler table. */ +#define LJ_VMEVENTS_REGKEY "_VMEVENTS" +#define LJ_VMEVENTS_HSIZE 4 + +#define VMEVENT_MASK(ev) ((uint8_t)1 << ((int)(ev) & 7)) +#define VMEVENT_HASH(ev) ((int)(ev) & ~7) +#define VMEVENT_HASHIDX(h) ((int)(h) << 3) +#define VMEVENT_NOCACHE 255 + +#define VMEVENT_DEF(name, hash) \ + LJ_VMEVENT_##name##_, \ + LJ_VMEVENT_##name = ((LJ_VMEVENT_##name##_) & 7)|((hash) << 3) + +/* VM event IDs. */ +typedef enum { + VMEVENT_DEF(BC, 0x0000140b), + VMEVENT_DEF(TRACE, 0x10ea574d), + VMEVENT_DEF(RECORD, 0x5698231c), + VMEVENT_DEF(TEXIT, 0x12d984a7), + LJ_VMEVENT__MAX +} VMEvent; + +#ifdef LUAJIT_DISABLE_VMEVENT +#define lj_vmevent_send(L, ev, args) UNUSED(L) +#else +#define lj_vmevent_send(L, ev, args) \ + if (G(L)->vmevmask & VMEVENT_MASK(LJ_VMEVENT_##ev)) { \ + ptrdiff_t argbase = lj_vmevent_prepare(L, LJ_VMEVENT_##ev); \ + if (argbase) { \ + args \ + lj_vmevent_call(L, argbase); \ + } \ + } + +LJ_FUNC ptrdiff_t lj_vmevent_prepare(lua_State *L, VMEvent ev); +LJ_FUNC void lj_vmevent_call(lua_State *L, ptrdiff_t argbase); +#endif + +#endif diff --git a/src/ljamalg.c b/src/ljamalg.c new file mode 100644 index 0000000000..46d0e21f65 --- /dev/null +++ b/src/ljamalg.c @@ -0,0 +1,70 @@ +/* +** LuaJIT core and libraries amalgamation. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +*/ + +/* ++--------------------------------------------------------------------------+ +| WARNING: Compiling the amalgamation needs a lot of virtual memory | +| (around 160 MB with GCC 4.x)! If you don't have enough physical memory | +| your machine will start swapping to disk and the compile will not finish | +| within a reasonable amount of time. | +| So either compile on a bigger machine or use the non-amalgamated build. | ++--------------------------------------------------------------------------+ +*/ + +#define ljamalg_c +#define LUA_CORE + +/* To get the mremap prototype. Must be defind before any system includes. */ +#if defined(__linux__) && !defined(_GNU_SOURCE) +#define _GNU_SOURCE +#endif + +#include "lua.h" +#include "lauxlib.h" + +#include "lj_gc.c" +#include "lj_err.c" +#include "lj_ctype.c" +#include "lj_bc.c" +#include "lj_obj.c" +#include "lj_str.c" +#include "lj_tab.c" +#include "lj_func.c" +#include "lj_udata.c" +#include "lj_meta.c" +#include "lj_state.c" +#include "lj_dispatch.c" +#include "lj_vmevent.c" +#include "lj_api.c" +#include "lj_lex.c" +#include "lj_parse.c" +#include "lj_lib.c" +#include "lj_ir.c" +#include "lj_opt_mem.c" +#include "lj_opt_fold.c" +#include "lj_opt_narrow.c" +#include "lj_opt_dce.c" +#include "lj_opt_loop.c" +#include "lj_mcode.c" +#include "lj_snap.c" +#include "lj_record.c" +#include "lj_asm.c" +#include "lj_trace.c" +#include "lj_gdbjit.c" +#include "lj_alloc.c" + +#include "lib_aux.c" +#include "lib_base.c" +#include "lib_math.c" +#include "lib_string.c" +#include "lib_table.c" +#include "lib_io.c" +#include "lib_os.c" +#include "lib_package.c" +#include "lib_debug.c" +#include "lib_bit.c" +#include "lib_jit.c" +#include "lib_init.c" + diff --git a/src/lua.h b/src/lua.h new file mode 100644 index 0000000000..0e98b3744c --- /dev/null +++ b/src/lua.h @@ -0,0 +1,388 @@ +/* +** $Id: lua.h,v 1.218.1.5 2008/08/06 13:30:12 roberto Exp $ +** Lua - An Extensible Extension Language +** Lua.org, PUC-Rio, Brazil (http://www.lua.org) +** See Copyright Notice at the end of this file +*/ + + +#ifndef lua_h +#define lua_h + +#include +#include + + +#include "luaconf.h" + + +#define LUA_VERSION "Lua 5.1" +#define LUA_RELEASE "Lua 5.1.4" +#define LUA_VERSION_NUM 501 +#define LUA_COPYRIGHT "Copyright (C) 1994-2008 Lua.org, PUC-Rio" +#define LUA_AUTHORS "R. Ierusalimschy, L. H. de Figueiredo & W. Celes" + + +/* mark for precompiled code (`Lua') */ +#define LUA_SIGNATURE "\033Lua" + +/* option for multiple returns in `lua_pcall' and `lua_call' */ +#define LUA_MULTRET (-1) + + +/* +** pseudo-indices +*/ +#define LUA_REGISTRYINDEX (-10000) +#define LUA_ENVIRONINDEX (-10001) +#define LUA_GLOBALSINDEX (-10002) +#define lua_upvalueindex(i) (LUA_GLOBALSINDEX-(i)) + + +/* thread status; 0 is OK */ +#define LUA_YIELD 1 +#define LUA_ERRRUN 2 +#define LUA_ERRSYNTAX 3 +#define LUA_ERRMEM 4 +#define LUA_ERRERR 5 + + +typedef struct lua_State lua_State; + +typedef int (*lua_CFunction) (lua_State *L); + + +/* +** functions that read/write blocks when loading/dumping Lua chunks +*/ +typedef const char * (*lua_Reader) (lua_State *L, void *ud, size_t *sz); + +typedef int (*lua_Writer) (lua_State *L, const void* p, size_t sz, void* ud); + + +/* +** prototype for memory-allocation functions +*/ +typedef void * (*lua_Alloc) (void *ud, void *ptr, size_t osize, size_t nsize); + + +/* +** basic types +*/ +#define LUA_TNONE (-1) + +#define LUA_TNIL 0 +#define LUA_TBOOLEAN 1 +#define LUA_TLIGHTUSERDATA 2 +#define LUA_TNUMBER 3 +#define LUA_TSTRING 4 +#define LUA_TTABLE 5 +#define LUA_TFUNCTION 6 +#define LUA_TUSERDATA 7 +#define LUA_TTHREAD 8 + + + +/* minimum Lua stack available to a C function */ +#define LUA_MINSTACK 20 + + +/* +** generic extra include file +*/ +#if defined(LUA_USER_H) +#include LUA_USER_H +#endif + + +/* type of numbers in Lua */ +typedef LUA_NUMBER lua_Number; + + +/* type for integer functions */ +typedef LUA_INTEGER lua_Integer; + + + +/* +** state manipulation +*/ +LUA_API lua_State *(lua_newstate) (lua_Alloc f, void *ud); +LUA_API void (lua_close) (lua_State *L); +LUA_API lua_State *(lua_newthread) (lua_State *L); + +LUA_API lua_CFunction (lua_atpanic) (lua_State *L, lua_CFunction panicf); + + +/* +** basic stack manipulation +*/ +LUA_API int (lua_gettop) (lua_State *L); +LUA_API void (lua_settop) (lua_State *L, int idx); +LUA_API void (lua_pushvalue) (lua_State *L, int idx); +LUA_API void (lua_remove) (lua_State *L, int idx); +LUA_API void (lua_insert) (lua_State *L, int idx); +LUA_API void (lua_replace) (lua_State *L, int idx); +LUA_API int (lua_checkstack) (lua_State *L, int sz); + +LUA_API void (lua_xmove) (lua_State *from, lua_State *to, int n); + + +/* +** access functions (stack -> C) +*/ + +LUA_API int (lua_isnumber) (lua_State *L, int idx); +LUA_API int (lua_isstring) (lua_State *L, int idx); +LUA_API int (lua_iscfunction) (lua_State *L, int idx); +LUA_API int (lua_isuserdata) (lua_State *L, int idx); +LUA_API int (lua_type) (lua_State *L, int idx); +LUA_API const char *(lua_typename) (lua_State *L, int tp); + +LUA_API int (lua_equal) (lua_State *L, int idx1, int idx2); +LUA_API int (lua_rawequal) (lua_State *L, int idx1, int idx2); +LUA_API int (lua_lessthan) (lua_State *L, int idx1, int idx2); + +LUA_API lua_Number (lua_tonumber) (lua_State *L, int idx); +LUA_API lua_Integer (lua_tointeger) (lua_State *L, int idx); +LUA_API int (lua_toboolean) (lua_State *L, int idx); +LUA_API const char *(lua_tolstring) (lua_State *L, int idx, size_t *len); +LUA_API size_t (lua_objlen) (lua_State *L, int idx); +LUA_API lua_CFunction (lua_tocfunction) (lua_State *L, int idx); +LUA_API void *(lua_touserdata) (lua_State *L, int idx); +LUA_API lua_State *(lua_tothread) (lua_State *L, int idx); +LUA_API const void *(lua_topointer) (lua_State *L, int idx); + + +/* +** push functions (C -> stack) +*/ +LUA_API void (lua_pushnil) (lua_State *L); +LUA_API void (lua_pushnumber) (lua_State *L, lua_Number n); +LUA_API void (lua_pushinteger) (lua_State *L, lua_Integer n); +LUA_API void (lua_pushlstring) (lua_State *L, const char *s, size_t l); +LUA_API void (lua_pushstring) (lua_State *L, const char *s); +LUA_API const char *(lua_pushvfstring) (lua_State *L, const char *fmt, + va_list argp); +LUA_API const char *(lua_pushfstring) (lua_State *L, const char *fmt, ...); +LUA_API void (lua_pushcclosure) (lua_State *L, lua_CFunction fn, int n); +LUA_API void (lua_pushboolean) (lua_State *L, int b); +LUA_API void (lua_pushlightuserdata) (lua_State *L, void *p); +LUA_API int (lua_pushthread) (lua_State *L); + + +/* +** get functions (Lua -> stack) +*/ +LUA_API void (lua_gettable) (lua_State *L, int idx); +LUA_API void (lua_getfield) (lua_State *L, int idx, const char *k); +LUA_API void (lua_rawget) (lua_State *L, int idx); +LUA_API void (lua_rawgeti) (lua_State *L, int idx, int n); +LUA_API void (lua_createtable) (lua_State *L, int narr, int nrec); +LUA_API void *(lua_newuserdata) (lua_State *L, size_t sz); +LUA_API int (lua_getmetatable) (lua_State *L, int objindex); +LUA_API void (lua_getfenv) (lua_State *L, int idx); + + +/* +** set functions (stack -> Lua) +*/ +LUA_API void (lua_settable) (lua_State *L, int idx); +LUA_API void (lua_setfield) (lua_State *L, int idx, const char *k); +LUA_API void (lua_rawset) (lua_State *L, int idx); +LUA_API void (lua_rawseti) (lua_State *L, int idx, int n); +LUA_API int (lua_setmetatable) (lua_State *L, int objindex); +LUA_API int (lua_setfenv) (lua_State *L, int idx); + + +/* +** `load' and `call' functions (load and run Lua code) +*/ +LUA_API void (lua_call) (lua_State *L, int nargs, int nresults); +LUA_API int (lua_pcall) (lua_State *L, int nargs, int nresults, int errfunc); +LUA_API int (lua_cpcall) (lua_State *L, lua_CFunction func, void *ud); +LUA_API int (lua_load) (lua_State *L, lua_Reader reader, void *dt, + const char *chunkname); + +LUA_API int (lua_dump) (lua_State *L, lua_Writer writer, void *data); + + +/* +** coroutine functions +*/ +LUA_API int (lua_yield) (lua_State *L, int nresults); +LUA_API int (lua_resume) (lua_State *L, int narg); +LUA_API int (lua_status) (lua_State *L); + +/* +** garbage-collection function and options +*/ + +#define LUA_GCSTOP 0 +#define LUA_GCRESTART 1 +#define LUA_GCCOLLECT 2 +#define LUA_GCCOUNT 3 +#define LUA_GCCOUNTB 4 +#define LUA_GCSTEP 5 +#define LUA_GCSETPAUSE 6 +#define LUA_GCSETSTEPMUL 7 + +LUA_API int (lua_gc) (lua_State *L, int what, int data); + + +/* +** miscellaneous functions +*/ + +LUA_API int (lua_error) (lua_State *L); + +LUA_API int (lua_next) (lua_State *L, int idx); + +LUA_API void (lua_concat) (lua_State *L, int n); + +LUA_API lua_Alloc (lua_getallocf) (lua_State *L, void **ud); +LUA_API void lua_setallocf (lua_State *L, lua_Alloc f, void *ud); + + + +/* +** =============================================================== +** some useful macros +** =============================================================== +*/ + +#define lua_pop(L,n) lua_settop(L, -(n)-1) + +#define lua_newtable(L) lua_createtable(L, 0, 0) + +#define lua_register(L,n,f) (lua_pushcfunction(L, (f)), lua_setglobal(L, (n))) + +#define lua_pushcfunction(L,f) lua_pushcclosure(L, (f), 0) + +#define lua_strlen(L,i) lua_objlen(L, (i)) + +#define lua_isfunction(L,n) (lua_type(L, (n)) == LUA_TFUNCTION) +#define lua_istable(L,n) (lua_type(L, (n)) == LUA_TTABLE) +#define lua_islightuserdata(L,n) (lua_type(L, (n)) == LUA_TLIGHTUSERDATA) +#define lua_isnil(L,n) (lua_type(L, (n)) == LUA_TNIL) +#define lua_isboolean(L,n) (lua_type(L, (n)) == LUA_TBOOLEAN) +#define lua_isthread(L,n) (lua_type(L, (n)) == LUA_TTHREAD) +#define lua_isnone(L,n) (lua_type(L, (n)) == LUA_TNONE) +#define lua_isnoneornil(L, n) (lua_type(L, (n)) <= 0) + +#define lua_pushliteral(L, s) \ + lua_pushlstring(L, "" s, (sizeof(s)/sizeof(char))-1) + +#define lua_setglobal(L,s) lua_setfield(L, LUA_GLOBALSINDEX, (s)) +#define lua_getglobal(L,s) lua_getfield(L, LUA_GLOBALSINDEX, (s)) + +#define lua_tostring(L,i) lua_tolstring(L, (i), NULL) + + + +/* +** compatibility macros and functions +*/ + +#define lua_open() luaL_newstate() + +#define lua_getregistry(L) lua_pushvalue(L, LUA_REGISTRYINDEX) + +#define lua_getgccount(L) lua_gc(L, LUA_GCCOUNT, 0) + +#define lua_Chunkreader lua_Reader +#define lua_Chunkwriter lua_Writer + + +/* hack */ +LUA_API void lua_setlevel (lua_State *from, lua_State *to); + + +/* +** {====================================================================== +** Debug API +** ======================================================================= +*/ + + +/* +** Event codes +*/ +#define LUA_HOOKCALL 0 +#define LUA_HOOKRET 1 +#define LUA_HOOKLINE 2 +#define LUA_HOOKCOUNT 3 +#define LUA_HOOKTAILRET 4 + + +/* +** Event masks +*/ +#define LUA_MASKCALL (1 << LUA_HOOKCALL) +#define LUA_MASKRET (1 << LUA_HOOKRET) +#define LUA_MASKLINE (1 << LUA_HOOKLINE) +#define LUA_MASKCOUNT (1 << LUA_HOOKCOUNT) + +typedef struct lua_Debug lua_Debug; /* activation record */ + + +/* Functions to be called by the debuger in specific events */ +typedef void (*lua_Hook) (lua_State *L, lua_Debug *ar); + + +LUA_API int lua_getstack (lua_State *L, int level, lua_Debug *ar); +LUA_API int lua_getinfo (lua_State *L, const char *what, lua_Debug *ar); +LUA_API const char *lua_getlocal (lua_State *L, const lua_Debug *ar, int n); +LUA_API const char *lua_setlocal (lua_State *L, const lua_Debug *ar, int n); +LUA_API const char *lua_getupvalue (lua_State *L, int funcindex, int n); +LUA_API const char *lua_setupvalue (lua_State *L, int funcindex, int n); + +LUA_API int lua_sethook (lua_State *L, lua_Hook func, int mask, int count); +LUA_API lua_Hook lua_gethook (lua_State *L); +LUA_API int lua_gethookmask (lua_State *L); +LUA_API int lua_gethookcount (lua_State *L); + + +struct lua_Debug { + int event; + const char *name; /* (n) */ + const char *namewhat; /* (n) `global', `local', `field', `method' */ + const char *what; /* (S) `Lua', `C', `main', `tail' */ + const char *source; /* (S) */ + int currentline; /* (l) */ + int nups; /* (u) number of upvalues */ + int linedefined; /* (S) */ + int lastlinedefined; /* (S) */ + char short_src[LUA_IDSIZE]; /* (S) */ + /* private part */ + int i_ci; /* active function */ +}; + +/* }====================================================================== */ + + +/****************************************************************************** +* Copyright (C) 1994-2008 Lua.org, PUC-Rio. All rights reserved. +* +* Permission is hereby granted, free of charge, to any person obtaining +* a copy of this software and associated documentation files (the +* "Software"), to deal in the Software without restriction, including +* without limitation the rights to use, copy, modify, merge, publish, +* distribute, sublicense, and/or sell copies of the Software, and to +* permit persons to whom the Software is furnished to do so, subject to +* the following conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +******************************************************************************/ + + +#endif diff --git a/src/lua.hpp b/src/lua.hpp new file mode 100644 index 0000000000..ec417f5946 --- /dev/null +++ b/src/lua.hpp @@ -0,0 +1,9 @@ +// lua.hpp +// Lua header files for C++ +// <> not supplied automatically because Lua also compiles as C++ + +extern "C" { +#include "lua.h" +#include "lualib.h" +#include "lauxlib.h" +} diff --git a/src/luaconf.h b/src/luaconf.h new file mode 100644 index 0000000000..4d4f109915 --- /dev/null +++ b/src/luaconf.h @@ -0,0 +1,133 @@ +/* +** Configuration header. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +*/ + +#ifndef luaconf_h +#define luaconf_h + +#include +#include + +/* Try to determine supported features for a couple of standard platforms. */ +#if defined(_WIN32) +#define LUA_USE_WIN +#define LUA_DL_DLL +#elif defined(__linux__) || defined(__solaris__) || defined(__CYGWIN__) || \ + defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \ + (defined(__MACH__) && defined(__APPLE__)) +#define LUA_USE_POSIX +#define LUA_DL_DLOPEN +#endif + +/* Default path for loading Lua and C modules with require(). */ +#ifdef LUA_USE_WIN +/* +** In Windows, any exclamation mark ('!') in the path is replaced by the +** path of the directory of the executable file of the current process. +*/ +#define LUA_LDIR "!\\lua\\" +#define LUA_CDIR "!\\" +#define LUA_PATH_DEFAULT \ + ".\\?.lua;" LUA_LDIR"?.lua;" LUA_LDIR"?\\init.lua;" +#define LUA_CPATH_DEFAULT \ + ".\\?.dll;" LUA_CDIR"?.dll;" LUA_CDIR"loadall.dll" +#else +#define LUA_ROOT "/usr/local/" +#define LUA_JDIR LUA_ROOT "share/luajit-2.0.0-beta1/" +#define LUA_LDIR LUA_ROOT "share/lua/5.1/" +#define LUA_CDIR LUA_ROOT "lib/lua/5.1/" +#define LUA_PATH_DEFAULT \ + "./?.lua;" LUA_JDIR"?.lua;" LUA_LDIR"?.lua;" LUA_LDIR"?/init.lua;" +#define LUA_CPATH_DEFAULT \ + "./?.so;" LUA_CDIR"?.so;" LUA_CDIR"loadall.so" +#endif + +/* Environment variable names for path overrides and initialization code. */ +#define LUA_PATH "LUA_PATH" +#define LUA_CPATH "LUA_CPATH" +#define LUA_INIT "LUA_INIT" + +/* Special file system characters. */ +#ifdef LUA_USE_WIN +#define LUA_DIRSEP "\\" +#else +#define LUA_DIRSEP "/" +#endif +#define LUA_PATHSEP ";" +#define LUA_PATH_MARK "?" +#define LUA_EXECDIR "!" +#define LUA_IGMARK "-" +#define LUA_PATH_CONFIG \ + LUA_DIRSEP "\n" LUA_PATHSEP "\n" LUA_PATH_MARK "\n" \ + LUA_EXECDIR "\n" LUA_IGMARK + +/* Quoting in error messages. */ +#define LUA_QL(x) "'" x "'" +#define LUA_QS LUA_QL("%s") + +/* Various tunables. */ +#define LUAI_MAXSTACK 65500 /* Max. # of stack slots for a thread (<64K). */ +#define LUAI_MAXCSTACK 8000 /* Max. # of stack slots for a C func (<10K). */ +#define LUAI_GCPAUSE 200 /* Pause GC until memory is at 200%. */ +#define LUAI_GCMUL 200 /* Run GC at 200% of allocation speed. */ +#define LUA_MAXCAPTURES 32 /* Max. pattern captures. */ + +/* Compatibility with older library function names. */ +#define LUA_COMPAT_MOD /* OLD: math.mod, NEW: math.fmod */ +#define LUA_COMPAT_GFIND /* OLD: string.gfind, NEW: string.gmatch */ + +/* Configuration for the frontend (the luajit executable). */ +#if defined(luajit_c) +#define LUA_PROGNAME "luajit" /* Fallback frontend name. */ +#define LUA_PROMPT "> " /* Interactive prompt. */ +#define LUA_PROMPT2 ">> " /* Continuation prompt. */ +#define LUA_MAXINPUT 512 /* Max. input line length. */ +#endif + +/* Note: changing the following defines breaks the Lua 5.1 ABI. */ +#define LUA_INTEGER ptrdiff_t +#define LUA_IDSIZE 60 /* Size of lua_Debug.short_src. */ +#define LUAL_BUFFERSIZE BUFSIZ /* Size of lauxlib and io.* buffers. */ + +/* The following defines are here only for compatibility with luaconf.h +** from the standard Lua distribution. They must not be changed for LuaJIT. +*/ +#define LUA_NUMBER_DOUBLE +#define LUA_NUMBER double +#define LUAI_UACNUMBER double +#define LUA_NUMBER_SCAN "%lf" +#define LUA_NUMBER_FMT "%.14g" +#define lua_number2str(s, n) sprintf((s), LUA_NUMBER_FMT, (n)) +#define LUAI_MAXNUMBER2STR 32 +#define lua_str2number(s, p) strtod((s), (p)) +#define LUA_INTFRMLEN "l" +#define LUA_INTFRM_T long + +/* Linkage of public API functions. */ +#if defined(LUA_BUILD_AS_DLL) +#if defined(LUA_CORE) || defined(LUA_LIB) +#define LUA_API __declspec(dllexport) +#else +#define LUA_API __declspec(dllimport) +#endif +#else +#define LUA_API extern +#endif + +#define LUALIB_API LUA_API + +/* Support for internal assertions. */ +#if defined(LUA_USE_ASSERT) || defined(LUA_USE_APICHECK) +#include +#endif +#ifdef LUA_USE_ASSERT +#define lua_assert(x) assert(x) +#endif +#ifdef LUA_USE_APICHECK +#define luai_apicheck(L, o) { (void)L; assert(o); } +#else +#define luai_apicheck(L, o) { (void)L; } +#endif + +#endif diff --git a/src/luajit.c b/src/luajit.c new file mode 100644 index 0000000000..9153975bd5 --- /dev/null +++ b/src/luajit.c @@ -0,0 +1,519 @@ +/* +** LuaJIT frontend. Runs commands, scripts, read-eval-print (REPL) etc. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +** +** Major portions taken verbatim or adapted from the Lua interpreter. +** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h +*/ + +#include +#include +#include +#include + +#define luajit_c + +#include "lua.h" +#include "lauxlib.h" +#include "lualib.h" +#include "luajit.h" + +#if defined(LUA_USE_POSIX) +#include +#define lua_stdin_is_tty() isatty(0) +#elif defined(LUA_USE_WIN) +#include +#ifdef __BORLANDC__ +#define lua_stdin_is_tty() isatty(_fileno(stdin)) +#else +#define lua_stdin_is_tty() _isatty(_fileno(stdin)) +#endif +#else +#define lua_stdin_is_tty() 1 +#endif + +static lua_State *globalL = NULL; +static const char *progname = LUA_PROGNAME; + +static void lstop(lua_State *L, lua_Debug *ar) +{ + (void)ar; /* unused arg. */ + lua_sethook(L, NULL, 0, 0); + /* Avoid luaL_error -- a C hook doesn't add an extra frame. */ + luaL_where(L, 0); + lua_pushfstring(L, "%sinterrupted!", lua_tostring(L, -1)); + lua_error(L); +} + +static void laction(int i) +{ + signal(i, SIG_DFL); /* if another SIGINT happens before lstop, + terminate process (default action) */ + lua_sethook(globalL, lstop, LUA_MASKCALL | LUA_MASKRET | LUA_MASKCOUNT, 1); +} + +static void print_usage(void) +{ + fprintf(stderr, + "usage: %s [options] [script [args]].\n" + "Available options are:\n" + " -e stat execute string " LUA_QL("stat") "\n" + " -l name require library " LUA_QL("name") "\n" + " -j cmd perform LuaJIT control command\n" + " -O[lvl] set LuaJIT optimization level\n" + " -i enter interactive mode after executing " LUA_QL("script") "\n" + " -v show version information\n" + " -- stop handling options\n" + " - execute stdin and stop handling options\n" + , + progname); + fflush(stderr); +} + +static void l_message(const char *pname, const char *msg) +{ + if (pname) fprintf(stderr, "%s: ", pname); + fprintf(stderr, "%s\n", msg); + fflush(stderr); +} + +static int report(lua_State *L, int status) +{ + if (status && !lua_isnil(L, -1)) { + const char *msg = lua_tostring(L, -1); + if (msg == NULL) msg = "(error object is not a string)"; + l_message(progname, msg); + lua_pop(L, 1); + } + return status; +} + +static int traceback(lua_State *L) +{ + if (!lua_isstring(L, 1)) /* 'message' not a string? */ + return 1; /* keep it intact */ + lua_getfield(L, LUA_GLOBALSINDEX, "debug"); + if (!lua_istable(L, -1)) { + lua_pop(L, 1); + return 1; + } + lua_getfield(L, -1, "traceback"); + if (!lua_isfunction(L, -1)) { + lua_pop(L, 2); + return 1; + } + lua_pushvalue(L, 1); /* pass error message */ + lua_pushinteger(L, 2); /* skip this function and traceback */ + lua_call(L, 2, 1); /* call debug.traceback */ + return 1; +} + +static int docall(lua_State *L, int narg, int clear) +{ + int status; + int base = lua_gettop(L) - narg; /* function index */ + lua_pushcfunction(L, traceback); /* push traceback function */ + lua_insert(L, base); /* put it under chunk and args */ + signal(SIGINT, laction); + status = lua_pcall(L, narg, (clear ? 0 : LUA_MULTRET), base); + signal(SIGINT, SIG_DFL); + lua_remove(L, base); /* remove traceback function */ + /* force a complete garbage collection in case of errors */ + if (status != 0) lua_gc(L, LUA_GCCOLLECT, 0); + return status; +} + +static void print_version(void) +{ + fprintf(stderr, + LUAJIT_VERSION " -- " LUAJIT_COPYRIGHT ". " LUAJIT_URL "\n"); +} + +static void print_jit_status(lua_State *L) +{ + int n; + const char *s; + lua_getfield(L, LUA_REGISTRYINDEX, "_LOADED"); + lua_getfield(L, -1, "jit"); /* Get jit.* module table. */ + lua_remove(L, -2); + lua_getfield(L, -1, "status"); + lua_remove(L, -2); + n = lua_gettop(L); + lua_call(L, 0, LUA_MULTRET); + fputs(lua_toboolean(L, n) ? "JIT: ON" : "JIT: OFF", stderr); + for (n++; (s = lua_tostring(L, n)); n++) + fprintf(stderr, " %s", s); + fputs("\n", stdout); +} + +static int getargs(lua_State *L, char **argv, int n) +{ + int narg; + int i; + int argc = 0; + while (argv[argc]) argc++; /* count total number of arguments */ + narg = argc - (n + 1); /* number of arguments to the script */ + luaL_checkstack(L, narg + 3, "too many arguments to script"); + for (i = n+1; i < argc; i++) + lua_pushstring(L, argv[i]); + lua_createtable(L, narg, n + 1); + for (i = 0; i < argc; i++) { + lua_pushstring(L, argv[i]); + lua_rawseti(L, -2, i - n); + } + return narg; +} + +static int dofile(lua_State *L, const char *name) +{ + int status = luaL_loadfile(L, name) || docall(L, 0, 1); + return report(L, status); +} + +static int dostring(lua_State *L, const char *s, const char *name) +{ + int status = luaL_loadbuffer(L, s, strlen(s), name) || docall(L, 0, 1); + return report(L, status); +} + +static int dolibrary(lua_State *L, const char *name) +{ + lua_getglobal(L, "require"); + lua_pushstring(L, name); + return report(L, docall(L, 1, 1)); +} + +static void write_prompt(lua_State *L, int firstline) +{ + const char *p; + lua_getfield(L, LUA_GLOBALSINDEX, firstline ? "_PROMPT" : "_PROMPT2"); + p = lua_tostring(L, -1); + if (p == NULL) p = firstline ? LUA_PROMPT : LUA_PROMPT2; + fputs(p, stdout); + fflush(stdout); + lua_pop(L, 1); /* remove global */ +} + +static int incomplete(lua_State *L, int status) +{ + if (status == LUA_ERRSYNTAX) { + size_t lmsg; + const char *msg = lua_tolstring(L, -1, &lmsg); + const char *tp = msg + lmsg - (sizeof(LUA_QL("")) - 1); + if (strstr(msg, LUA_QL("")) == tp) { + lua_pop(L, 1); + return 1; + } + } + return 0; /* else... */ +} + +static int pushline(lua_State *L, int firstline) +{ + char buf[LUA_MAXINPUT]; + write_prompt(L, firstline); + if (fgets(buf, LUA_MAXINPUT, stdin)) { + size_t len = strlen(buf); + if (len > 0 && buf[len-1] == '\n') + buf[len-1] = '\0'; + if (firstline && buf[0] == '=') + lua_pushfstring(L, "return %s", buf+1); + else + lua_pushstring(L, buf); + return 1; + } + return 0; +} + +static int loadline(lua_State *L) +{ + int status; + lua_settop(L, 0); + if (!pushline(L, 1)) + return -1; /* no input */ + for (;;) { /* repeat until gets a complete line */ + status = luaL_loadbuffer(L, lua_tostring(L, 1), lua_strlen(L, 1), "=stdin"); + if (!incomplete(L, status)) break; /* cannot try to add lines? */ + if (!pushline(L, 0)) /* no more input? */ + return -1; + lua_pushliteral(L, "\n"); /* add a new line... */ + lua_insert(L, -2); /* ...between the two lines */ + lua_concat(L, 3); /* join them */ + } + lua_remove(L, 1); /* remove line */ + return status; +} + +static void dotty(lua_State *L) +{ + int status; + const char *oldprogname = progname; + progname = NULL; + while ((status = loadline(L)) != -1) { + if (status == 0) status = docall(L, 0, 0); + report(L, status); + if (status == 0 && lua_gettop(L) > 0) { /* any result to print? */ + lua_getglobal(L, "print"); + lua_insert(L, 1); + if (lua_pcall(L, lua_gettop(L)-1, 0, 0) != 0) + l_message(progname, + lua_pushfstring(L, "error calling " LUA_QL("print") " (%s)", + lua_tostring(L, -1))); + } + } + lua_settop(L, 0); /* clear stack */ + fputs("\n", stdout); + fflush(stdout); + progname = oldprogname; +} + +static int handle_script(lua_State *L, char **argv, int n) +{ + int status; + const char *fname; + int narg = getargs(L, argv, n); /* collect arguments */ + lua_setglobal(L, "arg"); + fname = argv[n]; + if (strcmp(fname, "-") == 0 && strcmp(argv[n-1], "--") != 0) + fname = NULL; /* stdin */ + status = luaL_loadfile(L, fname); + lua_insert(L, -(narg+1)); + if (status == 0) + status = docall(L, narg, 0); + else + lua_pop(L, narg); + return report(L, status); +} + +/* Load add-on module. */ +static int loadjitmodule(lua_State *L, const char *notfound) +{ + lua_getglobal(L, "require"); + lua_pushliteral(L, "jit."); + lua_pushvalue(L, -3); + lua_concat(L, 2); + if (lua_pcall(L, 1, 1, 0)) { + const char *msg = lua_tostring(L, -1); + if (msg && !strncmp(msg, "module ", 7)) { + err: + l_message(progname, notfound); + return 1; + } else { + return report(L, 1); + } + } + lua_getfield(L, -1, "start"); + if (lua_isnil(L, -1)) goto err; + lua_remove(L, -2); /* Drop module table. */ + return 0; +} + +/* Run command with options. */ +static int runcmdopt(lua_State *L, const char *opt) +{ + int narg = 0; + if (opt && *opt) { + for (;;) { /* Split arguments. */ + const char *p = strchr(opt, ','); + narg++; + if (!p) break; + if (p == opt) + lua_pushnil(L); + else + lua_pushlstring(L, opt, (size_t)(p - opt)); + opt = p + 1; + } + if (*opt) + lua_pushstring(L, opt); + else + lua_pushnil(L); + } + return report(L, lua_pcall(L, narg, 0, 0)); +} + +/* JIT engine control command: try jit library first or load add-on module. */ +static int dojitcmd(lua_State *L, const char *cmd) +{ + const char *opt = strchr(cmd, '='); + lua_pushlstring(L, cmd, opt ? (size_t)(opt - cmd) : strlen(cmd)); + lua_getfield(L, LUA_REGISTRYINDEX, "_LOADED"); + lua_getfield(L, -1, "jit"); /* Get jit.* module table. */ + lua_remove(L, -2); + lua_pushvalue(L, -2); + lua_gettable(L, -2); /* Lookup library function. */ + if (!lua_isfunction(L, -1)) { + lua_pop(L, 2); /* Drop non-function and jit.* table, keep module name. */ + if (loadjitmodule(L, "unknown luaJIT command")) + return 1; + } else { + lua_remove(L, -2); /* Drop jit.* table. */ + } + lua_remove(L, -2); /* Drop module name. */ + return runcmdopt(L, opt ? opt+1 : opt); +} + +/* Optimization flags. */ +static int dojitopt(lua_State *L, const char *opt) +{ + lua_getfield(L, LUA_REGISTRYINDEX, "_LOADED"); + lua_getfield(L, -1, "jit.opt"); /* Get jit.opt.* module table. */ + lua_remove(L, -2); + lua_getfield(L, -1, "start"); + lua_remove(L, -2); + return runcmdopt(L, opt); +} + +/* check that argument has no extra characters at the end */ +#define notail(x) {if ((x)[2] != '\0') return -1;} + +static int collectargs(char **argv, int *pi, int *pv, int *pe) +{ + int i; + for (i = 1; argv[i] != NULL; i++) { + if (argv[i][0] != '-') /* not an option? */ + return i; + switch (argv[i][1]) { /* option */ + case '-': + notail(argv[i]); + return (argv[i+1] != NULL ? i+1 : 0); + case '\0': + return i; + case 'i': + notail(argv[i]); + *pi = 1; /* go through */ + case 'v': + notail(argv[i]); + *pv = 1; + break; + case 'e': + *pe = 1; /* go through */ + case 'j': /* LuaJIT extension */ + case 'l': + if (argv[i][2] == '\0') { + i++; + if (argv[i] == NULL) return -1; + } + break; + case 'O': break; /* LuaJIT extension */ + default: return -1; /* invalid option */ + } + } + return 0; +} + +static int runargs(lua_State *L, char **argv, int n) +{ + int i; + for (i = 1; i < n; i++) { + if (argv[i] == NULL) continue; + lua_assert(argv[i][0] == '-'); + switch (argv[i][1]) { /* option */ + case 'e': { + const char *chunk = argv[i] + 2; + if (*chunk == '\0') chunk = argv[++i]; + lua_assert(chunk != NULL); + if (dostring(L, chunk, "=(command line)") != 0) + return 1; + break; + } + case 'l': { + const char *filename = argv[i] + 2; + if (*filename == '\0') filename = argv[++i]; + lua_assert(filename != NULL); + if (dolibrary(L, filename)) + return 1; /* stop if file fails */ + break; + } + case 'j': { /* LuaJIT extension */ + const char *cmd = argv[i] + 2; + if (*cmd == '\0') cmd = argv[++i]; + lua_assert(cmd != NULL); + if (dojitcmd(L, cmd)) + return 1; + break; + } + case 'O': /* LuaJIT extension */ + if (dojitopt(L, argv[i] + 2)) + return 1; + break; + default: break; + } + } + return 0; +} + +static int handle_luainit(lua_State *L) +{ + const char *init = getenv(LUA_INIT); + if (init == NULL) + return 0; /* status OK */ + else if (init[0] == '@') + return dofile(L, init+1); + else + return dostring(L, init, "=" LUA_INIT); +} + +struct Smain { + int argc; + char **argv; + int status; +}; + +static int pmain(lua_State *L) +{ + struct Smain *s = (struct Smain *)lua_touserdata(L, 1); + char **argv = s->argv; + int script; + int has_i = 0, has_v = 0, has_e = 0; + globalL = L; + if (argv[0] && argv[0][0]) progname = argv[0]; + LUAJIT_VERSION_SYM(); /* linker-enforced version check */ + lua_gc(L, LUA_GCSTOP, 0); /* stop collector during initialization */ + luaL_openlibs(L); /* open libraries */ + lua_gc(L, LUA_GCRESTART, 0); + s->status = handle_luainit(L); + if (s->status != 0) return 0; + script = collectargs(argv, &has_i, &has_v, &has_e); + if (script < 0) { /* invalid args? */ + print_usage(); + s->status = 1; + return 0; + } + if (has_v) print_version(); + s->status = runargs(L, argv, (script > 0) ? script : s->argc); + if (s->status != 0) return 0; + if (script) + s->status = handle_script(L, argv, script); + if (s->status != 0) return 0; + if (has_i) { + print_jit_status(L); + dotty(L); + } else if (script == 0 && !has_e && !has_v) { + if (lua_stdin_is_tty()) { + print_version(); + print_jit_status(L); + dotty(L); + } else { + dofile(L, NULL); /* executes stdin as a file */ + } + } + return 0; +} + +int main(int argc, char **argv) +{ + int status; + struct Smain s; + lua_State *L = lua_open(); /* create state */ + if (L == NULL) { + l_message(argv[0], "cannot create state: not enough memory"); + return EXIT_FAILURE; + } + s.argc = argc; + s.argv = argv; + status = lua_cpcall(L, pmain, &s); + report(L, status); + lua_close(L); + return (status || s.status) ? EXIT_FAILURE : EXIT_SUCCESS; +} + diff --git a/src/luajit.h b/src/luajit.h new file mode 100644 index 0000000000..019137558c --- /dev/null +++ b/src/luajit.h @@ -0,0 +1,68 @@ +/* +** LuaJIT -- a Just-In-Time Compiler for Lua. http://luajit.org/ +** +** Copyright (C) 2005-2009 Mike Pall. All rights reserved. +** +** Permission is hereby granted, free of charge, to any person obtaining +** a copy of this software and associated documentation files (the +** "Software"), to deal in the Software without restriction, including +** without limitation the rights to use, copy, modify, merge, publish, +** distribute, sublicense, and/or sell copies of the Software, and to +** permit persons to whom the Software is furnished to do so, subject to +** the following conditions: +** +** The above copyright notice and this permission notice shall be +** included in all copies or substantial portions of the Software. +** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +** SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +** +** [ MIT license: http://www.opensource.org/licenses/mit-license.php ] +*/ + +#ifndef _LUAJIT_H +#define _LUAJIT_H + +#include "lua.h" + +#define LUAJIT_VERSION "LuaJIT 2.0.0-beta1" +#define LUAJIT_VERSION_NUM 20000 /* Version 2.0.0 = 02.00.00. */ +#define LUAJIT_VERSION_SYM luaJIT_version_2_0_0_beta1 +#define LUAJIT_COPYRIGHT "Copyright (C) 2005-2009 Mike Pall" +#define LUAJIT_URL "http://luajit.org/" + +/* Modes for luaJIT_setmode. */ +#define LUAJIT_MODE_MASK 0x00ff + +enum { + LUAJIT_MODE_ENGINE, /* Set mode for whole JIT engine. */ + LUAJIT_MODE_DEBUG, /* Set debug mode (idx = level). */ + + LUAJIT_MODE_FUNC, /* Change mode for a function. */ + LUAJIT_MODE_ALLFUNC, /* Recurse into subroutine protos. */ + LUAJIT_MODE_ALLSUBFUNC, /* Change only the subroutines. */ + + LUAJIT_MODE_TRACE, /* Flush a compiled trace. */ + + LUAJIT_MODE_MAX +}; + +/* Flags or'ed in to the mode. */ +#define LUAJIT_MODE_OFF 0x0000 /* Disable JIT compilation. */ +#define LUAJIT_MODE_ON 0x0100 /* (Re-)enable JIT compilation. */ +#define LUAJIT_MODE_FLUSH 0x0200 /* Flush JIT-compiled code. */ + +/* LuaJIT public C API. */ + +/* Control the JIT engine. */ +LUA_API int luaJIT_setmode(lua_State *L, int idx, int mode); + +/* Enforce (dynamic) linker error for version mismatches. Call from main. */ +LUA_API void LUAJIT_VERSION_SYM(void); + +#endif diff --git a/src/lualib.h b/src/lualib.h new file mode 100644 index 0000000000..c1ceb613b0 --- /dev/null +++ b/src/lualib.h @@ -0,0 +1,41 @@ +/* +** Standard library header. +** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +*/ + +#ifndef _LUALIB_H +#define _LUALIB_H + +#include "lua.h" + +#define LUA_FILEHANDLE "FILE*" + +#define LUA_COLIBNAME "coroutine" +#define LUA_MATHLIBNAME "math" +#define LUA_STRLIBNAME "string" +#define LUA_TABLIBNAME "table" +#define LUA_IOLIBNAME "io" +#define LUA_OSLIBNAME "os" +#define LUA_LOADLIBNAME "package" +#define LUA_DBLIBNAME "debug" +#define LUA_BITLIBNAME "bit" +#define LUA_JITLIBNAME "jit" + +LUALIB_API int luaopen_base(lua_State *L); +LUALIB_API int luaopen_math(lua_State *L); +LUALIB_API int luaopen_string(lua_State *L); +LUALIB_API int luaopen_table(lua_State *L); +LUALIB_API int luaopen_io(lua_State *L); +LUALIB_API int luaopen_os(lua_State *L); +LUALIB_API int luaopen_package(lua_State *L); +LUALIB_API int luaopen_debug(lua_State *L); +LUALIB_API int luaopen_bit(lua_State *L); +LUALIB_API int luaopen_jit(lua_State *L); + +LUALIB_API void luaL_openlibs(lua_State *L); + +#ifndef lua_assert +#define lua_assert(x) ((void)0) +#endif + +#endif diff --git a/src/msvcbuild.bat b/src/msvcbuild.bat new file mode 100644 index 0000000000..8bdc4d8a7d --- /dev/null +++ b/src/msvcbuild.bat @@ -0,0 +1,53 @@ +@rem Script to build LuaJIT with MSVC. +@rem Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h +@rem +@rem Open a "Visual Studio .NET Command Prompt", cd to this directory +@rem and run this script. + +@if not defined INCLUDE goto :FAIL + +@setlocal +@set LJCOMPILE=cl /nologo /c /MD /O2 /W3 /D_CRT_SECURE_NO_DEPRECATE +@set LJLINK=link /nologo +@set LJMT=mt /nologo +@set DASMDIR=..\dynasm +@set DASM=lua %DASMDIR%\dynasm.lua +@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c + +if not exist buildvm_x86.h^ + %DASM% -LN -o buildvm_x86.h buildvm_x86.dasc + +%LJCOMPILE% /I "." /I %DASMDIR% buildvm*.c +%LJLINK% /out:buildvm.exe buildvm*.obj +if exist buildvm.exe.manifest^ + %LJMT% -manifest buildvm.exe.manifest -outputresource:buildvm.exe + +buildvm -m peobj -o lj_vm.obj +buildvm -m ffdef -o lj_ffdef.h %ALL_LIB% +buildvm -m libdef -o lj_libdef.h %ALL_LIB% +buildvm -m recdef -o lj_recdef.h %ALL_LIB% +buildvm -m vmdef -o ..\lib\vmdef.lua %ALL_LIB% +buildvm -m folddef -o lj_folddef.h lj_opt_fold.c + +@if "%1"=="amalg" goto :AMALGDLL +%LJCOMPILE% /DLUA_BUILD_AS_DLL lj_*.c lib_*.c +%LJLINK% /DLL /out:lua51.dll lj_*.obj lib_*.obj +@goto :MTDLL +:AMALGDLL +%LJCOMPILE% /DLUA_BUILD_AS_DLL ljamalg.c +%LJLINK% /DLL /out:lua51.dll ljamalg.obj lj_vm.obj +:MTDLL +if exist lua51.dll.manifest^ + %LJMT% -manifest lua51.dll.manifest -outputresource:lua51.dll;2 + +%LJCOMPILE% luajit.c +%LJLINK% /out:luajit.exe luajit.obj lua51.lib +if exist luajit.exe.manifest^ + %LJMT% -manifest luajit.exe.manifest -outputresource:luajit.exe + +del *.obj *.manifest buildvm.exe + +@goto :END +:FAIL +@echo You must open a "Visual Studio .NET Command Prompt" to run this script +:END