Skip to content

Commit

Permalink
Rewrite header parser in Lua (#33)
Browse files Browse the repository at this point in the history
* Remove Python as dependency

* Delete Lua venv script, update docs

* Update version number and release date
  • Loading branch information
dargueta authored Apr 9, 2023
1 parent 80e3c71 commit 9338561
Show file tree
Hide file tree
Showing 9 changed files with 231 additions and 754 deletions.
13 changes: 13 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,19 @@
Changes
=======

2.1.0 (2023-04-08)
------------------

**The Python build dependency has been completely removed.** You now only need
Lua, Make, and a C++ compiler. The script for creating a virtual environment is
still written in Python, but that's a utility, not required for installation.

Other Changes
~~~~~~~~~~~~~

The virtual environment script has been removed. Use `lenv <https://github.com/mah0x211/lenv>`_
instead.

2.0.1 (2023-04-06)
------------------

Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ $(TEST_EXECUTABLE): $(DOCTEST_HEADER) $(TEST_CPP_OBJECT_FILES) $(LIB_OBJECT_FILE


$(CONSTS_DIR)/%_const.cpp: $(UNICORN_INCDIR)/unicorn/%.h | $(CONSTS_DIR)
python3 tools/generate_constants.py $< $@
$(SET_SEARCH_PATHS); $(LUA) tools/generate_constants.lua $< $@


# We're deliberately omitting CXXFLAGS as provided by LuaRocks because it includes
Expand Down
22 changes: 3 additions & 19 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -199,30 +199,14 @@ directory of this repository, and run
Development
-----------

Configuration
~~~~~~~~~~~~~

Using a virtual environment for Lua is strongly recommended. You'll want to avoid
using your OS's real Lua, and using virtual environments allows you to test with
multiple versions of Lua.

To create a separate execution environment, you can use the ``lua_venv.py``
script.

.. code-block:: sh
python3 tools/lua_venv.py --luarocks 5.3 ~/my-virtualenvs/5.3/
This will download Lua 5.3 and install it in a directory named ``~/my-virtualenvs/5.3``.
Use ``~/my-virtualenvs/5.3/luarocks/bin/luarocks`` as your LuaRocks executable.
multiple versions of Lua. You can use `lenv <https://github.com/mah0x211/lenv>`_
for this.

If you're running MacOS and encounter a linker error with LuaJIT, check out
`this ticket <https://github.com/LuaJIT/LuaJIT/issues/449>`_.

Using Your OS's Lua
^^^^^^^^^^^^^^^^^^^

Use your global installation of LuaRocks for operations.

Building and Testing
~~~~~~~~~~~~~~~~~~~~
Expand All @@ -236,7 +220,7 @@ Building and Testing
luarocks test
Examples
~~~~~~~~
--------

See the ``examples`` directory for examples of how you can use this library.

Expand Down
4 changes: 2 additions & 2 deletions include/unicornlua/unicornlua.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,12 @@
/**
* The minor version number of this Lua library (second part, x.1.x).
*/
#define UNICORNLUA_VERSION_MINOR 0
#define UNICORNLUA_VERSION_MINOR 1

/**
* The patch version number of this Lua library (third part, x.x.1).
*/
#define UNICORNLUA_VERSION_PATCH 1
#define UNICORNLUA_VERSION_PATCH 0

/**
* Create a 24-bit number from a release's major, minor, and patch numbers.
Expand Down
205 changes: 205 additions & 0 deletions tools/generate_constants.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
pl_file = require "pl.file"
pl_lexer = require "pl.lexer"
pl_path = require "pl.path"
pl_stringx = require "pl.stringx"
pl_tablex = require "pl.tablex"
pl_template = require "pl.template"
pl_utils = require "pl.utils"

pl_stringx.import()


OUTPUT_CPP_TEMPLATE = [[
/** Autogenerated from installed Unicorn header files. DO NOT EDIT.
*
* Source: $(header_file)
*
* @file $(slug)_const.cpp
*/
#include <unicorn/unicorn.h>
! if slug ~= "unicorn" then
#include <unicorn/$(slug).h>
! end
#include "unicornlua/lua.h"
#include "unicornlua/utils.h"
static const struct NamedIntConst kConstants[] {
! for name, text in pairs(constants) do
{"$(name)", $(name)},
! end
{nullptr, 0}
};
extern "C" UNICORN_EXPORT int luaopen_unicorn_$(slug)_const(lua_State *L) {
lua_createtable(L, 0, $(pl_tablex.size(constants)));
load_int_constants(L, kConstants);
return 1;
}
]]


function main()
local source_header = arg[1]
local output_file = arg[2]

if #arg < 1 or #arg > 2 then
pl_utils.quit(
1,
"USAGE: %s header_file [output_file]\nIf `output_file` isn't given"
.. " or is \"-\", stdout is used.\n",
arg[-1]
)
end

-- Read in the entire file so we can tack on a trailing newline at the end
-- of the text.
-- https://github.com/lunarmodules/Penlight/issues/450
local source_text = pl_file.read(source_header) .. "\n"

local constants = extract_constants(source_text)
local source_basename = pl_path.basename(source_header)
local stem = pl_path.splitext(source_basename)

local text, render_error = pl_template.substitute(
OUTPUT_CPP_TEMPLATE,
{
_chunk_name = "cpp_template",
_escape = "!",
_parent = _G,
constants = constants,
header_file = source_header,
slug = stem,
}
)

if render_error ~= nil then
pl_utils.quit(1, "%s\n", render_error)
end

if output_file == nil or output_file == "-" then
print(text)
else
pl_file.write(output_file, text)
end
end


function extract_constants(source)
local tokenizer = pl_lexer.cpp(source)
local constants = {}
local ttype, value = tokenizer()

while ttype ~= nil do
local extracted

if ttype == "prepro" then
extracted = maybe_extract_preprocessor(value)
elseif ttype == "keyword" and value == "enum" then
-- Enum declaration to follow
extracted = maybe_extract_enum(tokenizer)
end

if extracted ~= nil then
for name, text in pairs(extracted) do
-- If a definition for the macro already exists, ignore the new
-- one. It most likely is due to a #if ... #elif ... block that
-- we're not interpreting.
if constants[name] == nil then
constants[name] = text
end
end
end

ttype, value = tokenizer()
extracted = nil
end

return constants
end


function maybe_extract_preprocessor(text)
local parts = text:split()
-- We know the first part is "#define". After that come the identifier and
-- whatever the expansion of the macro is, if applicable.
local directive = parts[1]
local macro_name = parts[2]
local macro_text = parts[3]

if directive == "#define"
and macro_name:startswith("UC_")
and macro_name:lfind("(") == nil -- Ignore function macros
and macro_text ~= nil
and macro_text ~= ""
then
-- FIXME (dargueta): Ensure that `macro_text` can be evaluated as an integer
return {[macro_name] = macro_text}
end
return {}
end


function maybe_extract_enum(tokenizer)
-- The tokenizer is positioned immediately after the `enum` keyword. The
-- next token in the stream will either be the name of the enum, or `{`
-- if this is of the form `typedef enum { ... } XYZ`.
local ttype, text

local start_lineno = pl_lexer.lineno(tokenizer)
-- `tok` is either the name of the enum or `{`.
repeat
ttype, text = tokenizer()
if ttype == nil then
local current_line = pl_lexer.lineno(tokenizer)
pl_utils.quit(
1,
"Unexpected EOF on line %d, expected `{` on or near line %d",
current_line,
start_lineno
)
end
until ttype == "{"

local constants = {}

-- The general structure we're expecting is
-- IDENTIFIER [expression] ("," | "}")
-- For this application we can probably get away with completely ignoring
-- `expression` entirely, i.e. consuming the identifier and then discarding
-- tokens until we reach a comma. This'll misbehave if, for example, there's
-- a macro call as the value, but this is unlikely.
while ttype ~= "}" do
local current_lineno = pl_lexer.lineno(tokenizer)

ttype, text = tokenizer()
if ttype == "}" then
return constants
elseif ttype ~= "iden" then
pl_utils.quit(
1,
"Expected identifier on line %s",
tostring(current_lineno)
)
end

constants[text] = text

-- Skip everything until we hit a comma that ends the current item
-- definition, or "}" which indicates the end of the enum.
while ttype ~= "," and ttype ~= "}" and ttype ~= nil do
ttype, text = tokenizer()
end
if ttype == nil then
pl_lexer.quit(
1,
"Unexpected EOF while processing enum value starting line %d",
current_lineno
)
end
end
return constants
end

main()
Loading

0 comments on commit 9338561

Please sign in to comment.