From 8d72f6a8b207f904030947d30699cd6e7fbcce17 Mon Sep 17 00:00:00 2001 From: Allen Short Date: Wed, 19 Apr 2017 21:26:59 +0000 Subject: [PATCH] rapidjson memory usage tracking --- rjson/rjson.cpp | 128 +++++++++++++++++++------------- rjson/rjson_allocator.h | 159 ++++++++++++++++++++++++++++++++++++++++ rjson/test_sandbox.c | 4 +- 3 files changed, 241 insertions(+), 50 deletions(-) create mode 100644 rjson/rjson_allocator.h diff --git a/rjson/rjson.cpp b/rjson/rjson.cpp index 69174a22a..3599d6dd7 100644 --- a/rjson/rjson.cpp +++ b/rjson/rjson.cpp @@ -33,8 +33,27 @@ int luaopen_rjson(lua_State *lua); #include "luasandbox_output.h" #endif +#include "rjson_allocator.h" namespace rj = rapidjson; +#ifdef LUA_SANDBOX +typedef SandboxMemoryAllocator Allocator; +typedef rj::GenericDocument, SandboxMemoryAllocator > Document; +typedef rj::GenericValue, SandboxMemoryAllocator > Value; +typedef rj::GenericSchemaDocument SchemaDocument; +typedef rj::GenericSchemaValidator, void>, SandboxMemoryAllocator> SchemaValidator; +typedef rj::GenericStringBuffer, SandboxMemoryAllocator > StringBuffer; +typedef rj::GenericPointer Pointer; +#else +typedef rj::MemoryPoolAllocator Allocator; +typedef rj::Document Document; +typedef rj::Value Value; +typedef rj::SchemaDocument SchemaDocument; +typedef rj::Validator SchemaValidator; +typedef rj::StringBuffer StringBuffer; +typedef rj::Pointer Pointer; +#endif + typedef struct rjson_buffer { unsigned char *buf; @@ -45,22 +64,22 @@ typedef struct rjson_buffer typedef struct rjson { - rj::MemoryPoolAllocator<> *mpa; - rj::Document *doc; - rj::Value *val; - std::unordered_map *refs; - rjson_buffer insitu; + Allocator *mpa; + Document *doc; + Value *val; + std::unordered_map *refs; + rjson_buffer insitu; } rjson; typedef struct rjson_schema { - rj::SchemaDocument *doc; + SchemaDocument *doc; } rjson_schema; typedef struct rjson_object_iterator { - rj::Value::MemberIterator *it; - rj::Value::MemberIterator *end; + Value::MemberIterator *it; + Value::MemberIterator *end; } rjson_object_iterator; static const char *mozsvc_rjson = "mozsvc.rjson"; @@ -75,23 +94,34 @@ static void init_rjson_buffer(rjson_buffer *b) b->capacity = 0; } +static Allocator* make_memory_allocator(lua_State *lua) +{ + #ifdef LUA_SANDBOX + lua_getfield(lua, LUA_REGISTRYINDEX, LSB_HEKA_THIS_PTR); + lsb_heka_sandbox *hsb = static_cast(lua_touserdata(lua, -1)); + lua_pop(lua, 1); // remove this ptr + return new Allocator(hsb); + #else + return new Allocator(); + #endif +} -static void init_rjson(rjson *j) +static void init_rjson(rjson *j, lua_State *lua) { - j->mpa = new rj::MemoryPoolAllocator<>; - j->doc = new rj::Document(j->mpa); + j->mpa = make_memory_allocator(lua); + j->doc = new Document(j->mpa); j->val = NULL; - j->refs = new std::unordered_map; + j->refs = new std::unordered_map; init_rjson_buffer(&j->insitu); } -static rj::Value* check_value(lua_State *lua) +static Value* check_value(lua_State *lua) { int n = lua_gettop(lua); luaL_argcheck(lua, n >= 1 && n <= 2, 0, "invalid number of arguments"); rjson *j = static_cast(luaL_checkudata(lua, 1, mozsvc_rjson)); - rj::Value *v = static_cast(lua_touserdata(lua, 2)); + Value *v = static_cast(lua_touserdata(lua, 2)); if (!v) { int t = lua_type(lua, 2); if (t == LUA_TNONE) { @@ -126,7 +156,7 @@ static int iter_gc(lua_State *lua) } -static void delete_owned_refs(std::unordered_map *refs) +static void delete_owned_refs(std::unordered_map *refs) { auto end = refs->end(); for (auto it = refs->begin(); it != end; ++it) { @@ -160,13 +190,14 @@ static int rjson_parse_schema(lua_State *lua) lua_setmetatable(lua, -2); { // allows doc to be destroyed before the longjmp - rj::Document doc; + Allocator *a = make_memory_allocator(lua); + Document doc (a); if (doc.Parse(json).HasParseError()) { lua_pushfstring(lua, "failed to parse offset:%f %s", (lua_Number)doc.GetErrorOffset(), rj::GetParseError_En(doc.GetParseError())); } else { - hs->doc = new rj::SchemaDocument(doc); + hs->doc = new SchemaDocument(doc, NULL, a); if (!hs->doc) { lua_pushstring(lua, "memory allocation failed"); } @@ -188,7 +219,7 @@ static int rjson_parse(lua_State *lua) luaL_typerror(lua, 2, "boolean"); } rjson *j = static_cast(lua_newuserdata(lua, sizeof*j)); - init_rjson(j); + init_rjson(j, lua); luaL_getmetatable(lua, mozsvc_rjson); lua_setmetatable(lua, -2); @@ -259,18 +290,17 @@ static int rjson_dparse(lua_State *lua) static int rjson_validate(lua_State *lua) { - rjson *j = static_cast - (luaL_checkudata(lua, 1, mozsvc_rjson)); + rjson *j = static_cast(luaL_checkudata(lua, 1, mozsvc_rjson)); rjson_schema *hs = static_cast (luaL_checkudata(lua, 2, mozsvc_rjson_schema)); - rj::SchemaValidator validator(*hs->doc); - rj::Value *v = j->doc ? j->doc : j->val; + SchemaValidator validator(*hs->doc, j->mpa); + Value *v = j->doc ? j->doc : j->val; if (!v->Accept(validator)) { lua_pushboolean(lua, false); luaL_Buffer b; luaL_buffinit(lua, &b); - rj::StringBuffer sb; + StringBuffer sb(j->mpa); validator.GetInvalidSchemaPointer().StringifyUriFragment(sb); luaL_addstring(&b, "SchemaURI: "); luaL_addstring(&b, sb.GetString()); @@ -290,7 +320,7 @@ static int rjson_find(lua_State *lua) { rjson *j = static_cast(luaL_checkudata(lua, 1, mozsvc_rjson)); int start = 3; - rj::Value *v = static_cast(lua_touserdata(lua, 2)); + Value *v = static_cast(lua_touserdata(lua, 2)); if (!v) { v = j->doc ? j->doc : j->val; start = 2; @@ -307,7 +337,7 @@ static int rjson_find(lua_State *lua) lua_pushnil(lua); return 1; } - rj::Value::MemberIterator itr = v->FindMember(lua_tostring(lua, i)); + Value::MemberIterator itr = v->FindMember(lua_tostring(lua, i)); if (itr == v->MemberEnd()) { lua_pushnil(lua); return 1; @@ -342,7 +372,7 @@ static int rjson_find(lua_State *lua) static int rjson_type(lua_State *lua) { - rj::Value *v = check_value(lua); + Value *v = check_value(lua); if (!v) { lua_pushnil(lua); return 1; @@ -375,7 +405,7 @@ static int rjson_type(lua_State *lua) static int rjson_size(lua_State *lua) { - rj::Value *v = check_value(lua); + Value *v = check_value(lua); if (!v) { lua_pushnil(lua); return 1; @@ -407,7 +437,7 @@ static int rjson_object_iter(lua_State *lua) { rjson_object_iterator *hoi = static_cast (lua_touserdata(lua, lua_upvalueindex(1))); - rj::Value *v = (rj::Value *)lua_touserdata(lua, lua_upvalueindex(2)); + Value *v = (Value *)lua_touserdata(lua, lua_upvalueindex(2)); rjson *j = (rjson *)lua_touserdata(lua, lua_upvalueindex(3)); if (j->refs->find(v) == j->refs->end()) { @@ -415,7 +445,7 @@ static int rjson_object_iter(lua_State *lua) } if (*hoi->it != *hoi->end) { - rj::Value *next = &(*hoi->it)->value; + Value *next = &(*hoi->it)->value; j->refs->insert(std::make_pair(next, false)); lua_pushlstring(lua, (*hoi->it)->name.GetString(), (size_t)(*hoi->it)->name.GetStringLength()); @@ -433,7 +463,7 @@ static int rjson_array_iter(lua_State *lua) { rj::SizeType it = (rj::SizeType)lua_tonumber(lua, lua_upvalueindex(1)); rj::SizeType end = (rj::SizeType)lua_tonumber(lua, lua_upvalueindex(2)); - rj::Value *v = (rj::Value *)lua_touserdata(lua, lua_upvalueindex(3)); + Value *v = (Value *)lua_touserdata(lua, lua_upvalueindex(3)); rjson *j = (rjson *)lua_touserdata(lua, lua_upvalueindex(4)); if (j->refs->find(v) == j->refs->end()) { @@ -441,7 +471,7 @@ static int rjson_array_iter(lua_State *lua) } if (it != end) { - rj::Value *next = &(*v)[it]; + Value *next = &(*v)[it]; j->refs->insert(std::make_pair(next, false)); lua_pushnumber(lua, (lua_Number)it); lua_pushlightuserdata(lua, next); @@ -459,7 +489,7 @@ static int rjson_array_iter(lua_State *lua) static int rjson_value(lua_State *lua) { - rj::Value *v = check_value(lua); + Value *v = check_value(lua); if (!v) { lua_pushnil(lua); return 1; @@ -492,7 +522,7 @@ static int rjson_value(lua_State *lua) static int rjson_iter(lua_State *lua) { - rj::Value *v = check_value(lua); + Value *v = check_value(lua); if (!v) { lua_pushnil(lua); return 1; @@ -503,8 +533,8 @@ static int rjson_iter(lua_State *lua) { rjson_object_iterator *hoi = static_cast (lua_newuserdata(lua, sizeof*hoi)); - hoi->it = new rj::Value::MemberIterator; - hoi->end = new rj::Value::MemberIterator; + hoi->it = new Value::MemberIterator; + hoi->end = new Value::MemberIterator; luaL_getmetatable(lua, mozsvc_rjson_object_iter); lua_setmetatable(lua, -2); if (!hoi->it || !hoi->end) { @@ -534,11 +564,11 @@ static int rjson_iter(lua_State *lua) } -static rj::Value* remove_value(lua_State *lua, bool shallow) +static Value* remove_value(lua_State *lua, bool shallow) { rjson *j = static_cast(luaL_checkudata(lua, 1, mozsvc_rjson)); - rj::Value *v = static_cast(lua_touserdata(lua, 2)); - rj::Value *rv = NULL; + Value *v = static_cast(lua_touserdata(lua, 2)); + Value *rv = NULL; int n = lua_gettop(lua); int start = 3; @@ -559,13 +589,13 @@ static rj::Value* remove_value(lua_State *lua, bool shallow) if (!v->IsObject()) { return rv; } - rj::Value::MemberIterator itr = v->FindMember(lua_tostring(lua, i)); + Value::MemberIterator itr = v->FindMember(lua_tostring(lua, i)); if (itr == v->MemberEnd()) { return rv; } if (i == n) { j->refs->erase(&itr->value); - rv = new rj::Value; + rv = new Value; if (shallow) { j->refs->insert(std::make_pair(rv, true)); } @@ -587,7 +617,7 @@ static rj::Value* remove_value(lua_State *lua, bool shallow) } if (i == n) { j->refs->erase(&(*v)[idx]); - rv = new rj::Value; + rv = new Value; if (shallow) { j->refs->insert(std::make_pair(rv, true)); } @@ -608,16 +638,16 @@ static rj::Value* remove_value(lua_State *lua, bool shallow) static int rjson_remove(lua_State *lua) { - rj::Value *v = remove_value(lua, false); + Value *v = remove_value(lua, false); if (!v) { lua_pushnil(lua); return 1; } rjson *nv = static_cast(lua_newuserdata(lua, sizeof*nv)); - nv->mpa = new rj::MemoryPoolAllocator<>; + nv->mpa = make_memory_allocator(lua); nv->doc = NULL; - nv->val = new rj::Value(*v, *nv->mpa); // deep copy - nv->refs = new std::unordered_map; + nv->val = new Value(*v, *nv->mpa); // deep copy + nv->refs = new std::unordered_map; init_rjson_buffer(&nv->insitu); luaL_getmetatable(lua, mozsvc_rjson); lua_setmetatable(lua, -2); @@ -634,7 +664,7 @@ static int rjson_remove(lua_State *lua) static int rjson_remove_shallow(lua_State *lua) { - rj::Value *v = remove_value(lua, true); + Value *v = remove_value(lua, true); if (!v) { lua_pushnil(lua); return 1; @@ -745,7 +775,7 @@ class OutputBufferWrapper { static int rjson_make_field(lua_State *lua) { - rj::Value *v = check_value(lua); + Value *v = check_value(lua); if (!v) { lua_pushnil(lua); return 1; @@ -767,7 +797,7 @@ static int output_rjson(lua_State *lua) lsb_output_buffer *ob = static_cast (lua_touserdata(lua, -1)); rjson *j = static_cast(lua_touserdata(lua, -2)); - rj::Value *v = static_cast(lua_touserdata(lua, -3)); + Value *v = static_cast(lua_touserdata(lua, -3)); if (!(ob && j)) { return 1; } @@ -907,7 +937,7 @@ static int rjson_parse_message(lua_State *lua) if (!json.s) return luaL_error(lua, "field not found"); rjson *j = static_cast(lua_newuserdata(lua, sizeof*j)); - init_rjson(j); + init_rjson(j, lua); luaL_getmetatable(lua, mozsvc_rjson); lua_setmetatable(lua, -2); diff --git a/rjson/rjson_allocator.h b/rjson/rjson_allocator.h new file mode 100644 index 000000000..d065321e3 --- /dev/null +++ b/rjson/rjson_allocator.h @@ -0,0 +1,159 @@ +#include "luasandbox/heka/sandbox.h" +#include + +class SandboxMemoryAllocator { +public: + size_t current_capacity = 0; + static const bool kNeedFree = false; //!< Tell users that no need to call Free() with this allocator. (concept Allocator) + /*! No-args constructor. Invoked in some rapidjson internals, will result in untracked memory usage. + */ + SandboxMemoryAllocator() : + chunkHead_(0), chunk_capacity_(kDefaultChunkCapacity), baseAllocator_(0), ownBaseAllocator_(0), hsb_(0) + { + } + + //! Constructor with memory limit. + /*! \param limit The size of memory chunk. The default is kDefaultChunkSize. + \param baseAllocator The allocator for allocating memory chunks. + */ + SandboxMemoryAllocator(lsb_heka_sandbox *hsb) : + chunkHead_(0), chunk_capacity_(kDefaultChunkCapacity), baseAllocator_(0), ownBaseAllocator_(0), hsb_(hsb) + { + } + + //! Destructor. + /*! This deallocates all memory chunks, excluding the user-supplied buffer. + */ + ~SandboxMemoryAllocator() { + Clear(); + RAPIDJSON_DELETE(ownBaseAllocator_); + } + + //! Deallocates all memory chunks. + void Clear() { + while (chunkHead_) { + ChunkHeader* next = chunkHead_->next; + baseAllocator_->Free(chunkHead_); + chunkHead_ = next; + } + UpdateCapacity(0); + } + + //! Computes the total capacity of allocated memory chunks. + /*! \return total capacity in bytes. + */ + size_t Capacity() const { + return current_capacity; + } + + //! Computes the memory blocks allocated. + /*! \return total used bytes. + */ + size_t Size() const { + size_t size = 0; + for (ChunkHeader* c = chunkHead_; c != 0; c = c->next) + size += c->size; + return size; + } + + //! Allocates a memory block. (concept Allocator) + void* Malloc(size_t size) { + if (!size) + return NULL; + + size = RAPIDJSON_ALIGN(size); + if (chunkHead_ == 0 || chunkHead_->size + size > chunkHead_->capacity) + if (!AddChunk(chunk_capacity_ > size ? chunk_capacity_ : size)) + return NULL; + + void *buffer = reinterpret_cast(chunkHead_) + RAPIDJSON_ALIGN(sizeof(ChunkHeader)) + chunkHead_->size; + chunkHead_->size += size; + return buffer; + } + + //! Resizes a memory block (concept Allocator) + void* Realloc(void* originalPtr, size_t originalSize, size_t newSize) { + if (originalPtr == 0) + return Malloc(newSize); + + if (newSize == 0) + return NULL; + + originalSize = RAPIDJSON_ALIGN(originalSize); + newSize = RAPIDJSON_ALIGN(newSize); + + // Do not shrink if new size is smaller than original + if (originalSize >= newSize) + return originalPtr; + + // Simply expand it if it is the last allocation and there is sufficient space + if (originalPtr == reinterpret_cast(chunkHead_) + RAPIDJSON_ALIGN(sizeof(ChunkHeader)) + chunkHead_->size - originalSize) { + size_t increment = static_cast(newSize - originalSize); + if (chunkHead_->size + increment <= chunkHead_->capacity) { + chunkHead_->size += increment; + return originalPtr; + } + } + + // Realloc process: allocate and copy memory, do not free original buffer. + if (void* newBuffer = Malloc(newSize)) { + if (originalSize) + std::memcpy(newBuffer, originalPtr, originalSize); + return newBuffer; + } + else + return NULL; + } + + //! Frees a memory block (concept Allocator) + static void Free(void *ptr) { (void)ptr; } // Do nothing + +private: + //! Copy constructor is not permitted. + SandboxMemoryAllocator(const SandboxMemoryAllocator& rhs) /* = delete */; + //! Copy assignment operator is not permitted. + SandboxMemoryAllocator& operator=(const SandboxMemoryAllocator& rhs) /* = delete */; + + //! Creates a new chunk. + /*! \param capacity Capacity of the chunk in bytes. + \return true if success. + */ + bool AddChunk(size_t capacity) { + if (!baseAllocator_) + ownBaseAllocator_ = baseAllocator_ = RAPIDJSON_NEW(rapidjson::CrtAllocator)(); + UpdateCapacity(current_capacity + capacity); + if (ChunkHeader* chunk = reinterpret_cast(baseAllocator_->Malloc(RAPIDJSON_ALIGN(sizeof(ChunkHeader)) + capacity))) { + chunk->capacity = capacity; + chunk->size = 0; + chunk->next = chunkHead_; + chunkHead_ = chunk; + return true; + } + else + return false; + } + + void UpdateCapacity(size_t capacity) { + if (hsb_) { + lsb_heka_adjust_ext_memory_usage(hsb_, capacity - current_capacity); + current_capacity = capacity; + } + } + + static const int kDefaultChunkCapacity = 64 * 1024; //!< Default chunk capacity. + + //! Chunk header for perpending to each chunk. + /*! Chunks are stored as a singly linked list. + */ + struct ChunkHeader { + size_t capacity; //!< Capacity of the chunk in bytes (excluding the header itself). + size_t size; //!< Current size of allocated memory in bytes. + ChunkHeader *next; //!< Next chunk in the linked list. + }; + + ChunkHeader *chunkHead_; //!< Head of the chunk linked-list. Only the head chunk serves allocation. + size_t chunk_capacity_; //!< The minimum capacity of chunk when they are allocated. + rapidjson::CrtAllocator* baseAllocator_; //!< base allocator for allocating memory chunks. + rapidjson::CrtAllocator* ownBaseAllocator_; //!< base allocator created by this object. + lsb_heka_sandbox *hsb_; +}; diff --git a/rjson/test_sandbox.c b/rjson/test_sandbox.c index 3ba5f1307..328cd320d 100644 --- a/rjson/test_sandbox.c +++ b/rjson/test_sandbox.c @@ -4,7 +4,7 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -/** @brief rjson luasandox tests @file */ +/** @brief rjson luasandbox tests @file */ #include #include @@ -66,6 +66,8 @@ static char* test_rjson_sandbox() "max_message_size = 8196\n" TEST_MODULE_PATH, &logger, iim); + lsb_heka_stats stats = lsb_heka_get_stats(hsb); + mu_assert(0 < stats.ext_mem_max, "received %llu", stats.ext_mem_max); mu_assert(hsb, "lsb_heka_create_input failed"); e = lsb_heka_destroy_sandbox(hsb); return NULL;