From d65e22d27ab305d38059046dae60d7a66ff4a4e0 Mon Sep 17 00:00:00 2001 From: Qiaowei Ren Date: Tue, 11 Sep 2018 10:54:20 +0800 Subject: ceph: shared persistent read-only rbd cache This patch introduces introduces RBD shared persistent RO cache which can provide client-side sharing cache for rbd clone/snapshot case. Change-Id: Icad8063f4f10b1ab4ce31920e90d5affa7d0abdc Signed-off-by: Qiaowei Ren Signed-off-by: Dehao Shang Signed-off-by: Tushar Gohad Signed-off-by: Jason Dillaman Signed-off-by: Yuan Zhou --- .../0006-librbd-LRU-policy-based-eviction.patch | 403 +++++++++++++++++++++ 1 file changed, 403 insertions(+) create mode 100644 src/ceph/0006-librbd-LRU-policy-based-eviction.patch (limited to 'src/ceph/0006-librbd-LRU-policy-based-eviction.patch') diff --git a/src/ceph/0006-librbd-LRU-policy-based-eviction.patch b/src/ceph/0006-librbd-LRU-policy-based-eviction.patch new file mode 100644 index 0000000..eb99e3a --- /dev/null +++ b/src/ceph/0006-librbd-LRU-policy-based-eviction.patch @@ -0,0 +1,403 @@ +From b233d6540160c8bc5cc25b870c2140fa48776fa6 Mon Sep 17 00:00:00 2001 +From: Dehao Shang +Date: Mon, 6 Aug 2018 22:42:38 +0800 +Subject: [PATCH 06/10] librbd: LRU policy based eviction + +Signed-off-by: Dehao Shang +Signed-off-by: Yuan Zhou +--- + src/tools/rbd_cache/ObjectCacheStore.cc | 73 ++++++++----- + src/tools/rbd_cache/ObjectCacheStore.h | 14 +-- + src/tools/rbd_cache/Policy.hpp | 22 ++++ + src/tools/rbd_cache/SimplePolicy.hpp | 180 ++++++++++++++++++++++++++++++++ + 4 files changed, 254 insertions(+), 35 deletions(-) + create mode 100644 src/tools/rbd_cache/Policy.hpp + create mode 100644 src/tools/rbd_cache/SimplePolicy.hpp + +diff --git a/src/tools/rbd_cache/ObjectCacheStore.cc b/src/tools/rbd_cache/ObjectCacheStore.cc +index 2a87469..b39fe66 100644 +--- a/src/tools/rbd_cache/ObjectCacheStore.cc ++++ b/src/tools/rbd_cache/ObjectCacheStore.cc +@@ -14,12 +14,12 @@ namespace cache { + + ObjectCacheStore::ObjectCacheStore(CephContext *cct, ContextWQ* work_queue) + : m_cct(cct), m_work_queue(work_queue), +- m_cache_table_lock("rbd::cache::ObjectCacheStore"), + m_rados(new librados::Rados()) { ++ m_policy = new SimplePolicy(4096, 0.9); // TODO + } + + ObjectCacheStore::~ObjectCacheStore() { +- ++ delete m_policy; + } + + int ObjectCacheStore::init(bool reset) { +@@ -43,6 +43,7 @@ int ObjectCacheStore::do_promote(std::string pool_name, std::string object_name) + int ret = 0; + std::string cache_file_name = pool_name + object_name; + ++ //TODO(): lock on ioctx map + if (m_ioctxs.find(pool_name) == m_ioctxs.end()) { + librados::IoCtx* io_ctx = new librados::IoCtx(); + ret = m_rados->ioctx_create(pool_name.c_str(), *io_ctx); +@@ -58,10 +59,8 @@ int ObjectCacheStore::do_promote(std::string pool_name, std::string object_name) + librados::IoCtx* ioctx = m_ioctxs[pool_name]; + + //promoting: update metadata +- { +- Mutex::Locker locker(m_cache_table_lock); +- m_cache_table.emplace(cache_file_name, PROMOTING); +- } ++ m_policy->update_status(cache_file_name, PROMOTING); ++ assert(PROMOTING == m_policy->get_status(cache_file_name)); + + librados::bufferlist* read_buf = new librados::bufferlist(); + int object_size = 4096*1024; //TODO(): read config from image metadata +@@ -83,42 +82,60 @@ int ObjectCacheStore::do_promote(std::string pool_name, std::string object_name) + cache_file.open(); + ret = cache_file.write_object_to_file(*read_buf, object_size); + +- assert(m_cache_table.find(cache_file_name) != m_cache_table.end()); +- + // update metadata +- { +- Mutex::Locker locker(m_cache_table_lock); +- m_cache_table.emplace(cache_file_name, PROMOTED); +- } ++ assert(PROMOTING == m_policy->get_status(cache_file_name)); ++ m_policy->update_status(cache_file_name, PROMOTED); ++ assert(PROMOTED == m_policy->get_status(cache_file_name)); + + return ret; + + } + ++// return -1, client need to read data from cluster. ++// return 0, client directly read data from cache. + int ObjectCacheStore::lookup_object(std::string pool_name, std::string object_name) { + + std::string cache_file_name = pool_name + object_name; +- { +- Mutex::Locker locker(m_cache_table_lock); +- +- auto it = m_cache_table.find(cache_file_name); +- if (it != m_cache_table.end()) { +- +- if (it->second == PROMOTING) { +- return -1; +- } else if (it->second == PROMOTED) { +- return 0; +- } else { +- assert(0); +- } +- } ++ ++ // TODO lookup and return status; ++ ++ CACHESTATUS ret; ++ ret = m_policy->lookup_object(cache_file_name); ++ ++ switch(ret) { ++ case NONE: ++ return do_promote(pool_name, object_name); ++ case PROMOTING: ++ return -1; ++ case PROMOTED: ++ return 0; ++ default: ++ return -1; + } ++} + +- int ret = do_promote(pool_name, object_name); ++void ObjectCacheStore::evict_thread_body() { ++ int ret; ++ while(m_evict_go) { ++ std::string temp_cache_file; + +- return ret; ++ ret = m_policy->evict_object(temp_cache_file); ++ if(ret == 0) { ++ continue; ++ } ++ ++ // TODO ++ // delete temp_cache_file file. ++ ++ assert(EVICTING == m_policy->get_status(temp_cache_file)); ++ ++ m_policy->update_status(temp_cache_file, EVICTED); ++ ++ assert(NONE == m_policy->get_status(temp_cache_file)); ++ } + } + ++ + int ObjectCacheStore::shutdown() { + m_rados->shutdown(); + return 0; +diff --git a/src/tools/rbd_cache/ObjectCacheStore.h b/src/tools/rbd_cache/ObjectCacheStore.h +index db09efa..5118a73 100644 +--- a/src/tools/rbd_cache/ObjectCacheStore.h ++++ b/src/tools/rbd_cache/ObjectCacheStore.h +@@ -13,6 +13,7 @@ + #include "librbd/ImageCtx.h" + #include "librbd/ImageState.h" + #include "librbd/cache/SharedPersistentObjectCacherFile.h" ++#include "SimplePolicy.hpp" + + using librados::Rados; + using librados::IoCtx; +@@ -39,6 +40,8 @@ class ObjectCacheStore + + int lock_cache(std::string vol_name); + ++ void evict_thread_body(); ++ + private: + int _evict_object(); + +@@ -48,21 +51,18 @@ class ObjectCacheStore + librados::bufferlist* read_buf, + uint64_t length); + +- enum { +- PROMOTING = 0, +- PROMOTED, +- }; +- + CephContext *m_cct; + ContextWQ* m_work_queue; +- Mutex m_cache_table_lock; + RadosRef m_rados; + +- std::map m_cache_table; + + std::map m_ioctxs; + + librbd::cache::SyncFile *m_cache_file; ++ ++ Policy* m_policy; ++ ++ bool m_evict_go; + }; + + } // namespace rbd +diff --git a/src/tools/rbd_cache/Policy.hpp b/src/tools/rbd_cache/Policy.hpp +new file mode 100644 +index 0000000..575c294 +--- /dev/null ++++ b/src/tools/rbd_cache/Policy.hpp +@@ -0,0 +1,22 @@ ++#ifndef RBD_CACHE_POLICY_HPP ++#define RBD_CACHE_POLICY_HPP ++ ++enum CACHESTATUS { ++ NONE = 0, ++ PROMOTING, ++ PROMOTED, ++ EVICTING, ++ EVICTED, ++}; ++ ++ ++class Policy { ++public: ++ Policy(){} ++ virtual ~Policy(){}; ++ virtual CACHESTATUS lookup_object(std::string) = 0; ++ virtual int evict_object(std::string&) = 0; ++ virtual void update_status(std::string, CACHESTATUS) = 0; ++ virtual CACHESTATUS get_status(std::string) = 0; ++}; ++#endif +diff --git a/src/tools/rbd_cache/SimplePolicy.hpp b/src/tools/rbd_cache/SimplePolicy.hpp +new file mode 100644 +index 0000000..a0d8de7 +--- /dev/null ++++ b/src/tools/rbd_cache/SimplePolicy.hpp +@@ -0,0 +1,180 @@ ++#ifndef RBD_CACHE_SIMPLE_POLICY_HPP ++#define RBD_CACHE_SIMPLE_POLICY_HPP ++ ++#include "Policy.hpp" ++#include "include/lru.h" ++#include "common/Mutex.h" ++ ++#include ++#include ++#include ++ ++class SimplePolicy : public Policy { ++public: ++ SimplePolicy(uint64_t block_num, float level) ++ : m_level(level), ++ m_lock("SimplePolicy"), ++ m_entry_count(block_num) ++ { ++ ++ Entry m_entries[m_entry_count]; ++ ++ for(auto &entry : m_entries) { ++ m_free_lru.lru_insert_bot(&entry); ++ } ++ } ++ ++ ~SimplePolicy() {} ++ ++ CACHESTATUS lookup_object(std::string cache_file_name) { ++ Mutex::Locker locker(m_lock); ++ ++ auto entry_it = m_oid_to_entry.find(cache_file_name); ++ if(entry_it == m_oid_to_entry.end()) { ++ return NONE; ++ } ++ ++ Entry* entry = entry_it->second; ++ ++ LRU* lru; ++ if(entry->status == PROMOTED) { ++ lru = &m_promoted_lru; ++ } else { ++ lru = &m_handing_lru; ++ } ++ ++ // touch it ++ lru->lru_remove(entry); ++ lru->lru_insert_top(entry); ++ ++ return entry->status; ++ } ++ ++ int evict_object(std::string& out_cache_file_name) { ++ Mutex::Locker locker(m_lock); ++ ++ // still have enough free space, don't need to evict lru. ++ uint64_t temp_current_size = m_oid_to_entry.size(); ++ float temp_current_evict_level = temp_current_size / m_entry_count; ++ if(temp_current_evict_level < m_level) { ++ return 0; ++ } ++ ++ // when all entries are USING, PROMOTING or EVICTING, just busy waiting. ++ if(m_promoted_lru.lru_get_size() == 0) { ++ return 0; ++ } ++ ++ assert(m_promoted_lru.lru_get_size() != 0); ++ ++ // evict one item from promoted lru ++ Entry *entry = reinterpret_cast(m_promoted_lru.lru_get_next_expire()); ++ assert(entry != nullptr); ++ ++ assert(entry->status == PROMOTED); ++ ++ out_cache_file_name = entry->cache_file_name; ++ entry->status = EVICTING; ++ ++ m_promoted_lru.lru_remove(entry); ++ m_handing_lru.lru_insert_top(entry); ++ ++ return 1; ++ } ++ ++ // TODO(): simplify the logic ++ void update_status(std::string _file_name, CACHESTATUS _status) { ++ Mutex::Locker locker(m_lock); ++ ++ Entry* entry; ++ auto entry_it = m_oid_to_entry.find(_file_name); ++ ++ // just check. ++ if(_status == PROMOTING) { ++ assert(m_oid_to_entry.find(_file_name) == m_oid_to_entry.end()); ++ } ++ ++ // miss this object. ++ if(entry_it == m_oid_to_entry.end() && _status == PROMOTING) { ++ entry = reinterpret_cast(m_free_lru.lru_get_next_expire()); ++ if(entry == nullptr) { ++ assert(0); // namely evict thread have some problems. ++ } ++ ++ entry->status = PROMOTING; ++ ++ m_oid_to_entry[_file_name] = entry; ++ m_free_lru.lru_remove(entry); ++ m_handing_lru.lru_insert_top(entry); ++ ++ return; ++ } ++ ++ assert(entry_it != m_oid_to_entry.end()); ++ ++ entry = entry_it->second; ++ ++ // promoting action have been finished, so update it. ++ if(entry->status == PROMOTING && _status== PROMOTED) { ++ m_handing_lru.lru_remove(entry); ++ m_promoted_lru.lru_insert_top(entry); ++ entry->status = PROMOTED; ++ return; ++ } ++ ++ // will delete this cache file ++ if(entry->status == PROMOTED && _status == EVICTING) { ++ m_promoted_lru.lru_remove(entry); ++ m_handing_lru.lru_insert_top(entry); ++ entry->status = EVICTING; ++ return; ++ } ++ ++ ++ if(_status == EVICTED) { ++ m_oid_to_entry.erase(entry_it); ++ m_handing_lru.lru_remove(entry); ++ m_free_lru.lru_insert_bot(entry); ++ return; ++ } ++ ++ assert(0); ++ } ++ ++ // get entry status ++ CACHESTATUS get_status(std::string _file_name) { ++ Mutex::Locker locker(m_lock); ++ auto entry_it = m_oid_to_entry.find(_file_name); ++ if(entry_it == m_oid_to_entry.end()) { ++ return NONE; ++ } ++ ++ return entry_it->second->status; ++ } ++ ++ ++private: ++ ++ class Entry : public LRUObject { ++ public: ++ CACHESTATUS status; ++ Entry() : status(NONE){} ++ std::string cache_file_name; ++ void encode(bufferlist &bl){} ++ void decode(bufferlist::iterator &it){} ++ }; ++ ++ std::unordered_map m_oid_to_entry; ++ ++ LRU m_free_lru; ++ LRU m_handing_lru; // include promoting status or evicting status ++ LRU m_promoted_lru; // include promoted, using status. ++ ++ mutable Mutex m_lock; ++ ++ float m_level; ++ uint64_t m_entry_count; ++ ++}; ++ ++#endif +-- +2.7.4 + -- cgit 1.2.3-korg