diff options
Diffstat (limited to 'src/ceph/0007-librbd-cleanup-policy-based-promotion-eviction.patch')
-rw-r--r-- | src/ceph/0007-librbd-cleanup-policy-based-promotion-eviction.patch | 512 |
1 files changed, 512 insertions, 0 deletions
diff --git a/src/ceph/0007-librbd-cleanup-policy-based-promotion-eviction.patch b/src/ceph/0007-librbd-cleanup-policy-based-promotion-eviction.patch new file mode 100644 index 0000000..010407b --- /dev/null +++ b/src/ceph/0007-librbd-cleanup-policy-based-promotion-eviction.patch @@ -0,0 +1,512 @@ +From dd4804fb05ad8aca51516b0112975cc91ef85a6b Mon Sep 17 00:00:00 2001 +From: Yuan Zhou <yuan.zhou@intel.com> +Date: Wed, 8 Aug 2018 15:31:47 +0800 +Subject: [PATCH 07/10] librbd: cleanup policy based promotion/eviction + +Signed-off-by: Yuan Zhou <yuan.zhou@intel.com> +--- + src/common/options.cc | 4 + + .../rbd_cache/CacheControllerSocketClient.hpp | 3 +- + src/tools/rbd_cache/ObjectCacheStore.cc | 63 +++---- + src/tools/rbd_cache/ObjectCacheStore.h | 10 +- + src/tools/rbd_cache/Policy.hpp | 18 +- + src/tools/rbd_cache/SimplePolicy.hpp | 188 +++++++++------------ + 6 files changed, 141 insertions(+), 145 deletions(-) + +diff --git a/src/common/options.cc b/src/common/options.cc +index 7839a31..b334c1e 100644 +--- a/src/common/options.cc ++++ b/src/common/options.cc +@@ -6365,6 +6365,10 @@ static std::vector<Option> get_rbd_options() { + .set_default("/tmp") + .set_description("shared ssd caching data dir"), + ++ Option("rbd_shared_cache_entries", Option::TYPE_INT, Option::LEVEL_ADVANCED) ++ .set_default(4096) ++ .set_description("shared ssd caching data entries"), ++ + Option("rbd_non_blocking_aio", Option::TYPE_BOOL, Option::LEVEL_ADVANCED) + .set_default(true) + .set_description("process AIO ops from a dispatch thread to prevent blocking"), +diff --git a/src/tools/rbd_cache/CacheControllerSocketClient.hpp b/src/tools/rbd_cache/CacheControllerSocketClient.hpp +index 4e1f36c..56b79ce 100644 +--- a/src/tools/rbd_cache/CacheControllerSocketClient.hpp ++++ b/src/tools/rbd_cache/CacheControllerSocketClient.hpp +@@ -90,7 +90,8 @@ public: + } + }); + std::unique_lock<std::mutex> lk(m); +- cv.wait(lk); ++ //cv.wait(lk); ++ cv.wait_for(lk, std::chrono::milliseconds(100)); + return 0; + } + +diff --git a/src/tools/rbd_cache/ObjectCacheStore.cc b/src/tools/rbd_cache/ObjectCacheStore.cc +index b39fe66..99f90d6 100644 +--- a/src/tools/rbd_cache/ObjectCacheStore.cc ++++ b/src/tools/rbd_cache/ObjectCacheStore.cc +@@ -15,7 +15,12 @@ namespace cache { + ObjectCacheStore::ObjectCacheStore(CephContext *cct, ContextWQ* work_queue) + : m_cct(cct), m_work_queue(work_queue), + m_rados(new librados::Rados()) { +- m_policy = new SimplePolicy(4096, 0.9); // TODO ++ ++ uint64_t object_cache_entries = ++ cct->_conf.get_val<int64_t>("rbd_shared_cache_entries"); ++ ++ //TODO(): allow to set level ++ m_policy = new SimplePolicy(object_cache_entries, 0.5); + } + + ObjectCacheStore::~ObjectCacheStore() { +@@ -35,7 +40,16 @@ int ObjectCacheStore::init(bool reset) { + lderr(m_cct) << "fail to conect to cluster" << dendl; + return ret; + } +- //TODO(): check existing cache objects ++ ++ std::string cache_path = m_cct->_conf.get_val<std::string>("rbd_shared_cache_path"); ++ //TODO(): check and reuse existing cache objects ++ if(reset) { ++ std::string cmd = "exec rm -rf " + cache_path + "/rbd_cache*; exec mkdir -p " + cache_path; ++ //TODO(): to use std::filesystem ++ int r = system(cmd.c_str()); ++ } ++ ++ evict_thd = new std::thread([this]{this->evict_thread_body();}); + return ret; + } + +@@ -58,10 +72,6 @@ int ObjectCacheStore::do_promote(std::string pool_name, std::string object_name) + + librados::IoCtx* ioctx = m_ioctxs[pool_name]; + +- //promoting: update metadata +- m_policy->update_status(cache_file_name, PROMOTING); +- assert(PROMOTING == m_policy->get_status(cache_file_name)); +- + librados::bufferlist* read_buf = new librados::bufferlist(); + int object_size = 4096*1024; //TODO(): read config from image metadata + +@@ -83,9 +93,9 @@ int ObjectCacheStore::do_promote(std::string pool_name, std::string object_name) + ret = cache_file.write_object_to_file(*read_buf, object_size); + + // update metadata +- assert(PROMOTING == m_policy->get_status(cache_file_name)); +- m_policy->update_status(cache_file_name, PROMOTED); +- assert(PROMOTED == m_policy->get_status(cache_file_name)); ++ assert(OBJ_CACHE_PROMOTING == m_policy->get_status(cache_file_name)); ++ m_policy->update_status(cache_file_name, OBJ_CACHE_PROMOTED); ++ assert(OBJ_CACHE_PROMOTED == m_policy->get_status(cache_file_name)); + + return ret; + +@@ -97,18 +107,15 @@ int ObjectCacheStore::lookup_object(std::string pool_name, std::string object_na + + std::string cache_file_name = pool_name + object_name; + +- // TODO lookup and return status; +- + CACHESTATUS ret; + ret = m_policy->lookup_object(cache_file_name); + + switch(ret) { +- case NONE: ++ case OBJ_CACHE_NONE: + return do_promote(pool_name, object_name); +- case PROMOTING: +- return -1; +- case PROMOTED: ++ case OBJ_CACHE_PROMOTED: + return 0; ++ case OBJ_CACHE_PROMOTING: + default: + return -1; + } +@@ -117,26 +124,14 @@ int ObjectCacheStore::lookup_object(std::string pool_name, std::string object_na + void ObjectCacheStore::evict_thread_body() { + int ret; + while(m_evict_go) { +- std::string temp_cache_file; +- +- ret = m_policy->evict_object(temp_cache_file); +- if(ret == 0) { +- continue; +- } +- +- // TODO +- // delete temp_cache_file file. +- +- assert(EVICTING == m_policy->get_status(temp_cache_file)); +- +- m_policy->update_status(temp_cache_file, EVICTED); +- +- assert(NONE == m_policy->get_status(temp_cache_file)); ++ ret = evict_objects(); + } + } + + + int ObjectCacheStore::shutdown() { ++ m_evict_go = false; ++ evict_thd->join(); + m_rados->shutdown(); + return 0; + } +@@ -165,5 +160,13 @@ int ObjectCacheStore::promote_object(librados::IoCtx* ioctx, std::string object_ + + } + ++int ObjectCacheStore::evict_objects() { ++ std::list<std::string> obj_list; ++ m_policy->get_evict_list(&obj_list); ++ for (auto& obj: obj_list) { ++ //do_evict(obj); ++ } ++} ++ + } // namespace cache + } // namespace rbd +diff --git a/src/tools/rbd_cache/ObjectCacheStore.h b/src/tools/rbd_cache/ObjectCacheStore.h +index 5118a73..ba0e1f1 100644 +--- a/src/tools/rbd_cache/ObjectCacheStore.h ++++ b/src/tools/rbd_cache/ObjectCacheStore.h +@@ -15,6 +15,7 @@ + #include "librbd/cache/SharedPersistentObjectCacherFile.h" + #include "SimplePolicy.hpp" + ++ + using librados::Rados; + using librados::IoCtx; + +@@ -40,10 +41,9 @@ class ObjectCacheStore + + int lock_cache(std::string vol_name); + +- void evict_thread_body(); +- + private: +- int _evict_object(); ++ void evict_thread_body(); ++ int evict_objects(); + + int do_promote(std::string pool_name, std::string object_name); + +@@ -61,8 +61,8 @@ class ObjectCacheStore + librbd::cache::SyncFile *m_cache_file; + + Policy* m_policy; +- +- bool m_evict_go; ++ std::thread* evict_thd; ++ bool m_evict_go = false; + }; + + } // namespace rbd +diff --git a/src/tools/rbd_cache/Policy.hpp b/src/tools/rbd_cache/Policy.hpp +index 575c294..711e3bd 100644 +--- a/src/tools/rbd_cache/Policy.hpp ++++ b/src/tools/rbd_cache/Policy.hpp +@@ -1,12 +1,16 @@ + #ifndef RBD_CACHE_POLICY_HPP + #define RBD_CACHE_POLICY_HPP + ++#include <list> ++#include <string> ++ ++namespace rbd { ++namespace cache { ++ + enum CACHESTATUS { +- NONE = 0, +- PROMOTING, +- PROMOTED, +- EVICTING, +- EVICTED, ++ OBJ_CACHE_NONE = 0, ++ OBJ_CACHE_PROMOTING, ++ OBJ_CACHE_PROMOTED, + }; + + +@@ -18,5 +22,9 @@ public: + virtual int evict_object(std::string&) = 0; + virtual void update_status(std::string, CACHESTATUS) = 0; + virtual CACHESTATUS get_status(std::string) = 0; ++ virtual void get_evict_list(std::list<std::string>* obj_list) = 0; + }; ++ ++} // namespace cache ++} // namespace rbd + #endif +diff --git a/src/tools/rbd_cache/SimplePolicy.hpp b/src/tools/rbd_cache/SimplePolicy.hpp +index a0d8de7..e785de1 100644 +--- a/src/tools/rbd_cache/SimplePolicy.hpp ++++ b/src/tools/rbd_cache/SimplePolicy.hpp +@@ -3,138 +3,93 @@ + + #include "Policy.hpp" + #include "include/lru.h" ++#include "common/RWLock.h" + #include "common/Mutex.h" + + #include <vector> + #include <unordered_map> + #include <string> + ++namespace rbd { ++namespace cache { ++ ++ + class SimplePolicy : public Policy { + public: +- SimplePolicy(uint64_t block_num, float level) +- : m_level(level), +- m_lock("SimplePolicy"), +- m_entry_count(block_num) ++ SimplePolicy(uint64_t block_num, float watermark) ++ : m_watermark(watermark), m_entry_count(block_num), ++ m_cache_map_lock("rbd::cache::SimplePolicy::m_cache_map_lock"), ++ m_free_list_lock("rbd::cache::SimplePolicy::m_free_list_lock") + { + +- Entry m_entries[m_entry_count]; +- +- for(auto &entry : m_entries) { +- m_free_lru.lru_insert_bot(&entry); ++ for(uint64_t i = 0; i < m_entry_count; i++) { ++ m_free_list.push_back(new Entry()); + } ++ + } + +- ~SimplePolicy() {} ++ ~SimplePolicy() { ++ for(uint64_t i = 0; i < m_entry_count; i++) { ++ Entry* entry = reinterpret_cast<Entry*>(m_free_list.front()); ++ delete entry; ++ m_free_list.pop_front(); ++ } ++ } + + CACHESTATUS lookup_object(std::string cache_file_name) { +- Mutex::Locker locker(m_lock); + +- auto entry_it = m_oid_to_entry.find(cache_file_name); +- if(entry_it == m_oid_to_entry.end()) { +- return NONE; ++ //TODO(): check race condition ++ RWLock::WLocker wlocker(m_cache_map_lock); ++ ++ auto entry_it = m_cache_map.find(cache_file_name); ++ if(entry_it == m_cache_map.end()) { ++ Mutex::Locker locker(m_free_list_lock); ++ Entry* entry = reinterpret_cast<Entry*>(m_free_list.front()); ++ assert(entry != nullptr); ++ m_free_list.pop_front(); ++ entry->status = OBJ_CACHE_PROMOTING; ++ ++ m_cache_map[cache_file_name] = entry; ++ ++ return OBJ_CACHE_NONE; + } + + Entry* entry = entry_it->second; + +- LRU* lru; +- if(entry->status == PROMOTED) { +- lru = &m_promoted_lru; +- } else { +- lru = &m_handing_lru; ++ if(entry->status == OBJ_CACHE_PROMOTED) { ++ // touch it ++ m_promoted_lru.lru_touch(entry); + } + +- // touch it +- lru->lru_remove(entry); +- lru->lru_insert_top(entry); +- + return entry->status; + } + + int evict_object(std::string& out_cache_file_name) { +- Mutex::Locker locker(m_lock); +- +- // still have enough free space, don't need to evict lru. +- uint64_t temp_current_size = m_oid_to_entry.size(); +- float temp_current_evict_level = temp_current_size / m_entry_count; +- if(temp_current_evict_level < m_level) { +- return 0; +- } +- +- // when all entries are USING, PROMOTING or EVICTING, just busy waiting. +- if(m_promoted_lru.lru_get_size() == 0) { +- return 0; +- } +- +- assert(m_promoted_lru.lru_get_size() != 0); +- +- // evict one item from promoted lru +- Entry *entry = reinterpret_cast<Entry*>(m_promoted_lru.lru_get_next_expire()); +- assert(entry != nullptr); +- +- assert(entry->status == PROMOTED); +- +- out_cache_file_name = entry->cache_file_name; +- entry->status = EVICTING; +- +- m_promoted_lru.lru_remove(entry); +- m_handing_lru.lru_insert_top(entry); ++ RWLock::WLocker locker(m_cache_map_lock); + + return 1; + } + + // TODO(): simplify the logic +- void update_status(std::string _file_name, CACHESTATUS _status) { +- Mutex::Locker locker(m_lock); ++ void update_status(std::string file_name, CACHESTATUS new_status) { ++ RWLock::WLocker locker(m_cache_map_lock); + + Entry* entry; +- auto entry_it = m_oid_to_entry.find(_file_name); ++ auto entry_it = m_cache_map.find(file_name); + + // just check. +- if(_status == PROMOTING) { +- assert(m_oid_to_entry.find(_file_name) == m_oid_to_entry.end()); ++ if(new_status == OBJ_CACHE_PROMOTING) { ++ assert(entry_it == m_cache_map.end()); + } + +- // miss this object. +- if(entry_it == m_oid_to_entry.end() && _status == PROMOTING) { +- entry = reinterpret_cast<Entry*>(m_free_lru.lru_get_next_expire()); +- if(entry == nullptr) { +- assert(0); // namely evict thread have some problems. +- } +- +- entry->status = PROMOTING; +- +- m_oid_to_entry[_file_name] = entry; +- m_free_lru.lru_remove(entry); +- m_handing_lru.lru_insert_top(entry); +- +- return; +- } +- +- assert(entry_it != m_oid_to_entry.end()); ++ assert(entry_it != m_cache_map.end()); + + entry = entry_it->second; + +- // promoting action have been finished, so update it. +- if(entry->status == PROMOTING && _status== PROMOTED) { +- m_handing_lru.lru_remove(entry); ++ // promoting is done, so update it. ++ if(entry->status == OBJ_CACHE_PROMOTING && new_status== OBJ_CACHE_PROMOTED) { + m_promoted_lru.lru_insert_top(entry); +- entry->status = PROMOTED; +- return; +- } +- +- // will delete this cache file +- if(entry->status == PROMOTED && _status == EVICTING) { +- m_promoted_lru.lru_remove(entry); +- m_handing_lru.lru_insert_top(entry); +- entry->status = EVICTING; +- return; +- } +- +- +- if(_status == EVICTED) { +- m_oid_to_entry.erase(entry_it); +- m_handing_lru.lru_remove(entry); +- m_free_lru.lru_insert_bot(entry); ++ entry->status = new_status; + return; + } + +@@ -142,39 +97,64 @@ public: + } + + // get entry status +- CACHESTATUS get_status(std::string _file_name) { +- Mutex::Locker locker(m_lock); +- auto entry_it = m_oid_to_entry.find(_file_name); +- if(entry_it == m_oid_to_entry.end()) { +- return NONE; ++ CACHESTATUS get_status(std::string file_name) { ++ RWLock::RLocker locker(m_cache_map_lock); ++ auto entry_it = m_cache_map.find(file_name); ++ if(entry_it == m_cache_map.end()) { ++ return OBJ_CACHE_NONE; + } + + return entry_it->second->status; + } + ++ void get_evict_list(std::list<std::string>* obj_list) { ++ RWLock::WLocker locker(m_cache_map_lock); ++ // check free ratio, pop entries from LRU ++ if (m_free_list.size() / m_entry_count < m_watermark) { ++ int evict_num = 10; //TODO(): make this configurable ++ for(int i = 0; i < evict_num; i++) { ++ Entry* entry = reinterpret_cast<Entry*>(m_promoted_lru.lru_expire()); ++ if (entry == nullptr) { ++ continue; ++ } ++ std::string file_name = entry->cache_file_name; ++ obj_list->push_back(file_name); ++ ++ auto entry_it = m_cache_map.find(file_name); ++ m_cache_map.erase(entry_it); ++ ++ //mark this entry as free ++ entry->status = OBJ_CACHE_NONE; ++ Mutex::Locker locker(m_free_list_lock); ++ m_free_list.push_back(entry); ++ } ++ } ++ } + + private: + + class Entry : public LRUObject { + public: + CACHESTATUS status; +- Entry() : status(NONE){} ++ Entry() : status(OBJ_CACHE_NONE){} + std::string cache_file_name; + void encode(bufferlist &bl){} + void decode(bufferlist::iterator &it){} + }; + +- std::unordered_map<std::string, Entry*> m_oid_to_entry; ++ float m_watermark; ++ uint64_t m_entry_count; + +- LRU m_free_lru; +- LRU m_handing_lru; // include promoting status or evicting status +- LRU m_promoted_lru; // include promoted, using status. ++ std::unordered_map<std::string, Entry*> m_cache_map; ++ RWLock m_cache_map_lock; + +- mutable Mutex m_lock; ++ std::deque<Entry*> m_free_list; ++ Mutex m_free_list_lock; + +- float m_level; +- uint64_t m_entry_count; ++ LRU m_promoted_lru; // include promoted, using status. + + }; + ++} // namespace cache ++} // namespace rbd + #endif +-- +2.7.4 + |