summaryrefslogtreecommitdiffstats
path: root/src/ceph/0006-librbd-LRU-policy-based-eviction.patch
diff options
context:
space:
mode:
authorQiaowei Ren <qiaowei.ren@intel.com>2018-09-11 10:54:20 +0800
committerQiaowei Ren <qiaowei.ren@intel.com>2018-09-11 10:54:20 +0800
commitd65e22d27ab305d38059046dae60d7a66ff4a4e0 (patch)
treed0bd06459534ef33d0b930f6c164c4501bd527d7 /src/ceph/0006-librbd-LRU-policy-based-eviction.patch
parent828acdd1d5c5c2aeef287aa69e473bf44fcbce70 (diff)
ceph: shared persistent read-only rbd cache
This patch introduces introduces RBD shared persistent RO cache which can provide client-side sharing cache for rbd clone/snapshot case. Change-Id: Icad8063f4f10b1ab4ce31920e90d5affa7d0abdc Signed-off-by: Qiaowei Ren <qiaowei.ren@intel.com> Signed-off-by: Dehao Shang <dehao.shang@intel.com> Signed-off-by: Tushar Gohad <tushar.gohad@intel.com> Signed-off-by: Jason Dillaman <dillaman@redhat.com> Signed-off-by: Yuan Zhou <yuan.zhou@intel.com>
Diffstat (limited to 'src/ceph/0006-librbd-LRU-policy-based-eviction.patch')
-rw-r--r--src/ceph/0006-librbd-LRU-policy-based-eviction.patch403
1 files changed, 403 insertions, 0 deletions
diff --git a/src/ceph/0006-librbd-LRU-policy-based-eviction.patch b/src/ceph/0006-librbd-LRU-policy-based-eviction.patch
new file mode 100644
index 0000000..eb99e3a
--- /dev/null
+++ b/src/ceph/0006-librbd-LRU-policy-based-eviction.patch
@@ -0,0 +1,403 @@
+From b233d6540160c8bc5cc25b870c2140fa48776fa6 Mon Sep 17 00:00:00 2001
+From: Dehao Shang <dehao.shang@intel.com>
+Date: Mon, 6 Aug 2018 22:42:38 +0800
+Subject: [PATCH 06/10] librbd: LRU policy based eviction
+
+Signed-off-by: Dehao Shang <dehao.shang@intel.com>
+Signed-off-by: Yuan Zhou <yuan.zhou@intel.com>
+---
+ src/tools/rbd_cache/ObjectCacheStore.cc | 73 ++++++++-----
+ src/tools/rbd_cache/ObjectCacheStore.h | 14 +--
+ src/tools/rbd_cache/Policy.hpp | 22 ++++
+ src/tools/rbd_cache/SimplePolicy.hpp | 180 ++++++++++++++++++++++++++++++++
+ 4 files changed, 254 insertions(+), 35 deletions(-)
+ create mode 100644 src/tools/rbd_cache/Policy.hpp
+ create mode 100644 src/tools/rbd_cache/SimplePolicy.hpp
+
+diff --git a/src/tools/rbd_cache/ObjectCacheStore.cc b/src/tools/rbd_cache/ObjectCacheStore.cc
+index 2a87469..b39fe66 100644
+--- a/src/tools/rbd_cache/ObjectCacheStore.cc
++++ b/src/tools/rbd_cache/ObjectCacheStore.cc
+@@ -14,12 +14,12 @@ namespace cache {
+
+ ObjectCacheStore::ObjectCacheStore(CephContext *cct, ContextWQ* work_queue)
+ : m_cct(cct), m_work_queue(work_queue),
+- m_cache_table_lock("rbd::cache::ObjectCacheStore"),
+ m_rados(new librados::Rados()) {
++ m_policy = new SimplePolicy(4096, 0.9); // TODO
+ }
+
+ ObjectCacheStore::~ObjectCacheStore() {
+-
++ delete m_policy;
+ }
+
+ int ObjectCacheStore::init(bool reset) {
+@@ -43,6 +43,7 @@ int ObjectCacheStore::do_promote(std::string pool_name, std::string object_name)
+ int ret = 0;
+ std::string cache_file_name = pool_name + object_name;
+
++ //TODO(): lock on ioctx map
+ if (m_ioctxs.find(pool_name) == m_ioctxs.end()) {
+ librados::IoCtx* io_ctx = new librados::IoCtx();
+ ret = m_rados->ioctx_create(pool_name.c_str(), *io_ctx);
+@@ -58,10 +59,8 @@ int ObjectCacheStore::do_promote(std::string pool_name, std::string object_name)
+ librados::IoCtx* ioctx = m_ioctxs[pool_name];
+
+ //promoting: update metadata
+- {
+- Mutex::Locker locker(m_cache_table_lock);
+- m_cache_table.emplace(cache_file_name, PROMOTING);
+- }
++ m_policy->update_status(cache_file_name, PROMOTING);
++ assert(PROMOTING == m_policy->get_status(cache_file_name));
+
+ librados::bufferlist* read_buf = new librados::bufferlist();
+ int object_size = 4096*1024; //TODO(): read config from image metadata
+@@ -83,42 +82,60 @@ int ObjectCacheStore::do_promote(std::string pool_name, std::string object_name)
+ cache_file.open();
+ ret = cache_file.write_object_to_file(*read_buf, object_size);
+
+- assert(m_cache_table.find(cache_file_name) != m_cache_table.end());
+-
+ // update metadata
+- {
+- Mutex::Locker locker(m_cache_table_lock);
+- m_cache_table.emplace(cache_file_name, PROMOTED);
+- }
++ assert(PROMOTING == m_policy->get_status(cache_file_name));
++ m_policy->update_status(cache_file_name, PROMOTED);
++ assert(PROMOTED == m_policy->get_status(cache_file_name));
+
+ return ret;
+
+ }
+
++// return -1, client need to read data from cluster.
++// return 0, client directly read data from cache.
+ int ObjectCacheStore::lookup_object(std::string pool_name, std::string object_name) {
+
+ std::string cache_file_name = pool_name + object_name;
+- {
+- Mutex::Locker locker(m_cache_table_lock);
+-
+- auto it = m_cache_table.find(cache_file_name);
+- if (it != m_cache_table.end()) {
+-
+- if (it->second == PROMOTING) {
+- return -1;
+- } else if (it->second == PROMOTED) {
+- return 0;
+- } else {
+- assert(0);
+- }
+- }
++
++ // TODO lookup and return status;
++
++ CACHESTATUS ret;
++ ret = m_policy->lookup_object(cache_file_name);
++
++ switch(ret) {
++ case NONE:
++ return do_promote(pool_name, object_name);
++ case PROMOTING:
++ return -1;
++ case PROMOTED:
++ return 0;
++ default:
++ return -1;
+ }
++}
+
+- int ret = do_promote(pool_name, object_name);
++void ObjectCacheStore::evict_thread_body() {
++ int ret;
++ while(m_evict_go) {
++ std::string temp_cache_file;
+
+- return ret;
++ ret = m_policy->evict_object(temp_cache_file);
++ if(ret == 0) {
++ continue;
++ }
++
++ // TODO
++ // delete temp_cache_file file.
++
++ assert(EVICTING == m_policy->get_status(temp_cache_file));
++
++ m_policy->update_status(temp_cache_file, EVICTED);
++
++ assert(NONE == m_policy->get_status(temp_cache_file));
++ }
+ }
+
++
+ int ObjectCacheStore::shutdown() {
+ m_rados->shutdown();
+ return 0;
+diff --git a/src/tools/rbd_cache/ObjectCacheStore.h b/src/tools/rbd_cache/ObjectCacheStore.h
+index db09efa..5118a73 100644
+--- a/src/tools/rbd_cache/ObjectCacheStore.h
++++ b/src/tools/rbd_cache/ObjectCacheStore.h
+@@ -13,6 +13,7 @@
+ #include "librbd/ImageCtx.h"
+ #include "librbd/ImageState.h"
+ #include "librbd/cache/SharedPersistentObjectCacherFile.h"
++#include "SimplePolicy.hpp"
+
+ using librados::Rados;
+ using librados::IoCtx;
+@@ -39,6 +40,8 @@ class ObjectCacheStore
+
+ int lock_cache(std::string vol_name);
+
++ void evict_thread_body();
++
+ private:
+ int _evict_object();
+
+@@ -48,21 +51,18 @@ class ObjectCacheStore
+ librados::bufferlist* read_buf,
+ uint64_t length);
+
+- enum {
+- PROMOTING = 0,
+- PROMOTED,
+- };
+-
+ CephContext *m_cct;
+ ContextWQ* m_work_queue;
+- Mutex m_cache_table_lock;
+ RadosRef m_rados;
+
+- std::map<std::string, uint8_t> m_cache_table;
+
+ std::map<std::string, librados::IoCtx*> m_ioctxs;
+
+ librbd::cache::SyncFile *m_cache_file;
++
++ Policy* m_policy;
++
++ bool m_evict_go;
+ };
+
+ } // namespace rbd
+diff --git a/src/tools/rbd_cache/Policy.hpp b/src/tools/rbd_cache/Policy.hpp
+new file mode 100644
+index 0000000..575c294
+--- /dev/null
++++ b/src/tools/rbd_cache/Policy.hpp
+@@ -0,0 +1,22 @@
++#ifndef RBD_CACHE_POLICY_HPP
++#define RBD_CACHE_POLICY_HPP
++
++enum CACHESTATUS {
++ NONE = 0,
++ PROMOTING,
++ PROMOTED,
++ EVICTING,
++ EVICTED,
++};
++
++
++class Policy {
++public:
++ Policy(){}
++ virtual ~Policy(){};
++ virtual CACHESTATUS lookup_object(std::string) = 0;
++ virtual int evict_object(std::string&) = 0;
++ virtual void update_status(std::string, CACHESTATUS) = 0;
++ virtual CACHESTATUS get_status(std::string) = 0;
++};
++#endif
+diff --git a/src/tools/rbd_cache/SimplePolicy.hpp b/src/tools/rbd_cache/SimplePolicy.hpp
+new file mode 100644
+index 0000000..a0d8de7
+--- /dev/null
++++ b/src/tools/rbd_cache/SimplePolicy.hpp
+@@ -0,0 +1,180 @@
++#ifndef RBD_CACHE_SIMPLE_POLICY_HPP
++#define RBD_CACHE_SIMPLE_POLICY_HPP
++
++#include "Policy.hpp"
++#include "include/lru.h"
++#include "common/Mutex.h"
++
++#include <vector>
++#include <unordered_map>
++#include <string>
++
++class SimplePolicy : public Policy {
++public:
++ SimplePolicy(uint64_t block_num, float level)
++ : m_level(level),
++ m_lock("SimplePolicy"),
++ m_entry_count(block_num)
++ {
++
++ Entry m_entries[m_entry_count];
++
++ for(auto &entry : m_entries) {
++ m_free_lru.lru_insert_bot(&entry);
++ }
++ }
++
++ ~SimplePolicy() {}
++
++ CACHESTATUS lookup_object(std::string cache_file_name) {
++ Mutex::Locker locker(m_lock);
++
++ auto entry_it = m_oid_to_entry.find(cache_file_name);
++ if(entry_it == m_oid_to_entry.end()) {
++ return NONE;
++ }
++
++ Entry* entry = entry_it->second;
++
++ LRU* lru;
++ if(entry->status == PROMOTED) {
++ lru = &m_promoted_lru;
++ } else {
++ lru = &m_handing_lru;
++ }
++
++ // touch it
++ lru->lru_remove(entry);
++ lru->lru_insert_top(entry);
++
++ return entry->status;
++ }
++
++ int evict_object(std::string& out_cache_file_name) {
++ Mutex::Locker locker(m_lock);
++
++ // still have enough free space, don't need to evict lru.
++ uint64_t temp_current_size = m_oid_to_entry.size();
++ float temp_current_evict_level = temp_current_size / m_entry_count;
++ if(temp_current_evict_level < m_level) {
++ return 0;
++ }
++
++ // when all entries are USING, PROMOTING or EVICTING, just busy waiting.
++ if(m_promoted_lru.lru_get_size() == 0) {
++ return 0;
++ }
++
++ assert(m_promoted_lru.lru_get_size() != 0);
++
++ // evict one item from promoted lru
++ Entry *entry = reinterpret_cast<Entry*>(m_promoted_lru.lru_get_next_expire());
++ assert(entry != nullptr);
++
++ assert(entry->status == PROMOTED);
++
++ out_cache_file_name = entry->cache_file_name;
++ entry->status = EVICTING;
++
++ m_promoted_lru.lru_remove(entry);
++ m_handing_lru.lru_insert_top(entry);
++
++ return 1;
++ }
++
++ // TODO(): simplify the logic
++ void update_status(std::string _file_name, CACHESTATUS _status) {
++ Mutex::Locker locker(m_lock);
++
++ Entry* entry;
++ auto entry_it = m_oid_to_entry.find(_file_name);
++
++ // just check.
++ if(_status == PROMOTING) {
++ assert(m_oid_to_entry.find(_file_name) == m_oid_to_entry.end());
++ }
++
++ // miss this object.
++ if(entry_it == m_oid_to_entry.end() && _status == PROMOTING) {
++ entry = reinterpret_cast<Entry*>(m_free_lru.lru_get_next_expire());
++ if(entry == nullptr) {
++ assert(0); // namely evict thread have some problems.
++ }
++
++ entry->status = PROMOTING;
++
++ m_oid_to_entry[_file_name] = entry;
++ m_free_lru.lru_remove(entry);
++ m_handing_lru.lru_insert_top(entry);
++
++ return;
++ }
++
++ assert(entry_it != m_oid_to_entry.end());
++
++ entry = entry_it->second;
++
++ // promoting action have been finished, so update it.
++ if(entry->status == PROMOTING && _status== PROMOTED) {
++ m_handing_lru.lru_remove(entry);
++ m_promoted_lru.lru_insert_top(entry);
++ entry->status = PROMOTED;
++ return;
++ }
++
++ // will delete this cache file
++ if(entry->status == PROMOTED && _status == EVICTING) {
++ m_promoted_lru.lru_remove(entry);
++ m_handing_lru.lru_insert_top(entry);
++ entry->status = EVICTING;
++ return;
++ }
++
++
++ if(_status == EVICTED) {
++ m_oid_to_entry.erase(entry_it);
++ m_handing_lru.lru_remove(entry);
++ m_free_lru.lru_insert_bot(entry);
++ return;
++ }
++
++ assert(0);
++ }
++
++ // get entry status
++ CACHESTATUS get_status(std::string _file_name) {
++ Mutex::Locker locker(m_lock);
++ auto entry_it = m_oid_to_entry.find(_file_name);
++ if(entry_it == m_oid_to_entry.end()) {
++ return NONE;
++ }
++
++ return entry_it->second->status;
++ }
++
++
++private:
++
++ class Entry : public LRUObject {
++ public:
++ CACHESTATUS status;
++ Entry() : status(NONE){}
++ std::string cache_file_name;
++ void encode(bufferlist &bl){}
++ void decode(bufferlist::iterator &it){}
++ };
++
++ std::unordered_map<std::string, Entry*> m_oid_to_entry;
++
++ LRU m_free_lru;
++ LRU m_handing_lru; // include promoting status or evicting status
++ LRU m_promoted_lru; // include promoted, using status.
++
++ mutable Mutex m_lock;
++
++ float m_level;
++ uint64_t m_entry_count;
++
++};
++
++#endif
+--
+2.7.4
+