/*
 * fs/logfs/gc.c	- garbage collection code
 *
 * As should be obvious for Linux kernel code, license is GPLv2
 *
 * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
 */
#include "logfs.h"
#include <linux/sched.h>
#include <linux/slab.h>

/*
 * Wear leveling needs to kick in when the difference between low erase
 * counts and high erase counts gets too big.  A good value for "too big"
 * may be somewhat below 10% of maximum erase count for the device.
 * Why not 397, to pick a nice round number with no specific meaning? :)
 *
 * WL_RATELIMIT is the minimum time between two wear level events.  A huge
 * number of segments may fulfil the requirements for wear leveling at the
 * same time.  If that happens we don't want to cause a latency from hell,
 * but just gently pick one segment every so often and minimize overhead.
 */
#define WL_DELTA 397
#define WL_RATELIMIT 100
#define MAX_OBJ_ALIASES	2600
#define SCAN_RATIO 512	/* number of scanned segments per gc'd segment */
#define LIST_SIZE 64	/* base size of candidate lists */
#define SCAN_ROUNDS 128	/* maximum number of complete medium scans */
#define SCAN_ROUNDS_HIGH 4 /* maximum number of higher-level scans */

static int no_free_segments(struct super_block *sb)
{
	struct logfs_super *super = logfs_super(sb);

	return super->s_free_list.count;
}

/* journal has distance -1, top-most ifile layer distance 0 */
static u8 root_distance(struct super_block *sb, gc_level_t __gc_level)
{
	struct logfs_super *super = logfs_super(sb);
	u8 gc_level = (__force u8)__gc_level;

	switch (gc_level) {
	case 0: /* fall through */
	case 1: /* fall through */
	case 2: /* fall through */
	case 3:
		/* file data or indirect blocks */
		return super->s_ifile_levels + super->s_iblock_levels - gc_level;
	case 6: /* fall through */
	case 7: /* fall through */
	case 8: /* fall through */
	case 9:
		/* inode file data or indirect blocks */
		return super->s_ifile_levels - (gc_level - 6);
	default:
		printk(KERN_ERR"LOGFS: segment of unknown level %x found\n",
				gc_level);
		WARN_ON(1);
		return super->s_ifile_levels + super->s_iblock_levels;
	}
}

static int segment_is_reserved(struct super_block *sb, u32 segno)
{
	struct logfs_super *super = logfs_super(sb);
	struct logfs_area *area;
	void *reserved;
	int i;

	/* Some segments are reserved.  Just pretend they were all valid */
	reserved = btree_lookup32(&super->s_reserved_segments, segno);
	if (reserved)
		return 1;

	/* Currently open segments */
	for_each_area(i) {
		area = super->s_area[i];
		if (area->a_is_open && area->a_segno == segno)
			return 1;
	}

	return 0;
}

static void logfs_mark_segment_bad(struct super_block *sb, u32 segno)
{
	BUG();
}

/*
 * Returns the bytes consumed by valid objects in this segment.  Object headers
 * are counted, the segment header is not.
 */
static u32 logfs_valid_bytes(struct super_block *sb, u32 segno, u32 *ec,
		gc_level_t *gc_level)
{
	struct logfs_segment_entry se;
	u32 ec_level;

	logfs_get_segment_entry(sb, segno, &se);
	if (se.ec_level == cpu_to_be32(BADSEG) ||
			se.valid == cpu_to_be32(RESERVED))
		return RESERVED;

	ec_level = be32_to_cpu(se.ec_level);
	*ec = ec_level >> 4;
	*gc_level = GC_LEVEL(ec_level & 0xf);
	return be32_to_cpu(se.valid);
}

static void logfs_cleanse_block(struct super_block *sb, u64 ofs, u64 ino,
		u64 bix, gc_level_t gc_level)
{
	struct inode *inode;
	int err, cookie;

	inode = logfs_safe_iget(sb, ino, &cookie);
	err = logfs_rewrite_block(inode, bix, ofs, gc_level, 0);
	BUG_ON(err);
	logfs_safe_iput(inode, cookie);
}

static u32 logfs_gc_segment(struct super_block *sb, u32 segno)
{
	struct logfs_super *super = logfs_super(sb);
	struct logfs_segment_header sh;
	struct logfs_object_header oh;
	u64 ofs, ino, bix;
	u32 seg_ofs, logical_segno, cleaned = 0;
	int err, len, valid;
	gc_level_t gc_level;

	LOGFS_BUG_ON(segment_is_reserved(sb, segno), sb);

	btree_insert32(&super->s_reserved_segments, segno, (void *)1, GFP_NOFS);
	err = wbuf_read(sb, dev_ofs(sb, segno, 0), sizeof(sh), &sh);
	BUG_ON(err);
	gc_level = GC_LEVEL(sh.level);
	logical_segno = be32_to_cpu(sh.segno);
	if (sh.crc != logfs_crc32(&sh, sizeof(sh), 4)) {
		logfs_mark_segment_bad(sb, segno);
		cleaned = -1;
		goto out;
	}

	for (seg_ofs = LOGFS_SEGMENT_HEADERSIZE;
			seg_ofs + sizeof(oh) < super->s_segsize; ) {
		ofs = dev_ofs(sb, logical_segno, seg_ofs);
		err = wbuf_read(sb, dev_ofs(sb, segno, seg_ofs), sizeof(oh),
				&oh);
		BUG_ON(err);

		if (!memchr_inv(&oh, 0xff, sizeof(oh)))
			break;

		if (oh.crc != logfs_crc32(&oh, sizeof(oh) - 4, 4)) {
			logfs_mark_segment_bad(sb, segno);
			cleaned = super->s_segsize - 1;
			goto out;
		}

		ino = be64_to_cpu(oh.ino);
		bix = be64_to_cpu(oh.bix);
		len = sizeof(oh) + be16_to_cpu(oh.len);
		valid = logfs_is_valid_block(sb, ofs, ino, bix, gc_level);
		if (valid == 1) {
			logfs_cleanse_block(sb, ofs, ino, bix, gc_level);
			cleaned += len;
		} else if (valid == 2) {
			/* Will be invalid upon journal commit */
			cleaned += len;
		}
		seg_ofs += len;
	}
out:
	btree_remove32(&super->s_reserved_segments, segno);
	return cleaned;
}

static struct gc_candidate *add_list(struct gc_candidate *cand,
		struct candidate_list *list)
{
	struct rb_node **p = &list->rb_tree.rb_node;
	struct rb_node *parent = NULL;
	struct gc_candidate *cur;
	int comp;

	cand->list = list;
	while (*p) {
		parent = *p;
		cur = rb_entry(parent, struct gc_candidate, rb_node);

		if (list->sort_by_ec)
			comp = cand->erase_count < cur->erase_count;
		else
			comp = cand->valid < cur->valid;

		if (comp)
			p = &parent->rb_left;
		else
			p = &parent->rb_right;
	}
	rb_link_node(&cand->rb_node, parent, p);
	rb_insert_color(&cand->rb_node, &list->rb_tree);

	if (list->count <= list->maxcount) {
		list->count++;
		return NULL;
	}
	cand = rb_entry(rb_last(&list->rb_tree), struct gc_candidate, rb_node);
	rb_erase(&cand->rb_node, &list->rb_tree);
	cand->list = NULL;
	return cand;
}

static void remove_from_list(struct gc_candidate *cand)
{
	struct candidate_list *list = cand->list;

	rb_erase(&cand->rb_node, &list->rb_tree);
	list->count--;
}

static void free_candidate(struct super_block *sb, struct gc_candidate *cand)
{
	struct logfs_super *super = logfs_super(sb);

	btree_remove32(&super->s_cand_tree, cand->segno);
	kfree(cand);
}

u32 get_best_cand(struct super_block *sb, struct candidate_list *list, u32 *ec)
{
	struct gc_candidate *cand;
	u32 segno;

	BUG_ON(list->count == 0);

	cand = rb_entry(rb_first(&list->rb_tree), struct gc_candidate, rb_node);
	remove_from_list(cand);
	segno = cand->segno;
	if (ec)
		*ec = cand->erase_count;
	free_candidate(sb, cand);
	return segno;
}

/*
 * We have several lists to manage segments with.  The reserve_list is used to
 * deal with bad blocks.  We try to keep the best (lowest ec) segments on this
 * list.
 * The free_list contains free segments for normal usage.  It usually gets the
 * second pick after the reserve_list.  But when the free_list is running short
 * it is more important to keep the free_list full than to keep a reserve.
 *
 * Segments that are not free are put onto a per-level low_list.  If we have
 * to run garbage collection, we pick a candidate from there.  All segments on
 * those lists should have at least some free space so GC will make progress.
 *
 * And last we have the ec_list, which is used to pick segments for wear
 * leveling.
 *
 * If all appropriate lists are full, we simply free the candidate and forget
 * about that segment for a while.  We have better candidates for each purpose.
 */
static void __add_candidate(struct super_block *sb, struct gc_candidate *cand)
{
	struct logfs_super *super = logfs_super(sb);
	u32 full = super->s_segsize - LOGFS_SEGMENT_RESERVE;

	if (cand->valid == 0) {
		/* 100% free segments */
		log_gc_noisy("add reserve segment %x (ec %x) at %llx\n",
				cand->segno, cand->erase_count,
				dev_ofs(sb, cand->segno, 0));
		cand = add_list(cand, &super->s_reserve_list);
		if (cand) {
			log_gc_noisy("add free segment %x (ec %x) at %llx\n",
					cand->segno, cand->erase_count,
					dev_ofs(sb, cand->segno, 0));
			cand = add_list(cand, &super->s_free_list);
		}
	} else {
		/* good candidates for Garbage Collection */
		if (cand->valid < full)
			cand = add_list(cand, &super->s_low_list[cand->dist]);
		/* good candidates for wear leveling,
		 * segments that were recently written get ignored */
		if (cand)
			cand = add_list(cand, &super->s_ec_list);
	}
	if (cand)
		free_candidate(sb, cand);
}

static int add_candidate(struct super_block *sb, u32 segno, u32 valid, u32 ec,
		u8 dist)
{
	struct logfs_super *super = logfs_super(sb);
	struct gc_candidate *cand;

	cand = kmalloc(sizeof(*cand), GFP_NOFS);
	if (!cand)
		return -ENOMEM;

	cand->segno = segno;
	cand->valid = valid;
	cand->erase_count = ec;
	cand->dist = dist;

	btree_insert32(&super->s_cand_tree, segno, cand, GFP_NOFS);
	__add_candidate(sb, cand);
	return 0;
}

static void remove_segment_from_lists(struct super_block *sb, u32 segno)
{
	struct logfs_super *super = logfs_super(sb);
	struct gc_candidate *cand;

	cand = btree_lookup32(&super->s_cand_tree, segno);
	if (cand) {
		remove_from_list(cand);
		free_candidate(sb, cand);
	}
}

static void scan_segment(struct super_block *sb, u32 segno)
{
	u32 valid, ec = 0;
	gc_level_t gc_level = 0;
	u8 dist;

	if (segment_is_reserved(sb, segno))
		return;

	remove_segment_from_lists(sb, segno);
	valid = logfs_valid_bytes(sb, segno, &ec, &gc_level);
	if (valid == RESERVED)
		return;

	dist = root_distance(sb, gc_level);
	add_candidate(sb, segno, valid, ec, dist);
}

static struct gc_candidate *first_in_list(struct candidate_list *list)
{
	if (list->count == 0)
		return NULL;
	return rb_entry(rb_first(&list->rb_tree), struct gc_candidate, rb_node);
}

/*
 * Find the best segment for garbage collection.  Main criterion is
 * the segment requiring the least effort to clean.  Secondary
 * criterion is to GC on the<style>.highlight .hll { background-color: #ffffcc }
.highlight .c { color: #888888 } /* Comment */
.highlight .err { color: #a61717; background-color: #e3d2d2 } /* Error */
.highlight .k { color: #008800; font-weight: bold } /* Keyword */
.highlight .ch { color: #888888 } /* Comment.Hashbang */
.highlight .cm { color: #888888 } /* Comment.Multiline */
.highlight .cp { color: #cc0000; font-weight: bold } /* Comment.Preproc */
.highlight .cpf { color: #888888 } /* Comment.PreprocFile */
.highlight .c1 { color: #888888 } /* Comment.Single */
.highlight .cs { color: #cc0000; font-weight: bold; background-color: #fff0f0 } /* Comment.Special */
.highlight .gd { color: #000000; background-color: #ffdddd } /* Generic.Deleted */
.highlight .ge { font-style: italic } /* Generic.Emph */
.highlight .gr { color: #aa0000 } /* Generic.Error */
.highlight .gh { color: #333333 } /* Generic.Heading */
.highlight .gi { color: #000000; background-color: #ddffdd } /* Generic.Inserted */
.highlight .go { color: #888888 } /* Generic.Output */
.highlight .gp { color: #555555 } /* Generic.Prompt */
.highlight .gs { font-weight: bold } /* Generic.Strong */
.highlight .gu { color: #666666 } /* Generic.Subheading */
.highlight .gt { color: #aa0000 } /* Generic.Traceback */
.highlight .kc { color: #008800; font-weight: bold } /* Keyword.Constant */
.highlight .kd { color: #008800; font-weight: bold } /* Keyword.Declaration */
.highlight .kn { color: #008800; font-weight: bold } /* Keyword.Namespace */
.highlight .kp { color: #008800 } /* Keyword.Pseudo */
.highlight .kr { color: #008800; font-weight: bold } /* Keyword.Reserved */
.highlight .kt { color: #888888; font-weight: bold } /* Keyword.Type */
.highlight .m { color: #0000DD; font-weight: bold } /* Literal.Number */
.highlight .s { color: #dd2200; background-color: #fff0f0 } /* Literal.String */
.highlight .na { color: #336699 } /* Name.Attribute */
.highlight .nb { color: #003388 } /* Name.Builtin */
.highlight .nc { color: #bb0066; font-weight: bold } /* Name.Class */
.highlight .no { color: #003366; font-weight: bold } /* Name.Constant */
.highlight .nd { color: #555555 } /* Name.Decorator */
.highlight .ne { color: #bb0066; font-weight: bold } /* Name.Exception */
.highlight .nf { color: #0066bb; font-weight: bold } /* Name.Function */
.highlight .nl { color: #336699; font-style: italic } /* Name.Label */
.highlight .nn { color: #bb0066; font-weight: bold } /* Name.Namespace */
.highlight .py { color: #336699; font-weight: bold } /* Name.Property */
.highlight .nt { color: #bb0066; font-weight: bold } /* Name.Tag */
.highlight .nv { color: #336699 } /* Name.Variable */
.highlight .ow { color: #008800 } /* Operator.Word */
.highlight .w { color: #bbbbbb } /* Text.Whitespace */
.highlight .mb { color: #0000DD; font-weight: bold } /* Literal.Number.Bin */
.highlight .mf { color: #0000DD; font-weight: bold } /* Literal.Number.Float */
.highlight .mh { color: #0000DD; font-weight: bold } /* Literal.Number.Hex */
.highlight .mi { color: #0000DD; font-weight: bold } /* Literal.Number.Integer */
.highlight .mo { color: #0000DD; font-weight: bold } /* Literal.Number.Oct */
.highlight .sa { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Affix */
.highlight .sb { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Backtick */
.highlight .sc { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Char */
.highlight .dl { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Delimiter */
.highlight .sd { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Doc */
.highlight .s2 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Double */
.highlight .se { color: #0044dd; background-color: #fff0f0 } /* Literal.String.Escape */
.highlight .sh { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Heredoc */
.highlight .si { color: #3333bb; background-color: #fff0f0 } /* Literal.String.Interpol */
.highlight .sx { color: #22bb22; background-color: #f0fff0 } /* Literal.String.Other */
.highlight .sr { color: #008800; background-color: #fff0ff } /* Literal.String.Regex */
.highlight .s1 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Single */
.highlight .ss { color: #aa6600; background-color: #fff0f0 } /* Literal.String.Symbol */
.highlight .bp { color: #003388 } /* Name.Builtin.Pseudo */
.highlight .fm { color: #0066bb; font-weight: bold } /* Name.Function.Magic */
.highlight .vc { color: #336699 } /* Name.Variable.Class */
.highlight .vg { color: #dd7700 } /* Name.Variable.Global */
.highlight .vi { color: #3333bb } /* Name.Variable.Instance */
.highlight .vm { color: #336699 } /* Name.Variable.Magic */
.highlight .il { color: #0000DD; font-weight: bold } /* Literal.Number.Integer.Long */</style><div class="highlight"><pre><span></span><span class="cm">/*</span>
<span class="cm"> * arch/arm/plat-iop/time.c</span>
<span class="cm"> *</span>
<span class="cm"> * Timer code for IOP32x and IOP33x based systems</span>
<span class="cm"> *</span>
<span class="cm"> * Author: Deepak Saxena &lt;dsaxena@mvista.com&gt;</span>
<span class="cm"> *</span>
<span class="cm"> * Copyright 2002-2003 MontaVista Software Inc.</span>
<span class="cm"> *</span>
<span class="cm"> * This program is free software; you can redistribute it and/or modify it</span>
<span class="cm"> * under the terms of the GNU General Public License as published by the</span>
<span class="cm"> * Free Software Foundation; either version 2 of the License, or (at your</span>
<span class="cm"> * option) any later version.</span>
<span class="cm"> */</span>

<span class="cp">#include</span> <span class="cpf">&lt;linux/kernel.h&gt;</span><span class="cp"></span>
<span class="cp">#include</span> <span class="cpf">&lt;linux/interrupt.h&gt;</span><span class="cp"></span>
<span class="cp">#include</span> <span class="cpf">&lt;linux/time.h&gt;</span><span class="cp"></span>
<span class="cp">#include</span> <span class="cpf">&lt;linux/init.h&gt;</span><span class="cp"></span>
<span class="cp">#include</span> <span class="cpf">&lt;linux/timex.h&gt;</span><span class="cp"></span>
<span class="cp">#include</span> <span class="cpf">&lt;linux/io.h&gt;</span><span class="cp"></span>
<span class="cp">#include</span> <span class="cpf">&lt;linux/clocksource.h&gt;</span><span class="cp"></span>
<span class="cp">#include</span> <span class="cpf">&lt;linux/clockchips.h&gt;</span><span class="cp"></span>
<span class="cp">#include</span> <span class="cpf">&lt;linux/export.h&gt;</span><span class="cp"></span>
<span class="cp">#include</span> <span class="cpf">&lt;linux/sched_clock.h&gt;</span><span class="cp"></span>
<span class="cp">#include</span> <span class="cpf">&lt;mach/hardware.h&gt;</span><span class="cp"></span>
<span class="cp">#include</span> <span class="cpf">&lt;asm/irq.h&gt;</span><span class="cp"></span>
<span class="cp">#include</span> <span class="cpf">&lt;asm/uaccess.h&gt;</span><span class="cp"></span>
<span class="cp">#include</span> <span class="cpf">&lt;asm/mach/irq.h&gt;</span><span class="cp"></span>
<span class="cp">#include</span> <span class="cpf">&lt;asm/mach/time.h&gt;</span><span class="cp"></span>
<span class="cp">#include</span> <span class="cpf">&lt;mach/time.h&gt;</span><span class="cp"></span>

<span class="cm">/*</span>
<span class="cm"> * Minimum clocksource/clockevent timer range in seconds</span>
<span class="cm"> */</span>
<span class="cp">#define IOP_MIN_RANGE 4</span>

<span class="cm">/*</span>
<span class="cm"> * IOP clocksource (free-running timer 1).</span>
<span class="cm"> */</span>
<span class="k">static</span> <span class="n">cycle_t</span> <span class="n">notrace</span> <span class="nf">iop_clocksource_read</span><span class="p">(</span><span class="k">struct</span> <span class="n">clocksource</span> <span class="o">*</span><span class="n">unused</span><span class="p">)</span>
<span class="p">{</span>
	<span class="k">return</span> <span class="mh">0xffffffffu</span> <span class="o">-</span> <span class="n">read_tcr1</span><span class="p">();</span>
<span class="p">}</span>

<span class="k">static</span> <span class="k">struct</span> <span class="n">clocksource</span> <span class="n">iop_clocksource</span> <span class="o">=</span> <span class="p">{</span>
	<span class="p">.</span><span class="n">name</span> 		<span class="o">=</span> <span class="s">&quot;iop_timer1&quot;</span><span class="p">,</span>
	<span class="p">.</span><span class="n">rating</span>		<span class="o">=</span> <span class="mi">300</span><span class="p">,</span>
	<span class="p">.</span><span class="n">read</span>		<span class="o">=</span> <span class="n">iop_clocksource_read</span><span class="p">,</span>
	<span class="p">.</span><span class="n">mask</span>		<span class="o">=</span> <span class="n">CLOCKSOURCE_MASK</span><span class="p">(</span><span class="mi">32</span><span class="p">),</span>
	<span class="p">.</span><span class="n">flags</span>		<span class="o">=</span> <span class="n">CLOCK_SOURCE_IS_CONTINUOUS</span><span class="p">,</span>
<span class="p">};</span>

<span class="cm">/*</span>
<span class="cm"> * IOP sched_clock() implementation via its clocksource.</span>
<span class="cm"> */</span>
<span class="k">static</span> <span class="n">u64</span> <span class="n">notrace</span> <span class="nf">iop_read_sched_clock</span><span class="p">(</span><span class="kt">void</span><span class="p">)</span>
<span class="p">{</span>
	<span class="k">return</span> <span class="mh">0xffffffffu</span> <span class="o">-</span> <span class="n">read_tcr1</span><span class="p">();</span>
<span class="p">}</span>

<span class="cm">/*</span>
<span class="cm"> * IOP clockevents (interrupting timer 0).</span>
<span class="cm"> */</span>
<span class="k">static</span> <span class="kt">int</span> <span class="nf">iop_set_next_event</span><span class="p">(</span><span class="kt">unsigned</span> <span class="kt">long</span> <span class="n">delta</span><span class="p">,</span>
			      <span class="k">struct</span> <span class="n">clock_event_device</span> <span class="o">*</span><span class="n">unused</span><span class="p">)</span>
<span class="p">{</span>
	<span class="n">u32</span> <span class="n">tmr</span> <span class="o">=</span> <span class="n">IOP_TMR_PRIVILEGED</span> <span class="o">|</span> <span class="n">IOP_TMR_RATIO_1_1</span><span class="p">;</span>

	<span class="n">BUG_ON</span><span class="p">(</span><span class="n">delta</span> <span class="o">==</span> <span class="mi">0</span><span class="p">);</span>
	<span class="n">write_tmr0</span><span class="p">(</span><span class="n">tmr</span> <span class="o">&amp;</span> <span class="o">~</span><span class="p">(</span><span class