summaryrefslogtreecommitdiffstats
path: root/kernel/drivers/s390/cio/css.c
blob: 489e703dc82d85b49d33990bd3d5c2b687d9eb0f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
.highlight .hll { background-color: #ffffcc }
.highlight .c { color: #888888 } /* Comment */
.highlight .err { color: #a61717; background-color: #e3d2d2 } /* Error */
.highlight .k { color: #008800; font-weight: bold } /* Keyword */
.highlight .ch { color: #888888 } /* Comment.Hashbang */
.highlight .cm { color: #888888 } /* Comment.Multiline */
.highlight .cp { color: #cc0000; font-weight: bold } /* Comment.Preproc */
.highlight .cpf { color: #888888 } /* Comment.PreprocFile */
.highlight .c1 { color: #888888 } /* Comment.Single */
.highlight .cs { color: #cc0000; font-weight: bold; background-color: #fff0f0 } /* Comment.Special */
.highlight .gd { color: #000000; background-color: #ffdddd } /* Generic.Deleted */
.highlight .ge { font-style: italic } /* Generic.Emph */
.highlight .gr { color: #aa0000 } /* Generic.Error */
.highlight .gh { color: #333333 } /* Generic.Heading */
.highlight .gi { color: #000000; background-color: #ddffdd } /* Generic.Inserted */
.highlight .go { color: #888888 } /* Generic.Output */
.highlight .gp { color: #555555 } /* Generic.Prompt */
.highlight .gs { font-weight: bold } /* Generic.Strong */
.highlight .gu { color: #666666 } /* Generic.Subheading */
.highlight .gt { color: #aa0000 } /* Generic.Traceback */
.highlight .kc { color: #008800; font-weight: bold } /* Keyword.Constant */
.highlight .kd { color: #008800; font-weight: bold } /* Keyword.Declaration */
.highlight .kn { color: #008800; font-weight: bold } /* Keyword.Namespace */
.highlight .kp { color: #008800 } /* Keyword.Pseudo */
.highlight .kr { color: #008800; font-weight: bold } /* Keyword.Reserved */
.highlight .kt { color: #888888; font-weight: bold } /* Keyword.Type */
.highlight .m { color: #0000DD; font-weight: bold } /* Literal.Number */
.highlight .s { color: #dd2200; background-color: #fff0f0 } /* Literal.String */
.highlight .na { color: #336699 } /* Name.Attribute */
.highlight .nb { color: #003388 } /* Name.Builtin */
.highlight .nc { color: #bb0066; font-weight: bold } /* Name.Class */
.highlight .no { color: #003366; font-weight: bold } /* Name.Constant */
.highlight .nd { color: #555555 } /* Name.Decorator */
.highlight .ne { color: #bb0066; font-weight: bold } /* Name.Exception */
.highlight .nf { color: #0066bb; font-weight: bold } /* Name.Function */
.highlight .nl { color: #336699; font-style: italic } /* Name.Label */
.highlight .nn { color: #bb0066; font-weight: bold } /* Name.Namespace */
.highlight .py { color: #336699; font-weight: bold } /* Name.Property */
.highlight .nt { color: #bb0066; font-weight: bold } /* Name.Tag */
.highlight .nv { color: #336699 } /* Name.Variable */
.highlight .ow { color: #008800 } /* Operator.Word */
.highlight .w { color: #bbbbbb } /* Text.Whitespace */
.highlight .mb { color: #0000DD; font-weight: bold } /* Literal.Number.Bin */
.highlight .mf { color: #0000DD; font-weight: bold } /* Literal.Number.Float */
.highlight .mh { color: #0000DD; font-weight: bold } /* Literal.Number.Hex */
.highlight .mi { color: #0000DD; font-weight: bold } /* Literal.Number.Integer */
.highlight .mo { color: #0000DD; font-weight: bold } /* Literal.Number.Oct */
.highlight .sa { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Affix */
.highlight .sb { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Backtick */
.highlight .sc { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Char */
.highlight .dl { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Delimiter */
.highlight .sd { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Doc */
.highlight .s2 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Double */
.highlight .se { color: #0044dd; background-color: #fff0f0 } /* Literal.String.Escape */
.highlight .sh { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Heredoc */
.highlight .si { color: #3333bb; background-color: #fff0f0 } /* Literal.String.Interpol */
.highlight .sx { color: #22bb22; background-color: #f0fff0 } /* Literal.String.Other */
.highlight .sr { color: #008800; background-color: #fff0ff } /* Literal.String.Regex */
.highlight .s1 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Single */
.highlight .ss { color: #aa6600; background-color: #fff0f0 } /* Literal.String.Symbol */
.highlight .bp { color: #003388 } /* Name.Builtin.Pseudo */
.highlight .fm { color: #0066bb; font-weight: bold } /* Name.Function.Magic */
.highlight .vc { color: #336699 } /* Name.Variable.Class */
.highlight .vg { color: #dd7700 } /* Name.Variable.Global */
.highlight .vi { color: #3333bb } /* Name.Variable.Instance */
.highlight .vm { color: #336699 } /* Name.Variable.Magic */
.highlight .il { color: #0000DD; font-weight: bold } /* Literal.Number.Integer.Long */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM ras
#define TRACE_INCLUDE_FILE ras_event

#if !defined(_TRACE_HW_EVENT_MC_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_HW_EVENT_MC_H

#include <linux/tracepoint.h>
#include <linux/edac.h>
#include <linux/ktime.h>
#include <linux/pci.h>
#include <linux/aer.h>
#include <linux/cper.h>
#include <linux/mm.h>

/*
 * MCE Extended Error Log trace event
 *
 * These events are generated when hardware detects a corrected or
 * uncorrected event.
 */

/* memory trace event */

#if defined(CONFIG_ACPI_EXTLOG) || defined(CONFIG_ACPI_EXTLOG_MODULE)
TRACE_EVENT(extlog_mem_event,
	TP_PROTO(struct cper_sec_mem_err *mem,
		 u32 err_seq,
		 const uuid_le *fru_id,
		 const char *fru_text,
		 u8 sev),

	TP_ARGS(mem, err_seq, fru_id, fru_text, sev),

	TP_STRUCT__entry(
		__field(u32, err_seq)
		__field(u8, etype)
		__field(u8, sev)
		__field(u64, pa)
		__field(u8, pa_mask_lsb)
		__field_struct(uuid_le, fru_id)
		__string(fru_text, fru_text)
		__field_struct(struct cper_mem_err_compact, data)
	),

	TP_fast_assign(
		__entry->err_seq = err_seq;
		if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE)
			__entry->etype = mem->error_type;
		else
			__entry->etype = ~0;
		__entry->sev = sev;
		if (mem->validation_bits & CPER_MEM_VALID_PA)
			__entry->pa = mem->physical_addr;
		else
			__entry->pa = ~0ull;

		if (mem->validation_bits & CPER_MEM_VALID_PA_MASK)
			__entry->pa_mask_lsb = (u8)__ffs64(mem->physical_addr_mask);
		else
			__entry->pa_mask_lsb = ~0;
		__entry->fru_id = *fru_id;
		__assign_str(fru_text, fru_text);
		cper_mem_err_pack(mem, &__entry->data);
	),

	TP_printk("{%d} %s error: %s physical addr: %016llx (mask lsb: %x) %sFRU: %pUl %.20s",
		  __entry->err_seq,
		  cper_severity_str(__entry->sev),
		  cper_mem_err_type_str(__entry->etype),
		  __entry->pa,
		  __entry->pa_mask_lsb,
		  cper_mem_err_unpack(p, &__entry->data),
		  &__entry->fru_id,
		  __get_str(fru_text))
);
#endif

/*
 * Hardware Events Report
 *
 * Those events are generated when hardware detected a corrected or
 * uncorrected event, and are meant to replace the current API to report
 * errors defined on both EDAC and MCE subsystems.
 *
 * FIXME: Add events for handling memory errors originated from the
 *        MCE subsystem.
 */

/*
 * Hardware-independent Memory Controller specific events
 */

/*
 * Default error mechanisms for Memory Controller errors (CE and UE)
 */
TRACE_EVENT(mc_event,

	TP_PROTO(const unsigned int err_type,
		 const char *error_msg,
		 const char *label,
		 const int error_count,
		 const u8 mc_index,
		 const s8 top_layer,
		 const s8 mid_layer,
		 const s8 low_layer,
		 unsigned long address,
		 const u8 grain_bits,
		 unsigned long syndrome,
		 const char *driver_detail),

	TP_ARGS(err_type, error_msg, label, error_count, mc_index,
		top_layer, mid_layer, low_layer, address, grain_bits,
		syndrome, driver_detail),

	TP_STRUCT__entry(
		__field(	unsigned int,	error_type		)
		__string(	msg,		error_msg		)
		__string(	label,		label			)
		__field(	u16,		error_count		)
		__field(	u8,		mc_index		)
		__field(	s8,		top_layer		)
		__field(	s8,		middle_layer		)
		__field(	s8,		lower_layer		)
		__field(	long,		address			)
		__field(	u8,		grain_bits		)
		__field(	long,		syndrome		)
		__string(	driver_detail,	driver_detail		)
	),

	TP_fast_assign(
		__entry->error_type		= err_type;
		__assign_str(msg, error_msg);
		__assign_str(label, label);
		__entry->error_count		= error_count;
		__entry->mc_index		= mc_index;
		__entry->top_layer		= top_layer;
		__entry->middle_layer		= mid_layer;
		__entry->lower_layer		= low_layer;
		__entry->address		= address;
		__entry->grain_bits		= grain_bits;
		__entry->syndrome		= syndrome;
		__assign_str(driver_detail, driver_detail);
	),

	TP_printk("%d %s error%s:%s%s on %s (mc:%d location:%d:%d:%d address:0x%08lx grain:%d syndrome:0x%08lx%s%s)",
		  __entry->error_count,
		  mc_event_error_type(__entry->error_type),
		  __entry->error_count > 1 ? "s" : "",
		  ((char *)__get_str(msg))[0] ? " " : "",
		  __get_str(msg),
		  __get_str(label),
		  __entry->mc_index,
		  __entry->top_layer,
		  __entry->middle_layer,
		  __entry->lower_layer,
		  __entry->address,
		  1 << __entry->grain_bits,
		  __entry->syndrome,
		  ((char *)__get_str(driver_detail))[0] ? " " : "",
		  __get_str(driver_detail))
);

/*
 * PCIe AER Trace event
 *
 * These events are generated when hardware detects a corrected or
 * uncorrected event on a PCIe device. The event report has
 * the following structure:
 *
 * char * dev_name -	The name of the slot where the device resides
 *			([domain:]bus:device.function).
 * u32 status -		Either the correctable or uncorrectable register
 *			indicating what error or errors have been seen
 * u8 severity -	error severity 0:NONFATAL 1:FATAL 2:CORRECTED
 */

#define aer_correctable_errors					\
	{PCI_ERR_COR_RCVR,	"Receiver Error"},		\
	{PCI_ERR_COR_BAD_TLP,	"Bad TLP"},			\
	{PCI_ERR_COR_BAD_DLLP,	"Bad DLLP"},			\
	{PCI_ERR_COR_REP_ROLL,	"RELAY_NUM Rollover"},		\
	{PCI_ERR_COR_REP_TIMER,	"Replay Timer Timeout"},	\
	{PCI_ERR_COR_ADV_NFAT,	"Advisory Non-Fatal Error"},	\
	{PCI_ERR_COR_INTERNAL,	"Corrected Internal Error"},	\
	{PCI_ERR_COR_LOG_OVER,	"Header Log Overflow"}

#define aer_uncorrectable_errors				\
	{PCI_ERR_UNC_UND,	"Undefined"},			\
	{PCI_ERR_UNC_DLP,	"Data Link Protocol Error"},	\
	{PCI_ERR_UNC_SURPDN,	"Surprise Down Error"},		\
	{PCI_ERR_UNC_POISON_TLP,"Poisoned TLP"},		\
	{PCI_ERR_UNC_FCP,	"Flow Control Protocol Error"},	\
	{PCI_ERR_UNC_COMP_TIME,	"Completion Timeout"},		\
	{PCI_ERR_UNC_COMP_ABORT,"Completer Abort"},		\
	{PCI_ERR_UNC_UNX_COMP,	"Unexpected Completion"},	\
	{PCI_ERR_UNC_RX_OVER,	"Receiver Overflow"},		\
	{PCI_ERR_UNC_MALF_TLP,	"Malformed TLP"},		\
	{PCI_ERR_UNC_ECRC,	"ECRC Error"},			\
	{PCI_ERR_UNC_UNSUP,	"Unsupported Request Error"},	\
	{PCI_ERR_UNC_ACSV,	"ACS Violation"},		\
	{PCI_ERR_UNC_INTN,	"Uncorrectable Internal Error"},\
	{PCI_ERR_UNC_MCBTLP,	"MC Blocked TLP"},		\
	{PCI_ERR_UNC_ATOMEG,	"AtomicOp Egress Blocked"},	\
	{PCI_ERR_UNC_TLPPRE,	"TLP Prefix Blocked Error"}

TRACE_EVENT(aer_event,
	TP_PROTO(const char *dev_name,
		 const u32 status,
		 const u8 severity),

	TP_ARGS(dev_name, status, severity),

	TP_STRUCT__entry(
		__string(	dev_name,	dev_name	)
		__field(	u32,		status		)
		__field(	u8,		severity	)
	),

	TP_fast_assign(
		__assign_str(dev_name, dev_name);
		__entry->status		= status;
		__entry->severity	= severity;
	),

	TP_printk("%s PCIe Bus Error: severity=%s, %s\n",
		__get_str(dev_name),
		__entry->severity == AER_CORRECTABLE ? "Corrected" :
			__entry->severity == AER_FATAL ?
			"Fatal" : "Uncorrected, non-fatal",
		__entry->severity == AER_CORRECTABLE ?
		__print_flags(__entry->status, "|", aer_correctable_errors) :
		__print_flags(__entry->status, "|", aer_uncorrectable_errors))
);

/*
 * memory-failure recovery action result event
 *
 * unsigned long pfn -	Page Frame Number of the corrupted page
 * int type	-	Page types of the corrupted page
 * int result	-	Result of recovery action
 */

#ifdef CONFIG_MEMORY_FAILURE
#define MF_ACTION_RESULT	\
	EM ( MF_IGNORED, "Ignored" )	\
	EM ( MF_FAILED,  "Failed" )	\
	EM ( MF_DELAYED, "Delayed" )	\
	EMe ( MF_RECOVERED, "Recovered" )

#define MF_PAGE_TYPE		\
	EM ( MF_MSG_KERNEL, "reserved kernel page" )			\
	EM ( MF_MSG_KERNEL_HIGH_ORDER, "high-order kernel page" )	\
	EM ( MF_MSG_SLAB, "kernel slab page" )				\
	EM ( MF_MSG_DIFFERENT_COMPOUND, "different compound page after locking" ) \
	EM ( MF_MSG_POISONED_HUGE, "huge page already hardware poisoned" )	\
	EM ( MF_MSG_HUGE, "huge page" )					\
	EM ( MF_MSG_FREE_HUGE, "free huge page" )			\
	EM ( MF_MSG_UNMAP_FAILED, "unmapping failed page" )		\
	EM ( MF_MSG_DIRTY_SWAPCACHE, "dirty swapcache page" )		\
	EM ( MF_MSG_CLEAN_SWAPCACHE, "clean swapcache page" )		\
	EM ( MF_MSG_DIRTY_MLOCKED_LRU, "dirty mlocked LRU page" )	\
	EM ( MF_MSG_CLEAN_MLOCKED_LRU, "clean mlocked LRU page" )	\
	EM ( MF_MSG_DIRTY_UNEVICTABLE_LRU, "dirty unevictable LRU page" )	\
	EM ( MF_MSG_CLEAN_UNEVICTABLE_LRU, "clean unevictable LRU page" )	\
	EM ( MF_MSG_DIRTY_LRU, "dirty LRU page" )			\
	EM ( MF_MSG_CLEAN_LRU, "clean LRU page" )			\
	EM ( MF_MSG_TRUNCATED_LRU, "already truncated LRU page" )	\
	EM ( MF_MSG_BUDDY, "free buddy page" )				\
	EM ( MF_MSG_BUDDY_2ND, "free buddy page (2nd try)" )		\
	EMe ( MF_MSG_UNKNOWN, "unknown page" )

/*
 * First define the enums in MM_ACTION_RESULT to be exported to userspace
 * via TRACE_DEFINE_ENUM().
 */
#undef EM
#undef EMe
#define EM(a, b) TRACE_DEFINE_ENUM(a);
#define EMe(a, b)	TRACE_DEFINE_ENUM(a);

MF_ACTION_RESULT
MF_PAGE_TYPE

/*
 * driver for channel subsystem
 *
 * Copyright IBM Corp. 2002, 2010
 *
 * Author(s): Arnd Bergmann (arndb@de.ibm.com)
 *	      Cornelia Huck (cornelia.huck@de.ibm.com)
 */

#define KMSG_COMPONENT "cio"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt

#include <linux/module.h>
#include <linux/init.h>
#include <linux/device.h>
#include <linux/slab.h>
#include <linux/errno.h>
#include <linux/list.h>
#include <linux/reboot.h>
#include <linux/suspend.h>
#include <linux/proc_fs.h>
#include <asm/isc.h>
#include <asm/crw.h>

#include "css.h"
#include "cio.h"
#include "cio_debug.h"
#include "ioasm.h"
#include "chsc.h"
#include "device.h"
#include "idset.h"
#include "chp.h"

int css_init_done = 0;
int max_ssid;

struct channel_subsystem *channel_subsystems[__MAX_CSSID + 1];
static struct bus_type css_bus_type;

int
for_each_subchannel(int(*fn)(struct subchannel_id, void *), void *data)
{
	struct subchannel_id schid;
	int ret;

	init_subchannel_id(&schid);
	do {
		do {
			ret = fn(schid, data);
			if (ret)
				break;
		} while (schid.sch_no++ < __MAX_SUBCHANNEL);
		schid.sch_no = 0;
	} while (schid.ssid++ < max_ssid);
	return ret;
}

struct cb_data {
	void *data;
	struct idset *set;
	int (*fn_known_sch)(struct subchannel *, void *);
	int (*fn_unknown_sch)(struct subchannel_id, void *);
};

static int call_fn_known_sch(struct device *dev, void *data)
{
	struct subchannel *sch = to_subchannel(dev);
	struct cb_data *cb = data;
	int rc = 0;

	if (cb->set)
		idset_sch_del(cb->set, sch->schid);
	if (cb->fn_known_sch)
		rc = cb->fn_known_sch(sch, cb->data);
	return rc;
}

static int call_fn_unknown_sch(struct subchannel_id schid, void *data)
{
	struct cb_data *cb = data;
	int rc = 0;

	if (idset_sch_contains(cb->set, schid))
		rc = cb->fn_unknown_sch(schid, cb->data);
	return rc;
}

static int call_fn_all_sch(struct subchannel_id schid, void *data)
{
	struct cb_data *cb = data;
	struct subchannel *sch;
	int rc = 0;

	sch = get_subchannel_by_schid(schid);
	if (sch) {
		if (cb->fn_known_sch)
			rc = cb->fn_known_sch(sch, cb->data);
		put_device(&sch->dev);
	} else {
		if (cb->fn_unknown_sch)
			rc = cb->fn_unknown_sch(schid, cb->data);
	}

	return rc;
}

int for_each_subchannel_staged(int (*fn_known)(struct subchannel *, void *),
			       int (*fn_unknown)(struct subchannel_id,
			       void *), void *data)
{
	struct cb_data cb;
	int rc;

	cb.data = data;
	cb.fn_known_sch = fn_known;
	cb.fn_unknown_sch = fn_unknown;

	if (fn_known && !fn_unknown) {
		/* Skip idset allocation in case of known-only loop. */
		cb.set = NULL;
		return bus_for_each_dev(&css_bus_type, NULL, &cb,
					call_fn_known_sch);
	}

	cb.set = idset_sch_new();
	if (!cb.set)
		/* fall back to brute force scanning in case of oom */
		return for_each_subchannel(call_fn_all_sch, &cb);

	idset_fill(cb.set);

	/* Process registered subchannels. */
	rc = bus_for_each_dev(&css_bus_type, NULL, &cb, call_fn_known_sch);
	if (rc)
		goto out;
	/* Process unregistered subchannels. */
	if (fn_unknown)
		rc = for_each_subchannel(call_fn_unknown_sch, &cb);
out:
	idset_free(cb.set);

	return rc;
}

static void css_sch_todo(struct work_struct *work);

static int css_sch_create_locks(struct subchannel *sch)
{
	sch->lock = kmalloc(sizeof(*sch->lock), GFP_KERNEL);
	if (!sch->lock)
		return -ENOMEM;

	spin_lock_init(sch->lock);
	mutex_init(&sch->reg_mutex);

	return 0;
}

static void css_subchannel_release(struct device *dev)
{
	struct subchannel *sch = to_subchannel(dev);

	sch->config.intparm = 0;
	cio_commit_config(sch);
	kfree(sch->lock);
	kfree(sch);
}

struct subchannel *css_alloc_subchannel(struct subchannel_id schid)
{
	struct subchannel *sch;
	int ret;

	sch = kzalloc(sizeof(*sch), GFP_KERNEL | GFP_DMA);
	if (!sch)
		return ERR_PTR(-ENOMEM);

	ret = cio_validate_subchannel(sch, schid);
	if (ret < 0)
		goto err;

	ret = css_sch_create_locks(sch);
	if (ret)
		goto err;

	INIT_WORK(&sch->todo_work, css_sch_todo);
	sch->dev.release = &css_subchannel_release;
	device_initialize(&sch->dev);
	return sch;

err:
	kfree(sch);
	return ERR_PTR(ret);
}

static int css_sch_device_register(struct subchannel *sch)
{
	int ret;

	mutex_lock(&sch->reg_mutex);
	dev_set_name(&sch->dev, "0.%x.%04x", sch->schid.ssid,
		     sch->schid.sch_no);
	ret = device_add(&sch->dev);
	mutex_unlock(&sch->reg_mutex);
	return ret;
}

/**
 * css_sch_device_unregister - unregister a subchannel
 * @sch: subchannel to be unregistered
 */
void css_sch_device_unregister(struct subchannel *sch)
{
	mutex_lock(&sch->reg_mutex);
	if (device_is_registered(&sch->dev))
		device_unregister(&sch->dev);
	mutex_unlock(&sch->reg_mutex);
}
EXPORT_SYMBOL_GPL(css_sch_device_unregister);

static void ssd_from_pmcw(struct chsc_ssd_info *ssd, struct pmcw *pmcw)
{
	int i;
	int mask;

	memset(ssd, 0, sizeof(struct chsc_ssd_info));
	ssd->path_mask = pmcw->pim;
	for (i = 0; i < 8; i++) {
		mask = 0x80 >> i;
		if (pmcw->pim & mask) {
			chp_id_init(&ssd->chpid[i]);
			ssd->chpid[i].id = pmcw->chpid[i];
		}
	}
}

static void ssd_register_chpids(struct chsc_ssd_info *ssd)
{
	int i;
	int mask;

	for (i = 0; i < 8; i++) {
		mask = 0x80 >> i;
		if (ssd->path_mask & mask)
			if (!chp_is_registered(ssd->chpid[i]))
				chp_new(ssd->chpid[i]);
	}
}

void css_update_ssd_info(struct subchannel *sch)
{
	int ret;

	ret = chsc_get_ssd_info(sch->schid, &sch->ssd_info);
	if (ret)
		ssd_from_pmcw(&sch->ssd_info, &sch->schib.pmcw);

	ssd_register_chpids(&sch->ssd_info);
}

static ssize_t type_show(struct device *dev, struct device_attribute *attr,
			 char *buf)
{
	struct subchannel *sch = to_subchannel(dev);

	return sprintf(buf, "%01x\n", sch->st);
}

static DEVICE_ATTR(type, 0444, type_show, NULL);

static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
			     char *buf)
{
	struct subchannel *sch = to_subchannel(dev);

	return sprintf(buf, "css:t%01X\n", sch->st);
}

static DEVICE_ATTR(modalias, 0444, modalias_show, NULL);

static struct attribute *subch_attrs[] = {
	&dev_attr_type.attr,
	&dev_attr_modalias.attr,
	NULL,
};

static struct attribute_group subch_attr_group = {
	.attrs = subch_attrs,
};

static const struct attribute_group *default_subch_attr_groups[] = {
	&subch_attr_group,
	NULL,
};

int css_register_subchannel(struct subchannel *sch)
{
	int ret;

	/* Initialize the subchannel structure */
	sch->dev.parent = &channel_subsystems[0]->device;
	sch->dev.bus = &css_bus_type;
	sch->dev.groups = default_subch_attr_groups;
	/*
	 * We don't want to generate uevents for I/O subchannels that don't
	 * have a working ccw device behind them since they will be
	 * unregistered before they can be used anyway, so we delay the add
	 * uevent until after device recognition was successful.
	 * Note that we suppress the uevent for all subchannel types;
	 * the subchannel driver can decide itself when it wants to inform
	 * userspace of its existence.
	 */
	dev_set_uevent_suppress(&sch->dev, 1);
	css_update_ssd_info(sch);
	/* make it known to the system */
	ret = css_sch_device_register(sch);
	if (ret) {
		CIO_MSG_EVENT(0, "Could not register sch 0.%x.%04x: %d\n",
			      sch->schid.ssid, sch->schid.sch_no, ret);
		return ret;
	}
	if (!sch->driver) {
		/*
		 * No driver matched. Generate the uevent now so that
		 * a fitting driver module may be loaded based on the
		 * modalias.
		 */
		dev_set_uevent_suppress(&sch->dev, 0);
		kobject_uevent(&sch->dev.kobj, KOBJ_ADD);
	}
	return ret;
}

static int css_probe_device(struct subchannel_id schid)
{
	struct subchannel *sch;
	int ret;

	sch = css_alloc_subchannel(schid);
	if (IS_ERR(sch))
		return PTR_ERR(sch);

	ret = css_register_subchannel(sch);
	if (ret)
		put_device(&sch->dev);

	return ret;
}

static int
check_subchannel(struct device * dev, void * data)
{
	struct subchannel *sch;
	struct subchannel_id *schid = data;

	sch = to_subchannel(dev);
	return schid_equal(&sch->schid, schid);
}

struct subchannel *
get_subchannel_by_schid(struct subchannel_id schid)
{
	struct device *dev;

	dev = bus_find_device(&css_bus_type, NULL,
			      &schid, check_subchannel);

	return dev ? to_subchannel(dev) : NULL;
}

/**
 * css_sch_is_valid() - check if a subchannel is valid
 * @schib: subchannel information block for the subchannel
 */
int css_sch_is_valid(struct schib *schib)
{
	if ((schib->pmcw.st == SUBCHANNEL_TYPE_IO) && !schib->pmcw.dnv)
		return 0;
	if ((schib->pmcw.st == SUBCHANNEL_TYPE_MSG) && !schib->pmcw.w)
		return 0;
	return 1;
}
EXPORT_SYMBOL_GPL(css_sch_is_valid);

static int css_evaluate_new_subchannel(struct subchannel_id schid, int slow)
{
	struct schib schib;

	if (!slow) {
		/* Will be done on the slow path. */
		return -EAGAIN;
	}
	if (stsch_err(schid, &schib)) {
		/* Subchannel is not provided. */
		return -ENXIO;
	}
	if (!css_sch_is_valid(&schib)) {
		/* Unusable - ignore. */
		return 0;
	}
	CIO_MSG_EVENT(4, "event: sch 0.%x.%04x, new\n", schid.ssid,
		      schid.sch_no);

	return css_probe_device(schid);
}

static int css_evaluate_known_subchannel(struct subchannel *sch, int slow)
{
	int ret = 0;

	if (sch->driver) {
		if (sch->driver->sch_event)
			ret = sch->driver->sch_event(sch, slow);
		else
			dev_dbg(&sch->dev,
				"Got subchannel machine check but "
				"no sch_event handler provided.\n");
	}
	if (ret != 0 && ret != -EAGAIN) {
		CIO_MSG_EVENT(2, "eval: sch 0.%x.%04x, rc=%d\n",
			      sch->schid.ssid, sch->schid.sch_no, ret);
	}
	return ret;
}

static void css_evaluate_subchannel(struct subchannel_id schid, int slow)
{
	struct subchannel *sch;
	int ret;

	sch = get_subchannel_by_schid(schid);
	if (sch) {
		ret = css_evaluate_known_subchannel(sch, slow);
		put_device(&sch->dev);
	} else
		ret = css_evaluate_new_subchannel(schid, slow);
	if (ret == -EAGAIN)
		css_schedule_eval(schid);
}

/**
 * css_sched_sch_todo - schedule a subchannel operation
 * @sch: subchannel
 * @todo: todo
 *
 * Schedule the operation identified by @todo to be performed on the slow path
 * workqueue. Do nothing if another operation with higher priority is already
 * scheduled. Needs to be called with subchannel lock held.
 */
void css_sched_sch_todo(struct subchannel *sch, enum sch_todo todo)
{
	CIO_MSG_EVENT(4, "sch_todo: sched sch=0.%x.%04x todo=%d\n",
		      sch->schid.ssid, sch->schid.sch_no, todo);
	if (sch->todo >= todo)
		return;
	/* Get workqueue ref. */
	if (!get_device(&sch->dev))
		return;
	sch->todo = todo;
	if (!queue_work(cio_work_q, &sch->todo_work)) {
		/* Already queued, release workqueue ref. */
		put_device(&sch->dev);
	}
}
EXPORT_SYMBOL_GPL(css_sched_sch_todo);

static void css_sch_todo(struct work_struct *work)
{
	struct subchannel *sch;
	enum sch_todo todo;
	int ret;

	sch = container_of(work, struct subchannel, todo_work);
	/* Find out todo. */
	spin_lock_irq(sch->lock);
	todo = sch->todo;
	CIO_MSG_EVENT(4, "sch_todo: sch=0.%x.%04x, todo=%d\n", sch->schid.ssid,
		      sch->schid.sch_no, todo);
	sch->todo = SCH_TODO_NOTHING;
	spin_unlock_irq(sch->lock);
	/* Perform todo. */
	switch (todo) {
	case SCH_TODO_NOTHING:
		break;
	case SCH_TODO_EVAL:
		ret = css_evaluate_known_subchannel(sch, 1);
		if (ret == -EAGAIN) {
			spin_lock_irq(sch->lock);
			css_sched_sch_todo(sch, todo);
			spin_unlock_irq(sch->lock);
		}
		break;
	case SCH_TODO_UNREG:
		css_sch_device_unregister(sch);
		break;
	}
	/* Release workqueue ref. */
	put_device(&sch->dev);
}

static struct idset *slow_subchannel_set;
static spinlock_t slow_subchannel_lock;
static wait_queue_head_t css_eval_wq;
static atomic_t css_eval_scheduled;

static int __init slow_subchannel_init(void)
{
	spin_lock_init(&slow_subchannel_lock);
	atomic_set(&css_eval_scheduled, 0);
	init_waitqueue_head(&css_eval_wq);
	slow_subchannel_set = idset_sch_new();
	if (!slow_subchannel_set) {
		CIO_MSG_EVENT(0, "could not allocate slow subchannel set\n");
		return -ENOMEM;
	}
	return 0;
}

static int slow_eval_known_fn(struct subchannel *sch, void *data)
{
	int eval;
	int rc;

	spin_lock_irq(&slow_subchannel_lock);
	eval = idset_sch_contains(slow_subchannel_set, sch->schid);
	idset_sch_del(slow_subchannel_set, sch->schid);
	spin_unlock_irq(&slow_subchannel_lock);
	if (eval) {
		rc = css_evaluate_known_subchannel(sch, 1);
		if (rc == -EAGAIN)
			css_schedule_eval(sch->schid);
	}
	return 0;
}

static int slow_eval_unknown_fn(struct subchannel_id schid, void *data)
{
	int eval;
	int rc = 0;

	spin_lock_irq(&slow_subchannel_lock);
	eval = idset_sch_contains(slow_subchannel_set, schid);
	idset_sch_del(slow_subchannel_set, schid);
	spin_unlock_irq(&slow_subchannel_lock);
	if (eval) {
		rc = css_evaluate_new_subchannel(schid, 1);
		switch (rc) {
		case -EAGAIN:
			css_schedule_eval(schid);
			rc = 0;
			break;
		case -ENXIO:
		case -ENOMEM:
		case -EIO:
			/* These should abort looping */
			spin_lock_irq(&slow_subchannel_lock);
			idset_sch_del_subseq(slow_subchannel_set, schid);
			spin_unlock_irq(&slow_subchannel_lock);
			break;
		default:
			rc = 0;
		}
		/* Allow scheduling here since the containing loop might
		 * take a while.  */
		cond_resched();
	}
	return rc;
}

static void css_slow_path_func(struct work_struct *unused)
{
	unsigned long flags;

	CIO_TRACE_EVENT(4, "slowpath");
	for_each_subchannel_staged(slow_eval_known_fn, slow_eval_unknown_fn,
				   NULL);
	spin_lock_irqsave(&slow_subchannel_lock, flags);
	if (idset_is_empty(slow_subchannel_set)) {
		atomic_set(&css_eval_scheduled, 0);
		wake_up(&css_eval_wq);
	}
	spin_unlock_irqrestore(&slow_subchannel_lock, flags);
}

static DECLARE_DELAYED_WORK(slow_path_work, css_slow_path_func);
struct workqueue_struct *cio_work_q;

void css_schedule_eval(struct subchannel_id schid)
{
	unsigned long flags;

	spin_lock_irqsave(&slow_subchannel_lock, flags);
	idset_sch_add(slow_subchannel_set, schid);
	atomic_set(&css_eval_scheduled, 1);
	queue_delayed_work(cio_work_q, &slow_path_work, 0);
	spin_unlock_irqrestore(&slow_subchannel_lock, flags);
}

void css_schedule_eval_all(void)
{
	unsigned long flags;

	spin_lock_irqsave(&slow_subchannel_lock, flags);
	idset_fill(slow_subchannel_set);
	atomic_set(&css_eval_scheduled, 1);
	queue_delayed_work(cio_work_q, &slow_path_work, 0);
	spin_unlock_irqrestore(&slow_subchannel_lock, flags);
}

static int __unset_registered(struct device *dev, void *data)
{
	struct idset *set = data;
	struct subchannel *sch = to_subchannel(dev);

	idset_sch_del(set, sch->schid);
	return 0;
}

void css_schedule_eval_all_unreg(unsigned long delay)
{
	unsigned long flags;
	struct idset *unreg_set;

	/* Find unregistered subchannels. */
	unreg_set = idset_sch_new();
	if (!unreg_set) {
		/* Fallback. */
		css_schedule_eval_all();
		return;
	}
	idset_fill(unreg_set);
	bus_for_each_dev(&css_bus_type, NULL, unreg_set, __unset_registered);
	/* Apply to slow_subchannel_set. */
	spin_lock_irqsave(&slow_subchannel_lock, flags);
	idset_add_set(slow_subchannel_set, unreg_set);
	atomic_set(&css_eval_scheduled, 1);
	queue_delayed_work(cio_work_q, &slow_path_work, delay);
	spin_unlock_irqrestore(&slow_subchannel_lock, flags);
	idset_free(unreg_set);
}

void css_wait_for_slow_path(void)
{
	flush_workqueue(cio_work_q);
}

/* Schedule reprobing of all unregistered subchannels. */
void css_schedule_reprobe(void)
{
	/* Schedule with a delay to allow merging of subsequent calls. */
	css_schedule_eval_all_unreg(1 * HZ);
}
EXPORT_SYMBOL_GPL(css_schedule_reprobe);

/*
 * Called from the machine check handler for subchannel report words.
 */
static void css_process_crw(struct crw *crw0, struct crw *crw1, int overflow)
{
	struct subchannel_id mchk_schid;
	struct subchannel *sch;

	if (overflow) {
		css_schedule_eval_all();
		return;
	}
	CIO_CRW_EVENT(2, "CRW0 reports slct=%d, oflw=%d, "
		      "chn=%d, rsc=%X, anc=%d, erc=%X, rsid=%X\n",
		      crw0->slct, crw0->oflw, crw0->chn, crw0->rsc, crw0->anc,
		      crw0->erc, crw0->rsid);
	if (crw1)
		CIO_CRW_EVENT(2, "CRW1 reports slct=%d, oflw=%d, "
			      "chn=%d, rsc=%X, anc=%d, erc=%X, rsid=%X\n",
			      crw1->slct, crw1->oflw, crw1->chn, crw1->rsc,
			      crw1->anc, crw1->erc, crw1->rsid);
	init_subchannel_id(&mchk_schid);
	mchk_schid.sch_no = crw0->rsid;
	if (crw1)
		mchk_schid.ssid = (crw1->rsid >> 4) & 3;

	if (crw0->erc == CRW_ERC_PMOD) {
		sch = get_subchannel_by_schid(mchk_schid);
		if (sch) {
			css_update_ssd_info(sch);
			put_device(&sch->dev);
		}
	}
	/*
	 * Since we are always presented with IPI in the CRW, we have to
	 * use stsch() to find out if the subchannel in question has come
	 * or gone.
	 */
	css_evaluate_subchannel(mchk_schid, 0);
}

static void __init
css_generate_pgid(struct channel_subsystem *css, u32 tod_high)
{
	struct cpuid cpu_id;

	if (css_general_characteristics.mcss) {
		css->global_pgid.pgid_high.ext_cssid.version = 0x80;
		css->global_pgid.pgid_high.ext_cssid.cssid = css->cssid;
	} else {
		css->global_pgid.pgid_high.cpu_addr = stap();
	}
	get_cpu_id(&cpu_id);
	css->global_pgid.cpu_id = cpu_id.ident;
	css->global_pgid.cpu_model = cpu_id.machine;
	css->global_pgid.tod_high = tod_high;
}

static void
channel_subsystem_release(struct device *dev)
{
	struct channel_subsystem *css;

	css = to_css(dev);
	mutex_destroy(&css->mutex);
	if (css->pseudo_subchannel) {
		/* Implies that it has been generated but never registered. */
		css_subchannel_release(&css->pseudo_subchannel->dev);
		css->pseudo_subchannel = NULL;
	}
	kfree(css);
}

static ssize_t
css_cm_enable_show(struct device *dev, struct device_attribute *attr,
		   char *buf)
{
	struct channel_subsystem *css = to_css(dev);
	int ret;

	if (!css)
		return 0;
	mutex_lock(&css->mutex);
	ret = sprintf(buf, "%x\n", css->cm_enabled);
	mutex_unlock(&css->mutex);
	return ret;
}

static ssize_t
css_cm_enable_store(struct device *dev, struct device_attribute *attr,
		    const char *buf, size_t count)
{
	struct channel_subsystem *css = to_css(dev);
	int ret;
	unsigned long val;

	ret = kstrtoul(buf, 16, &val);
	if (ret)
		return ret;
	mutex_lock(&css->mutex);
	switch (val) {
	case 0:
		ret = css->cm_enabled ? chsc_secm(css, 0) : 0;
		break;
	case 1:
		ret = css->cm_enabled ? 0 : chsc_secm(css, 1);
		break;
	default:
		ret = -EINVAL;
	}
	mutex_unlock(&css->mutex);
	return ret < 0 ? ret : count;
}

static DEVICE_ATTR(cm_enable, 0644, css_cm_enable_show, css_cm_enable_store);

static int __init setup_css(int nr)
{
	u32 tod_high;
	int ret;
	struct channel_subsystem *css;

	css = channel_subsystems[nr];
	memset(css, 0, sizeof(struct channel_subsystem));
	css->pseudo_subchannel =
		kzalloc(sizeof(*css->pseudo_subchannel), GFP_KERNEL);
	if (!css->pseudo_subchannel)
		return -ENOMEM;
	css->pseudo_subchannel->dev.parent = &css->device;
	css->pseudo_subchannel->dev.release = css_subchannel_release;
	dev_set_name(&css->pseudo_subchannel->dev, "defunct");
	mutex_init(&css->pseudo_subchannel->reg_mutex);
	ret = css_sch_create_locks(css->pseudo_subchannel);
	if (ret) {
		kfree(css->pseudo_subchannel);
		return ret;
	}
	mutex_init(&css->mutex);
	css->valid = 1;
	css->cssid = nr;
	dev_set_name(&css->device, "css%x", nr);
	css->device.release = channel_subsystem_release;
	tod_high = (u32) (get_tod_clock() >> 32);
	css_generate_pgid(css, tod_high);
	return 0;
}

static int css_reboot_event(struct notifier_block *this,
			    unsigned long event,
			    void *ptr)
{
	int ret, i;

	ret = NOTIFY_DONE;
	for (i = 0; i <= __MAX_CSSID; i++) {
		struct channel_subsystem *css;

		css = channel_subsystems[i];
		mutex_lock(&css->mutex);
		if (css->cm_enabled)
			if (chsc_secm(css, 0))
				ret = NOTIFY_BAD;
		mutex_unlock(&css->mutex);
	}

	return ret;
}

static struct notifier_block css_reboot_notifier = {
	.notifier_call = css_reboot_event,
};

/*
 * Since the css devices are neither on a bus nor have a class
 * nor have a special device type, we cannot stop/restart channel
 * path measurements via the normal suspend/resume callbacks, but have
 * to use notifiers.
 */
static int css_power_event(struct notifier_block *this, unsigned long event,
			   void *ptr)
{
	int ret, i;

	switch (event) {
	case PM_HIBERNATION_PREPARE:
	case PM_SUSPEND_PREPARE:
		ret = NOTIFY_DONE;
		for (i = 0; i <= __MAX_CSSID; i++) {
			struct channel_subsystem *css;

			css = channel_subsystems[i];
			mutex_lock(&css->mutex);
			if (!css->cm_enabled) {
				mutex_unlock(&css->mutex);
				continue;
			}
			ret = __chsc_do_secm(css, 0);
			ret = notifier_from_errno(ret);
			mutex_unlock(&css->mutex);
		}
		break;
	case PM_POST_HIBERNATION:
	case PM_POST_SUSPEND:
		ret = NOTIFY_DONE;
		for (i = 0; i <= __MAX_CSSID; i++) {
			struct channel_subsystem *css;

			css = channel_subsystems[i];
			mutex_lock(&css->mutex);
			if (!css->cm_enabled) {
				mutex_unlock(&css->mutex);
				continue;
			}
			ret = __chsc_do_secm(css, 1);
			ret = notifier_from_errno(ret);
			mutex_unlock(&css->mutex);
		}
		/* search for subchannels, which appeared during hibernation */
		css_schedule_reprobe();
		break;
	default:
		ret = NOTIFY_DONE;
	}
	return ret;

}
static struct notifier_block css_power_notifier = {
	.notifier_call = css_power_event,
};

/*
 * Now that the driver core is running, we can setup our channel subsystem.
 * The struct subchannel's are created during probing.
 */
static int __init css_bus_init(void)
{
	int ret, i;

	ret = chsc_init();
	if (ret)
		return ret;

	chsc_determine_css_characteristics();
	/* Try to enable MSS. */
	ret = chsc_enable_facility(CHSC_SDA_OC_MSS);
	if (ret)
		max_ssid = 0;
	else /* Success. */
		max_ssid = __MAX_SSID;

	ret = slow_subchannel_init();
	if (ret)
		goto out;

	ret = crw_register_handler(CRW_RSC_SCH, css_process_crw);
	if (ret)
		goto out;

	if ((ret = bus_register(&css_bus_type)))
		goto out;

	/* Setup css structure. */
	for (i = 0; i <= __MAX_CSSID; i++) {
		struct channel_subsystem *css;

		css = kmalloc(sizeof(struct channel_subsystem), GFP_KERNEL);
		if (!css) {
			ret = -ENOMEM;
			goto out_unregister;
		}
		channel_subsystems[i] = css;
		ret = setup_css(i);
		if (ret) {
			kfree(channel_subsystems[i]);
			goto out_unregister;
		}
		ret = device_register(&css->device);
		if (ret) {
			put_device(&css->device);
			goto out_unregister;
		}
		if (css_chsc_characteristics.secm) {
			ret = device_create_file(&css->device,
						 &dev_attr_cm_enable);
			if (ret)
				goto out_device;
		}
		ret = device_register(&css->pseudo_subchannel->dev);
		if (ret) {
			put_device(&css->pseudo_subchannel->dev);
			goto out_file;
		}
	}
	ret = register_reboot_notifier(&css_reboot_notifier);
	if (ret)
		goto out_unregister;
	ret = register_pm_notifier(&css_power_notifier);
	if (ret) {
		unregister_reboot_notifier(&css_reboot_notifier);
		goto out_unregister;
	}
	css_init_done = 1;

	/* Enable default isc for I/O subchannels. */
	isc_register(IO_SCH_ISC);

	return 0;
out_file:
	if (css_chsc_characteristics.secm)
		device_remove_file(&channel_subsystems[i]->device,
				   &dev_attr_cm_enable);
out_device:
	device_unregister(&channel_subsystems[i]->device);
out_unregister:
	while (i > 0) {
		struct channel_subsystem *css;

		i--;
		css = channel_subsystems[i];
		device_unregister(&css->pseudo_subchannel->dev);
		css->pseudo_subchannel = NULL;
		if (css_chsc_characteristics.secm)
			device_remove_file(&css->device,
					   &dev_attr_cm_enable);
		device_unregister(&css->device);
	}
	bus_unregister(&css_bus_type);
out:
	crw_unregister_handler(CRW_RSC_SCH);
	idset_free(slow_subchannel_set);
	chsc_init_cleanup();
	pr_alert("The CSS device driver initialization failed with "
		 "errno=%d\n", ret);
	return ret;
}

static void __init css_bus_cleanup(void)
{
	struct channel_subsystem *css;
	int i;

	for (i = 0; i <= __MAX_CSSID; i++) {
		css = channel_subsystems[i];
		device_unregister(&css->pseudo_subchannel->dev);
		css->pseudo_subchannel = NULL;
		if (css_chsc_characteristics.secm)
			device_remove_file(&css->device, &dev_attr_cm_enable);
		device_unregister(&css->device);
	}
	bus_unregister(&css_bus_type);
	crw_unregister_handler(CRW_RSC_SCH);
	idset_free(slow_subchannel_set);
	chsc_init_cleanup();
	isc_unregister(IO_SCH_ISC);
}

static int __init channel_subsystem_init(void)
{
	int ret;

	ret = css_bus_init();
	if (ret)
		return ret;
	cio_work_q = create_singlethread_workqueue("cio");
	if (!cio_work_q) {
		ret = -ENOMEM;
		goto out_bus;
	}
	ret = io_subchannel_init();
	if (ret)
		goto out_wq;

	return ret;
out_wq:
	destroy_workqueue(cio_work_q);
out_bus:
	css_bus_cleanup();
	return ret;
}
subsys_initcall(channel_subsystem_init);

static int css_settle(struct device_driver *drv, void *unused)
{
	struct css_driver *cssdrv = to_cssdriver(drv);

	if (cssdrv->settle)
		return cssdrv->settle();
	return 0;
}

int css_complete_work(void)
{
	int ret;

	/* Wait for the evaluation of subchannels to finish. */
	ret = wait_event_interruptible(css_eval_wq,
				       atomic_read(&css_eval_scheduled) == 0);
	if (ret)
		return -EINTR;
	flush_workqueue(cio_work_q);
	/* Wait for the subchannel type specific initialization to finish */
	return bus_for_each_drv(&css_bus_type, NULL, NULL, css_settle);
}


/*
 * Wait for the initialization of devices to finish, to make sure we are
 * done with our setup if the search for the root device starts.
 */
static int __init channel_subsystem_init_sync(void)
{
	/* Register subchannels which are already in use. */
	cio_register_early_subchannels();
	/* Start initial subchannel evaluation. */
	css_schedule_eval_all();
	css_complete_work();
	return 0;
}
subsys_initcall_sync(channel_subsystem_init_sync);

void channel_subsystem_reinit(void)
{
	struct channel_path *chp;
	struct chp_id chpid;

	chsc_enable_facility(CHSC_SDA_OC_MSS);
	chp_id_for_each(&chpid) {
		chp = chpid_to_chp(chpid);
		if (chp)
			chp_update_desc(chp);
	}
	cmf_reactivate();
}

#ifdef CONFIG_PROC_FS
static ssize_t cio_settle_write(struct file *file, const char __user *buf,
				size_t count, loff_t *ppos)
{
	int ret;

	/* Handle pending CRW's. */
	crw_wait_for_channel_report();
	ret = css_complete_work();

	return ret ? ret : count;
}

static const struct file_operations cio_settle_proc_fops = {
	.open = nonseekable_open,
	.write = cio_settle_write,
	.llseek = no_llseek,
};

static int __init cio_settle_init(void)
{
	struct proc_dir_entry *entry;

	entry = proc_create("cio_settle", S_IWUSR, NULL,
			    &cio_settle_proc_fops);
	if (!entry)
		return -ENOMEM;
	return 0;
}
device_initcall(cio_settle_init);
#endif /*CONFIG_PROC_FS*/

int sch_is_pseudo_sch(struct subchannel *sch)
{
	return sch == to_css(sch->dev.parent)->pseudo_subchannel;
}

static int css_bus_match(struct device *dev, struct device_driver *drv)
{
	struct subchannel *sch = to_subchannel(dev);
	struct css_driver *driver = to_cssdriver(drv);
	struct css_device_id *id;

	for (id = driver->subchannel_type; id->match_flags; id++) {
		if (sch->st == id->type)
			return 1;
	}

	return 0;
}

static int css_probe(struct device *dev)
{
	struct subchannel *sch;
	int ret;

	sch = to_subchannel(dev);
	sch->driver = to_cssdriver(dev->driver);
	ret = sch->driver->probe ? sch->driver->probe(sch) : 0;
	if (ret)
		sch->driver = NULL;
	return ret;
}

static int css_remove(struct device *dev)
{
	struct subchannel *sch;
	int ret;

	sch = to_subchannel(dev);
	ret = sch->driver->remove ? sch->driver->remove(sch) : 0;
	sch->driver = NULL;
	return ret;
}

static void css_shutdown(struct device *dev)
{
	struct subchannel *sch;

	sch = to_subchannel(dev);
	if (sch->driver && sch->driver->shutdown)
		sch->driver->shutdown(sch);
}

static int css_uevent(struct device *dev, struct kobj_uevent_env *env)
{
	struct subchannel *sch = to_subchannel(dev);
	int ret;

	ret = add_uevent_var(env, "ST=%01X", sch->st);
	if (ret)
		return ret;
	ret = add_uevent_var(env, "MODALIAS=css:t%01X", sch->st);
	return ret;
}

static int css_pm_prepare(struct device *dev)
{
	struct subchannel *sch = to_subchannel(dev);
	struct css_driver *drv;

	if (mutex_is_locked(&sch->reg_mutex))
		return -EAGAIN;
	if (!sch->dev.driver)
		return 0;
	drv = to_cssdriver(sch->dev.driver);
	/* Notify drivers that they may not register children. */
	return drv->prepare ? drv->prepare(sch) : 0;
}

static void css_pm_complete(struct device *dev)
{
	struct subchannel *sch = to_subchannel(dev);
	struct css_driver *drv;

	if (!sch->dev.driver)
		return;
	drv = to_cssdriver(sch->dev.driver);
	if (drv->complete)
		drv->complete(sch);
}

static int css_pm_freeze(struct device *dev)
{
	struct subchannel *sch = to_subchannel(dev);
	struct css_driver *drv;

	if (!sch->dev.driver)
		return 0;
	drv = to_cssdriver(sch->dev.driver);
	return drv->freeze ? drv->freeze(sch) : 0;
}

static int css_pm_thaw(struct device *dev)
{
	struct subchannel *sch = to_subchannel(dev);
	struct css_driver *drv;

	if (!sch->dev.driver)
		return 0;
	drv = to_cssdriver(sch->dev.driver);
	return drv->thaw ? drv->thaw(sch) : 0;
}

static int css_pm_restore(struct device *dev)
{
	struct subchannel *sch = to_subchannel(dev);
	struct css_driver *drv;

	css_update_ssd_info(sch);
	if (!sch->dev.driver)
		return 0;
	drv = to_cssdriver(sch->dev.driver);
	return drv->restore ? drv->restore(sch) : 0;
}

static const struct dev_pm_ops css_pm_ops = {
	.prepare = css_pm_prepare,
	.complete = css_pm_complete,
	.freeze = css_pm_freeze,
	.thaw = css_pm_thaw,
	.restore = css_pm_restore,
};

static struct bus_type css_bus_type = {
	.name     = "css",
	.match    = css_bus_match,
	.probe    = css_probe,
	.remove   = css_remove,
	.shutdown = css_shutdown,
	.uevent   = css_uevent,
	.pm = &css_pm_ops,
};

/**
 * css_driver_register - register a css driver
 * @cdrv: css driver to register
 *
 * This is mainly a wrapper around driver_register that sets name
 * and bus_type in the embedded struct device_driver correctly.
 */
int css_driver_register(struct css_driver *cdrv)
{
	cdrv->drv.bus = &css_bus_type;
	return driver_register(&cdrv->drv);
}
EXPORT_SYMBOL_GPL(css_driver_register);

/**
 * css_driver_unregister - unregister a css driver
 * @cdrv: css driver to unregister
 *
 * This is a wrapper around driver_unregister.
 */
void css_driver_unregister(struct css_driver *cdrv)
{
	driver_unregister(&cdrv->drv);
}
EXPORT_SYMBOL_GPL(css_driver_unregister);

MODULE_LICENSE("GPL");