From 9ca8dbcc65cfc63d6f5ef3312a33184e1d726e00 Mon Sep 17 00:00:00 2001 From: Yunhong Jiang Date: Tue, 4 Aug 2015 12:17:53 -0700 Subject: Add the rt linux 4.1.3-rt3 as base Import the rt linux 4.1.3-rt3 as OPNFV kvm base. It's from git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git linux-4.1.y-rt and the base is: commit 0917f823c59692d751951bf5ea699a2d1e2f26a2 Author: Sebastian Andrzej Siewior Date: Sat Jul 25 12:13:34 2015 +0200 Prepare v4.1.3-rt3 Signed-off-by: Sebastian Andrzej Siewior We lose all the git history this way and it's not good. We should apply another opnfv project repo in future. Change-Id: I87543d81c9df70d99c5001fbdf646b202c19f423 Signed-off-by: Yunhong Jiang --- kernel/arch/ia64/Kconfig | 638 ++ kernel/arch/ia64/Kconfig.debug | 64 + kernel/arch/ia64/Makefile | 102 + kernel/arch/ia64/configs/bigsur_defconfig | 109 + kernel/arch/ia64/configs/generic_defconfig | 220 + kernel/arch/ia64/configs/gensparse_defconfig | 199 + kernel/arch/ia64/configs/sim_defconfig | 52 + kernel/arch/ia64/configs/tiger_defconfig | 174 + kernel/arch/ia64/configs/zx1_defconfig | 154 + kernel/arch/ia64/dig/Makefile | 14 + kernel/arch/ia64/dig/machvec.c | 3 + kernel/arch/ia64/dig/machvec_vtd.c | 3 + kernel/arch/ia64/dig/setup.c | 70 + kernel/arch/ia64/hp/common/Makefile | 11 + kernel/arch/ia64/hp/common/aml_nfw.c | 235 + kernel/arch/ia64/hp/common/hwsw_iommu.c | 59 + kernel/arch/ia64/hp/common/sba_iommu.c | 2241 +++++++ kernel/arch/ia64/hp/sim/Kconfig | 22 + kernel/arch/ia64/hp/sim/Makefile | 16 + kernel/arch/ia64/hp/sim/boot/Makefile | 37 + kernel/arch/ia64/hp/sim/boot/boot_head.S | 164 + kernel/arch/ia64/hp/sim/boot/bootloader.c | 174 + kernel/arch/ia64/hp/sim/boot/bootloader.lds | 66 + kernel/arch/ia64/hp/sim/boot/fw-emu.c | 373 ++ kernel/arch/ia64/hp/sim/boot/ssc.h | 35 + kernel/arch/ia64/hp/sim/hpsim.S | 10 + kernel/arch/ia64/hp/sim/hpsim_console.c | 76 + kernel/arch/ia64/hp/sim/hpsim_irq.c | 75 + kernel/arch/ia64/hp/sim/hpsim_machvec.c | 3 + kernel/arch/ia64/hp/sim/hpsim_setup.c | 40 + kernel/arch/ia64/hp/sim/hpsim_ssc.h | 36 + kernel/arch/ia64/hp/sim/simeth.c | 509 ++ kernel/arch/ia64/hp/sim/simscsi.c | 380 ++ kernel/arch/ia64/hp/sim/simserial.c | 545 ++ kernel/arch/ia64/hp/zx1/Makefile | 8 + kernel/arch/ia64/hp/zx1/hpzx1_machvec.c | 3 + kernel/arch/ia64/hp/zx1/hpzx1_swiotlb_machvec.c | 3 + kernel/arch/ia64/include/asm/Kbuild | 10 + kernel/arch/ia64/include/asm/acenv.h | 52 + kernel/arch/ia64/include/asm/acpi-ext.h | 20 + kernel/arch/ia64/include/asm/acpi.h | 147 + kernel/arch/ia64/include/asm/agp.h | 26 + kernel/arch/ia64/include/asm/asm-offsets.h | 1 + kernel/arch/ia64/include/asm/asmmacro.h | 135 + kernel/arch/ia64/include/asm/atomic.h | 196 + kernel/arch/ia64/include/asm/barrier.h | 93 + kernel/arch/ia64/include/asm/bitops.h | 456 ++ kernel/arch/ia64/include/asm/bug.h | 14 + kernel/arch/ia64/include/asm/bugs.h | 19 + kernel/arch/ia64/include/asm/cache.h | 29 + kernel/arch/ia64/include/asm/cacheflush.h | 54 + kernel/arch/ia64/include/asm/checksum.h | 79 + kernel/arch/ia64/include/asm/clocksource.h | 10 + kernel/arch/ia64/include/asm/cpu.h | 22 + kernel/arch/ia64/include/asm/cputime.h | 29 + kernel/arch/ia64/include/asm/current.h | 17 + kernel/arch/ia64/include/asm/cyclone.h | 15 + kernel/arch/ia64/include/asm/delay.h | 88 + kernel/arch/ia64/include/asm/device.h | 18 + kernel/arch/ia64/include/asm/div64.h | 1 + kernel/arch/ia64/include/asm/dma-mapping.h | 109 + kernel/arch/ia64/include/asm/dma.h | 24 + kernel/arch/ia64/include/asm/dmi.h | 14 + kernel/arch/ia64/include/asm/elf.h | 234 + kernel/arch/ia64/include/asm/emergency-restart.h | 6 + kernel/arch/ia64/include/asm/esi.h | 29 + kernel/arch/ia64/include/asm/fb.h | 23 + kernel/arch/ia64/include/asm/fpswa.h | 73 + kernel/arch/ia64/include/asm/ftrace.h | 27 + kernel/arch/ia64/include/asm/futex.h | 126 + kernel/arch/ia64/include/asm/gcc_intrin.h | 12 + kernel/arch/ia64/include/asm/gpio.h | 4 + kernel/arch/ia64/include/asm/hardirq.h | 26 + kernel/arch/ia64/include/asm/hpsim.h | 16 + kernel/arch/ia64/include/asm/hugetlb.h | 85 + kernel/arch/ia64/include/asm/hw_irq.h | 194 + kernel/arch/ia64/include/asm/idle.h | 7 + kernel/arch/ia64/include/asm/intrinsics.h | 25 + kernel/arch/ia64/include/asm/io.h | 449 ++ kernel/arch/ia64/include/asm/iommu.h | 22 + kernel/arch/ia64/include/asm/iommu_table.h | 6 + kernel/arch/ia64/include/asm/iosapic.h | 121 + kernel/arch/ia64/include/asm/irq.h | 37 + kernel/arch/ia64/include/asm/irq_regs.h | 1 + kernel/arch/ia64/include/asm/irq_remapping.h | 6 + kernel/arch/ia64/include/asm/irqflags.h | 98 + kernel/arch/ia64/include/asm/kdebug.h | 57 + kernel/arch/ia64/include/asm/kexec.h | 45 + kernel/arch/ia64/include/asm/kmap_types.h | 12 + kernel/arch/ia64/include/asm/kprobes.h | 127 + kernel/arch/ia64/include/asm/kregs.h | 165 + kernel/arch/ia64/include/asm/libata-portmap.h | 12 + kernel/arch/ia64/include/asm/linkage.h | 18 + kernel/arch/ia64/include/asm/local.h | 1 + kernel/arch/ia64/include/asm/local64.h | 1 + kernel/arch/ia64/include/asm/machvec.h | 367 ++ kernel/arch/ia64/include/asm/machvec_dig.h | 16 + kernel/arch/ia64/include/asm/machvec_dig_vtd.h | 18 + kernel/arch/ia64/include/asm/machvec_hpsim.h | 18 + kernel/arch/ia64/include/asm/machvec_hpzx1.h | 18 + .../arch/ia64/include/asm/machvec_hpzx1_swiotlb.h | 19 + kernel/arch/ia64/include/asm/machvec_init.h | 35 + kernel/arch/ia64/include/asm/machvec_sn2.h | 118 + kernel/arch/ia64/include/asm/machvec_uv.h | 26 + kernel/arch/ia64/include/asm/mc146818rtc.h | 10 + kernel/arch/ia64/include/asm/mca.h | 187 + kernel/arch/ia64/include/asm/mca_asm.h | 244 + kernel/arch/ia64/include/asm/meminit.h | 74 + kernel/arch/ia64/include/asm/mman.h | 17 + kernel/arch/ia64/include/asm/mmu.h | 13 + kernel/arch/ia64/include/asm/mmu_context.h | 198 + kernel/arch/ia64/include/asm/mmzone.h | 42 + kernel/arch/ia64/include/asm/module.h | 40 + kernel/arch/ia64/include/asm/msidef.h | 42 + kernel/arch/ia64/include/asm/mutex.h | 90 + kernel/arch/ia64/include/asm/native/inst.h | 194 + kernel/arch/ia64/include/asm/native/irq.h | 33 + kernel/arch/ia64/include/asm/native/patchlist.h | 38 + kernel/arch/ia64/include/asm/native/pvchk_inst.h | 271 + kernel/arch/ia64/include/asm/nodedata.h | 63 + kernel/arch/ia64/include/asm/numa.h | 79 + kernel/arch/ia64/include/asm/page.h | 236 + kernel/arch/ia64/include/asm/pal.h | 1825 ++++++ kernel/arch/ia64/include/asm/param.h | 17 + kernel/arch/ia64/include/asm/paravirt.h | 321 + kernel/arch/ia64/include/asm/paravirt_patch.h | 143 + kernel/arch/ia64/include/asm/paravirt_privop.h | 479 ++ kernel/arch/ia64/include/asm/parport.h | 19 + kernel/arch/ia64/include/asm/patch.h | 27 + kernel/arch/ia64/include/asm/pci.h | 133 + kernel/arch/ia64/include/asm/percpu.h | 54 + kernel/arch/ia64/include/asm/perfmon.h | 110 + kernel/arch/ia64/include/asm/pgalloc.h | 125 + kernel/arch/ia64/include/asm/pgtable.h | 594 ++ kernel/arch/ia64/include/asm/processor.h | 711 ++ kernel/arch/ia64/include/asm/ptrace.h | 151 + kernel/arch/ia64/include/asm/rwsem.h | 145 + kernel/arch/ia64/include/asm/sal.h | 917 +++ kernel/arch/ia64/include/asm/sections.h | 42 + kernel/arch/ia64/include/asm/segment.h | 6 + kernel/arch/ia64/include/asm/serial.h | 17 + kernel/arch/ia64/include/asm/shmparam.h | 12 + kernel/arch/ia64/include/asm/siginfo.h | 23 + kernel/arch/ia64/include/asm/signal.h | 32 + kernel/arch/ia64/include/asm/smp.h | 137 + kernel/arch/ia64/include/asm/sn/acpi.h | 15 + kernel/arch/ia64/include/asm/sn/addrs.h | 299 + kernel/arch/ia64/include/asm/sn/arch.h | 86 + kernel/arch/ia64/include/asm/sn/bte.h | 234 + kernel/arch/ia64/include/asm/sn/clksupport.h | 28 + kernel/arch/ia64/include/asm/sn/geo.h | 132 + kernel/arch/ia64/include/asm/sn/intr.h | 68 + kernel/arch/ia64/include/asm/sn/io.h | 274 + kernel/arch/ia64/include/asm/sn/ioc3.h | 241 + kernel/arch/ia64/include/asm/sn/klconfig.h | 246 + kernel/arch/ia64/include/asm/sn/l1.h | 51 + kernel/arch/ia64/include/asm/sn/leds.h | 33 + kernel/arch/ia64/include/asm/sn/module.h | 127 + kernel/arch/ia64/include/asm/sn/mspec.h | 59 + kernel/arch/ia64/include/asm/sn/nodepda.h | 82 + kernel/arch/ia64/include/asm/sn/pcibr_provider.h | 150 + .../ia64/include/asm/sn/pcibus_provider_defs.h | 68 + kernel/arch/ia64/include/asm/sn/pcidev.h | 85 + kernel/arch/ia64/include/asm/sn/pda.h | 68 + kernel/arch/ia64/include/asm/sn/pic.h | 261 + kernel/arch/ia64/include/asm/sn/rw_mmr.h | 28 + kernel/arch/ia64/include/asm/sn/shub_mmr.h | 502 ++ kernel/arch/ia64/include/asm/sn/shubio.h | 3358 ++++++++++ kernel/arch/ia64/include/asm/sn/simulator.h | 25 + kernel/arch/ia64/include/asm/sn/sn2/sn_hwperf.h | 242 + kernel/arch/ia64/include/asm/sn/sn_cpuid.h | 132 + kernel/arch/ia64/include/asm/sn/sn_feature_sets.h | 58 + kernel/arch/ia64/include/asm/sn/sn_sal.h | 1233 ++++ kernel/arch/ia64/include/asm/sn/tioca.h | 596 ++ kernel/arch/ia64/include/asm/sn/tioca_provider.h | 207 + kernel/arch/ia64/include/asm/sn/tioce.h | 760 +++ kernel/arch/ia64/include/asm/sn/tioce_provider.h | 63 + kernel/arch/ia64/include/asm/sn/tiocp.h | 257 + kernel/arch/ia64/include/asm/sn/tiocx.h | 72 + kernel/arch/ia64/include/asm/sn/types.h | 26 + kernel/arch/ia64/include/asm/sparsemem.h | 20 + kernel/arch/ia64/include/asm/spinlock.h | 296 + kernel/arch/ia64/include/asm/spinlock_types.h | 21 + kernel/arch/ia64/include/asm/string.h | 21 + kernel/arch/ia64/include/asm/swiotlb.h | 17 + kernel/arch/ia64/include/asm/switch_to.h | 79 + kernel/arch/ia64/include/asm/syscall.h | 88 + kernel/arch/ia64/include/asm/termios.h | 57 + kernel/arch/ia64/include/asm/thread_info.h | 152 + kernel/arch/ia64/include/asm/timex.h | 45 + kernel/arch/ia64/include/asm/tlb.h | 283 + kernel/arch/ia64/include/asm/tlbflush.h | 102 + kernel/arch/ia64/include/asm/topology.h | 67 + kernel/arch/ia64/include/asm/types.h | 31 + kernel/arch/ia64/include/asm/uaccess.h | 402 ++ kernel/arch/ia64/include/asm/unaligned.h | 11 + kernel/arch/ia64/include/asm/uncached.h | 12 + kernel/arch/ia64/include/asm/unistd.h | 50 + kernel/arch/ia64/include/asm/unwind.h | 233 + kernel/arch/ia64/include/asm/user.h | 58 + kernel/arch/ia64/include/asm/ustack.h | 11 + kernel/arch/ia64/include/asm/uv/uv.h | 12 + kernel/arch/ia64/include/asm/uv/uv_hub.h | 315 + kernel/arch/ia64/include/asm/uv/uv_mmrs.h | 825 +++ kernel/arch/ia64/include/asm/vga.h | 25 + kernel/arch/ia64/include/asm/xor.h | 31 + kernel/arch/ia64/include/uapi/asm/Kbuild | 49 + kernel/arch/ia64/include/uapi/asm/auxvec.h | 13 + kernel/arch/ia64/include/uapi/asm/bitsperlong.h | 8 + kernel/arch/ia64/include/uapi/asm/break.h | 23 + kernel/arch/ia64/include/uapi/asm/byteorder.h | 6 + kernel/arch/ia64/include/uapi/asm/cmpxchg.h | 156 + kernel/arch/ia64/include/uapi/asm/errno.h | 1 + kernel/arch/ia64/include/uapi/asm/fcntl.h | 14 + kernel/arch/ia64/include/uapi/asm/fpu.h | 66 + kernel/arch/ia64/include/uapi/asm/gcc_intrin.h | 618 ++ kernel/arch/ia64/include/uapi/asm/ia64regs.h | 100 + kernel/arch/ia64/include/uapi/asm/intel_intrin.h | 161 + kernel/arch/ia64/include/uapi/asm/intrinsics.h | 124 + kernel/arch/ia64/include/uapi/asm/ioctl.h | 1 + kernel/arch/ia64/include/uapi/asm/ioctls.h | 6 + kernel/arch/ia64/include/uapi/asm/ipcbuf.h | 1 + kernel/arch/ia64/include/uapi/asm/mman.h | 16 + kernel/arch/ia64/include/uapi/asm/msgbuf.h | 27 + kernel/arch/ia64/include/uapi/asm/param.h | 29 + kernel/arch/ia64/include/uapi/asm/perfmon.h | 177 + .../ia64/include/uapi/asm/perfmon_default_smpl.h | 83 + kernel/arch/ia64/include/uapi/asm/poll.h | 1 + kernel/arch/ia64/include/uapi/asm/posix_types.h | 8 + kernel/arch/ia64/include/uapi/asm/ptrace.h | 247 + kernel/arch/ia64/include/uapi/asm/ptrace_offsets.h | 268 + kernel/arch/ia64/include/uapi/asm/resource.h | 7 + kernel/arch/ia64/include/uapi/asm/rse.h | 66 + kernel/arch/ia64/include/uapi/asm/sembuf.h | 22 + kernel/arch/ia64/include/uapi/asm/setup.h | 24 + kernel/arch/ia64/include/uapi/asm/shmbuf.h | 38 + kernel/arch/ia64/include/uapi/asm/sigcontext.h | 70 + kernel/arch/ia64/include/uapi/asm/siginfo.h | 125 + kernel/arch/ia64/include/uapi/asm/signal.h | 121 + kernel/arch/ia64/include/uapi/asm/socket.h | 97 + kernel/arch/ia64/include/uapi/asm/sockios.h | 20 + kernel/arch/ia64/include/uapi/asm/stat.h | 51 + kernel/arch/ia64/include/uapi/asm/statfs.h | 20 + kernel/arch/ia64/include/uapi/asm/swab.h | 34 + kernel/arch/ia64/include/uapi/asm/termbits.h | 208 + kernel/arch/ia64/include/uapi/asm/termios.h | 50 + kernel/arch/ia64/include/uapi/asm/types.h | 31 + kernel/arch/ia64/include/uapi/asm/ucontext.h | 12 + kernel/arch/ia64/include/uapi/asm/unistd.h | 336 + kernel/arch/ia64/include/uapi/asm/ustack.h | 12 + kernel/arch/ia64/install.sh | 40 + kernel/arch/ia64/kernel/.gitignore | 2 + kernel/arch/ia64/kernel/Makefile | 113 + kernel/arch/ia64/kernel/Makefile.gate | 27 + kernel/arch/ia64/kernel/acpi-ext.c | 104 + kernel/arch/ia64/kernel/acpi.c | 994 +++ kernel/arch/ia64/kernel/asm-offsets.c | 290 + kernel/arch/ia64/kernel/audit.c | 60 + kernel/arch/ia64/kernel/brl_emu.c | 234 + kernel/arch/ia64/kernel/crash.c | 286 + kernel/arch/ia64/kernel/crash_dump.c | 50 + kernel/arch/ia64/kernel/cyclone.c | 124 + kernel/arch/ia64/kernel/dma-mapping.c | 24 + kernel/arch/ia64/kernel/efi.c | 1342 ++++ kernel/arch/ia64/kernel/efi_stub.S | 86 + kernel/arch/ia64/kernel/elfcore.c | 76 + kernel/arch/ia64/kernel/entry.S | 1785 ++++++ kernel/arch/ia64/kernel/entry.h | 82 + kernel/arch/ia64/kernel/err_inject.c | 314 + kernel/arch/ia64/kernel/esi.c | 205 + kernel/arch/ia64/kernel/esi_stub.S | 96 + kernel/arch/ia64/kernel/fsys.S | 836 +++ kernel/arch/ia64/kernel/fsyscall_gtod_data.h | 23 + kernel/arch/ia64/kernel/ftrace.c | 204 + kernel/arch/ia64/kernel/gate-data.S | 3 + kernel/arch/ia64/kernel/gate.S | 386 ++ kernel/arch/ia64/kernel/gate.lds.S | 108 + kernel/arch/ia64/kernel/head.S | 1212 ++++ kernel/arch/ia64/kernel/ia64_ksyms.c | 98 + kernel/arch/ia64/kernel/init_task.c | 42 + kernel/arch/ia64/kernel/iosapic.c | 1141 ++++ kernel/arch/ia64/kernel/irq.c | 202 + kernel/arch/ia64/kernel/irq_ia64.c | 670 ++ kernel/arch/ia64/kernel/irq_lsapic.c | 44 + kernel/arch/ia64/kernel/ivt.S | 1688 +++++ kernel/arch/ia64/kernel/jprobes.S | 90 + kernel/arch/ia64/kernel/kprobes.c | 1129 ++++ kernel/arch/ia64/kernel/machine_kexec.c | 170 + kernel/arch/ia64/kernel/machvec.c | 90 + kernel/arch/ia64/kernel/mca.c | 2166 +++++++ kernel/arch/ia64/kernel/mca_asm.S | 1122 ++++ kernel/arch/ia64/kernel/mca_drv.c | 795 +++ kernel/arch/ia64/kernel/mca_drv.h | 122 + kernel/arch/ia64/kernel/mca_drv_asm.S | 55 + kernel/arch/ia64/kernel/minstate.h | 250 + kernel/arch/ia64/kernel/module.c | 951 +++ kernel/arch/ia64/kernel/msi_ia64.c | 206 + kernel/arch/ia64/kernel/nr-irqs.c | 21 + kernel/arch/ia64/kernel/numa.c | 85 + kernel/arch/ia64/kernel/pal.S | 298 + kernel/arch/ia64/kernel/palinfo.c | 1022 +++ kernel/arch/ia64/kernel/paravirt.c | 902 +++ kernel/arch/ia64/kernel/paravirt_inst.h | 28 + kernel/arch/ia64/kernel/paravirt_patch.c | 514 ++ kernel/arch/ia64/kernel/paravirt_patchlist.c | 81 + kernel/arch/ia64/kernel/paravirt_patchlist.h | 24 + kernel/arch/ia64/kernel/paravirtentry.S | 121 + kernel/arch/ia64/kernel/patch.c | 256 + kernel/arch/ia64/kernel/pci-dma.c | 110 + kernel/arch/ia64/kernel/pci-swiotlb.c | 67 + kernel/arch/ia64/kernel/perfmon.c | 6782 ++++++++++++++++++++ kernel/arch/ia64/kernel/perfmon_default_smpl.c | 296 + kernel/arch/ia64/kernel/perfmon_generic.h | 45 + kernel/arch/ia64/kernel/perfmon_itanium.h | 115 + kernel/arch/ia64/kernel/perfmon_mckinley.h | 187 + kernel/arch/ia64/kernel/perfmon_montecito.h | 269 + kernel/arch/ia64/kernel/process.c | 682 ++ kernel/arch/ia64/kernel/ptrace.c | 2194 +++++++ kernel/arch/ia64/kernel/relocate_kernel.S | 325 + kernel/arch/ia64/kernel/sal.c | 405 ++ kernel/arch/ia64/kernel/salinfo.c | 704 ++ kernel/arch/ia64/kernel/setup.c | 1071 ++++ kernel/arch/ia64/kernel/sigframe.h | 25 + kernel/arch/ia64/kernel/signal.c | 496 ++ kernel/arch/ia64/kernel/smp.c | 342 + kernel/arch/ia64/kernel/smpboot.c | 861 +++ kernel/arch/ia64/kernel/stacktrace.c | 39 + kernel/arch/ia64/kernel/sys_ia64.c | 183 + kernel/arch/ia64/kernel/time.c | 456 ++ kernel/arch/ia64/kernel/topology.c | 470 ++ kernel/arch/ia64/kernel/traps.c | 652 ++ kernel/arch/ia64/kernel/unaligned.c | 1542 +++++ kernel/arch/ia64/kernel/uncached.c | 281 + kernel/arch/ia64/kernel/unwind.c | 2319 +++++++ kernel/arch/ia64/kernel/unwind_decoder.c | 459 ++ kernel/arch/ia64/kernel/unwind_i.h | 164 + kernel/arch/ia64/kernel/vmlinux.lds.S | 248 + kernel/arch/ia64/lib/Makefile | 50 + kernel/arch/ia64/lib/carta_random.S | 54 + kernel/arch/ia64/lib/checksum.c | 101 + kernel/arch/ia64/lib/clear_page.S | 76 + kernel/arch/ia64/lib/clear_user.S | 209 + kernel/arch/ia64/lib/copy_page.S | 98 + kernel/arch/ia64/lib/copy_page_mck.S | 185 + kernel/arch/ia64/lib/copy_user.S | 610 ++ kernel/arch/ia64/lib/csum_partial_copy.c | 140 + kernel/arch/ia64/lib/do_csum.S | 323 + kernel/arch/ia64/lib/flush.S | 117 + kernel/arch/ia64/lib/idiv32.S | 83 + kernel/arch/ia64/lib/idiv64.S | 80 + kernel/arch/ia64/lib/io.c | 164 + kernel/arch/ia64/lib/ip_fast_csum.S | 144 + kernel/arch/ia64/lib/memcpy.S | 301 + kernel/arch/ia64/lib/memcpy_mck.S | 666 ++ kernel/arch/ia64/lib/memset.S | 362 ++ kernel/arch/ia64/lib/strlen.S | 192 + kernel/arch/ia64/lib/strlen_user.S | 198 + kernel/arch/ia64/lib/strncpy_from_user.S | 44 + kernel/arch/ia64/lib/strnlen_user.S | 45 + kernel/arch/ia64/lib/xor.S | 184 + kernel/arch/ia64/mm/Makefile | 11 + kernel/arch/ia64/mm/contig.c | 278 + kernel/arch/ia64/mm/discontig.c | 764 +++ kernel/arch/ia64/mm/extable.c | 115 + kernel/arch/ia64/mm/fault.c | 308 + kernel/arch/ia64/mm/hugetlbpage.c | 199 + kernel/arch/ia64/mm/init.c | 741 +++ kernel/arch/ia64/mm/ioremap.c | 125 + kernel/arch/ia64/mm/numa.c | 110 + kernel/arch/ia64/mm/tlb.c | 561 ++ kernel/arch/ia64/module.lds | 13 + kernel/arch/ia64/oprofile/Makefile | 10 + kernel/arch/ia64/oprofile/backtrace.c | 131 + kernel/arch/ia64/oprofile/init.c | 38 + kernel/arch/ia64/oprofile/perfmon.c | 99 + kernel/arch/ia64/pci/Makefile | 4 + kernel/arch/ia64/pci/fixup.c | 70 + kernel/arch/ia64/pci/pci.c | 824 +++ kernel/arch/ia64/scripts/check-gas | 15 + kernel/arch/ia64/scripts/check-gas-asm.S | 2 + kernel/arch/ia64/scripts/check-model.c | 1 + kernel/arch/ia64/scripts/check-segrel.S | 4 + kernel/arch/ia64/scripts/check-segrel.lds | 12 + kernel/arch/ia64/scripts/check-serialize.S | 2 + kernel/arch/ia64/scripts/check-text-align.S | 6 + kernel/arch/ia64/scripts/pvcheck.sed | 33 + kernel/arch/ia64/scripts/toolchain-flags | 53 + kernel/arch/ia64/scripts/unwcheck.py | 64 + kernel/arch/ia64/sn/Makefile | 12 + kernel/arch/ia64/sn/include/ioerror.h | 81 + kernel/arch/ia64/sn/include/tio.h | 41 + kernel/arch/ia64/sn/include/xtalk/hubdev.h | 91 + kernel/arch/ia64/sn/include/xtalk/xbow.h | 301 + kernel/arch/ia64/sn/include/xtalk/xwidgetdev.h | 70 + kernel/arch/ia64/sn/kernel/Makefile | 18 + kernel/arch/ia64/sn/kernel/bte.c | 471 ++ kernel/arch/ia64/sn/kernel/bte_error.c | 260 + kernel/arch/ia64/sn/kernel/huberror.c | 220 + kernel/arch/ia64/sn/kernel/idle.c | 30 + kernel/arch/ia64/sn/kernel/io_acpi_init.c | 510 ++ kernel/arch/ia64/sn/kernel/io_common.c | 564 ++ kernel/arch/ia64/sn/kernel/io_init.c | 321 + kernel/arch/ia64/sn/kernel/iomv.c | 82 + kernel/arch/ia64/sn/kernel/irq.c | 488 ++ kernel/arch/ia64/sn/kernel/klconflib.c | 107 + kernel/arch/ia64/sn/kernel/machvec.c | 11 + kernel/arch/ia64/sn/kernel/mca.c | 146 + kernel/arch/ia64/sn/kernel/msi_sn.c | 238 + kernel/arch/ia64/sn/kernel/pio_phys.S | 71 + kernel/arch/ia64/sn/kernel/setup.c | 775 +++ kernel/arch/ia64/sn/kernel/sn2/Makefile | 15 + kernel/arch/ia64/sn/kernel/sn2/cache.c | 41 + kernel/arch/ia64/sn/kernel/sn2/io.c | 101 + kernel/arch/ia64/sn/kernel/sn2/prominfo_proc.c | 231 + kernel/arch/ia64/sn/kernel/sn2/ptc_deadlock.S | 92 + kernel/arch/ia64/sn/kernel/sn2/sn2_smp.c | 572 ++ kernel/arch/ia64/sn/kernel/sn2/sn_hwperf.c | 1003 +++ kernel/arch/ia64/sn/kernel/sn2/sn_proc_fs.c | 117 + kernel/arch/ia64/sn/kernel/sn2/timer.c | 60 + kernel/arch/ia64/sn/kernel/sn2/timer_interrupt.c | 60 + kernel/arch/ia64/sn/kernel/tiocx.c | 569 ++ kernel/arch/ia64/sn/pci/Makefile | 12 + kernel/arch/ia64/sn/pci/pci_dma.c | 487 ++ kernel/arch/ia64/sn/pci/pcibr/Makefile | 13 + kernel/arch/ia64/sn/pci/pcibr/pcibr_ate.c | 177 + kernel/arch/ia64/sn/pci/pcibr/pcibr_dma.c | 413 ++ kernel/arch/ia64/sn/pci/pcibr/pcibr_provider.c | 265 + kernel/arch/ia64/sn/pci/pcibr/pcibr_reg.c | 285 + kernel/arch/ia64/sn/pci/tioca_provider.c | 677 ++ kernel/arch/ia64/sn/pci/tioce_provider.c | 1062 +++ kernel/arch/ia64/uv/Makefile | 12 + kernel/arch/ia64/uv/kernel/Makefile | 13 + kernel/arch/ia64/uv/kernel/machvec.c | 11 + kernel/arch/ia64/uv/kernel/setup.c | 116 + 434 files changed, 101798 insertions(+) create mode 100644 kernel/arch/ia64/Kconfig create mode 100644 kernel/arch/ia64/Kconfig.debug create mode 100644 kernel/arch/ia64/Makefile create mode 100644 kernel/arch/ia64/configs/bigsur_defconfig create mode 100644 kernel/arch/ia64/configs/generic_defconfig create mode 100644 kernel/arch/ia64/configs/gensparse_defconfig create mode 100644 kernel/arch/ia64/configs/sim_defconfig create mode 100644 kernel/arch/ia64/configs/tiger_defconfig create mode 100644 kernel/arch/ia64/configs/zx1_defconfig create mode 100644 kernel/arch/ia64/dig/Makefile create mode 100644 kernel/arch/ia64/dig/machvec.c create mode 100644 kernel/arch/ia64/dig/machvec_vtd.c create mode 100644 kernel/arch/ia64/dig/setup.c create mode 100644 kernel/arch/ia64/hp/common/Makefile create mode 100644 kernel/arch/ia64/hp/common/aml_nfw.c create mode 100644 kernel/arch/ia64/hp/common/hwsw_iommu.c create mode 100644 kernel/arch/ia64/hp/common/sba_iommu.c create mode 100644 kernel/arch/ia64/hp/sim/Kconfig create mode 100644 kernel/arch/ia64/hp/sim/Makefile create mode 100644 kernel/arch/ia64/hp/sim/boot/Makefile create mode 100644 kernel/arch/ia64/hp/sim/boot/boot_head.S create mode 100644 kernel/arch/ia64/hp/sim/boot/bootloader.c create mode 100644 kernel/arch/ia64/hp/sim/boot/bootloader.lds create mode 100644 kernel/arch/ia64/hp/sim/boot/fw-emu.c create mode 100644 kernel/arch/ia64/hp/sim/boot/ssc.h create mode 100644 kernel/arch/ia64/hp/sim/hpsim.S create mode 100644 kernel/arch/ia64/hp/sim/hpsim_console.c create mode 100644 kernel/arch/ia64/hp/sim/hpsim_irq.c create mode 100644 kernel/arch/ia64/hp/sim/hpsim_machvec.c create mode 100644 kernel/arch/ia64/hp/sim/hpsim_setup.c create mode 100644 kernel/arch/ia64/hp/sim/hpsim_ssc.h create mode 100644 kernel/arch/ia64/hp/sim/simeth.c create mode 100644 kernel/arch/ia64/hp/sim/simscsi.c create mode 100644 kernel/arch/ia64/hp/sim/simserial.c create mode 100644 kernel/arch/ia64/hp/zx1/Makefile create mode 100644 kernel/arch/ia64/hp/zx1/hpzx1_machvec.c create mode 100644 kernel/arch/ia64/hp/zx1/hpzx1_swiotlb_machvec.c create mode 100644 kernel/arch/ia64/include/asm/Kbuild create mode 100644 kernel/arch/ia64/include/asm/acenv.h create mode 100644 kernel/arch/ia64/include/asm/acpi-ext.h create mode 100644 kernel/arch/ia64/include/asm/acpi.h create mode 100644 kernel/arch/ia64/include/asm/agp.h create mode 100644 kernel/arch/ia64/include/asm/asm-offsets.h create mode 100644 kernel/arch/ia64/include/asm/asmmacro.h create mode 100644 kernel/arch/ia64/include/asm/atomic.h create mode 100644 kernel/arch/ia64/include/asm/barrier.h create mode 100644 kernel/arch/ia64/include/asm/bitops.h create mode 100644 kernel/arch/ia64/include/asm/bug.h create mode 100644 kernel/arch/ia64/include/asm/bugs.h create mode 100644 kernel/arch/ia64/include/asm/cache.h create mode 100644 kernel/arch/ia64/include/asm/cacheflush.h create mode 100644 kernel/arch/ia64/include/asm/checksum.h create mode 100644 kernel/arch/ia64/include/asm/clocksource.h create mode 100644 kernel/arch/ia64/include/asm/cpu.h create mode 100644 kernel/arch/ia64/include/asm/cputime.h create mode 100644 kernel/arch/ia64/include/asm/current.h create mode 100644 kernel/arch/ia64/include/asm/cyclone.h create mode 100644 kernel/arch/ia64/include/asm/delay.h create mode 100644 kernel/arch/ia64/include/asm/device.h create mode 100644 kernel/arch/ia64/include/asm/div64.h create mode 100644 kernel/arch/ia64/include/asm/dma-mapping.h create mode 100644 kernel/arch/ia64/include/asm/dma.h create mode 100644 kernel/arch/ia64/include/asm/dmi.h create mode 100644 kernel/arch/ia64/include/asm/elf.h create mode 100644 kernel/arch/ia64/include/asm/emergency-restart.h create mode 100644 kernel/arch/ia64/include/asm/esi.h create mode 100644 kernel/arch/ia64/include/asm/fb.h create mode 100644 kernel/arch/ia64/include/asm/fpswa.h create mode 100644 kernel/arch/ia64/include/asm/ftrace.h create mode 100644 kernel/arch/ia64/include/asm/futex.h create mode 100644 kernel/arch/ia64/include/asm/gcc_intrin.h create mode 100644 kernel/arch/ia64/include/asm/gpio.h create mode 100644 kernel/arch/ia64/include/asm/hardirq.h create mode 100644 kernel/arch/ia64/include/asm/hpsim.h create mode 100644 kernel/arch/ia64/include/asm/hugetlb.h create mode 100644 kernel/arch/ia64/include/asm/hw_irq.h create mode 100644 kernel/arch/ia64/include/asm/idle.h create mode 100644 kernel/arch/ia64/include/asm/intrinsics.h create mode 100644 kernel/arch/ia64/include/asm/io.h create mode 100644 kernel/arch/ia64/include/asm/iommu.h create mode 100644 kernel/arch/ia64/include/asm/iommu_table.h create mode 100644 kernel/arch/ia64/include/asm/iosapic.h create mode 100644 kernel/arch/ia64/include/asm/irq.h create mode 100644 kernel/arch/ia64/include/asm/irq_regs.h create mode 100644 kernel/arch/ia64/include/asm/irq_remapping.h create mode 100644 kernel/arch/ia64/include/asm/irqflags.h create mode 100644 kernel/arch/ia64/include/asm/kdebug.h create mode 100644 kernel/arch/ia64/include/asm/kexec.h create mode 100644 kernel/arch/ia64/include/asm/kmap_types.h create mode 100644 kernel/arch/ia64/include/asm/kprobes.h create mode 100644 kernel/arch/ia64/include/asm/kregs.h create mode 100644 kernel/arch/ia64/include/asm/libata-portmap.h create mode 100644 kernel/arch/ia64/include/asm/linkage.h create mode 100644 kernel/arch/ia64/include/asm/local.h create mode 100644 kernel/arch/ia64/include/asm/local64.h create mode 100644 kernel/arch/ia64/include/asm/machvec.h create mode 100644 kernel/arch/ia64/include/asm/machvec_dig.h create mode 100644 kernel/arch/ia64/include/asm/machvec_dig_vtd.h create mode 100644 kernel/arch/ia64/include/asm/machvec_hpsim.h create mode 100644 kernel/arch/ia64/include/asm/machvec_hpzx1.h create mode 100644 kernel/arch/ia64/include/asm/machvec_hpzx1_swiotlb.h create mode 100644 kernel/arch/ia64/include/asm/machvec_init.h create mode 100644 kernel/arch/ia64/include/asm/machvec_sn2.h create mode 100644 kernel/arch/ia64/include/asm/machvec_uv.h create mode 100644 kernel/arch/ia64/include/asm/mc146818rtc.h create mode 100644 kernel/arch/ia64/include/asm/mca.h create mode 100644 kernel/arch/ia64/include/asm/mca_asm.h create mode 100644 kernel/arch/ia64/include/asm/meminit.h create mode 100644 kernel/arch/ia64/include/asm/mman.h create mode 100644 kernel/arch/ia64/include/asm/mmu.h create mode 100644 kernel/arch/ia64/include/asm/mmu_context.h create mode 100644 kernel/arch/ia64/include/asm/mmzone.h create mode 100644 kernel/arch/ia64/include/asm/module.h create mode 100644 kernel/arch/ia64/include/asm/msidef.h create mode 100644 kernel/arch/ia64/include/asm/mutex.h create mode 100644 kernel/arch/ia64/include/asm/native/inst.h create mode 100644 kernel/arch/ia64/include/asm/native/irq.h create mode 100644 kernel/arch/ia64/include/asm/native/patchlist.h create mode 100644 kernel/arch/ia64/include/asm/native/pvchk_inst.h create mode 100644 kernel/arch/ia64/include/asm/nodedata.h create mode 100644 kernel/arch/ia64/include/asm/numa.h create mode 100644 kernel/arch/ia64/include/asm/page.h create mode 100644 kernel/arch/ia64/include/asm/pal.h create mode 100644 kernel/arch/ia64/include/asm/param.h create mode 100644 kernel/arch/ia64/include/asm/paravirt.h create mode 100644 kernel/arch/ia64/include/asm/paravirt_patch.h create mode 100644 kernel/arch/ia64/include/asm/paravirt_privop.h create mode 100644 kernel/arch/ia64/include/asm/parport.h create mode 100644 kernel/arch/ia64/include/asm/patch.h create mode 100644 kernel/arch/ia64/include/asm/pci.h create mode 100644 kernel/arch/ia64/include/asm/percpu.h create mode 100644 kernel/arch/ia64/include/asm/perfmon.h create mode 100644 kernel/arch/ia64/include/asm/pgalloc.h create mode 100644 kernel/arch/ia64/include/asm/pgtable.h create mode 100644 kernel/arch/ia64/include/asm/processor.h create mode 100644 kernel/arch/ia64/include/asm/ptrace.h create mode 100644 kernel/arch/ia64/include/asm/rwsem.h create mode 100644 kernel/arch/ia64/include/asm/sal.h create mode 100644 kernel/arch/ia64/include/asm/sections.h create mode 100644 kernel/arch/ia64/include/asm/segment.h create mode 100644 kernel/arch/ia64/include/asm/serial.h create mode 100644 kernel/arch/ia64/include/asm/shmparam.h create mode 100644 kernel/arch/ia64/include/asm/siginfo.h create mode 100644 kernel/arch/ia64/include/asm/signal.h create mode 100644 kernel/arch/ia64/include/asm/smp.h create mode 100644 kernel/arch/ia64/include/asm/sn/acpi.h create mode 100644 kernel/arch/ia64/include/asm/sn/addrs.h create mode 100644 kernel/arch/ia64/include/asm/sn/arch.h create mode 100644 kernel/arch/ia64/include/asm/sn/bte.h create mode 100644 kernel/arch/ia64/include/asm/sn/clksupport.h create mode 100644 kernel/arch/ia64/include/asm/sn/geo.h create mode 100644 kernel/arch/ia64/include/asm/sn/intr.h create mode 100644 kernel/arch/ia64/include/asm/sn/io.h create mode 100644 kernel/arch/ia64/include/asm/sn/ioc3.h create mode 100644 kernel/arch/ia64/include/asm/sn/klconfig.h create mode 100644 kernel/arch/ia64/include/asm/sn/l1.h create mode 100644 kernel/arch/ia64/include/asm/sn/leds.h create mode 100644 kernel/arch/ia64/include/asm/sn/module.h create mode 100644 kernel/arch/ia64/include/asm/sn/mspec.h create mode 100644 kernel/arch/ia64/include/asm/sn/nodepda.h create mode 100644 kernel/arch/ia64/include/asm/sn/pcibr_provider.h create mode 100644 kernel/arch/ia64/include/asm/sn/pcibus_provider_defs.h create mode 100644 kernel/arch/ia64/include/asm/sn/pcidev.h create mode 100644 kernel/arch/ia64/include/asm/sn/pda.h create mode 100644 kernel/arch/ia64/include/asm/sn/pic.h create mode 100644 kernel/arch/ia64/include/asm/sn/rw_mmr.h create mode 100644 kernel/arch/ia64/include/asm/sn/shub_mmr.h create mode 100644 kernel/arch/ia64/include/asm/sn/shubio.h create mode 100644 kernel/arch/ia64/include/asm/sn/simulator.h create mode 100644 kernel/arch/ia64/include/asm/sn/sn2/sn_hwperf.h create mode 100644 kernel/arch/ia64/include/asm/sn/sn_cpuid.h create mode 100644 kernel/arch/ia64/include/asm/sn/sn_feature_sets.h create mode 100644 kernel/arch/ia64/include/asm/sn/sn_sal.h create mode 100644 kernel/arch/ia64/include/asm/sn/tioca.h create mode 100644 kernel/arch/ia64/include/asm/sn/tioca_provider.h create mode 100644 kernel/arch/ia64/include/asm/sn/tioce.h create mode 100644 kernel/arch/ia64/include/asm/sn/tioce_provider.h create mode 100644 kernel/arch/ia64/include/asm/sn/tiocp.h create mode 100644 kernel/arch/ia64/include/asm/sn/tiocx.h create mode 100644 kernel/arch/ia64/include/asm/sn/types.h create mode 100644 kernel/arch/ia64/include/asm/sparsemem.h create mode 100644 kernel/arch/ia64/include/asm/spinlock.h create mode 100644 kernel/arch/ia64/include/asm/spinlock_types.h create mode 100644 kernel/arch/ia64/include/asm/string.h create mode 100644 kernel/arch/ia64/include/asm/swiotlb.h create mode 100644 kernel/arch/ia64/include/asm/switch_to.h create mode 100644 kernel/arch/ia64/include/asm/syscall.h create mode 100644 kernel/arch/ia64/include/asm/termios.h create mode 100644 kernel/arch/ia64/include/asm/thread_info.h create mode 100644 kernel/arch/ia64/include/asm/timex.h create mode 100644 kernel/arch/ia64/include/asm/tlb.h create mode 100644 kernel/arch/ia64/include/asm/tlbflush.h create mode 100644 kernel/arch/ia64/include/asm/topology.h create mode 100644 kernel/arch/ia64/include/asm/types.h create mode 100644 kernel/arch/ia64/include/asm/uaccess.h create mode 100644 kernel/arch/ia64/include/asm/unaligned.h create mode 100644 kernel/arch/ia64/include/asm/uncached.h create mode 100644 kernel/arch/ia64/include/asm/unistd.h create mode 100644 kernel/arch/ia64/include/asm/unwind.h create mode 100644 kernel/arch/ia64/include/asm/user.h create mode 100644 kernel/arch/ia64/include/asm/ustack.h create mode 100644 kernel/arch/ia64/include/asm/uv/uv.h create mode 100644 kernel/arch/ia64/include/asm/uv/uv_hub.h create mode 100644 kernel/arch/ia64/include/asm/uv/uv_mmrs.h create mode 100644 kernel/arch/ia64/include/asm/vga.h create mode 100644 kernel/arch/ia64/include/asm/xor.h create mode 100644 kernel/arch/ia64/include/uapi/asm/Kbuild create mode 100644 kernel/arch/ia64/include/uapi/asm/auxvec.h create mode 100644 kernel/arch/ia64/include/uapi/asm/bitsperlong.h create mode 100644 kernel/arch/ia64/include/uapi/asm/break.h create mode 100644 kernel/arch/ia64/include/uapi/asm/byteorder.h create mode 100644 kernel/arch/ia64/include/uapi/asm/cmpxchg.h create mode 100644 kernel/arch/ia64/include/uapi/asm/errno.h create mode 100644 kernel/arch/ia64/include/uapi/asm/fcntl.h create mode 100644 kernel/arch/ia64/include/uapi/asm/fpu.h create mode 100644 kernel/arch/ia64/include/uapi/asm/gcc_intrin.h create mode 100644 kernel/arch/ia64/include/uapi/asm/ia64regs.h create mode 100644 kernel/arch/ia64/include/uapi/asm/intel_intrin.h create mode 100644 kernel/arch/ia64/include/uapi/asm/intrinsics.h create mode 100644 kernel/arch/ia64/include/uapi/asm/ioctl.h create mode 100644 kernel/arch/ia64/include/uapi/asm/ioctls.h create mode 100644 kernel/arch/ia64/include/uapi/asm/ipcbuf.h create mode 100644 kernel/arch/ia64/include/uapi/asm/mman.h create mode 100644 kernel/arch/ia64/include/uapi/asm/msgbuf.h create mode 100644 kernel/arch/ia64/include/uapi/asm/param.h create mode 100644 kernel/arch/ia64/include/uapi/asm/perfmon.h create mode 100644 kernel/arch/ia64/include/uapi/asm/perfmon_default_smpl.h create mode 100644 kernel/arch/ia64/include/uapi/asm/poll.h create mode 100644 kernel/arch/ia64/include/uapi/asm/posix_types.h create mode 100644 kernel/arch/ia64/include/uapi/asm/ptrace.h create mode 100644 kernel/arch/ia64/include/uapi/asm/ptrace_offsets.h create mode 100644 kernel/arch/ia64/include/uapi/asm/resource.h create mode 100644 kernel/arch/ia64/include/uapi/asm/rse.h create mode 100644 kernel/arch/ia64/include/uapi/asm/sembuf.h create mode 100644 kernel/arch/ia64/include/uapi/asm/setup.h create mode 100644 kernel/arch/ia64/include/uapi/asm/shmbuf.h create mode 100644 kernel/arch/ia64/include/uapi/asm/sigcontext.h create mode 100644 kernel/arch/ia64/include/uapi/asm/siginfo.h create mode 100644 kernel/arch/ia64/include/uapi/asm/signal.h create mode 100644 kernel/arch/ia64/include/uapi/asm/socket.h create mode 100644 kernel/arch/ia64/include/uapi/asm/sockios.h create mode 100644 kernel/arch/ia64/include/uapi/asm/stat.h create mode 100644 kernel/arch/ia64/include/uapi/asm/statfs.h create mode 100644 kernel/arch/ia64/include/uapi/asm/swab.h create mode 100644 kernel/arch/ia64/include/uapi/asm/termbits.h create mode 100644 kernel/arch/ia64/include/uapi/asm/termios.h create mode 100644 kernel/arch/ia64/include/uapi/asm/types.h create mode 100644 kernel/arch/ia64/include/uapi/asm/ucontext.h create mode 100644 kernel/arch/ia64/include/uapi/asm/unistd.h create mode 100644 kernel/arch/ia64/include/uapi/asm/ustack.h create mode 100644 kernel/arch/ia64/install.sh create mode 100644 kernel/arch/ia64/kernel/.gitignore create mode 100644 kernel/arch/ia64/kernel/Makefile create mode 100644 kernel/arch/ia64/kernel/Makefile.gate create mode 100644 kernel/arch/ia64/kernel/acpi-ext.c create mode 100644 kernel/arch/ia64/kernel/acpi.c create mode 100644 kernel/arch/ia64/kernel/asm-offsets.c create mode 100644 kernel/arch/ia64/kernel/audit.c create mode 100644 kernel/arch/ia64/kernel/brl_emu.c create mode 100644 kernel/arch/ia64/kernel/crash.c create mode 100644 kernel/arch/ia64/kernel/crash_dump.c create mode 100644 kernel/arch/ia64/kernel/cyclone.c create mode 100644 kernel/arch/ia64/kernel/dma-mapping.c create mode 100644 kernel/arch/ia64/kernel/efi.c create mode 100644 kernel/arch/ia64/kernel/efi_stub.S create mode 100644 kernel/arch/ia64/kernel/elfcore.c create mode 100644 kernel/arch/ia64/kernel/entry.S create mode 100644 kernel/arch/ia64/kernel/entry.h create mode 100644 kernel/arch/ia64/kernel/err_inject.c create mode 100644 kernel/arch/ia64/kernel/esi.c create mode 100644 kernel/arch/ia64/kernel/esi_stub.S create mode 100644 kernel/arch/ia64/kernel/fsys.S create mode 100644 kernel/arch/ia64/kernel/fsyscall_gtod_data.h create mode 100644 kernel/arch/ia64/kernel/ftrace.c create mode 100644 kernel/arch/ia64/kernel/gate-data.S create mode 100644 kernel/arch/ia64/kernel/gate.S create mode 100644 kernel/arch/ia64/kernel/gate.lds.S create mode 100644 kernel/arch/ia64/kernel/head.S create mode 100644 kernel/arch/ia64/kernel/ia64_ksyms.c create mode 100644 kernel/arch/ia64/kernel/init_task.c create mode 100644 kernel/arch/ia64/kernel/iosapic.c create mode 100644 kernel/arch/ia64/kernel/irq.c create mode 100644 kernel/arch/ia64/kernel/irq_ia64.c create mode 100644 kernel/arch/ia64/kernel/irq_lsapic.c create mode 100644 kernel/arch/ia64/kernel/ivt.S create mode 100644 kernel/arch/ia64/kernel/jprobes.S create mode 100644 kernel/arch/ia64/kernel/kprobes.c create mode 100644 kernel/arch/ia64/kernel/machine_kexec.c create mode 100644 kernel/arch/ia64/kernel/machvec.c create mode 100644 kernel/arch/ia64/kernel/mca.c create mode 100644 kernel/arch/ia64/kernel/mca_asm.S create mode 100644 kernel/arch/ia64/kernel/mca_drv.c create mode 100644 kernel/arch/ia64/kernel/mca_drv.h create mode 100644 kernel/arch/ia64/kernel/mca_drv_asm.S create mode 100644 kernel/arch/ia64/kernel/minstate.h create mode 100644 kernel/arch/ia64/kernel/module.c create mode 100644 kernel/arch/ia64/kernel/msi_ia64.c create mode 100644 kernel/arch/ia64/kernel/nr-irqs.c create mode 100644 kernel/arch/ia64/kernel/numa.c create mode 100644 kernel/arch/ia64/kernel/pal.S create mode 100644 kernel/arch/ia64/kernel/palinfo.c create mode 100644 kernel/arch/ia64/kernel/paravirt.c create mode 100644 kernel/arch/ia64/kernel/paravirt_inst.h create mode 100644 kernel/arch/ia64/kernel/paravirt_patch.c create mode 100644 kernel/arch/ia64/kernel/paravirt_patchlist.c create mode 100644 kernel/arch/ia64/kernel/paravirt_patchlist.h create mode 100644 kernel/arch/ia64/kernel/paravirtentry.S create mode 100644 kernel/arch/ia64/kernel/patch.c create mode 100644 kernel/arch/ia64/kernel/pci-dma.c create mode 100644 kernel/arch/ia64/kernel/pci-swiotlb.c create mode 100644 kernel/arch/ia64/kernel/perfmon.c create mode 100644 kernel/arch/ia64/kernel/perfmon_default_smpl.c create mode 100644 kernel/arch/ia64/kernel/perfmon_generic.h create mode 100644 kernel/arch/ia64/kernel/perfmon_itanium.h create mode 100644 kernel/arch/ia64/kernel/perfmon_mckinley.h create mode 100644 kernel/arch/ia64/kernel/perfmon_montecito.h create mode 100644 kernel/arch/ia64/kernel/process.c create mode 100644 kernel/arch/ia64/kernel/ptrace.c create mode 100644 kernel/arch/ia64/kernel/relocate_kernel.S create mode 100644 kernel/arch/ia64/kernel/sal.c create mode 100644 kernel/arch/ia64/kernel/salinfo.c create mode 100644 kernel/arch/ia64/kernel/setup.c create mode 100644 kernel/arch/ia64/kernel/sigframe.h create mode 100644 kernel/arch/ia64/kernel/signal.c create mode 100644 kernel/arch/ia64/kernel/smp.c create mode 100644 kernel/arch/ia64/kernel/smpboot.c create mode 100644 kernel/arch/ia64/kernel/stacktrace.c create mode 100644 kernel/arch/ia64/kernel/sys_ia64.c create mode 100644 kernel/arch/ia64/kernel/time.c create mode 100644 kernel/arch/ia64/kernel/topology.c create mode 100644 kernel/arch/ia64/kernel/traps.c create mode 100644 kernel/arch/ia64/kernel/unaligned.c create mode 100644 kernel/arch/ia64/kernel/uncached.c create mode 100644 kernel/arch/ia64/kernel/unwind.c create mode 100644 kernel/arch/ia64/kernel/unwind_decoder.c create mode 100644 kernel/arch/ia64/kernel/unwind_i.h create mode 100644 kernel/arch/ia64/kernel/vmlinux.lds.S create mode 100644 kernel/arch/ia64/lib/Makefile create mode 100644 kernel/arch/ia64/lib/carta_random.S create mode 100644 kernel/arch/ia64/lib/checksum.c create mode 100644 kernel/arch/ia64/lib/clear_page.S create mode 100644 kernel/arch/ia64/lib/clear_user.S create mode 100644 kernel/arch/ia64/lib/copy_page.S create mode 100644 kernel/arch/ia64/lib/copy_page_mck.S create mode 100644 kernel/arch/ia64/lib/copy_user.S create mode 100644 kernel/arch/ia64/lib/csum_partial_copy.c create mode 100644 kernel/arch/ia64/lib/do_csum.S create mode 100644 kernel/arch/ia64/lib/flush.S create mode 100644 kernel/arch/ia64/lib/idiv32.S create mode 100644 kernel/arch/ia64/lib/idiv64.S create mode 100644 kernel/arch/ia64/lib/io.c create mode 100644 kernel/arch/ia64/lib/ip_fast_csum.S create mode 100644 kernel/arch/ia64/lib/memcpy.S create mode 100644 kernel/arch/ia64/lib/memcpy_mck.S create mode 100644 kernel/arch/ia64/lib/memset.S create mode 100644 kernel/arch/ia64/lib/strlen.S create mode 100644 kernel/arch/ia64/lib/strlen_user.S create mode 100644 kernel/arch/ia64/lib/strncpy_from_user.S create mode 100644 kernel/arch/ia64/lib/strnlen_user.S create mode 100644 kernel/arch/ia64/lib/xor.S create mode 100644 kernel/arch/ia64/mm/Makefile create mode 100644 kernel/arch/ia64/mm/contig.c create mode 100644 kernel/arch/ia64/mm/discontig.c create mode 100644 kernel/arch/ia64/mm/extable.c create mode 100644 kernel/arch/ia64/mm/fault.c create mode 100644 kernel/arch/ia64/mm/hugetlbpage.c create mode 100644 kernel/arch/ia64/mm/init.c create mode 100644 kernel/arch/ia64/mm/ioremap.c create mode 100644 kernel/arch/ia64/mm/numa.c create mode 100644 kernel/arch/ia64/mm/tlb.c create mode 100644 kernel/arch/ia64/module.lds create mode 100644 kernel/arch/ia64/oprofile/Makefile create mode 100644 kernel/arch/ia64/oprofile/backtrace.c create mode 100644 kernel/arch/ia64/oprofile/init.c create mode 100644 kernel/arch/ia64/oprofile/perfmon.c create mode 100644 kernel/arch/ia64/pci/Makefile create mode 100644 kernel/arch/ia64/pci/fixup.c create mode 100644 kernel/arch/ia64/pci/pci.c create mode 100755 kernel/arch/ia64/scripts/check-gas create mode 100644 kernel/arch/ia64/scripts/check-gas-asm.S create mode 100644 kernel/arch/ia64/scripts/check-model.c create mode 100644 kernel/arch/ia64/scripts/check-segrel.S create mode 100644 kernel/arch/ia64/scripts/check-segrel.lds create mode 100644 kernel/arch/ia64/scripts/check-serialize.S create mode 100644 kernel/arch/ia64/scripts/check-text-align.S create mode 100644 kernel/arch/ia64/scripts/pvcheck.sed create mode 100755 kernel/arch/ia64/scripts/toolchain-flags create mode 100644 kernel/arch/ia64/scripts/unwcheck.py create mode 100644 kernel/arch/ia64/sn/Makefile create mode 100644 kernel/arch/ia64/sn/include/ioerror.h create mode 100644 kernel/arch/ia64/sn/include/tio.h create mode 100644 kernel/arch/ia64/sn/include/xtalk/hubdev.h create mode 100644 kernel/arch/ia64/sn/include/xtalk/xbow.h create mode 100644 kernel/arch/ia64/sn/include/xtalk/xwidgetdev.h create mode 100644 kernel/arch/ia64/sn/kernel/Makefile create mode 100644 kernel/arch/ia64/sn/kernel/bte.c create mode 100644 kernel/arch/ia64/sn/kernel/bte_error.c create mode 100644 kernel/arch/ia64/sn/kernel/huberror.c create mode 100644 kernel/arch/ia64/sn/kernel/idle.c create mode 100644 kernel/arch/ia64/sn/kernel/io_acpi_init.c create mode 100644 kernel/arch/ia64/sn/kernel/io_common.c create mode 100644 kernel/arch/ia64/sn/kernel/io_init.c create mode 100644 kernel/arch/ia64/sn/kernel/iomv.c create mode 100644 kernel/arch/ia64/sn/kernel/irq.c create mode 100644 kernel/arch/ia64/sn/kernel/klconflib.c create mode 100644 kernel/arch/ia64/sn/kernel/machvec.c create mode 100644 kernel/arch/ia64/sn/kernel/mca.c create mode 100644 kernel/arch/ia64/sn/kernel/msi_sn.c create mode 100644 kernel/arch/ia64/sn/kernel/pio_phys.S create mode 100644 kernel/arch/ia64/sn/kernel/setup.c create mode 100644 kernel/arch/ia64/sn/kernel/sn2/Makefile create mode 100644 kernel/arch/ia64/sn/kernel/sn2/cache.c create mode 100644 kernel/arch/ia64/sn/kernel/sn2/io.c create mode 100644 kernel/arch/ia64/sn/kernel/sn2/prominfo_proc.c create mode 100644 kernel/arch/ia64/sn/kernel/sn2/ptc_deadlock.S create mode 100644 kernel/arch/ia64/sn/kernel/sn2/sn2_smp.c create mode 100644 kernel/arch/ia64/sn/kernel/sn2/sn_hwperf.c create mode 100644 kernel/arch/ia64/sn/kernel/sn2/sn_proc_fs.c create mode 100644 kernel/arch/ia64/sn/kernel/sn2/timer.c create mode 100644 kernel/arch/ia64/sn/kernel/sn2/timer_interrupt.c create mode 100644 kernel/arch/ia64/sn/kernel/tiocx.c create mode 100644 kernel/arch/ia64/sn/pci/Makefile create mode 100644 kernel/arch/ia64/sn/pci/pci_dma.c create mode 100644 kernel/arch/ia64/sn/pci/pcibr/Makefile create mode 100644 kernel/arch/ia64/sn/pci/pcibr/pcibr_ate.c create mode 100644 kernel/arch/ia64/sn/pci/pcibr/pcibr_dma.c create mode 100644 kernel/arch/ia64/sn/pci/pcibr/pcibr_provider.c create mode 100644 kernel/arch/ia64/sn/pci/pcibr/pcibr_reg.c create mode 100644 kernel/arch/ia64/sn/pci/tioca_provider.c create mode 100644 kernel/arch/ia64/sn/pci/tioce_provider.c create mode 100644 kernel/arch/ia64/uv/Makefile create mode 100644 kernel/arch/ia64/uv/kernel/Makefile create mode 100644 kernel/arch/ia64/uv/kernel/machvec.c create mode 100644 kernel/arch/ia64/uv/kernel/setup.c (limited to 'kernel/arch/ia64') diff --git a/kernel/arch/ia64/Kconfig b/kernel/arch/ia64/Kconfig new file mode 100644 index 000000000..76d25b2cf --- /dev/null +++ b/kernel/arch/ia64/Kconfig @@ -0,0 +1,638 @@ +config PGTABLE_LEVELS + int "Page Table Levels" if !IA64_PAGE_SIZE_64KB + range 3 4 if !IA64_PAGE_SIZE_64KB + default 3 + +source "init/Kconfig" + +source "kernel/Kconfig.freezer" + +menu "Processor type and features" + +config IA64 + bool + select ARCH_MIGHT_HAVE_PC_PARPORT + select ARCH_MIGHT_HAVE_PC_SERIO + select PCI if (!IA64_HP_SIM) + select ACPI if (!IA64_HP_SIM) + select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI + select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI + select HAVE_UNSTABLE_SCHED_CLOCK + select HAVE_IDE + select HAVE_OPROFILE + select HAVE_KPROBES + select HAVE_KRETPROBES + select HAVE_FTRACE_MCOUNT_RECORD + select HAVE_DYNAMIC_FTRACE if (!ITANIUM) + select HAVE_FUNCTION_TRACER + select HAVE_DMA_ATTRS + select TTY + select HAVE_ARCH_TRACEHOOK + select HAVE_DMA_API_DEBUG + select HAVE_MEMBLOCK + select HAVE_MEMBLOCK_NODE_MAP + select HAVE_VIRT_CPU_ACCOUNTING + select ARCH_HAS_SG_CHAIN + select VIRT_TO_BUS + select ARCH_DISCARD_MEMBLOCK + select GENERIC_IRQ_PROBE + select GENERIC_PENDING_IRQ if SMP + select GENERIC_IRQ_SHOW + select GENERIC_IRQ_LEGACY + select ARCH_WANT_OPTIONAL_GPIOLIB + select ARCH_HAVE_NMI_SAFE_CMPXCHG + select GENERIC_IOMAP + select GENERIC_SMP_IDLE_THREAD + select ARCH_INIT_TASK + select ARCH_TASK_STRUCT_ALLOCATOR + select ARCH_THREAD_INFO_ALLOCATOR + select ARCH_CLOCKSOURCE_DATA + select GENERIC_TIME_VSYSCALL_OLD + select SYSCTL_ARCH_UNALIGN_NO_WARN + select HAVE_MOD_ARCH_SPECIFIC + select MODULES_USE_ELF_RELA + select ARCH_USE_CMPXCHG_LOCKREF + select HAVE_ARCH_AUDITSYSCALL + default y + help + The Itanium Processor Family is Intel's 64-bit successor to + the 32-bit X86 line. The IA-64 Linux project has a home + page at and a mailing list at + . + +config 64BIT + bool + select ATA_NONSTANDARD if ATA + default y + +config ZONE_DMA + def_bool y + depends on !IA64_SGI_SN2 + +config QUICKLIST + bool + default y + +config MMU + bool + default y + +config ARCH_DMA_ADDR_T_64BIT + def_bool y + +config NEED_DMA_MAP_STATE + def_bool y + +config NEED_SG_DMA_LENGTH + def_bool y + +config SWIOTLB + bool + +config STACKTRACE_SUPPORT + def_bool y + +config GENERIC_LOCKBREAK + def_bool n + +config RWSEM_XCHGADD_ALGORITHM + bool + default y + +config HUGETLB_PAGE_SIZE_VARIABLE + bool + depends on HUGETLB_PAGE + default y + +config GENERIC_CALIBRATE_DELAY + bool + default y + +config HAVE_SETUP_PER_CPU_AREA + def_bool y + +config DMI + bool + default y + select DMI_SCAN_MACHINE_NON_EFI_FALLBACK + +config EFI + bool + select UCS2_STRING + default y + +config SCHED_OMIT_FRAME_POINTER + bool + default y + +config IA64_UNCACHED_ALLOCATOR + bool + select GENERIC_ALLOCATOR + +config ARCH_USES_PG_UNCACHED + def_bool y + depends on IA64_UNCACHED_ALLOCATOR + +config AUDIT_ARCH + bool + default y + +menuconfig PARAVIRT_GUEST + bool "Paravirtualized guest support" + depends on BROKEN + help + Say Y here to get to see options related to running Linux under + various hypervisors. This option alone does not add any kernel code. + + If you say N, all options in this submenu will be skipped and disabled. + +if PARAVIRT_GUEST + +config PARAVIRT + bool "Enable paravirtualization code" + depends on PARAVIRT_GUEST + default y + help + This changes the kernel so it can modify itself when it is run + under a hypervisor, potentially improving performance significantly + over full virtualization. However, when run without a hypervisor + the kernel is theoretically slower and slightly larger. + +endif + +choice + prompt "System type" + default IA64_GENERIC + +config IA64_GENERIC + bool "generic" + select NUMA + select ACPI_NUMA + select SWIOTLB + select PCI_MSI + help + This selects the system type of your hardware. A "generic" kernel + will run on any supported IA-64 system. However, if you configure + a kernel for your specific system, it will be faster and smaller. + + generic For any supported IA-64 system + DIG-compliant For DIG ("Developer's Interface Guide") compliant systems + DIG+Intel+IOMMU For DIG systems with Intel IOMMU + HP-zx1/sx1000 For HP systems + HP-zx1/sx1000+swiotlb For HP systems with (broken) DMA-constrained devices. + SGI-SN2 For SGI Altix systems + SGI-UV For SGI UV systems + Ski-simulator For the HP simulator + + If you don't know what to do, choose "generic". + +config IA64_DIG + bool "DIG-compliant" + select SWIOTLB + +config IA64_DIG_VTD + bool "DIG+Intel+IOMMU" + select INTEL_IOMMU + select PCI_MSI + +config IA64_HP_ZX1 + bool "HP-zx1/sx1000" + help + Build a kernel that runs on HP zx1 and sx1000 systems. This adds + support for the HP I/O MMU. + +config IA64_HP_ZX1_SWIOTLB + bool "HP-zx1/sx1000 with software I/O TLB" + select SWIOTLB + help + Build a kernel that runs on HP zx1 and sx1000 systems even when they + have broken PCI devices which cannot DMA to full 32 bits. Apart + from support for the HP I/O MMU, this includes support for the software + I/O TLB, which allows supporting the broken devices at the expense of + wasting some kernel memory (about 2MB by default). + +config IA64_SGI_SN2 + bool "SGI-SN2" + select NUMA + select ACPI_NUMA + help + Selecting this option will optimize the kernel for use on sn2 based + systems, but the resulting kernel binary will not run on other + types of ia64 systems. If you have an SGI Altix system, it's safe + to select this option. If in doubt, select ia64 generic support + instead. + +config IA64_SGI_UV + bool "SGI-UV" + select NUMA + select ACPI_NUMA + select SWIOTLB + help + Selecting this option will optimize the kernel for use on UV based + systems, but the resulting kernel binary will not run on other + types of ia64 systems. If you have an SGI UV system, it's safe + to select this option. If in doubt, select ia64 generic support + instead. + +config IA64_HP_SIM + bool "Ski-simulator" + select SWIOTLB + depends on !PM + +endchoice + +choice + prompt "Processor type" + default ITANIUM + +config ITANIUM + bool "Itanium" + help + Select your IA-64 processor type. The default is Itanium. + This choice is safe for all IA-64 systems, but may not perform + optimally on systems with, say, Itanium 2 or newer processors. + +config MCKINLEY + bool "Itanium 2" + help + Select this to configure for an Itanium 2 (McKinley) processor. + +endchoice + +choice + prompt "Kernel page size" + default IA64_PAGE_SIZE_16KB + +config IA64_PAGE_SIZE_4KB + bool "4KB" + help + This lets you select the page size of the kernel. For best IA-64 + performance, a page size of 8KB or 16KB is recommended. For best + IA-32 compatibility, a page size of 4KB should be selected (the vast + majority of IA-32 binaries work perfectly fine with a larger page + size). For Itanium 2 or newer systems, a page size of 64KB can also + be selected. + + 4KB For best IA-32 compatibility + 8KB For best IA-64 performance + 16KB For best IA-64 performance + 64KB Requires Itanium 2 or newer processor. + + If you don't know what to do, choose 16KB. + +config IA64_PAGE_SIZE_8KB + bool "8KB" + +config IA64_PAGE_SIZE_16KB + bool "16KB" + +config IA64_PAGE_SIZE_64KB + depends on !ITANIUM + bool "64KB" + +endchoice + +if IA64_HP_SIM +config HZ + default 32 +endif + +if !IA64_HP_SIM +source kernel/Kconfig.hz +endif + +config IA64_BRL_EMU + bool + depends on ITANIUM + default y + +# align cache-sensitive data to 128 bytes +config IA64_L1_CACHE_SHIFT + int + default "7" if MCKINLEY + default "6" if ITANIUM + +config IA64_CYCLONE + bool "Cyclone (EXA) Time Source support" + help + Say Y here to enable support for IBM EXA Cyclone time source. + If you're unsure, answer N. + +config IOSAPIC + bool + depends on !IA64_HP_SIM + default y + +config FORCE_MAX_ZONEORDER + int "MAX_ORDER (11 - 17)" if !HUGETLB_PAGE + range 11 17 if !HUGETLB_PAGE + default "17" if HUGETLB_PAGE + default "11" + +config SMP + bool "Symmetric multi-processing support" + help + This enables support for systems with more than one CPU. If you have + a system with only one CPU, say N. If you have a system with more + than one CPU, say Y. + + If you say N here, the kernel will run on single and multiprocessor + systems, but will use only one CPU of a multiprocessor system. If + you say Y here, the kernel will run on many, but not all, + single processor systems. On a single processor system, the kernel + will run faster if you say N here. + + See also the SMP-HOWTO available at + . + + If you don't know what to do here, say N. + +config NR_CPUS + int "Maximum number of CPUs (2-4096)" + range 2 4096 + depends on SMP + default "4096" + help + You should set this to the number of CPUs in your system, but + keep in mind that a kernel compiled for, e.g., 2 CPUs will boot but + only use 2 CPUs on a >2 CPU system. Setting this to a value larger + than 64 will cause the use of a CPU mask array, causing a small + performance hit. + +config HOTPLUG_CPU + bool "Support for hot-pluggable CPUs" + depends on SMP + default n + ---help--- + Say Y here to experiment with turning CPUs off and on. CPUs + can be controlled through /sys/devices/system/cpu/cpu#. + Say N if you want to disable CPU hotplug. + +config ARCH_ENABLE_MEMORY_HOTPLUG + def_bool y + +config ARCH_ENABLE_MEMORY_HOTREMOVE + def_bool y + +config SCHED_SMT + bool "SMT scheduler support" + depends on SMP + help + Improves the CPU scheduler's decision making when dealing with + Intel IA64 chips with MultiThreading at a cost of slightly increased + overhead in some places. If unsure say N here. + +config PERMIT_BSP_REMOVE + bool "Support removal of Bootstrap Processor" + depends on HOTPLUG_CPU + default n + ---help--- + Say Y here if your platform SAL will support removal of BSP with HOTPLUG_CPU + support. + +config FORCE_CPEI_RETARGET + bool "Force assumption that CPEI can be re-targeted" + depends on PERMIT_BSP_REMOVE + default n + ---help--- + Say Y if you need to force the assumption that CPEI can be re-targeted to + any cpu in the system. This hint is available via ACPI 3.0 specifications. + Tiger4 systems are capable of re-directing CPEI to any CPU other than BSP. + This option it useful to enable this feature on older BIOS's as well. + You can also enable this by using boot command line option force_cpei=1. + +source "kernel/Kconfig.preempt" + +source "mm/Kconfig" + +config ARCH_SELECT_MEMORY_MODEL + def_bool y + +config ARCH_DISCONTIGMEM_ENABLE + def_bool y + help + Say Y to support efficient handling of discontiguous physical memory, + for architectures which are either NUMA (Non-Uniform Memory Access) + or have huge holes in the physical address space for other reasons. + See for more. + +config ARCH_FLATMEM_ENABLE + def_bool y + +config ARCH_SPARSEMEM_ENABLE + def_bool y + depends on ARCH_DISCONTIGMEM_ENABLE + select SPARSEMEM_VMEMMAP_ENABLE + +config ARCH_DISCONTIGMEM_DEFAULT + def_bool y if (IA64_SGI_SN2 || IA64_GENERIC || IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB) + depends on ARCH_DISCONTIGMEM_ENABLE + +config NUMA + bool "NUMA support" + depends on !IA64_HP_SIM && !FLATMEM + default y if IA64_SGI_SN2 + select ACPI_NUMA if ACPI + help + Say Y to compile the kernel to support NUMA (Non-Uniform Memory + Access). This option is for configuring high-end multiprocessor + server systems. If in doubt, say N. + +config NODES_SHIFT + int "Max num nodes shift(3-10)" + range 3 10 + default "10" + depends on NEED_MULTIPLE_NODES + help + This option specifies the maximum number of nodes in your SSI system. + MAX_NUMNODES will be 2^(This value). + If in doubt, use the default. + +# VIRTUAL_MEM_MAP and FLAT_NODE_MEM_MAP are functionally equivalent. +# VIRTUAL_MEM_MAP has been retained for historical reasons. +config VIRTUAL_MEM_MAP + bool "Virtual mem map" + depends on !SPARSEMEM + default y if !IA64_HP_SIM + help + Say Y to compile the kernel with support for a virtual mem map. + This code also only takes effect if a memory hole of greater than + 1 Gb is found during boot. You must turn this option on if you + require the DISCONTIGMEM option for your machine. If you are + unsure, say Y. + +config HOLES_IN_ZONE + bool + default y if VIRTUAL_MEM_MAP + +config HAVE_ARCH_EARLY_PFN_TO_NID + def_bool NUMA && SPARSEMEM + +config HAVE_ARCH_NODEDATA_EXTENSION + def_bool y + depends on NUMA + +config USE_PERCPU_NUMA_NODE_ID + def_bool y + depends on NUMA + +config HAVE_MEMORYLESS_NODES + def_bool NUMA + +config ARCH_PROC_KCORE_TEXT + def_bool y + depends on PROC_KCORE + +config IA64_MCA_RECOVERY + tristate "MCA recovery from errors other than TLB." + +config PERFMON + bool "Performance monitor support" + help + Selects whether support for the IA-64 performance monitor hardware + is included in the kernel. This makes some kernel data-structures a + little bigger and slows down execution a bit, but it is generally + a good idea to turn this on. If you're unsure, say Y. + +config IA64_PALINFO + tristate "/proc/pal support" + help + If you say Y here, you are able to get PAL (Processor Abstraction + Layer) information in /proc/pal. This contains useful information + about the processors in your systems, such as cache and TLB sizes + and the PAL firmware version in use. + + To use this option, you have to ensure that the "/proc file system + support" (CONFIG_PROC_FS) is enabled, too. + +config IA64_MC_ERR_INJECT + tristate "MC error injection support" + help + Adds support for MC error injection. If enabled, the kernel + will provide a sysfs interface for user applications to + call MC error injection PAL procedures to inject various errors. + This is a useful tool for MCA testing. + + If you're unsure, do not select this option. + +config SGI_SN + def_bool y if (IA64_SGI_SN2 || IA64_GENERIC) + +config IA64_ESI + bool "ESI (Extensible SAL Interface) support" + help + If you say Y here, support is built into the kernel to + make ESI calls. ESI calls are used to support vendor-specific + firmware extensions, such as the ability to inject memory-errors + for test-purposes. If you're unsure, say N. + +config IA64_HP_AML_NFW + bool "Support ACPI AML calls to native firmware" + help + This driver installs a global ACPI Operation Region handler for + region 0xA1. AML methods can use this OpRegion to call arbitrary + native firmware functions. The driver installs the OpRegion + handler if there is an HPQ5001 device or if the user supplies + the "force" module parameter, e.g., with the "aml_nfw.force" + kernel command line option. + +source "drivers/sn/Kconfig" + +config KEXEC + bool "kexec system call" + depends on !IA64_HP_SIM && (!SMP || HOTPLUG_CPU) + help + kexec is a system call that implements the ability to shutdown your + current kernel, and to start another kernel. It is like a reboot + but it is independent of the system firmware. And like a reboot + you can start any kernel with it, not just Linux. + + The name comes from the similarity to the exec system call. + + It is an ongoing process to be certain the hardware in a machine + is properly shutdown, so do not be surprised if this code does not + initially work for you. As of this writing the exact hardware + interface is strongly in flux, so no good recommendation can be + made. + +config CRASH_DUMP + bool "kernel crash dumps" + depends on IA64_MCA_RECOVERY && !IA64_HP_SIM && (!SMP || HOTPLUG_CPU) + help + Generate crash dump after being started by kexec. + +source "drivers/firmware/Kconfig" + +source "fs/Kconfig.binfmt" + +endmenu + +menu "Power management and ACPI options" + +source "kernel/power/Kconfig" + +source "drivers/acpi/Kconfig" + +if PM +menu "CPU Frequency scaling" +source "drivers/cpufreq/Kconfig" +endmenu +endif + +endmenu + +if !IA64_HP_SIM + +menu "Bus options (PCI, PCMCIA)" + +config PCI + bool "PCI support" + help + Real IA-64 machines all have PCI/PCI-X/PCI Express busses. Say Y + here unless you are using a simulator without PCI support. + +config PCI_DOMAINS + def_bool PCI + +config PCI_SYSCALL + def_bool PCI + +source "drivers/pci/pcie/Kconfig" + +source "drivers/pci/Kconfig" + +source "drivers/pci/hotplug/Kconfig" + +source "drivers/pcmcia/Kconfig" + +endmenu + +endif + +source "net/Kconfig" + +source "drivers/Kconfig" + +source "arch/ia64/hp/sim/Kconfig" + +config MSPEC + tristate "Memory special operations driver" + depends on IA64 + select IA64_UNCACHED_ALLOCATOR + help + If you have an ia64 and you want to enable memory special + operations support (formerly known as fetchop), say Y here, + otherwise say N. + +source "fs/Kconfig" + +source "arch/ia64/Kconfig.debug" + +source "security/Kconfig" + +source "crypto/Kconfig" + +source "lib/Kconfig" + +config IOMMU_HELPER + def_bool (IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB || IA64_GENERIC || SWIOTLB) diff --git a/kernel/arch/ia64/Kconfig.debug b/kernel/arch/ia64/Kconfig.debug new file mode 100644 index 000000000..de9d507ba --- /dev/null +++ b/kernel/arch/ia64/Kconfig.debug @@ -0,0 +1,64 @@ +menu "Kernel hacking" + +source "lib/Kconfig.debug" + +choice + prompt "Physical memory granularity" + default IA64_GRANULE_64MB + +config IA64_GRANULE_16MB + bool "16MB" + help + IA-64 identity-mapped regions use a large page size called "granules". + + Select "16MB" for a small granule size. + Select "64MB" for a large granule size. This is the current default. + +config IA64_GRANULE_64MB + bool "64MB" + depends on !(IA64_GENERIC || IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB || IA64_SGI_SN2) + +endchoice + +config IA64_PRINT_HAZARDS + bool "Print possible IA-64 dependency violations to console" + depends on DEBUG_KERNEL + help + Selecting this option prints more information for Illegal Dependency + Faults, that is, for Read-after-Write (RAW), Write-after-Write (WAW), + or Write-after-Read (WAR) violations. This option is ignored if you + are compiling for an Itanium A step processor + (CONFIG_ITANIUM_ASTEP_SPECIFIC). If you're unsure, select Y. + +config DISABLE_VHPT + bool "Disable VHPT" + depends on DEBUG_KERNEL + help + The Virtual Hash Page Table (VHPT) enhances virtual address + translation performance. Normally you want the VHPT active but you + can select this option to disable the VHPT for debugging. If you're + unsure, answer N. + +config IA64_DEBUG_CMPXCHG + bool "Turn on compare-and-exchange bug checking (slow!)" + depends on DEBUG_KERNEL + help + Selecting this option turns on bug checking for the IA-64 + compare-and-exchange instructions. This is slow! Itaniums + from step B3 or later don't have this problem. If you're unsure, + select N. + +config IA64_DEBUG_IRQ + bool "Turn on irq debug checks (slow!)" + depends on DEBUG_KERNEL + help + Selecting this option turns on bug checking for the IA-64 irq_save + and restore instructions. It's useful for tracking down spinlock + problems, but slow! If you're unsure, select N. + +config SYSVIPC_COMPAT + bool + depends on COMPAT && SYSVIPC + default y + +endmenu diff --git a/kernel/arch/ia64/Makefile b/kernel/arch/ia64/Makefile new file mode 100644 index 000000000..970d0bd99 --- /dev/null +++ b/kernel/arch/ia64/Makefile @@ -0,0 +1,102 @@ +# +# ia64/Makefile +# +# This file is included by the global makefile so that you can add your own +# architecture-specific flags and dependencies. +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 1998-2004 by David Mosberger-Tang +# + +KBUILD_DEFCONFIG := generic_defconfig + +NM := $(CROSS_COMPILE)nm -B +READELF := $(CROSS_COMPILE)readelf + +export AWK + +CHECKFLAGS += -m64 -D__ia64=1 -D__ia64__=1 -D_LP64 -D__LP64__ + +OBJCOPYFLAGS := --strip-all +LDFLAGS_vmlinux := -static +KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/ia64/module.lds +KBUILD_AFLAGS_KERNEL := -mconstant-gp +EXTRA := + +cflags-y := -pipe $(EXTRA) -ffixed-r13 -mfixed-range=f12-f15,f32-f127 \ + -falign-functions=32 -frename-registers -fno-optimize-sibling-calls +KBUILD_CFLAGS_KERNEL := -mconstant-gp + +GAS_STATUS = $(shell $(srctree)/arch/ia64/scripts/check-gas "$(CC)" "$(OBJDUMP)") +KBUILD_CPPFLAGS += $(shell $(srctree)/arch/ia64/scripts/toolchain-flags "$(CC)" "$(OBJDUMP)" "$(READELF)") + +ifeq ($(GAS_STATUS),buggy) +$(error Sorry, you need a newer version of the assember, one that is built from \ + a source-tree that post-dates 18-Dec-2002. You can find a pre-compiled \ + static binary of such an assembler at: \ + \ + ftp://ftp.hpl.hp.com/pub/linux-ia64/gas-030124.tar.gz) +endif + +KBUILD_CFLAGS += $(cflags-y) +head-y := arch/ia64/kernel/head.o arch/ia64/kernel/init_task.o + +libs-y += arch/ia64/lib/ +core-y += arch/ia64/kernel/ arch/ia64/mm/ +core-$(CONFIG_IA64_DIG) += arch/ia64/dig/ +core-$(CONFIG_IA64_DIG_VTD) += arch/ia64/dig/ +core-$(CONFIG_IA64_GENERIC) += arch/ia64/dig/ +core-$(CONFIG_IA64_HP_ZX1) += arch/ia64/dig/ +core-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += arch/ia64/dig/ +core-$(CONFIG_IA64_SGI_SN2) += arch/ia64/sn/ +core-$(CONFIG_IA64_SGI_UV) += arch/ia64/uv/ + +drivers-$(CONFIG_PCI) += arch/ia64/pci/ +drivers-$(CONFIG_IA64_HP_SIM) += arch/ia64/hp/sim/ +drivers-$(CONFIG_IA64_HP_ZX1) += arch/ia64/hp/common/ arch/ia64/hp/zx1/ +drivers-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += arch/ia64/hp/common/ arch/ia64/hp/zx1/ +drivers-$(CONFIG_IA64_GENERIC) += arch/ia64/hp/common/ arch/ia64/hp/zx1/ arch/ia64/hp/sim/ arch/ia64/sn/ arch/ia64/uv/ +drivers-$(CONFIG_OPROFILE) += arch/ia64/oprofile/ + +boot := arch/ia64/hp/sim/boot + +PHONY += boot compressed check + +all: compressed unwcheck + +compressed: vmlinux.gz + +vmlinuz: vmlinux.gz + +vmlinux.gz: vmlinux + $(Q)$(MAKE) $(build)=$(boot) $@ + +unwcheck: vmlinux + -$(Q)READELF=$(READELF) $(PYTHON) $(srctree)/arch/ia64/scripts/unwcheck.py $< + +archclean: + $(Q)$(MAKE) $(clean)=$(boot) + +CLEAN_FILES += vmlinux.gz bootloader + +boot: lib/lib.a vmlinux + $(Q)$(MAKE) $(build)=$(boot) $@ + +install: vmlinux.gz + sh $(srctree)/arch/ia64/install.sh $(KERNELRELEASE) $< System.map "$(INSTALL_PATH)" + +define archhelp + echo '* compressed - Build compressed kernel image' + echo ' install - Install compressed kernel image' + echo ' boot - Build vmlinux and bootloader for Ski simulator' + echo '* unwcheck - Check vmlinux for invalid unwind info' +endef + +archprepare: make_nr_irqs_h FORCE +PHONY += make_nr_irqs_h FORCE + +make_nr_irqs_h: FORCE + $(Q)$(MAKE) $(build)=arch/ia64/kernel include/generated/nr-irqs.h diff --git a/kernel/arch/ia64/configs/bigsur_defconfig b/kernel/arch/ia64/configs/bigsur_defconfig new file mode 100644 index 000000000..b6bda1838 --- /dev/null +++ b/kernel/arch/ia64/configs/bigsur_defconfig @@ -0,0 +1,109 @@ +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +CONFIG_LOG_BUF_SHIFT=16 +CONFIG_PROFILING=y +CONFIG_OPROFILE=y +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_SGI_PARTITION=y +CONFIG_IA64_DIG=y +CONFIG_SMP=y +CONFIG_NR_CPUS=2 +CONFIG_PREEMPT=y +# CONFIG_VIRTUAL_MEM_MAP is not set +CONFIG_PERFMON=y +CONFIG_IA64_PALINFO=y +CONFIG_EFI_VARS=y +CONFIG_BINFMT_MISC=m +CONFIG_ACPI_BUTTON=m +CONFIG_ACPI_FAN=m +CONFIG_ACPI_PROCESSOR=m +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_INET=y +# CONFIG_IPV6 is not set +CONFIG_BLK_DEV_LOOP=m +CONFIG_BLK_DEV_CRYPTOLOOP=m +CONFIG_BLK_DEV_NBD=m +CONFIG_BLK_DEV_RAM=m +CONFIG_IDE=m +CONFIG_BLK_DEV_IDECD=m +CONFIG_BLK_DEV_GENERIC=m +CONFIG_BLK_DEV_PIIX=m +CONFIG_SCSI=y +CONFIG_BLK_DEV_SD=y +CONFIG_SCSI_CONSTANTS=y +CONFIG_SCSI_LOGGING=y +CONFIG_SCSI_SPI_ATTRS=m +CONFIG_SCSI_QLOGIC_1280=y +CONFIG_MD=y +CONFIG_BLK_DEV_MD=m +CONFIG_MD_LINEAR=m +CONFIG_MD_RAID0=m +CONFIG_MD_RAID1=m +CONFIG_MD_RAID10=m +CONFIG_MD_MULTIPATH=m +CONFIG_BLK_DEV_DM=m +CONFIG_DM_CRYPT=m +CONFIG_DM_SNAPSHOT=m +CONFIG_DM_MIRROR=m +CONFIG_DM_ZERO=m +CONFIG_NETDEVICES=y +CONFIG_DUMMY=y +CONFIG_INPUT_EVDEV=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_EXTENDED=y +CONFIG_SERIAL_8250_SHARE_IRQ=y +# CONFIG_HW_RANDOM is not set +CONFIG_EFI_RTC=y +CONFIG_I2C=y +CONFIG_I2C_CHARDEV=y +CONFIG_AGP=m +CONFIG_AGP_I460=m +CONFIG_DRM=m +CONFIG_DRM_R128=m +CONFIG_SOUND=m +CONFIG_SND=m +CONFIG_SND_SEQUENCER=m +CONFIG_SND_MIXER_OSS=m +CONFIG_SND_PCM_OSS=m +CONFIG_SND_CS4281=m +CONFIG_USB_HIDDEV=y +CONFIG_USB=m +CONFIG_USB_MON=m +CONFIG_USB_UHCI_HCD=m +CONFIG_USB_ACM=m +CONFIG_USB_PRINTER=m +CONFIG_USB_STORAGE=m +CONFIG_EXT2_FS=y +CONFIG_EXT3_FS=y +CONFIG_XFS_FS=y +CONFIG_XFS_QUOTA=y +CONFIG_XFS_POSIX_ACL=y +CONFIG_AUTOFS4_FS=m +CONFIG_ISO9660_FS=m +CONFIG_JOLIET=y +CONFIG_UDF_FS=m +CONFIG_VFAT_FS=y +CONFIG_PROC_KCORE=y +CONFIG_TMPFS=y +CONFIG_HUGETLBFS=y +CONFIG_NFS_FS=m +CONFIG_NFS_V4=m +CONFIG_NFSD=m +CONFIG_NFSD_V4=y +CONFIG_CIFS=m +CONFIG_CIFS_STATS=y +CONFIG_CIFS_XATTR=y +CONFIG_CIFS_POSIX=y +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_ISO8859_1=y +CONFIG_NLS_UTF8=m +CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_KERNEL=y +CONFIG_DEBUG_MUTEXES=y +CONFIG_CRYPTO_MD5=y +CONFIG_CRYPTO_DES=y diff --git a/kernel/arch/ia64/configs/generic_defconfig b/kernel/arch/ia64/configs/generic_defconfig new file mode 100644 index 000000000..81f686dee --- /dev/null +++ b/kernel/arch/ia64/configs/generic_defconfig @@ -0,0 +1,220 @@ +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_LOG_BUF_SHIFT=20 +CONFIG_CGROUPS=y +CONFIG_CPUSETS=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_KALLSYMS_ALL=y +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODVERSIONS=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_SGI_PARTITION=y +CONFIG_MCKINLEY=y +CONFIG_IA64_PAGE_SIZE_64KB=y +CONFIG_IA64_CYCLONE=y +CONFIG_SMP=y +CONFIG_HOTPLUG_CPU=y +CONFIG_IA64_MCA_RECOVERY=y +CONFIG_PERFMON=y +CONFIG_IA64_PALINFO=y +CONFIG_KEXEC=y +CONFIG_CRASH_DUMP=y +CONFIG_EFI_VARS=y +CONFIG_BINFMT_MISC=m +CONFIG_ACPI_BUTTON=m +CONFIG_ACPI_FAN=m +CONFIG_ACPI_DOCK=y +CONFIG_ACPI_PROCESSOR=m +CONFIG_HOTPLUG_PCI=y +CONFIG_HOTPLUG_PCI_ACPI=y +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_SYN_COOKIES=y +# CONFIG_IPV6 is not set +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_CONNECTOR=y +# CONFIG_PNP_DEBUG_MESSAGES is not set +CONFIG_BLK_DEV_LOOP=m +CONFIG_BLK_DEV_CRYPTOLOOP=m +CONFIG_BLK_DEV_NBD=m +CONFIG_BLK_DEV_RAM=y +CONFIG_SGI_IOC4=y +CONFIG_SGI_XP=m +CONFIG_IDE=y +CONFIG_BLK_DEV_IDECD=y +CONFIG_BLK_DEV_GENERIC=y +CONFIG_BLK_DEV_CMD64X=y +CONFIG_BLK_DEV_PIIX=y +CONFIG_BLK_DEV_SGIIOC4=y +CONFIG_BLK_DEV_SD=y +CONFIG_CHR_DEV_ST=m +CONFIG_BLK_DEV_SR=m +CONFIG_CHR_DEV_SG=m +CONFIG_SCSI_FC_ATTRS=y +CONFIG_SCSI_SYM53C8XX_2=y +CONFIG_SCSI_QLOGIC_1280=y +CONFIG_ATA=y +CONFIG_ATA_PIIX=y +CONFIG_SATA_VITESSE=y +CONFIG_MD=y +CONFIG_BLK_DEV_MD=m +CONFIG_MD_LINEAR=m +CONFIG_MD_RAID0=m +CONFIG_MD_RAID1=m +CONFIG_MD_MULTIPATH=m +CONFIG_BLK_DEV_DM=m +CONFIG_DM_CRYPT=m +CONFIG_DM_SNAPSHOT=m +CONFIG_DM_MIRROR=m +CONFIG_DM_ZERO=m +CONFIG_DM_MULTIPATH=m +CONFIG_FUSION=y +CONFIG_FUSION_SPI=y +CONFIG_FUSION_FC=m +CONFIG_FUSION_SAS=y +CONFIG_NETDEVICES=y +CONFIG_DUMMY=m +CONFIG_NETCONSOLE=y +CONFIG_TIGON3=y +CONFIG_NET_TULIP=y +CONFIG_TULIP=m +CONFIG_E100=m +CONFIG_E1000=y +CONFIG_IGB=y +# CONFIG_SERIO_SERPORT is not set +CONFIG_GAMEPORT=m +CONFIG_SERIAL_NONSTANDARD=y +CONFIG_SGI_SNSC=y +CONFIG_SGI_TIOCX=y +CONFIG_SGI_MBCS=m +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_NR_UARTS=6 +CONFIG_SERIAL_8250_EXTENDED=y +CONFIG_SERIAL_8250_SHARE_IRQ=y +CONFIG_SERIAL_SGI_L1_CONSOLE=y +CONFIG_SERIAL_SGI_IOC4=y +# CONFIG_HW_RANDOM is not set +CONFIG_EFI_RTC=y +CONFIG_RAW_DRIVER=m +CONFIG_HPET=y +CONFIG_AGP=m +CONFIG_AGP_I460=m +CONFIG_AGP_HP_ZX1=m +CONFIG_AGP_SGI_TIOCA=m +CONFIG_DRM=m +CONFIG_DRM_TDFX=m +CONFIG_DRM_R128=m +CONFIG_DRM_RADEON=m +CONFIG_DRM_MGA=m +CONFIG_DRM_SIS=m +CONFIG_SOUND=m +CONFIG_SND=m +CONFIG_SND_SEQUENCER=m +CONFIG_SND_SEQ_DUMMY=m +CONFIG_SND_MIXER_OSS=m +CONFIG_SND_PCM_OSS=m +CONFIG_SND_SEQUENCER_OSS=y +CONFIG_SND_VERBOSE_PRINTK=y +CONFIG_SND_DUMMY=m +CONFIG_SND_VIRMIDI=m +CONFIG_SND_MTPAV=m +CONFIG_SND_SERIAL_U16550=m +CONFIG_SND_MPU401=m +CONFIG_SND_CS4281=m +CONFIG_SND_CS46XX=m +CONFIG_SND_EMU10K1=m +CONFIG_SND_FM801=m +CONFIG_HID_GYRATION=m +CONFIG_HID_PANTHERLORD=m +CONFIG_HID_PETALYNX=m +CONFIG_HID_SAMSUNG=m +CONFIG_HID_SONY=m +CONFIG_HID_SUNPLUS=m +CONFIG_USB=m +CONFIG_USB_MON=m +CONFIG_USB_EHCI_HCD=m +CONFIG_USB_OHCI_HCD=m +CONFIG_USB_UHCI_HCD=m +CONFIG_USB_STORAGE=m +CONFIG_INFINIBAND=m +CONFIG_INFINIBAND_MTHCA=m +CONFIG_INFINIBAND_IPOIB=m +CONFIG_INTEL_IOMMU=y +CONFIG_MSPEC=m +CONFIG_EXT2_FS=y +CONFIG_EXT2_FS_XATTR=y +CONFIG_EXT2_FS_POSIX_ACL=y +CONFIG_EXT2_FS_SECURITY=y +CONFIG_EXT3_FS=y +CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT3_FS_SECURITY=y +CONFIG_REISERFS_FS=y +CONFIG_REISERFS_FS_XATTR=y +CONFIG_REISERFS_FS_POSIX_ACL=y +CONFIG_REISERFS_FS_SECURITY=y +CONFIG_XFS_FS=y +CONFIG_AUTOFS4_FS=m +CONFIG_ISO9660_FS=m +CONFIG_JOLIET=y +CONFIG_UDF_FS=m +CONFIG_VFAT_FS=y +CONFIG_NTFS_FS=m +CONFIG_PROC_KCORE=y +CONFIG_TMPFS=y +CONFIG_HUGETLBFS=y +CONFIG_NFS_FS=m +CONFIG_NFS_V4=m +CONFIG_NFSD=m +CONFIG_NFSD_V4=y +CONFIG_CIFS=m +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_CODEPAGE_737=m +CONFIG_NLS_CODEPAGE_775=m +CONFIG_NLS_CODEPAGE_850=m +CONFIG_NLS_CODEPAGE_852=m +CONFIG_NLS_CODEPAGE_855=m +CONFIG_NLS_CODEPAGE_857=m +CONFIG_NLS_CODEPAGE_860=m +CONFIG_NLS_CODEPAGE_861=m +CONFIG_NLS_CODEPAGE_862=m +CONFIG_NLS_CODEPAGE_863=m +CONFIG_NLS_CODEPAGE_864=m +CONFIG_NLS_CODEPAGE_865=m +CONFIG_NLS_CODEPAGE_866=m +CONFIG_NLS_CODEPAGE_869=m +CONFIG_NLS_CODEPAGE_936=m +CONFIG_NLS_CODEPAGE_950=m +CONFIG_NLS_CODEPAGE_932=m +CONFIG_NLS_CODEPAGE_949=m +CONFIG_NLS_CODEPAGE_874=m +CONFIG_NLS_ISO8859_8=m +CONFIG_NLS_CODEPAGE_1250=m +CONFIG_NLS_CODEPAGE_1251=m +CONFIG_NLS_ISO8859_1=y +CONFIG_NLS_ISO8859_2=m +CONFIG_NLS_ISO8859_3=m +CONFIG_NLS_ISO8859_4=m +CONFIG_NLS_ISO8859_5=m +CONFIG_NLS_ISO8859_6=m +CONFIG_NLS_ISO8859_7=m +CONFIG_NLS_ISO8859_9=m +CONFIG_NLS_ISO8859_13=m +CONFIG_NLS_ISO8859_14=m +CONFIG_NLS_ISO8859_15=m +CONFIG_NLS_KOI8_R=m +CONFIG_NLS_KOI8_U=m +CONFIG_NLS_UTF8=m +CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_KERNEL=y +CONFIG_DEBUG_MUTEXES=y +CONFIG_CRYPTO_PCBC=m +CONFIG_CRYPTO_MD5=y +# CONFIG_CRYPTO_ANSI_CPRNG is not set +CONFIG_CRC_T10DIF=y diff --git a/kernel/arch/ia64/configs/gensparse_defconfig b/kernel/arch/ia64/configs/gensparse_defconfig new file mode 100644 index 000000000..5b4fcdd51 --- /dev/null +++ b/kernel/arch/ia64/configs/gensparse_defconfig @@ -0,0 +1,199 @@ +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_LOG_BUF_SHIFT=20 +CONFIG_BLK_DEV_INITRD=y +CONFIG_KALLSYMS_ALL=y +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODVERSIONS=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_SGI_PARTITION=y +CONFIG_MCKINLEY=y +CONFIG_IA64_CYCLONE=y +CONFIG_SMP=y +CONFIG_NR_CPUS=512 +CONFIG_HOTPLUG_CPU=y +CONFIG_SPARSEMEM_MANUAL=y +CONFIG_IA64_MCA_RECOVERY=y +CONFIG_PERFMON=y +CONFIG_IA64_PALINFO=y +CONFIG_SGI_IOC3=y +CONFIG_EFI_VARS=y +CONFIG_BINFMT_MISC=m +CONFIG_ACPI_BUTTON=m +CONFIG_ACPI_FAN=m +CONFIG_ACPI_PROCESSOR=m +CONFIG_HOTPLUG_PCI=y +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_SYN_COOKIES=y +# CONFIG_IPV6 is not set +CONFIG_BLK_DEV_LOOP=m +CONFIG_BLK_DEV_CRYPTOLOOP=m +CONFIG_BLK_DEV_NBD=m +CONFIG_BLK_DEV_RAM=y +CONFIG_SGI_IOC4=y +CONFIG_IDE=y +CONFIG_BLK_DEV_IDECD=y +CONFIG_IDE_GENERIC=y +CONFIG_BLK_DEV_GENERIC=y +CONFIG_BLK_DEV_CMD64X=y +CONFIG_BLK_DEV_PIIX=y +CONFIG_BLK_DEV_SGIIOC4=y +CONFIG_SCSI=y +CONFIG_BLK_DEV_SD=y +CONFIG_CHR_DEV_ST=m +CONFIG_BLK_DEV_SR=m +CONFIG_CHR_DEV_SG=m +CONFIG_SCSI_FC_ATTRS=y +CONFIG_SCSI_SYM53C8XX_2=y +CONFIG_SCSI_QLOGIC_1280=y +CONFIG_MD=y +CONFIG_BLK_DEV_MD=m +CONFIG_MD_LINEAR=m +CONFIG_MD_RAID0=m +CONFIG_MD_RAID1=m +CONFIG_MD_MULTIPATH=m +CONFIG_BLK_DEV_DM=m +CONFIG_DM_CRYPT=m +CONFIG_DM_SNAPSHOT=m +CONFIG_DM_MIRROR=m +CONFIG_DM_ZERO=m +CONFIG_DM_MULTIPATH=m +CONFIG_FUSION=y +CONFIG_FUSION_SPI=y +CONFIG_FUSION_FC=m +CONFIG_NETDEVICES=y +CONFIG_DUMMY=m +CONFIG_NETCONSOLE=y +CONFIG_TIGON3=y +CONFIG_NET_TULIP=y +CONFIG_TULIP=m +CONFIG_E100=m +CONFIG_E1000=y +# CONFIG_SERIO_SERPORT is not set +CONFIG_GAMEPORT=m +CONFIG_SERIAL_NONSTANDARD=y +CONFIG_SGI_SNSC=y +CONFIG_SGI_TIOCX=y +CONFIG_SGI_MBCS=m +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_NR_UARTS=6 +CONFIG_SERIAL_8250_EXTENDED=y +CONFIG_SERIAL_8250_SHARE_IRQ=y +CONFIG_SERIAL_SGI_L1_CONSOLE=y +CONFIG_SERIAL_SGI_IOC4=y +CONFIG_SERIAL_SGI_IOC3=y +# CONFIG_HW_RANDOM is not set +CONFIG_EFI_RTC=y +CONFIG_RAW_DRIVER=m +CONFIG_HPET=y +CONFIG_AGP=m +CONFIG_AGP_I460=m +CONFIG_AGP_HP_ZX1=m +CONFIG_AGP_SGI_TIOCA=m +CONFIG_DRM=m +CONFIG_DRM_TDFX=m +CONFIG_DRM_R128=m +CONFIG_DRM_RADEON=m +CONFIG_DRM_MGA=m +CONFIG_DRM_SIS=m +CONFIG_SOUND=m +CONFIG_SND=m +CONFIG_SND_SEQUENCER=m +CONFIG_SND_SEQ_DUMMY=m +CONFIG_SND_MIXER_OSS=m +CONFIG_SND_PCM_OSS=m +CONFIG_SND_SEQUENCER_OSS=y +CONFIG_SND_VERBOSE_PRINTK=y +CONFIG_SND_DUMMY=m +CONFIG_SND_VIRMIDI=m +CONFIG_SND_MTPAV=m +CONFIG_SND_SERIAL_U16550=m +CONFIG_SND_MPU401=m +CONFIG_SND_CS4281=m +CONFIG_SND_CS46XX=m +CONFIG_SND_EMU10K1=m +CONFIG_SND_FM801=m +CONFIG_USB=m +CONFIG_USB_MON=m +CONFIG_USB_EHCI_HCD=m +CONFIG_USB_OHCI_HCD=m +CONFIG_USB_UHCI_HCD=m +CONFIG_USB_STORAGE=m +CONFIG_INFINIBAND=m +CONFIG_INFINIBAND_MTHCA=m +CONFIG_INFINIBAND_IPOIB=m +CONFIG_EXT2_FS=y +CONFIG_EXT2_FS_XATTR=y +CONFIG_EXT2_FS_POSIX_ACL=y +CONFIG_EXT2_FS_SECURITY=y +CONFIG_EXT3_FS=y +CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT3_FS_SECURITY=y +CONFIG_REISERFS_FS=y +CONFIG_REISERFS_FS_XATTR=y +CONFIG_REISERFS_FS_POSIX_ACL=y +CONFIG_REISERFS_FS_SECURITY=y +CONFIG_XFS_FS=y +CONFIG_AUTOFS4_FS=y +CONFIG_ISO9660_FS=m +CONFIG_JOLIET=y +CONFIG_UDF_FS=m +CONFIG_VFAT_FS=y +CONFIG_NTFS_FS=m +CONFIG_PROC_KCORE=y +CONFIG_TMPFS=y +CONFIG_HUGETLBFS=y +CONFIG_NFS_FS=m +CONFIG_NFS_V4=m +CONFIG_NFSD=m +CONFIG_NFSD_V4=y +CONFIG_CIFS=m +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_CODEPAGE_737=m +CONFIG_NLS_CODEPAGE_775=m +CONFIG_NLS_CODEPAGE_850=m +CONFIG_NLS_CODEPAGE_852=m +CONFIG_NLS_CODEPAGE_855=m +CONFIG_NLS_CODEPAGE_857=m +CONFIG_NLS_CODEPAGE_860=m +CONFIG_NLS_CODEPAGE_861=m +CONFIG_NLS_CODEPAGE_862=m +CONFIG_NLS_CODEPAGE_863=m +CONFIG_NLS_CODEPAGE_864=m +CONFIG_NLS_CODEPAGE_865=m +CONFIG_NLS_CODEPAGE_866=m +CONFIG_NLS_CODEPAGE_869=m +CONFIG_NLS_CODEPAGE_936=m +CONFIG_NLS_CODEPAGE_950=m +CONFIG_NLS_CODEPAGE_932=m +CONFIG_NLS_CODEPAGE_949=m +CONFIG_NLS_CODEPAGE_874=m +CONFIG_NLS_ISO8859_8=m +CONFIG_NLS_CODEPAGE_1250=m +CONFIG_NLS_CODEPAGE_1251=m +CONFIG_NLS_ISO8859_1=y +CONFIG_NLS_ISO8859_2=m +CONFIG_NLS_ISO8859_3=m +CONFIG_NLS_ISO8859_4=m +CONFIG_NLS_ISO8859_5=m +CONFIG_NLS_ISO8859_6=m +CONFIG_NLS_ISO8859_7=m +CONFIG_NLS_ISO8859_9=m +CONFIG_NLS_ISO8859_13=m +CONFIG_NLS_ISO8859_14=m +CONFIG_NLS_ISO8859_15=m +CONFIG_NLS_KOI8_R=m +CONFIG_NLS_KOI8_U=m +CONFIG_NLS_UTF8=m +CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_KERNEL=y +CONFIG_DEBUG_MUTEXES=y +CONFIG_CRYPTO_MD5=y diff --git a/kernel/arch/ia64/configs/sim_defconfig b/kernel/arch/ia64/configs/sim_defconfig new file mode 100644 index 000000000..f0f69fdbd --- /dev/null +++ b/kernel/arch/ia64/configs/sim_defconfig @@ -0,0 +1,52 @@ +CONFIG_SYSVIPC=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_LOG_BUF_SHIFT=16 +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODULE_FORCE_UNLOAD=y +CONFIG_MODVERSIONS=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_IA64_HP_SIM=y +CONFIG_MCKINLEY=y +CONFIG_IA64_PAGE_SIZE_64KB=y +CONFIG_SMP=y +CONFIG_NR_CPUS=64 +CONFIG_PREEMPT=y +CONFIG_IA64_PALINFO=m +CONFIG_EFI_VARS=y +CONFIG_BINFMT_MISC=y +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +# CONFIG_IPV6 is not set +# CONFIG_STANDALONE is not set +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_RAM=y +CONFIG_SCSI=y +CONFIG_BLK_DEV_SD=y +CONFIG_SCSI_CONSTANTS=y +CONFIG_SCSI_LOGGING=y +CONFIG_SCSI_SPI_ATTRS=y +# CONFIG_INPUT_KEYBOARD is not set +# CONFIG_INPUT_MOUSE is not set +# CONFIG_SERIO_I8042 is not set +# CONFIG_LEGACY_PTYS is not set +CONFIG_EFI_RTC=y +# CONFIG_VGA_CONSOLE is not set +CONFIG_HP_SIMETH=y +CONFIG_HP_SIMSERIAL=y +CONFIG_HP_SIMSERIAL_CONSOLE=y +CONFIG_HP_SIMSCSI=y +CONFIG_EXT2_FS=y +CONFIG_EXT3_FS=y +# CONFIG_EXT3_FS_XATTR is not set +CONFIG_PROC_KCORE=y +CONFIG_HUGETLBFS=y +CONFIG_NFS_FS=y +CONFIG_NFSD=y +CONFIG_NFSD_V3=y +CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_KERNEL=y +CONFIG_DEBUG_MUTEXES=y diff --git a/kernel/arch/ia64/configs/tiger_defconfig b/kernel/arch/ia64/configs/tiger_defconfig new file mode 100644 index 000000000..192ed157c --- /dev/null +++ b/kernel/arch/ia64/configs/tiger_defconfig @@ -0,0 +1,174 @@ +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_LOG_BUF_SHIFT=20 +CONFIG_BLK_DEV_INITRD=y +CONFIG_KALLSYMS_ALL=y +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODVERSIONS=y +CONFIG_MODULE_SRCVERSION_ALL=y +# CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y +CONFIG_SGI_PARTITION=y +CONFIG_IA64_DIG=y +CONFIG_MCKINLEY=y +CONFIG_IA64_PAGE_SIZE_64KB=y +CONFIG_IA64_CYCLONE=y +CONFIG_SMP=y +CONFIG_NR_CPUS=16 +CONFIG_HOTPLUG_CPU=y +CONFIG_PERMIT_BSP_REMOVE=y +CONFIG_FORCE_CPEI_RETARGET=y +CONFIG_IA64_MCA_RECOVERY=y +CONFIG_PERFMON=y +CONFIG_IA64_PALINFO=y +CONFIG_KEXEC=y +CONFIG_EFI_VARS=y +CONFIG_BINFMT_MISC=m +CONFIG_ACPI_BUTTON=m +CONFIG_ACPI_FAN=m +CONFIG_ACPI_PROCESSOR=m +CONFIG_HOTPLUG_PCI=y +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_SYN_COOKIES=y +# CONFIG_IPV6 is not set +CONFIG_BLK_DEV_LOOP=m +CONFIG_BLK_DEV_CRYPTOLOOP=m +CONFIG_BLK_DEV_NBD=m +CONFIG_BLK_DEV_RAM=y +CONFIG_IDE=y +CONFIG_BLK_DEV_IDECD=y +CONFIG_BLK_DEV_GENERIC=y +CONFIG_BLK_DEV_CMD64X=y +CONFIG_BLK_DEV_PIIX=y +CONFIG_SCSI=y +CONFIG_BLK_DEV_SD=y +CONFIG_CHR_DEV_ST=m +CONFIG_BLK_DEV_SR=m +CONFIG_CHR_DEV_SG=m +CONFIG_SCSI_FC_ATTRS=y +CONFIG_SCSI_SYM53C8XX_2=y +CONFIG_SCSI_QLOGIC_1280=y +CONFIG_MD=y +CONFIG_BLK_DEV_MD=m +CONFIG_MD_LINEAR=m +CONFIG_MD_RAID0=m +CONFIG_MD_RAID1=m +CONFIG_MD_MULTIPATH=m +CONFIG_BLK_DEV_DM=m +CONFIG_DM_CRYPT=m +CONFIG_DM_SNAPSHOT=m +CONFIG_DM_MIRROR=m +CONFIG_DM_ZERO=m +CONFIG_FUSION=y +CONFIG_FUSION_SPI=y +CONFIG_FUSION_FC=y +CONFIG_FUSION_CTL=y +CONFIG_NETDEVICES=y +CONFIG_DUMMY=m +CONFIG_NETCONSOLE=y +CONFIG_TIGON3=y +CONFIG_NET_TULIP=y +CONFIG_TULIP=m +CONFIG_E100=m +CONFIG_E1000=y +# CONFIG_SERIO_SERPORT is not set +CONFIG_GAMEPORT=m +CONFIG_SERIAL_NONSTANDARD=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_NR_UARTS=6 +CONFIG_SERIAL_8250_EXTENDED=y +CONFIG_SERIAL_8250_SHARE_IRQ=y +# CONFIG_HW_RANDOM is not set +CONFIG_EFI_RTC=y +CONFIG_RAW_DRIVER=m +CONFIG_HPET=y +CONFIG_AGP=m +CONFIG_AGP_I460=m +CONFIG_DRM=m +CONFIG_DRM_TDFX=m +CONFIG_DRM_R128=m +CONFIG_DRM_RADEON=m +CONFIG_DRM_MGA=m +CONFIG_DRM_SIS=m +CONFIG_USB=y +CONFIG_USB_EHCI_HCD=m +CONFIG_USB_OHCI_HCD=m +CONFIG_USB_UHCI_HCD=y +CONFIG_USB_STORAGE=m +CONFIG_EXT2_FS=y +CONFIG_EXT2_FS_XATTR=y +CONFIG_EXT2_FS_POSIX_ACL=y +CONFIG_EXT2_FS_SECURITY=y +CONFIG_EXT3_FS=y +CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT3_FS_SECURITY=y +CONFIG_REISERFS_FS=y +CONFIG_REISERFS_FS_XATTR=y +CONFIG_REISERFS_FS_POSIX_ACL=y +CONFIG_REISERFS_FS_SECURITY=y +CONFIG_XFS_FS=y +CONFIG_AUTOFS4_FS=y +CONFIG_ISO9660_FS=m +CONFIG_JOLIET=y +CONFIG_UDF_FS=m +CONFIG_VFAT_FS=y +CONFIG_NTFS_FS=m +CONFIG_PROC_KCORE=y +CONFIG_TMPFS=y +CONFIG_HUGETLBFS=y +CONFIG_NFS_FS=m +CONFIG_NFS_V4=m +CONFIG_NFSD=m +CONFIG_NFSD_V4=y +CONFIG_CIFS=m +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_CODEPAGE_737=m +CONFIG_NLS_CODEPAGE_775=m +CONFIG_NLS_CODEPAGE_850=m +CONFIG_NLS_CODEPAGE_852=m +CONFIG_NLS_CODEPAGE_855=m +CONFIG_NLS_CODEPAGE_857=m +CONFIG_NLS_CODEPAGE_860=m +CONFIG_NLS_CODEPAGE_861=m +CONFIG_NLS_CODEPAGE_862=m +CONFIG_NLS_CODEPAGE_863=m +CONFIG_NLS_CODEPAGE_864=m +CONFIG_NLS_CODEPAGE_865=m +CONFIG_NLS_CODEPAGE_866=m +CONFIG_NLS_CODEPAGE_869=m +CONFIG_NLS_CODEPAGE_936=m +CONFIG_NLS_CODEPAGE_950=m +CONFIG_NLS_CODEPAGE_932=m +CONFIG_NLS_CODEPAGE_949=m +CONFIG_NLS_CODEPAGE_874=m +CONFIG_NLS_ISO8859_8=m +CONFIG_NLS_CODEPAGE_1250=m +CONFIG_NLS_CODEPAGE_1251=m +CONFIG_NLS_ISO8859_1=y +CONFIG_NLS_ISO8859_2=m +CONFIG_NLS_ISO8859_3=m +CONFIG_NLS_ISO8859_4=m +CONFIG_NLS_ISO8859_5=m +CONFIG_NLS_ISO8859_6=m +CONFIG_NLS_ISO8859_7=m +CONFIG_NLS_ISO8859_9=m +CONFIG_NLS_ISO8859_13=m +CONFIG_NLS_ISO8859_14=m +CONFIG_NLS_ISO8859_15=m +CONFIG_NLS_KOI8_R=m +CONFIG_NLS_KOI8_U=m +CONFIG_NLS_UTF8=m +CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_KERNEL=y +CONFIG_DEBUG_MUTEXES=y +CONFIG_IA64_GRANULE_16MB=y +CONFIG_CRYPTO_PCBC=m +CONFIG_CRYPTO_MD5=y diff --git a/kernel/arch/ia64/configs/zx1_defconfig b/kernel/arch/ia64/configs/zx1_defconfig new file mode 100644 index 000000000..b504c8e2f --- /dev/null +++ b/kernel/arch/ia64/configs/zx1_defconfig @@ -0,0 +1,154 @@ +CONFIG_SYSVIPC=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_KPROBES=y +CONFIG_MODULES=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_IA64_HP_ZX1=y +CONFIG_MCKINLEY=y +CONFIG_SMP=y +CONFIG_NR_CPUS=16 +CONFIG_HOTPLUG_CPU=y +CONFIG_FLATMEM_MANUAL=y +CONFIG_IA64_MCA_RECOVERY=y +CONFIG_PERFMON=y +CONFIG_IA64_PALINFO=y +CONFIG_CRASH_DUMP=y +CONFIG_EFI_VARS=y +CONFIG_BINFMT_MISC=y +CONFIG_HOTPLUG_PCI=y +CONFIG_HOTPLUG_PCI_ACPI=y +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +# CONFIG_IPV6 is not set +CONFIG_NETFILTER=y +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_RAM=y +CONFIG_IDE=y +CONFIG_BLK_DEV_IDECD=y +CONFIG_BLK_DEV_GENERIC=y +CONFIG_BLK_DEV_CMD64X=y +CONFIG_SCSI=y +CONFIG_BLK_DEV_SD=y +CONFIG_CHR_DEV_ST=y +CONFIG_CHR_DEV_OSST=y +CONFIG_BLK_DEV_SR=y +CONFIG_BLK_DEV_SR_VENDOR=y +CONFIG_CHR_DEV_SG=y +CONFIG_SCSI_CONSTANTS=y +CONFIG_SCSI_LOGGING=y +CONFIG_SCSI_FC_ATTRS=y +CONFIG_SCSI_SYM53C8XX_2=y +CONFIG_SCSI_QLOGIC_1280=y +CONFIG_FUSION=y +CONFIG_FUSION_SPI=y +CONFIG_FUSION_FC=y +CONFIG_FUSION_CTL=m +CONFIG_NETDEVICES=y +CONFIG_DUMMY=y +CONFIG_TIGON3=y +CONFIG_NET_TULIP=y +CONFIG_TULIP=y +CONFIG_TULIP_MWI=y +CONFIG_TULIP_MMIO=y +CONFIG_TULIP_NAPI=y +CONFIG_TULIP_NAPI_HW_MITIGATION=y +CONFIG_E100=y +CONFIG_E1000=y +CONFIG_INPUT_JOYDEV=y +CONFIG_INPUT_EVDEV=y +# CONFIG_INPUT_KEYBOARD is not set +# CONFIG_INPUT_MOUSE is not set +# CONFIG_SERIO_I8042 is not set +# CONFIG_SERIO_SERPORT is not set +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_NR_UARTS=8 +CONFIG_SERIAL_8250_EXTENDED=y +CONFIG_SERIAL_8250_SHARE_IRQ=y +# CONFIG_HW_RANDOM is not set +CONFIG_EFI_RTC=y +CONFIG_I2C_CHARDEV=y +CONFIG_AGP=y +CONFIG_AGP_HP_ZX1=y +CONFIG_DRM=y +CONFIG_DRM_RADEON=y +CONFIG_FB_RADEON=y +CONFIG_FB_RADEON_DEBUG=y +CONFIG_LOGO=y +# CONFIG_LOGO_LINUX_MONO is not set +# CONFIG_LOGO_LINUX_VGA16 is not set +CONFIG_SOUND=y +CONFIG_SND=y +CONFIG_SND_SEQUENCER=y +CONFIG_SND_MIXER_OSS=y +CONFIG_SND_PCM_OSS=y +CONFIG_SND_SEQUENCER_OSS=y +CONFIG_SND_FM801=y +CONFIG_USB_HIDDEV=y +CONFIG_USB=y +CONFIG_USB_MON=y +CONFIG_USB_EHCI_HCD=y +CONFIG_USB_OHCI_HCD=y +CONFIG_USB_UHCI_HCD=y +CONFIG_USB_STORAGE=y +CONFIG_EXT2_FS=y +CONFIG_EXT2_FS_XATTR=y +CONFIG_EXT3_FS=y +CONFIG_ISO9660_FS=y +CONFIG_JOLIET=y +CONFIG_UDF_FS=y +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y +CONFIG_PROC_KCORE=y +CONFIG_TMPFS=y +CONFIG_HUGETLBFS=y +CONFIG_NFS_FS=y +CONFIG_NFS_V4=y +CONFIG_NFSD=y +CONFIG_NFSD_V3=y +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_CODEPAGE_737=y +CONFIG_NLS_CODEPAGE_775=y +CONFIG_NLS_CODEPAGE_850=y +CONFIG_NLS_CODEPAGE_852=y +CONFIG_NLS_CODEPAGE_855=y +CONFIG_NLS_CODEPAGE_857=y +CONFIG_NLS_CODEPAGE_860=y +CONFIG_NLS_CODEPAGE_861=y +CONFIG_NLS_CODEPAGE_862=y +CONFIG_NLS_CODEPAGE_863=y +CONFIG_NLS_CODEPAGE_864=y +CONFIG_NLS_CODEPAGE_865=y +CONFIG_NLS_CODEPAGE_866=y +CONFIG_NLS_CODEPAGE_869=y +CONFIG_NLS_CODEPAGE_936=y +CONFIG_NLS_CODEPAGE_950=y +CONFIG_NLS_CODEPAGE_932=y +CONFIG_NLS_CODEPAGE_949=y +CONFIG_NLS_CODEPAGE_874=y +CONFIG_NLS_ISO8859_8=y +CONFIG_NLS_CODEPAGE_1251=y +CONFIG_NLS_ISO8859_1=y +CONFIG_NLS_ISO8859_2=y +CONFIG_NLS_ISO8859_3=y +CONFIG_NLS_ISO8859_4=y +CONFIG_NLS_ISO8859_5=y +CONFIG_NLS_ISO8859_6=y +CONFIG_NLS_ISO8859_7=y +CONFIG_NLS_ISO8859_9=y +CONFIG_NLS_ISO8859_13=y +CONFIG_NLS_ISO8859_14=y +CONFIG_NLS_ISO8859_15=y +CONFIG_NLS_KOI8_R=y +CONFIG_NLS_KOI8_U=y +CONFIG_NLS_UTF8=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_KERNEL=y +CONFIG_DEBUG_MUTEXES=y +CONFIG_IA64_PRINT_HAZARDS=y +CONFIG_CRYPTO_ECB=m +CONFIG_CRYPTO_PCBC=m diff --git a/kernel/arch/ia64/dig/Makefile b/kernel/arch/ia64/dig/Makefile new file mode 100644 index 000000000..ae16ec4f6 --- /dev/null +++ b/kernel/arch/ia64/dig/Makefile @@ -0,0 +1,14 @@ +# +# ia64/platform/dig/Makefile +# +# Copyright (C) 1999 Silicon Graphics, Inc. +# Copyright (C) Srinivasa Thirumalachar (sprasad@engr.sgi.com) +# + +obj-y := setup.o +ifeq ($(CONFIG_INTEL_IOMMU), y) +obj-$(CONFIG_IA64_GENERIC) += machvec.o machvec_vtd.o +else +obj-$(CONFIG_IA64_GENERIC) += machvec.o +endif + diff --git a/kernel/arch/ia64/dig/machvec.c b/kernel/arch/ia64/dig/machvec.c new file mode 100644 index 000000000..0c55bdafb --- /dev/null +++ b/kernel/arch/ia64/dig/machvec.c @@ -0,0 +1,3 @@ +#define MACHVEC_PLATFORM_NAME dig +#define MACHVEC_PLATFORM_HEADER +#include diff --git a/kernel/arch/ia64/dig/machvec_vtd.c b/kernel/arch/ia64/dig/machvec_vtd.c new file mode 100644 index 000000000..7cd3eb471 --- /dev/null +++ b/kernel/arch/ia64/dig/machvec_vtd.c @@ -0,0 +1,3 @@ +#define MACHVEC_PLATFORM_NAME dig_vtd +#define MACHVEC_PLATFORM_HEADER +#include diff --git a/kernel/arch/ia64/dig/setup.c b/kernel/arch/ia64/dig/setup.c new file mode 100644 index 000000000..98131e1db --- /dev/null +++ b/kernel/arch/ia64/dig/setup.c @@ -0,0 +1,70 @@ +/* + * Platform dependent support for DIG64 platforms. + * + * Copyright (C) 1999 Intel Corp. + * Copyright (C) 1999, 2001 Hewlett-Packard Co + * Copyright (C) 1999, 2001, 2003 David Mosberger-Tang + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999 Walt Drummond + * Copyright (C) 1999 Vijay Chander + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +void __init +dig_setup (char **cmdline_p) +{ + unsigned int orig_x, orig_y, num_cols, num_rows, font_height; + + /* + * Default to /dev/sda2. This assumes that the EFI partition + * is physical disk 1 partition 1 and the Linux root disk is + * physical disk 1 partition 2. + */ + ROOT_DEV = Root_SDA2; /* default to second partition on first drive */ + +#ifdef CONFIG_SMP + init_smp_config(); +#endif + + memset(&screen_info, 0, sizeof(screen_info)); + + if (!ia64_boot_param->console_info.num_rows + || !ia64_boot_param->console_info.num_cols) + { + printk(KERN_WARNING "dig_setup: warning: invalid screen-info, guessing 80x25\n"); + orig_x = 0; + orig_y = 0; + num_cols = 80; + num_rows = 25; + font_height = 16; + } else { + orig_x = ia64_boot_param->console_info.orig_x; + orig_y = ia64_boot_param->console_info.orig_y; + num_cols = ia64_boot_param->console_info.num_cols; + num_rows = ia64_boot_param->console_info.num_rows; + font_height = 400 / num_rows; + } + + screen_info.orig_x = orig_x; + screen_info.orig_y = orig_y; + screen_info.orig_video_cols = num_cols; + screen_info.orig_video_lines = num_rows; + screen_info.orig_video_points = font_height; + screen_info.orig_video_mode = 3; /* XXX fake */ + screen_info.orig_video_isVGA = 1; /* XXX fake */ + screen_info.orig_video_ega_bx = 3; /* XXX fake */ +} diff --git a/kernel/arch/ia64/hp/common/Makefile b/kernel/arch/ia64/hp/common/Makefile new file mode 100644 index 000000000..9e179dd06 --- /dev/null +++ b/kernel/arch/ia64/hp/common/Makefile @@ -0,0 +1,11 @@ +# +# ia64/platform/hp/common/Makefile +# +# Copyright (C) 2002 Hewlett Packard +# Copyright (C) Alex Williamson (alex_williamson@hp.com) +# + +obj-y := sba_iommu.o +obj-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += hwsw_iommu.o +obj-$(CONFIG_IA64_GENERIC) += hwsw_iommu.o +obj-$(CONFIG_IA64_HP_AML_NFW) += aml_nfw.o diff --git a/kernel/arch/ia64/hp/common/aml_nfw.c b/kernel/arch/ia64/hp/common/aml_nfw.c new file mode 100644 index 000000000..84715fcbb --- /dev/null +++ b/kernel/arch/ia64/hp/common/aml_nfw.c @@ -0,0 +1,235 @@ +/* + * OpRegion handler to allow AML to call native firmware + * + * (c) Copyright 2007 Hewlett-Packard Development Company, L.P. + * Bjorn Helgaas + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This driver implements HP Open Source Review Board proposal 1842, + * which was approved on 9/20/2006. + * + * For technical documentation, see the HP SPPA Firmware EAS, Appendix F. + * + * ACPI does not define a mechanism for AML methods to call native firmware + * interfaces such as PAL or SAL. This OpRegion handler adds such a mechanism. + * After the handler is installed, an AML method can call native firmware by + * storing the arguments and firmware entry point to specific offsets in the + * OpRegion. When AML reads the "return value" offset from the OpRegion, this + * handler loads up the arguments, makes the firmware call, and returns the + * result. + */ + +#include +#include +#include + +MODULE_AUTHOR("Bjorn Helgaas "); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ACPI opregion handler for native firmware calls"); + +static bool force_register; +module_param_named(force, force_register, bool, 0); +MODULE_PARM_DESC(force, "Install opregion handler even without HPQ5001 device"); + +#define AML_NFW_SPACE 0xA1 + +struct ia64_pdesc { + void *ip; + void *gp; +}; + +/* + * N.B. The layout of this structure is defined in the HP SPPA FW EAS, and + * the member offsets are embedded in AML methods. + */ +struct ia64_nfw_context { + u64 arg[8]; + struct ia64_sal_retval ret; + u64 ip; + u64 gp; + u64 pad[2]; +}; + +static void *virt_map(u64 address) +{ + if (address & (1UL << 63)) + return (void *) (__IA64_UNCACHED_OFFSET | address); + + return __va(address); +} + +static void aml_nfw_execute(struct ia64_nfw_context *c) +{ + struct ia64_pdesc virt_entry; + ia64_sal_handler entry; + + virt_entry.ip = virt_map(c->ip); + virt_entry.gp = virt_map(c->gp); + + entry = (ia64_sal_handler) &virt_entry; + + IA64_FW_CALL(entry, c->ret, + c->arg[0], c->arg[1], c->arg[2], c->arg[3], + c->arg[4], c->arg[5], c->arg[6], c->arg[7]); +} + +static void aml_nfw_read_arg(u8 *offset, u32 bit_width, u64 *value) +{ + switch (bit_width) { + case 8: + *value = *(u8 *)offset; + break; + case 16: + *value = *(u16 *)offset; + break; + case 32: + *value = *(u32 *)offset; + break; + case 64: + *value = *(u64 *)offset; + break; + } +} + +static void aml_nfw_write_arg(u8 *offset, u32 bit_width, u64 *value) +{ + switch (bit_width) { + case 8: + *(u8 *) offset = *value; + break; + case 16: + *(u16 *) offset = *value; + break; + case 32: + *(u32 *) offset = *value; + break; + case 64: + *(u64 *) offset = *value; + break; + } +} + +static acpi_status aml_nfw_handler(u32 function, acpi_physical_address address, + u32 bit_width, u64 *value, void *handler_context, + void *region_context) +{ + struct ia64_nfw_context *context = handler_context; + u8 *offset = (u8 *) context + address; + + if (bit_width != 8 && bit_width != 16 && + bit_width != 32 && bit_width != 64) + return AE_BAD_PARAMETER; + + if (address + (bit_width >> 3) > sizeof(struct ia64_nfw_context)) + return AE_BAD_PARAMETER; + + switch (function) { + case ACPI_READ: + if (address == offsetof(struct ia64_nfw_context, ret)) + aml_nfw_execute(context); + aml_nfw_read_arg(offset, bit_width, value); + break; + case ACPI_WRITE: + aml_nfw_write_arg(offset, bit_width, value); + break; + } + + return AE_OK; +} + +static struct ia64_nfw_context global_context; +static int global_handler_registered; + +static int aml_nfw_add_global_handler(void) +{ + acpi_status status; + + if (global_handler_registered) + return 0; + + status = acpi_install_address_space_handler(ACPI_ROOT_OBJECT, + AML_NFW_SPACE, aml_nfw_handler, NULL, &global_context); + if (ACPI_FAILURE(status)) + return -ENODEV; + + global_handler_registered = 1; + printk(KERN_INFO "Global 0x%02X opregion handler registered\n", + AML_NFW_SPACE); + return 0; +} + +static int aml_nfw_remove_global_handler(void) +{ + acpi_status status; + + if (!global_handler_registered) + return 0; + + status = acpi_remove_address_space_handler(ACPI_ROOT_OBJECT, + AML_NFW_SPACE, aml_nfw_handler); + if (ACPI_FAILURE(status)) + return -ENODEV; + + global_handler_registered = 0; + printk(KERN_INFO "Global 0x%02X opregion handler removed\n", + AML_NFW_SPACE); + return 0; +} + +static int aml_nfw_add(struct acpi_device *device) +{ + /* + * We would normally allocate a new context structure and install + * the address space handler for the specific device we found. + * But the HP-UX implementation shares a single global context + * and always puts the handler at the root, so we'll do the same. + */ + return aml_nfw_add_global_handler(); +} + +static int aml_nfw_remove(struct acpi_device *device) +{ + return aml_nfw_remove_global_handler(); +} + +static const struct acpi_device_id aml_nfw_ids[] = { + {"HPQ5001", 0}, + {"", 0} +}; + +static struct acpi_driver acpi_aml_nfw_driver = { + .name = "native firmware", + .ids = aml_nfw_ids, + .ops = { + .add = aml_nfw_add, + .remove = aml_nfw_remove, + }, +}; + +static int __init aml_nfw_init(void) +{ + int result; + + if (force_register) + aml_nfw_add_global_handler(); + + result = acpi_bus_register_driver(&acpi_aml_nfw_driver); + if (result < 0) { + aml_nfw_remove_global_handler(); + return result; + } + + return 0; +} + +static void __exit aml_nfw_exit(void) +{ + acpi_bus_unregister_driver(&acpi_aml_nfw_driver); + aml_nfw_remove_global_handler(); +} + +module_init(aml_nfw_init); +module_exit(aml_nfw_exit); diff --git a/kernel/arch/ia64/hp/common/hwsw_iommu.c b/kernel/arch/ia64/hp/common/hwsw_iommu.c new file mode 100644 index 000000000..1e4cae5ae --- /dev/null +++ b/kernel/arch/ia64/hp/common/hwsw_iommu.c @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2004 Hewlett-Packard Development Company, L.P. + * Contributed by David Mosberger-Tang + * + * This is a pseudo I/O MMU which dispatches to the hardware I/O MMU + * whenever possible. We assume that the hardware I/O MMU requires + * full 32-bit addressability, as is the case, e.g., for HP zx1-based + * systems (there, the I/O MMU window is mapped at 3-4GB). If a + * device doesn't provide full 32-bit addressability, we fall back on + * the sw I/O TLB. This is good enough to let us support broken + * hardware such as soundcards which have a DMA engine that can + * address only 28 bits. + */ + +#include +#include +#include +#include +#include + +extern struct dma_map_ops sba_dma_ops, swiotlb_dma_ops; + +/* swiotlb declarations & definitions: */ +extern int swiotlb_late_init_with_default_size (size_t size); + +/* + * Note: we need to make the determination of whether or not to use + * the sw I/O TLB based purely on the device structure. Anything else + * would be unreliable or would be too intrusive. + */ +static inline int use_swiotlb(struct device *dev) +{ + return dev && dev->dma_mask && + !sba_dma_ops.dma_supported(dev, *dev->dma_mask); +} + +struct dma_map_ops *hwsw_dma_get_ops(struct device *dev) +{ + if (use_swiotlb(dev)) + return &swiotlb_dma_ops; + return &sba_dma_ops; +} +EXPORT_SYMBOL(hwsw_dma_get_ops); + +void __init +hwsw_init (void) +{ + /* default to a smallish 2MB sw I/O TLB */ + if (swiotlb_late_init_with_default_size (2 * (1<<20)) != 0) { +#ifdef CONFIG_IA64_GENERIC + /* Better to have normal DMA than panic */ + printk(KERN_WARNING "%s: Failed to initialize software I/O TLB," + " reverting to hpzx1 platform vector\n", __func__); + machvec_init("hpzx1"); +#else + panic("Unable to initialize software I/O TLB services"); +#endif + } +} diff --git a/kernel/arch/ia64/hp/common/sba_iommu.c b/kernel/arch/ia64/hp/common/sba_iommu.c new file mode 100644 index 000000000..344387a55 --- /dev/null +++ b/kernel/arch/ia64/hp/common/sba_iommu.c @@ -0,0 +1,2241 @@ +/* +** IA64 System Bus Adapter (SBA) I/O MMU manager +** +** (c) Copyright 2002-2005 Alex Williamson +** (c) Copyright 2002-2003 Grant Grundler +** (c) Copyright 2002-2005 Hewlett-Packard Company +** +** Portions (c) 2000 Grant Grundler (from parisc I/O MMU code) +** Portions (c) 1999 Dave S. Miller (from sparc64 I/O MMU code) +** +** This program is free software; you can redistribute it and/or modify +** it under the terms of the GNU General Public License as published by +** the Free Software Foundation; either version 2 of the License, or +** (at your option) any later version. +** +** +** This module initializes the IOC (I/O Controller) found on HP +** McKinley machines and their successors. +** +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /* hweight64() */ +#include +#include +#include +#include + +#include /* ia64_get_itc() */ +#include +#include /* PAGE_OFFSET */ +#include + +#include + +extern int swiotlb_late_init_with_default_size (size_t size); + +#define PFX "IOC: " + +/* +** Enabling timing search of the pdir resource map. Output in /proc. +** Disabled by default to optimize performance. +*/ +#undef PDIR_SEARCH_TIMING + +/* +** This option allows cards capable of 64bit DMA to bypass the IOMMU. If +** not defined, all DMA will be 32bit and go through the TLB. +** There's potentially a conflict in the bio merge code with us +** advertising an iommu, but then bypassing it. Since I/O MMU bypassing +** appears to give more performance than bio-level virtual merging, we'll +** do the former for now. NOTE: BYPASS_SG also needs to be undef'd to +** completely restrict DMA to the IOMMU. +*/ +#define ALLOW_IOV_BYPASS + +/* +** This option specifically allows/disallows bypassing scatterlists with +** multiple entries. Coalescing these entries can allow better DMA streaming +** and in some cases shows better performance than entirely bypassing the +** IOMMU. Performance increase on the order of 1-2% sequential output/input +** using bonnie++ on a RAID0 MD device (sym2 & mpt). +*/ +#undef ALLOW_IOV_BYPASS_SG + +/* +** If a device prefetches beyond the end of a valid pdir entry, it will cause +** a hard failure, ie. MCA. Version 3.0 and later of the zx1 LBA should +** disconnect on 4k boundaries and prevent such issues. If the device is +** particularly aggressive, this option will keep the entire pdir valid such +** that prefetching will hit a valid address. This could severely impact +** error containment, and is therefore off by default. The page that is +** used for spill-over is poisoned, so that should help debugging somewhat. +*/ +#undef FULL_VALID_PDIR + +#define ENABLE_MARK_CLEAN + +/* +** The number of debug flags is a clue - this code is fragile. NOTE: since +** tightening the use of res_lock the resource bitmap and actual pdir are no +** longer guaranteed to stay in sync. The sanity checking code isn't going to +** like that. +*/ +#undef DEBUG_SBA_INIT +#undef DEBUG_SBA_RUN +#undef DEBUG_SBA_RUN_SG +#undef DEBUG_SBA_RESOURCE +#undef ASSERT_PDIR_SANITY +#undef DEBUG_LARGE_SG_ENTRIES +#undef DEBUG_BYPASS + +#if defined(FULL_VALID_PDIR) && defined(ASSERT_PDIR_SANITY) +#error FULL_VALID_PDIR and ASSERT_PDIR_SANITY are mutually exclusive +#endif + +#define SBA_INLINE __inline__ +/* #define SBA_INLINE */ + +#ifdef DEBUG_SBA_INIT +#define DBG_INIT(x...) printk(x) +#else +#define DBG_INIT(x...) +#endif + +#ifdef DEBUG_SBA_RUN +#define DBG_RUN(x...) printk(x) +#else +#define DBG_RUN(x...) +#endif + +#ifdef DEBUG_SBA_RUN_SG +#define DBG_RUN_SG(x...) printk(x) +#else +#define DBG_RUN_SG(x...) +#endif + + +#ifdef DEBUG_SBA_RESOURCE +#define DBG_RES(x...) printk(x) +#else +#define DBG_RES(x...) +#endif + +#ifdef DEBUG_BYPASS +#define DBG_BYPASS(x...) printk(x) +#else +#define DBG_BYPASS(x...) +#endif + +#ifdef ASSERT_PDIR_SANITY +#define ASSERT(expr) \ + if(!(expr)) { \ + printk( "\n" __FILE__ ":%d: Assertion " #expr " failed!\n",__LINE__); \ + panic(#expr); \ + } +#else +#define ASSERT(expr) +#endif + +/* +** The number of pdir entries to "free" before issuing +** a read to PCOM register to flush out PCOM writes. +** Interacts with allocation granularity (ie 4 or 8 entries +** allocated and free'd/purged at a time might make this +** less interesting). +*/ +#define DELAYED_RESOURCE_CNT 64 + +#define PCI_DEVICE_ID_HP_SX2000_IOC 0x12ec + +#define ZX1_IOC_ID ((PCI_DEVICE_ID_HP_ZX1_IOC << 16) | PCI_VENDOR_ID_HP) +#define ZX2_IOC_ID ((PCI_DEVICE_ID_HP_ZX2_IOC << 16) | PCI_VENDOR_ID_HP) +#define REO_IOC_ID ((PCI_DEVICE_ID_HP_REO_IOC << 16) | PCI_VENDOR_ID_HP) +#define SX1000_IOC_ID ((PCI_DEVICE_ID_HP_SX1000_IOC << 16) | PCI_VENDOR_ID_HP) +#define SX2000_IOC_ID ((PCI_DEVICE_ID_HP_SX2000_IOC << 16) | PCI_VENDOR_ID_HP) + +#define ZX1_IOC_OFFSET 0x1000 /* ACPI reports SBA, we want IOC */ + +#define IOC_FUNC_ID 0x000 +#define IOC_FCLASS 0x008 /* function class, bist, header, rev... */ +#define IOC_IBASE 0x300 /* IO TLB */ +#define IOC_IMASK 0x308 +#define IOC_PCOM 0x310 +#define IOC_TCNFG 0x318 +#define IOC_PDIR_BASE 0x320 + +#define IOC_ROPE0_CFG 0x500 +#define IOC_ROPE_AO 0x10 /* Allow "Relaxed Ordering" */ + + +/* AGP GART driver looks for this */ +#define ZX1_SBA_IOMMU_COOKIE 0x0000badbadc0ffeeUL + +/* +** The zx1 IOC supports 4/8/16/64KB page sizes (see TCNFG register) +** +** Some IOCs (sx1000) can run at the above pages sizes, but are +** really only supported using the IOC at a 4k page size. +** +** iovp_size could only be greater than PAGE_SIZE if we are +** confident the drivers really only touch the next physical +** page iff that driver instance owns it. +*/ +static unsigned long iovp_size; +static unsigned long iovp_shift; +static unsigned long iovp_mask; + +struct ioc { + void __iomem *ioc_hpa; /* I/O MMU base address */ + char *res_map; /* resource map, bit == pdir entry */ + u64 *pdir_base; /* physical base address */ + unsigned long ibase; /* pdir IOV Space base */ + unsigned long imask; /* pdir IOV Space mask */ + + unsigned long *res_hint; /* next avail IOVP - circular search */ + unsigned long dma_mask; + spinlock_t res_lock; /* protects the resource bitmap, but must be held when */ + /* clearing pdir to prevent races with allocations. */ + unsigned int res_bitshift; /* from the RIGHT! */ + unsigned int res_size; /* size of resource map in bytes */ +#ifdef CONFIG_NUMA + unsigned int node; /* node where this IOC lives */ +#endif +#if DELAYED_RESOURCE_CNT > 0 + spinlock_t saved_lock; /* may want to try to get this on a separate cacheline */ + /* than res_lock for bigger systems. */ + int saved_cnt; + struct sba_dma_pair { + dma_addr_t iova; + size_t size; + } saved[DELAYED_RESOURCE_CNT]; +#endif + +#ifdef PDIR_SEARCH_TIMING +#define SBA_SEARCH_SAMPLE 0x100 + unsigned long avg_search[SBA_SEARCH_SAMPLE]; + unsigned long avg_idx; /* current index into avg_search */ +#endif + + /* Stuff we don't need in performance path */ + struct ioc *next; /* list of IOC's in system */ + acpi_handle handle; /* for multiple IOC's */ + const char *name; + unsigned int func_id; + unsigned int rev; /* HW revision of chip */ + u32 iov_size; + unsigned int pdir_size; /* in bytes, determined by IOV Space size */ + struct pci_dev *sac_only_dev; +}; + +static struct ioc *ioc_list, *ioc_found; +static int reserve_sba_gart = 1; + +static SBA_INLINE void sba_mark_invalid(struct ioc *, dma_addr_t, size_t); +static SBA_INLINE void sba_free_range(struct ioc *, dma_addr_t, size_t); + +#define sba_sg_address(sg) sg_virt((sg)) + +#ifdef FULL_VALID_PDIR +static u64 prefetch_spill_page; +#endif + +#ifdef CONFIG_PCI +# define GET_IOC(dev) ((dev_is_pci(dev)) \ + ? ((struct ioc *) PCI_CONTROLLER(to_pci_dev(dev))->iommu) : NULL) +#else +# define GET_IOC(dev) NULL +#endif + +/* +** DMA_CHUNK_SIZE is used by the SCSI mid-layer to break up +** (or rather not merge) DMAs into manageable chunks. +** On parisc, this is more of the software/tuning constraint +** rather than the HW. I/O MMU allocation algorithms can be +** faster with smaller sizes (to some degree). +*/ +#define DMA_CHUNK_SIZE (BITS_PER_LONG*iovp_size) + +#define ROUNDUP(x,y) ((x + ((y)-1)) & ~((y)-1)) + +/************************************ +** SBA register read and write support +** +** BE WARNED: register writes are posted. +** (ie follow writes which must reach HW with a read) +** +*/ +#define READ_REG(addr) __raw_readq(addr) +#define WRITE_REG(val, addr) __raw_writeq(val, addr) + +#ifdef DEBUG_SBA_INIT + +/** + * sba_dump_tlb - debugging only - print IOMMU operating parameters + * @hpa: base address of the IOMMU + * + * Print the size/location of the IO MMU PDIR. + */ +static void +sba_dump_tlb(char *hpa) +{ + DBG_INIT("IO TLB at 0x%p\n", (void *)hpa); + DBG_INIT("IOC_IBASE : %016lx\n", READ_REG(hpa+IOC_IBASE)); + DBG_INIT("IOC_IMASK : %016lx\n", READ_REG(hpa+IOC_IMASK)); + DBG_INIT("IOC_TCNFG : %016lx\n", READ_REG(hpa+IOC_TCNFG)); + DBG_INIT("IOC_PDIR_BASE: %016lx\n", READ_REG(hpa+IOC_PDIR_BASE)); + DBG_INIT("\n"); +} +#endif + + +#ifdef ASSERT_PDIR_SANITY + +/** + * sba_dump_pdir_entry - debugging only - print one IOMMU PDIR entry + * @ioc: IO MMU structure which owns the pdir we are interested in. + * @msg: text to print ont the output line. + * @pide: pdir index. + * + * Print one entry of the IO MMU PDIR in human readable form. + */ +static void +sba_dump_pdir_entry(struct ioc *ioc, char *msg, uint pide) +{ + /* start printing from lowest pde in rval */ + u64 *ptr = &ioc->pdir_base[pide & ~(BITS_PER_LONG - 1)]; + unsigned long *rptr = (unsigned long *) &ioc->res_map[(pide >>3) & -sizeof(unsigned long)]; + uint rcnt; + + printk(KERN_DEBUG "SBA: %s rp %p bit %d rval 0x%lx\n", + msg, rptr, pide & (BITS_PER_LONG - 1), *rptr); + + rcnt = 0; + while (rcnt < BITS_PER_LONG) { + printk(KERN_DEBUG "%s %2d %p %016Lx\n", + (rcnt == (pide & (BITS_PER_LONG - 1))) + ? " -->" : " ", + rcnt, ptr, (unsigned long long) *ptr ); + rcnt++; + ptr++; + } + printk(KERN_DEBUG "%s", msg); +} + + +/** + * sba_check_pdir - debugging only - consistency checker + * @ioc: IO MMU structure which owns the pdir we are interested in. + * @msg: text to print ont the output line. + * + * Verify the resource map and pdir state is consistent + */ +static int +sba_check_pdir(struct ioc *ioc, char *msg) +{ + u64 *rptr_end = (u64 *) &(ioc->res_map[ioc->res_size]); + u64 *rptr = (u64 *) ioc->res_map; /* resource map ptr */ + u64 *pptr = ioc->pdir_base; /* pdir ptr */ + uint pide = 0; + + while (rptr < rptr_end) { + u64 rval; + int rcnt; /* number of bits we might check */ + + rval = *rptr; + rcnt = 64; + + while (rcnt) { + /* Get last byte and highest bit from that */ + u32 pde = ((u32)((*pptr >> (63)) & 0x1)); + if ((rval & 0x1) ^ pde) + { + /* + ** BUMMER! -- res_map != pdir -- + ** Dump rval and matching pdir entries + */ + sba_dump_pdir_entry(ioc, msg, pide); + return(1); + } + rcnt--; + rval >>= 1; /* try the next bit */ + pptr++; + pide++; + } + rptr++; /* look at next word of res_map */ + } + /* It'd be nice if we always got here :^) */ + return 0; +} + + +/** + * sba_dump_sg - debugging only - print Scatter-Gather list + * @ioc: IO MMU structure which owns the pdir we are interested in. + * @startsg: head of the SG list + * @nents: number of entries in SG list + * + * print the SG list so we can verify it's correct by hand. + */ +static void +sba_dump_sg( struct ioc *ioc, struct scatterlist *startsg, int nents) +{ + while (nents-- > 0) { + printk(KERN_DEBUG " %d : DMA %08lx/%05x CPU %p\n", nents, + startsg->dma_address, startsg->dma_length, + sba_sg_address(startsg)); + startsg = sg_next(startsg); + } +} + +static void +sba_check_sg( struct ioc *ioc, struct scatterlist *startsg, int nents) +{ + struct scatterlist *the_sg = startsg; + int the_nents = nents; + + while (the_nents-- > 0) { + if (sba_sg_address(the_sg) == 0x0UL) + sba_dump_sg(NULL, startsg, nents); + the_sg = sg_next(the_sg); + } +} + +#endif /* ASSERT_PDIR_SANITY */ + + + + +/************************************************************** +* +* I/O Pdir Resource Management +* +* Bits set in the resource map are in use. +* Each bit can represent a number of pages. +* LSbs represent lower addresses (IOVA's). +* +***************************************************************/ +#define PAGES_PER_RANGE 1 /* could increase this to 4 or 8 if needed */ + +/* Convert from IOVP to IOVA and vice versa. */ +#define SBA_IOVA(ioc,iovp,offset) ((ioc->ibase) | (iovp) | (offset)) +#define SBA_IOVP(ioc,iova) ((iova) & ~(ioc->ibase)) + +#define PDIR_ENTRY_SIZE sizeof(u64) + +#define PDIR_INDEX(iovp) ((iovp)>>iovp_shift) + +#define RESMAP_MASK(n) ~(~0UL << (n)) +#define RESMAP_IDX_MASK (sizeof(unsigned long) - 1) + + +/** + * For most cases the normal get_order is sufficient, however it limits us + * to PAGE_SIZE being the minimum mapping alignment and TC flush granularity. + * It only incurs about 1 clock cycle to use this one with the static variable + * and makes the code more intuitive. + */ +static SBA_INLINE int +get_iovp_order (unsigned long size) +{ + long double d = size - 1; + long order; + + order = ia64_getf_exp(d); + order = order - iovp_shift - 0xffff + 1; + if (order < 0) + order = 0; + return order; +} + +static unsigned long ptr_to_pide(struct ioc *ioc, unsigned long *res_ptr, + unsigned int bitshiftcnt) +{ + return (((unsigned long)res_ptr - (unsigned long)ioc->res_map) << 3) + + bitshiftcnt; +} + +/** + * sba_search_bitmap - find free space in IO PDIR resource bitmap + * @ioc: IO MMU structure which owns the pdir we are interested in. + * @bits_wanted: number of entries we need. + * @use_hint: use res_hint to indicate where to start looking + * + * Find consecutive free bits in resource bitmap. + * Each bit represents one entry in the IO Pdir. + * Cool perf optimization: search for log2(size) bits at a time. + */ +static SBA_INLINE unsigned long +sba_search_bitmap(struct ioc *ioc, struct device *dev, + unsigned long bits_wanted, int use_hint) +{ + unsigned long *res_ptr; + unsigned long *res_end = (unsigned long *) &(ioc->res_map[ioc->res_size]); + unsigned long flags, pide = ~0UL, tpide; + unsigned long boundary_size; + unsigned long shift; + int ret; + + ASSERT(((unsigned long) ioc->res_hint & (sizeof(unsigned long) - 1UL)) == 0); + ASSERT(res_ptr < res_end); + + boundary_size = (unsigned long long)dma_get_seg_boundary(dev) + 1; + boundary_size = ALIGN(boundary_size, 1ULL << iovp_shift) >> iovp_shift; + + BUG_ON(ioc->ibase & ~iovp_mask); + shift = ioc->ibase >> iovp_shift; + + spin_lock_irqsave(&ioc->res_lock, flags); + + /* Allow caller to force a search through the entire resource space */ + if (likely(use_hint)) { + res_ptr = ioc->res_hint; + } else { + res_ptr = (ulong *)ioc->res_map; + ioc->res_bitshift = 0; + } + + /* + * N.B. REO/Grande defect AR2305 can cause TLB fetch timeouts + * if a TLB entry is purged while in use. sba_mark_invalid() + * purges IOTLB entries in power-of-two sizes, so we also + * allocate IOVA space in power-of-two sizes. + */ + bits_wanted = 1UL << get_iovp_order(bits_wanted << iovp_shift); + + if (likely(bits_wanted == 1)) { + unsigned int bitshiftcnt; + for(; res_ptr < res_end ; res_ptr++) { + if (likely(*res_ptr != ~0UL)) { + bitshiftcnt = ffz(*res_ptr); + *res_ptr |= (1UL << bitshiftcnt); + pide = ptr_to_pide(ioc, res_ptr, bitshiftcnt); + ioc->res_bitshift = bitshiftcnt + bits_wanted; + goto found_it; + } + } + goto not_found; + + } + + if (likely(bits_wanted <= BITS_PER_LONG/2)) { + /* + ** Search the resource bit map on well-aligned values. + ** "o" is the alignment. + ** We need the alignment to invalidate I/O TLB using + ** SBA HW features in the unmap path. + */ + unsigned long o = 1 << get_iovp_order(bits_wanted << iovp_shift); + uint bitshiftcnt = ROUNDUP(ioc->res_bitshift, o); + unsigned long mask, base_mask; + + base_mask = RESMAP_MASK(bits_wanted); + mask = base_mask << bitshiftcnt; + + DBG_RES("%s() o %ld %p", __func__, o, res_ptr); + for(; res_ptr < res_end ; res_ptr++) + { + DBG_RES(" %p %lx %lx\n", res_ptr, mask, *res_ptr); + ASSERT(0 != mask); + for (; mask ; mask <<= o, bitshiftcnt += o) { + tpide = ptr_to_pide(ioc, res_ptr, bitshiftcnt); + ret = iommu_is_span_boundary(tpide, bits_wanted, + shift, + boundary_size); + if ((0 == ((*res_ptr) & mask)) && !ret) { + *res_ptr |= mask; /* mark resources busy! */ + pide = tpide; + ioc->res_bitshift = bitshiftcnt + bits_wanted; + goto found_it; + } + } + + bitshiftcnt = 0; + mask = base_mask; + + } + + } else { + int qwords, bits, i; + unsigned long *end; + + qwords = bits_wanted >> 6; /* /64 */ + bits = bits_wanted - (qwords * BITS_PER_LONG); + + end = res_end - qwords; + + for (; res_ptr < end; res_ptr++) { + tpide = ptr_to_pide(ioc, res_ptr, 0); + ret = iommu_is_span_boundary(tpide, bits_wanted, + shift, boundary_size); + if (ret) + goto next_ptr; + for (i = 0 ; i < qwords ; i++) { + if (res_ptr[i] != 0) + goto next_ptr; + } + if (bits && res_ptr[i] && (__ffs(res_ptr[i]) < bits)) + continue; + + /* Found it, mark it */ + for (i = 0 ; i < qwords ; i++) + res_ptr[i] = ~0UL; + res_ptr[i] |= RESMAP_MASK(bits); + + pide = tpide; + res_ptr += qwords; + ioc->res_bitshift = bits; + goto found_it; +next_ptr: + ; + } + } + +not_found: + prefetch(ioc->res_map); + ioc->res_hint = (unsigned long *) ioc->res_map; + ioc->res_bitshift = 0; + spin_unlock_irqrestore(&ioc->res_lock, flags); + return (pide); + +found_it: + ioc->res_hint = res_ptr; + spin_unlock_irqrestore(&ioc->res_lock, flags); + return (pide); +} + + +/** + * sba_alloc_range - find free bits and mark them in IO PDIR resource bitmap + * @ioc: IO MMU structure which owns the pdir we are interested in. + * @size: number of bytes to create a mapping for + * + * Given a size, find consecutive unmarked and then mark those bits in the + * resource bit map. + */ +static int +sba_alloc_range(struct ioc *ioc, struct device *dev, size_t size) +{ + unsigned int pages_needed = size >> iovp_shift; +#ifdef PDIR_SEARCH_TIMING + unsigned long itc_start; +#endif + unsigned long pide; + + ASSERT(pages_needed); + ASSERT(0 == (size & ~iovp_mask)); + +#ifdef PDIR_SEARCH_TIMING + itc_start = ia64_get_itc(); +#endif + /* + ** "seek and ye shall find"...praying never hurts either... + */ + pide = sba_search_bitmap(ioc, dev, pages_needed, 1); + if (unlikely(pide >= (ioc->res_size << 3))) { + pide = sba_search_bitmap(ioc, dev, pages_needed, 0); + if (unlikely(pide >= (ioc->res_size << 3))) { +#if DELAYED_RESOURCE_CNT > 0 + unsigned long flags; + + /* + ** With delayed resource freeing, we can give this one more shot. We're + ** getting close to being in trouble here, so do what we can to make this + ** one count. + */ + spin_lock_irqsave(&ioc->saved_lock, flags); + if (ioc->saved_cnt > 0) { + struct sba_dma_pair *d; + int cnt = ioc->saved_cnt; + + d = &(ioc->saved[ioc->saved_cnt - 1]); + + spin_lock(&ioc->res_lock); + while (cnt--) { + sba_mark_invalid(ioc, d->iova, d->size); + sba_free_range(ioc, d->iova, d->size); + d--; + } + ioc->saved_cnt = 0; + READ_REG(ioc->ioc_hpa+IOC_PCOM); /* flush purges */ + spin_unlock(&ioc->res_lock); + } + spin_unlock_irqrestore(&ioc->saved_lock, flags); + + pide = sba_search_bitmap(ioc, dev, pages_needed, 0); + if (unlikely(pide >= (ioc->res_size << 3))) { + printk(KERN_WARNING "%s: I/O MMU @ %p is" + "out of mapping resources, %u %u %lx\n", + __func__, ioc->ioc_hpa, ioc->res_size, + pages_needed, dma_get_seg_boundary(dev)); + return -1; + } +#else + printk(KERN_WARNING "%s: I/O MMU @ %p is" + "out of mapping resources, %u %u %lx\n", + __func__, ioc->ioc_hpa, ioc->res_size, + pages_needed, dma_get_seg_boundary(dev)); + return -1; +#endif + } + } + +#ifdef PDIR_SEARCH_TIMING + ioc->avg_search[ioc->avg_idx++] = (ia64_get_itc() - itc_start) / pages_needed; + ioc->avg_idx &= SBA_SEARCH_SAMPLE - 1; +#endif + + prefetchw(&(ioc->pdir_base[pide])); + +#ifdef ASSERT_PDIR_SANITY + /* verify the first enable bit is clear */ + if(0x00 != ((u8 *) ioc->pdir_base)[pide*PDIR_ENTRY_SIZE + 7]) { + sba_dump_pdir_entry(ioc, "sba_search_bitmap() botched it?", pide); + } +#endif + + DBG_RES("%s(%x) %d -> %lx hint %x/%x\n", + __func__, size, pages_needed, pide, + (uint) ((unsigned long) ioc->res_hint - (unsigned long) ioc->res_map), + ioc->res_bitshift ); + + return (pide); +} + + +/** + * sba_free_range - unmark bits in IO PDIR resource bitmap + * @ioc: IO MMU structure which owns the pdir we are interested in. + * @iova: IO virtual address which was previously allocated. + * @size: number of bytes to create a mapping for + * + * clear bits in the ioc's resource map + */ +static SBA_INLINE void +sba_free_range(struct ioc *ioc, dma_addr_t iova, size_t size) +{ + unsigned long iovp = SBA_IOVP(ioc, iova); + unsigned int pide = PDIR_INDEX(iovp); + unsigned int ridx = pide >> 3; /* convert bit to byte address */ + unsigned long *res_ptr = (unsigned long *) &((ioc)->res_map[ridx & ~RESMAP_IDX_MASK]); + int bits_not_wanted = size >> iovp_shift; + unsigned long m; + + /* Round up to power-of-two size: see AR2305 note above */ + bits_not_wanted = 1UL << get_iovp_order(bits_not_wanted << iovp_shift); + for (; bits_not_wanted > 0 ; res_ptr++) { + + if (unlikely(bits_not_wanted > BITS_PER_LONG)) { + + /* these mappings start 64bit aligned */ + *res_ptr = 0UL; + bits_not_wanted -= BITS_PER_LONG; + pide += BITS_PER_LONG; + + } else { + + /* 3-bits "bit" address plus 2 (or 3) bits for "byte" == bit in word */ + m = RESMAP_MASK(bits_not_wanted) << (pide & (BITS_PER_LONG - 1)); + bits_not_wanted = 0; + + DBG_RES("%s( ,%x,%x) %x/%lx %x %p %lx\n", __func__, (uint) iova, size, + bits_not_wanted, m, pide, res_ptr, *res_ptr); + + ASSERT(m != 0); + ASSERT(bits_not_wanted); + ASSERT((*res_ptr & m) == m); /* verify same bits are set */ + *res_ptr &= ~m; + } + } +} + + +/************************************************************** +* +* "Dynamic DMA Mapping" support (aka "Coherent I/O") +* +***************************************************************/ + +/** + * sba_io_pdir_entry - fill in one IO PDIR entry + * @pdir_ptr: pointer to IO PDIR entry + * @vba: Virtual CPU address of buffer to map + * + * SBA Mapping Routine + * + * Given a virtual address (vba, arg1) sba_io_pdir_entry() + * loads the I/O PDIR entry pointed to by pdir_ptr (arg0). + * Each IO Pdir entry consists of 8 bytes as shown below + * (LSB == bit 0): + * + * 63 40 11 7 0 + * +-+---------------------+----------------------------------+----+--------+ + * |V| U | PPN[39:12] | U | FF | + * +-+---------------------+----------------------------------+----+--------+ + * + * V == Valid Bit + * U == Unused + * PPN == Physical Page Number + * + * The physical address fields are filled with the results of virt_to_phys() + * on the vba. + */ + +#if 1 +#define sba_io_pdir_entry(pdir_ptr, vba) *pdir_ptr = ((vba & ~0xE000000000000FFFULL) \ + | 0x8000000000000000ULL) +#else +void SBA_INLINE +sba_io_pdir_entry(u64 *pdir_ptr, unsigned long vba) +{ + *pdir_ptr = ((vba & ~0xE000000000000FFFULL) | 0x80000000000000FFULL); +} +#endif + +#ifdef ENABLE_MARK_CLEAN +/** + * Since DMA is i-cache coherent, any (complete) pages that were written via + * DMA can be marked as "clean" so that lazy_mmu_prot_update() doesn't have to + * flush them when they get mapped into an executable vm-area. + */ +static void +mark_clean (void *addr, size_t size) +{ + unsigned long pg_addr, end; + + pg_addr = PAGE_ALIGN((unsigned long) addr); + end = (unsigned long) addr + size; + while (pg_addr + PAGE_SIZE <= end) { + struct page *page = virt_to_page((void *)pg_addr); + set_bit(PG_arch_1, &page->flags); + pg_addr += PAGE_SIZE; + } +} +#endif + +/** + * sba_mark_invalid - invalidate one or more IO PDIR entries + * @ioc: IO MMU structure which owns the pdir we are interested in. + * @iova: IO Virtual Address mapped earlier + * @byte_cnt: number of bytes this mapping covers. + * + * Marking the IO PDIR entry(ies) as Invalid and invalidate + * corresponding IO TLB entry. The PCOM (Purge Command Register) + * is to purge stale entries in the IO TLB when unmapping entries. + * + * The PCOM register supports purging of multiple pages, with a minium + * of 1 page and a maximum of 2GB. Hardware requires the address be + * aligned to the size of the range being purged. The size of the range + * must be a power of 2. The "Cool perf optimization" in the + * allocation routine helps keep that true. + */ +static SBA_INLINE void +sba_mark_invalid(struct ioc *ioc, dma_addr_t iova, size_t byte_cnt) +{ + u32 iovp = (u32) SBA_IOVP(ioc,iova); + + int off = PDIR_INDEX(iovp); + + /* Must be non-zero and rounded up */ + ASSERT(byte_cnt > 0); + ASSERT(0 == (byte_cnt & ~iovp_mask)); + +#ifdef ASSERT_PDIR_SANITY + /* Assert first pdir entry is set */ + if (!(ioc->pdir_base[off] >> 60)) { + sba_dump_pdir_entry(ioc,"sba_mark_invalid()", PDIR_INDEX(iovp)); + } +#endif + + if (byte_cnt <= iovp_size) + { + ASSERT(off < ioc->pdir_size); + + iovp |= iovp_shift; /* set "size" field for PCOM */ + +#ifndef FULL_VALID_PDIR + /* + ** clear I/O PDIR entry "valid" bit + ** Do NOT clear the rest - save it for debugging. + ** We should only clear bits that have previously + ** been enabled. + */ + ioc->pdir_base[off] &= ~(0x80000000000000FFULL); +#else + /* + ** If we want to maintain the PDIR as valid, put in + ** the spill page so devices prefetching won't + ** cause a hard fail. + */ + ioc->pdir_base[off] = (0x80000000000000FFULL | prefetch_spill_page); +#endif + } else { + u32 t = get_iovp_order(byte_cnt) + iovp_shift; + + iovp |= t; + ASSERT(t <= 31); /* 2GB! Max value of "size" field */ + + do { + /* verify this pdir entry is enabled */ + ASSERT(ioc->pdir_base[off] >> 63); +#ifndef FULL_VALID_PDIR + /* clear I/O Pdir entry "valid" bit first */ + ioc->pdir_base[off] &= ~(0x80000000000000FFULL); +#else + ioc->pdir_base[off] = (0x80000000000000FFULL | prefetch_spill_page); +#endif + off++; + byte_cnt -= iovp_size; + } while (byte_cnt > 0); + } + + WRITE_REG(iovp | ioc->ibase, ioc->ioc_hpa+IOC_PCOM); +} + +/** + * sba_map_single_attrs - map one buffer and return IOVA for DMA + * @dev: instance of PCI owned by the driver that's asking. + * @addr: driver buffer to map. + * @size: number of bytes to map in driver buffer. + * @dir: R/W or both. + * @attrs: optional dma attributes + * + * See Documentation/DMA-API-HOWTO.txt + */ +static dma_addr_t sba_map_page(struct device *dev, struct page *page, + unsigned long poff, size_t size, + enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + struct ioc *ioc; + void *addr = page_address(page) + poff; + dma_addr_t iovp; + dma_addr_t offset; + u64 *pdir_start; + int pide; +#ifdef ASSERT_PDIR_SANITY + unsigned long flags; +#endif +#ifdef ALLOW_IOV_BYPASS + unsigned long pci_addr = virt_to_phys(addr); +#endif + +#ifdef ALLOW_IOV_BYPASS + ASSERT(to_pci_dev(dev)->dma_mask); + /* + ** Check if the PCI device can DMA to ptr... if so, just return ptr + */ + if (likely((pci_addr & ~to_pci_dev(dev)->dma_mask) == 0)) { + /* + ** Device is bit capable of DMA'ing to the buffer... + ** just return the PCI address of ptr + */ + DBG_BYPASS("sba_map_single_attrs() bypass mask/addr: " + "0x%lx/0x%lx\n", + to_pci_dev(dev)->dma_mask, pci_addr); + return pci_addr; + } +#endif + ioc = GET_IOC(dev); + ASSERT(ioc); + + prefetch(ioc->res_hint); + + ASSERT(size > 0); + ASSERT(size <= DMA_CHUNK_SIZE); + + /* save offset bits */ + offset = ((dma_addr_t) (long) addr) & ~iovp_mask; + + /* round up to nearest iovp_size */ + size = (size + offset + ~iovp_mask) & iovp_mask; + +#ifdef ASSERT_PDIR_SANITY + spin_lock_irqsave(&ioc->res_lock, flags); + if (sba_check_pdir(ioc,"Check before sba_map_single_attrs()")) + panic("Sanity check failed"); + spin_unlock_irqrestore(&ioc->res_lock, flags); +#endif + + pide = sba_alloc_range(ioc, dev, size); + if (pide < 0) + return 0; + + iovp = (dma_addr_t) pide << iovp_shift; + + DBG_RUN("%s() 0x%p -> 0x%lx\n", __func__, addr, (long) iovp | offset); + + pdir_start = &(ioc->pdir_base[pide]); + + while (size > 0) { + ASSERT(((u8 *)pdir_start)[7] == 0); /* verify availability */ + sba_io_pdir_entry(pdir_start, (unsigned long) addr); + + DBG_RUN(" pdir 0x%p %lx\n", pdir_start, *pdir_start); + + addr += iovp_size; + size -= iovp_size; + pdir_start++; + } + /* force pdir update */ + wmb(); + + /* form complete address */ +#ifdef ASSERT_PDIR_SANITY + spin_lock_irqsave(&ioc->res_lock, flags); + sba_check_pdir(ioc,"Check after sba_map_single_attrs()"); + spin_unlock_irqrestore(&ioc->res_lock, flags); +#endif + return SBA_IOVA(ioc, iovp, offset); +} + +static dma_addr_t sba_map_single_attrs(struct device *dev, void *addr, + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + return sba_map_page(dev, virt_to_page(addr), + (unsigned long)addr & ~PAGE_MASK, size, dir, attrs); +} + +#ifdef ENABLE_MARK_CLEAN +static SBA_INLINE void +sba_mark_clean(struct ioc *ioc, dma_addr_t iova, size_t size) +{ + u32 iovp = (u32) SBA_IOVP(ioc,iova); + int off = PDIR_INDEX(iovp); + void *addr; + + if (size <= iovp_size) { + addr = phys_to_virt(ioc->pdir_base[off] & + ~0xE000000000000FFFULL); + mark_clean(addr, size); + } else { + do { + addr = phys_to_virt(ioc->pdir_base[off] & + ~0xE000000000000FFFULL); + mark_clean(addr, min(size, iovp_size)); + off++; + size -= iovp_size; + } while (size > 0); + } +} +#endif + +/** + * sba_unmap_single_attrs - unmap one IOVA and free resources + * @dev: instance of PCI owned by the driver that's asking. + * @iova: IOVA of driver buffer previously mapped. + * @size: number of bytes mapped in driver buffer. + * @dir: R/W or both. + * @attrs: optional dma attributes + * + * See Documentation/DMA-API-HOWTO.txt + */ +static void sba_unmap_page(struct device *dev, dma_addr_t iova, size_t size, + enum dma_data_direction dir, struct dma_attrs *attrs) +{ + struct ioc *ioc; +#if DELAYED_RESOURCE_CNT > 0 + struct sba_dma_pair *d; +#endif + unsigned long flags; + dma_addr_t offset; + + ioc = GET_IOC(dev); + ASSERT(ioc); + +#ifdef ALLOW_IOV_BYPASS + if (likely((iova & ioc->imask) != ioc->ibase)) { + /* + ** Address does not fall w/in IOVA, must be bypassing + */ + DBG_BYPASS("sba_unmap_single_attrs() bypass addr: 0x%lx\n", + iova); + +#ifdef ENABLE_MARK_CLEAN + if (dir == DMA_FROM_DEVICE) { + mark_clean(phys_to_virt(iova), size); + } +#endif + return; + } +#endif + offset = iova & ~iovp_mask; + + DBG_RUN("%s() iovp 0x%lx/%x\n", __func__, (long) iova, size); + + iova ^= offset; /* clear offset bits */ + size += offset; + size = ROUNDUP(size, iovp_size); + +#ifdef ENABLE_MARK_CLEAN + if (dir == DMA_FROM_DEVICE) + sba_mark_clean(ioc, iova, size); +#endif + +#if DELAYED_RESOURCE_CNT > 0 + spin_lock_irqsave(&ioc->saved_lock, flags); + d = &(ioc->saved[ioc->saved_cnt]); + d->iova = iova; + d->size = size; + if (unlikely(++(ioc->saved_cnt) >= DELAYED_RESOURCE_CNT)) { + int cnt = ioc->saved_cnt; + spin_lock(&ioc->res_lock); + while (cnt--) { + sba_mark_invalid(ioc, d->iova, d->size); + sba_free_range(ioc, d->iova, d->size); + d--; + } + ioc->saved_cnt = 0; + READ_REG(ioc->ioc_hpa+IOC_PCOM); /* flush purges */ + spin_unlock(&ioc->res_lock); + } + spin_unlock_irqrestore(&ioc->saved_lock, flags); +#else /* DELAYED_RESOURCE_CNT == 0 */ + spin_lock_irqsave(&ioc->res_lock, flags); + sba_mark_invalid(ioc, iova, size); + sba_free_range(ioc, iova, size); + READ_REG(ioc->ioc_hpa+IOC_PCOM); /* flush purges */ + spin_unlock_irqrestore(&ioc->res_lock, flags); +#endif /* DELAYED_RESOURCE_CNT == 0 */ +} + +void sba_unmap_single_attrs(struct device *dev, dma_addr_t iova, size_t size, + enum dma_data_direction dir, struct dma_attrs *attrs) +{ + sba_unmap_page(dev, iova, size, dir, attrs); +} + +/** + * sba_alloc_coherent - allocate/map shared mem for DMA + * @dev: instance of PCI owned by the driver that's asking. + * @size: number of bytes mapped in driver buffer. + * @dma_handle: IOVA of new buffer. + * + * See Documentation/DMA-API-HOWTO.txt + */ +static void * +sba_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, + gfp_t flags, struct dma_attrs *attrs) +{ + struct ioc *ioc; + void *addr; + + ioc = GET_IOC(dev); + ASSERT(ioc); + +#ifdef CONFIG_NUMA + { + int node = ioc->node; + struct page *page; + + if (node == NUMA_NO_NODE) + node = numa_node_id(); + + page = alloc_pages_exact_node(node, flags, get_order(size)); + if (unlikely(!page)) + return NULL; + + addr = page_address(page); + } +#else + addr = (void *) __get_free_pages(flags, get_order(size)); +#endif + if (unlikely(!addr)) + return NULL; + + memset(addr, 0, size); + *dma_handle = virt_to_phys(addr); + +#ifdef ALLOW_IOV_BYPASS + ASSERT(dev->coherent_dma_mask); + /* + ** Check if the PCI device can DMA to ptr... if so, just return ptr + */ + if (likely((*dma_handle & ~dev->coherent_dma_mask) == 0)) { + DBG_BYPASS("sba_alloc_coherent() bypass mask/addr: 0x%lx/0x%lx\n", + dev->coherent_dma_mask, *dma_handle); + + return addr; + } +#endif + + /* + * If device can't bypass or bypass is disabled, pass the 32bit fake + * device to map single to get an iova mapping. + */ + *dma_handle = sba_map_single_attrs(&ioc->sac_only_dev->dev, addr, + size, 0, NULL); + + return addr; +} + + +/** + * sba_free_coherent - free/unmap shared mem for DMA + * @dev: instance of PCI owned by the driver that's asking. + * @size: number of bytes mapped in driver buffer. + * @vaddr: virtual address IOVA of "consistent" buffer. + * @dma_handler: IO virtual address of "consistent" buffer. + * + * See Documentation/DMA-API-HOWTO.txt + */ +static void sba_free_coherent(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_handle, struct dma_attrs *attrs) +{ + sba_unmap_single_attrs(dev, dma_handle, size, 0, NULL); + free_pages((unsigned long) vaddr, get_order(size)); +} + + +/* +** Since 0 is a valid pdir_base index value, can't use that +** to determine if a value is valid or not. Use a flag to indicate +** the SG list entry contains a valid pdir index. +*/ +#define PIDE_FLAG 0x1UL + +#ifdef DEBUG_LARGE_SG_ENTRIES +int dump_run_sg = 0; +#endif + + +/** + * sba_fill_pdir - write allocated SG entries into IO PDIR + * @ioc: IO MMU structure which owns the pdir we are interested in. + * @startsg: list of IOVA/size pairs + * @nents: number of entries in startsg list + * + * Take preprocessed SG list and write corresponding entries + * in the IO PDIR. + */ + +static SBA_INLINE int +sba_fill_pdir( + struct ioc *ioc, + struct scatterlist *startsg, + int nents) +{ + struct scatterlist *dma_sg = startsg; /* pointer to current DMA */ + int n_mappings = 0; + u64 *pdirp = NULL; + unsigned long dma_offset = 0; + + while (nents-- > 0) { + int cnt = startsg->dma_length; + startsg->dma_length = 0; + +#ifdef DEBUG_LARGE_SG_ENTRIES + if (dump_run_sg) + printk(" %2d : %08lx/%05x %p\n", + nents, startsg->dma_address, cnt, + sba_sg_address(startsg)); +#else + DBG_RUN_SG(" %d : %08lx/%05x %p\n", + nents, startsg->dma_address, cnt, + sba_sg_address(startsg)); +#endif + /* + ** Look for the start of a new DMA stream + */ + if (startsg->dma_address & PIDE_FLAG) { + u32 pide = startsg->dma_address & ~PIDE_FLAG; + dma_offset = (unsigned long) pide & ~iovp_mask; + startsg->dma_address = 0; + if (n_mappings) + dma_sg = sg_next(dma_sg); + dma_sg->dma_address = pide | ioc->ibase; + pdirp = &(ioc->pdir_base[pide >> iovp_shift]); + n_mappings++; + } + + /* + ** Look for a VCONTIG chunk + */ + if (cnt) { + unsigned long vaddr = (unsigned long) sba_sg_address(startsg); + ASSERT(pdirp); + + /* Since multiple Vcontig blocks could make up + ** one DMA stream, *add* cnt to dma_len. + */ + dma_sg->dma_length += cnt; + cnt += dma_offset; + dma_offset=0; /* only want offset on first chunk */ + cnt = ROUNDUP(cnt, iovp_size); + do { + sba_io_pdir_entry(pdirp, vaddr); + vaddr += iovp_size; + cnt -= iovp_size; + pdirp++; + } while (cnt > 0); + } + startsg = sg_next(startsg); + } + /* force pdir update */ + wmb(); + +#ifdef DEBUG_LARGE_SG_ENTRIES + dump_run_sg = 0; +#endif + return(n_mappings); +} + + +/* +** Two address ranges are DMA contiguous *iff* "end of prev" and +** "start of next" are both on an IOV page boundary. +** +** (shift left is a quick trick to mask off upper bits) +*/ +#define DMA_CONTIG(__X, __Y) \ + (((((unsigned long) __X) | ((unsigned long) __Y)) << (BITS_PER_LONG - iovp_shift)) == 0UL) + + +/** + * sba_coalesce_chunks - preprocess the SG list + * @ioc: IO MMU structure which owns the pdir we are interested in. + * @startsg: list of IOVA/size pairs + * @nents: number of entries in startsg list + * + * First pass is to walk the SG list and determine where the breaks are + * in the DMA stream. Allocates PDIR entries but does not fill them. + * Returns the number of DMA chunks. + * + * Doing the fill separate from the coalescing/allocation keeps the + * code simpler. Future enhancement could make one pass through + * the sglist do both. + */ +static SBA_INLINE int +sba_coalesce_chunks(struct ioc *ioc, struct device *dev, + struct scatterlist *startsg, + int nents) +{ + struct scatterlist *vcontig_sg; /* VCONTIG chunk head */ + unsigned long vcontig_len; /* len of VCONTIG chunk */ + unsigned long vcontig_end; + struct scatterlist *dma_sg; /* next DMA stream head */ + unsigned long dma_offset, dma_len; /* start/len of DMA stream */ + int n_mappings = 0; + unsigned int max_seg_size = dma_get_max_seg_size(dev); + int idx; + + while (nents > 0) { + unsigned long vaddr = (unsigned long) sba_sg_address(startsg); + + /* + ** Prepare for first/next DMA stream + */ + dma_sg = vcontig_sg = startsg; + dma_len = vcontig_len = vcontig_end = startsg->length; + vcontig_end += vaddr; + dma_offset = vaddr & ~iovp_mask; + + /* PARANOID: clear entries */ + startsg->dma_address = startsg->dma_length = 0; + + /* + ** This loop terminates one iteration "early" since + ** it's always looking one "ahead". + */ + while (--nents > 0) { + unsigned long vaddr; /* tmp */ + + startsg = sg_next(startsg); + + /* PARANOID */ + startsg->dma_address = startsg->dma_length = 0; + + /* catch brokenness in SCSI layer */ + ASSERT(startsg->length <= DMA_CHUNK_SIZE); + + /* + ** First make sure current dma stream won't + ** exceed DMA_CHUNK_SIZE if we coalesce the + ** next entry. + */ + if (((dma_len + dma_offset + startsg->length + ~iovp_mask) & iovp_mask) + > DMA_CHUNK_SIZE) + break; + + if (dma_len + startsg->length > max_seg_size) + break; + + /* + ** Then look for virtually contiguous blocks. + ** + ** append the next transaction? + */ + vaddr = (unsigned long) sba_sg_address(startsg); + if (vcontig_end == vaddr) + { + vcontig_len += startsg->length; + vcontig_end += startsg->length; + dma_len += startsg->length; + continue; + } + +#ifdef DEBUG_LARGE_SG_ENTRIES + dump_run_sg = (vcontig_len > iovp_size); +#endif + + /* + ** Not virtually contiguous. + ** Terminate prev chunk. + ** Start a new chunk. + ** + ** Once we start a new VCONTIG chunk, dma_offset + ** can't change. And we need the offset from the first + ** chunk - not the last one. Ergo Successive chunks + ** must start on page boundaries and dove tail + ** with it's predecessor. + */ + vcontig_sg->dma_length = vcontig_len; + + vcontig_sg = startsg; + vcontig_len = startsg->length; + + /* + ** 3) do the entries end/start on page boundaries? + ** Don't update vcontig_end until we've checked. + */ + if (DMA_CONTIG(vcontig_end, vaddr)) + { + vcontig_end = vcontig_len + vaddr; + dma_len += vcontig_len; + continue; + } else { + break; + } + } + + /* + ** End of DMA Stream + ** Terminate last VCONTIG block. + ** Allocate space for DMA stream. + */ + vcontig_sg->dma_length = vcontig_len; + dma_len = (dma_len + dma_offset + ~iovp_mask) & iovp_mask; + ASSERT(dma_len <= DMA_CHUNK_SIZE); + idx = sba_alloc_range(ioc, dev, dma_len); + if (idx < 0) { + dma_sg->dma_length = 0; + return -1; + } + dma_sg->dma_address = (dma_addr_t)(PIDE_FLAG | (idx << iovp_shift) + | dma_offset); + n_mappings++; + } + + return n_mappings; +} + +static void sba_unmap_sg_attrs(struct device *dev, struct scatterlist *sglist, + int nents, enum dma_data_direction dir, + struct dma_attrs *attrs); +/** + * sba_map_sg - map Scatter/Gather list + * @dev: instance of PCI owned by the driver that's asking. + * @sglist: array of buffer/length pairs + * @nents: number of entries in list + * @dir: R/W or both. + * @attrs: optional dma attributes + * + * See Documentation/DMA-API-HOWTO.txt + */ +static int sba_map_sg_attrs(struct device *dev, struct scatterlist *sglist, + int nents, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + struct ioc *ioc; + int coalesced, filled = 0; +#ifdef ASSERT_PDIR_SANITY + unsigned long flags; +#endif +#ifdef ALLOW_IOV_BYPASS_SG + struct scatterlist *sg; +#endif + + DBG_RUN_SG("%s() START %d entries\n", __func__, nents); + ioc = GET_IOC(dev); + ASSERT(ioc); + +#ifdef ALLOW_IOV_BYPASS_SG + ASSERT(to_pci_dev(dev)->dma_mask); + if (likely((ioc->dma_mask & ~to_pci_dev(dev)->dma_mask) == 0)) { + for_each_sg(sglist, sg, nents, filled) { + sg->dma_length = sg->length; + sg->dma_address = virt_to_phys(sba_sg_address(sg)); + } + return filled; + } +#endif + /* Fast path single entry scatterlists. */ + if (nents == 1) { + sglist->dma_length = sglist->length; + sglist->dma_address = sba_map_single_attrs(dev, sba_sg_address(sglist), sglist->length, dir, attrs); + return 1; + } + +#ifdef ASSERT_PDIR_SANITY + spin_lock_irqsave(&ioc->res_lock, flags); + if (sba_check_pdir(ioc,"Check before sba_map_sg_attrs()")) + { + sba_dump_sg(ioc, sglist, nents); + panic("Check before sba_map_sg_attrs()"); + } + spin_unlock_irqrestore(&ioc->res_lock, flags); +#endif + + prefetch(ioc->res_hint); + + /* + ** First coalesce the chunks and allocate I/O pdir space + ** + ** If this is one DMA stream, we can properly map using the + ** correct virtual address associated with each DMA page. + ** w/o this association, we wouldn't have coherent DMA! + ** Access to the virtual address is what forces a two pass algorithm. + */ + coalesced = sba_coalesce_chunks(ioc, dev, sglist, nents); + if (coalesced < 0) { + sba_unmap_sg_attrs(dev, sglist, nents, dir, attrs); + return 0; + } + + /* + ** Program the I/O Pdir + ** + ** map the virtual addresses to the I/O Pdir + ** o dma_address will contain the pdir index + ** o dma_len will contain the number of bytes to map + ** o address contains the virtual address. + */ + filled = sba_fill_pdir(ioc, sglist, nents); + +#ifdef ASSERT_PDIR_SANITY + spin_lock_irqsave(&ioc->res_lock, flags); + if (sba_check_pdir(ioc,"Check after sba_map_sg_attrs()")) + { + sba_dump_sg(ioc, sglist, nents); + panic("Check after sba_map_sg_attrs()\n"); + } + spin_unlock_irqrestore(&ioc->res_lock, flags); +#endif + + ASSERT(coalesced == filled); + DBG_RUN_SG("%s() DONE %d mappings\n", __func__, filled); + + return filled; +} + +/** + * sba_unmap_sg_attrs - unmap Scatter/Gather list + * @dev: instance of PCI owned by the driver that's asking. + * @sglist: array of buffer/length pairs + * @nents: number of entries in list + * @dir: R/W or both. + * @attrs: optional dma attributes + * + * See Documentation/DMA-API-HOWTO.txt + */ +static void sba_unmap_sg_attrs(struct device *dev, struct scatterlist *sglist, + int nents, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ +#ifdef ASSERT_PDIR_SANITY + struct ioc *ioc; + unsigned long flags; +#endif + + DBG_RUN_SG("%s() START %d entries, %p,%x\n", + __func__, nents, sba_sg_address(sglist), sglist->length); + +#ifdef ASSERT_PDIR_SANITY + ioc = GET_IOC(dev); + ASSERT(ioc); + + spin_lock_irqsave(&ioc->res_lock, flags); + sba_check_pdir(ioc,"Check before sba_unmap_sg_attrs()"); + spin_unlock_irqrestore(&ioc->res_lock, flags); +#endif + + while (nents && sglist->dma_length) { + + sba_unmap_single_attrs(dev, sglist->dma_address, + sglist->dma_length, dir, attrs); + sglist = sg_next(sglist); + nents--; + } + + DBG_RUN_SG("%s() DONE (nents %d)\n", __func__, nents); + +#ifdef ASSERT_PDIR_SANITY + spin_lock_irqsave(&ioc->res_lock, flags); + sba_check_pdir(ioc,"Check after sba_unmap_sg_attrs()"); + spin_unlock_irqrestore(&ioc->res_lock, flags); +#endif + +} + +/************************************************************** +* +* Initialization and claim +* +***************************************************************/ + +static void +ioc_iova_init(struct ioc *ioc) +{ + int tcnfg; + int agp_found = 0; + struct pci_dev *device = NULL; +#ifdef FULL_VALID_PDIR + unsigned long index; +#endif + + /* + ** Firmware programs the base and size of a "safe IOVA space" + ** (one that doesn't overlap memory or LMMIO space) in the + ** IBASE and IMASK registers. + */ + ioc->ibase = READ_REG(ioc->ioc_hpa + IOC_IBASE) & ~0x1UL; + ioc->imask = READ_REG(ioc->ioc_hpa + IOC_IMASK) | 0xFFFFFFFF00000000UL; + + ioc->iov_size = ~ioc->imask + 1; + + DBG_INIT("%s() hpa %p IOV base 0x%lx mask 0x%lx (%dMB)\n", + __func__, ioc->ioc_hpa, ioc->ibase, ioc->imask, + ioc->iov_size >> 20); + + switch (iovp_size) { + case 4*1024: tcnfg = 0; break; + case 8*1024: tcnfg = 1; break; + case 16*1024: tcnfg = 2; break; + case 64*1024: tcnfg = 3; break; + default: + panic(PFX "Unsupported IOTLB page size %ldK", + iovp_size >> 10); + break; + } + WRITE_REG(tcnfg, ioc->ioc_hpa + IOC_TCNFG); + + ioc->pdir_size = (ioc->iov_size / iovp_size) * PDIR_ENTRY_SIZE; + ioc->pdir_base = (void *) __get_free_pages(GFP_KERNEL, + get_order(ioc->pdir_size)); + if (!ioc->pdir_base) + panic(PFX "Couldn't allocate I/O Page Table\n"); + + memset(ioc->pdir_base, 0, ioc->pdir_size); + + DBG_INIT("%s() IOV page size %ldK pdir %p size %x\n", __func__, + iovp_size >> 10, ioc->pdir_base, ioc->pdir_size); + + ASSERT(ALIGN((unsigned long) ioc->pdir_base, 4*1024) == (unsigned long) ioc->pdir_base); + WRITE_REG(virt_to_phys(ioc->pdir_base), ioc->ioc_hpa + IOC_PDIR_BASE); + + /* + ** If an AGP device is present, only use half of the IOV space + ** for PCI DMA. Unfortunately we can't know ahead of time + ** whether GART support will actually be used, for now we + ** can just key on an AGP device found in the system. + ** We program the next pdir index after we stop w/ a key for + ** the GART code to handshake on. + */ + for_each_pci_dev(device) + agp_found |= pci_find_capability(device, PCI_CAP_ID_AGP); + + if (agp_found && reserve_sba_gart) { + printk(KERN_INFO PFX "reserving %dMb of IOVA space at 0x%lx for agpgart\n", + ioc->iov_size/2 >> 20, ioc->ibase + ioc->iov_size/2); + ioc->pdir_size /= 2; + ((u64 *)ioc->pdir_base)[PDIR_INDEX(ioc->iov_size/2)] = ZX1_SBA_IOMMU_COOKIE; + } +#ifdef FULL_VALID_PDIR + /* + ** Check to see if the spill page has been allocated, we don't need more than + ** one across multiple SBAs. + */ + if (!prefetch_spill_page) { + char *spill_poison = "SBAIOMMU POISON"; + int poison_size = 16; + void *poison_addr, *addr; + + addr = (void *)__get_free_pages(GFP_KERNEL, get_order(iovp_size)); + if (!addr) + panic(PFX "Couldn't allocate PDIR spill page\n"); + + poison_addr = addr; + for ( ; (u64) poison_addr < addr + iovp_size; poison_addr += poison_size) + memcpy(poison_addr, spill_poison, poison_size); + + prefetch_spill_page = virt_to_phys(addr); + + DBG_INIT("%s() prefetch spill addr: 0x%lx\n", __func__, prefetch_spill_page); + } + /* + ** Set all the PDIR entries valid w/ the spill page as the target + */ + for (index = 0 ; index < (ioc->pdir_size / PDIR_ENTRY_SIZE) ; index++) + ((u64 *)ioc->pdir_base)[index] = (0x80000000000000FF | prefetch_spill_page); +#endif + + /* Clear I/O TLB of any possible entries */ + WRITE_REG(ioc->ibase | (get_iovp_order(ioc->iov_size) + iovp_shift), ioc->ioc_hpa + IOC_PCOM); + READ_REG(ioc->ioc_hpa + IOC_PCOM); + + /* Enable IOVA translation */ + WRITE_REG(ioc->ibase | 1, ioc->ioc_hpa + IOC_IBASE); + READ_REG(ioc->ioc_hpa + IOC_IBASE); +} + +static void __init +ioc_resource_init(struct ioc *ioc) +{ + spin_lock_init(&ioc->res_lock); +#if DELAYED_RESOURCE_CNT > 0 + spin_lock_init(&ioc->saved_lock); +#endif + + /* resource map size dictated by pdir_size */ + ioc->res_size = ioc->pdir_size / PDIR_ENTRY_SIZE; /* entries */ + ioc->res_size >>= 3; /* convert bit count to byte count */ + DBG_INIT("%s() res_size 0x%x\n", __func__, ioc->res_size); + + ioc->res_map = (char *) __get_free_pages(GFP_KERNEL, + get_order(ioc->res_size)); + if (!ioc->res_map) + panic(PFX "Couldn't allocate resource map\n"); + + memset(ioc->res_map, 0, ioc->res_size); + /* next available IOVP - circular search */ + ioc->res_hint = (unsigned long *) ioc->res_map; + +#ifdef ASSERT_PDIR_SANITY + /* Mark first bit busy - ie no IOVA 0 */ + ioc->res_map[0] = 0x1; + ioc->pdir_base[0] = 0x8000000000000000ULL | ZX1_SBA_IOMMU_COOKIE; +#endif +#ifdef FULL_VALID_PDIR + /* Mark the last resource used so we don't prefetch beyond IOVA space */ + ioc->res_map[ioc->res_size - 1] |= 0x80UL; /* res_map is chars */ + ioc->pdir_base[(ioc->pdir_size / PDIR_ENTRY_SIZE) - 1] = (0x80000000000000FF + | prefetch_spill_page); +#endif + + DBG_INIT("%s() res_map %x %p\n", __func__, + ioc->res_size, (void *) ioc->res_map); +} + +static void __init +ioc_sac_init(struct ioc *ioc) +{ + struct pci_dev *sac = NULL; + struct pci_controller *controller = NULL; + + /* + * pci_alloc_coherent() must return a DMA address which is + * SAC (single address cycle) addressable, so allocate a + * pseudo-device to enforce that. + */ + sac = kzalloc(sizeof(*sac), GFP_KERNEL); + if (!sac) + panic(PFX "Couldn't allocate struct pci_dev"); + + controller = kzalloc(sizeof(*controller), GFP_KERNEL); + if (!controller) + panic(PFX "Couldn't allocate struct pci_controller"); + + controller->iommu = ioc; + sac->sysdata = controller; + sac->dma_mask = 0xFFFFFFFFUL; +#ifdef CONFIG_PCI + sac->dev.bus = &pci_bus_type; +#endif + ioc->sac_only_dev = sac; +} + +static void __init +ioc_zx1_init(struct ioc *ioc) +{ + unsigned long rope_config; + unsigned int i; + + if (ioc->rev < 0x20) + panic(PFX "IOC 2.0 or later required for IOMMU support\n"); + + /* 38 bit memory controller + extra bit for range displaced by MMIO */ + ioc->dma_mask = (0x1UL << 39) - 1; + + /* + ** Clear ROPE(N)_CONFIG AO bit. + ** Disables "NT Ordering" (~= !"Relaxed Ordering") + ** Overrides bit 1 in DMA Hint Sets. + ** Improves netperf UDP_STREAM by ~10% for tg3 on bcm5701. + */ + for (i=0; i<(8*8); i+=8) { + rope_config = READ_REG(ioc->ioc_hpa + IOC_ROPE0_CFG + i); + rope_config &= ~IOC_ROPE_AO; + WRITE_REG(rope_config, ioc->ioc_hpa + IOC_ROPE0_CFG + i); + } +} + +typedef void (initfunc)(struct ioc *); + +struct ioc_iommu { + u32 func_id; + char *name; + initfunc *init; +}; + +static struct ioc_iommu ioc_iommu_info[] __initdata = { + { ZX1_IOC_ID, "zx1", ioc_zx1_init }, + { ZX2_IOC_ID, "zx2", NULL }, + { SX1000_IOC_ID, "sx1000", NULL }, + { SX2000_IOC_ID, "sx2000", NULL }, +}; + +static void ioc_init(unsigned long hpa, struct ioc *ioc) +{ + struct ioc_iommu *info; + + ioc->next = ioc_list; + ioc_list = ioc; + + ioc->ioc_hpa = ioremap(hpa, 0x1000); + + ioc->func_id = READ_REG(ioc->ioc_hpa + IOC_FUNC_ID); + ioc->rev = READ_REG(ioc->ioc_hpa + IOC_FCLASS) & 0xFFUL; + ioc->dma_mask = 0xFFFFFFFFFFFFFFFFUL; /* conservative */ + + for (info = ioc_iommu_info; info < ioc_iommu_info + ARRAY_SIZE(ioc_iommu_info); info++) { + if (ioc->func_id == info->func_id) { + ioc->name = info->name; + if (info->init) + (info->init)(ioc); + } + } + + iovp_size = (1 << iovp_shift); + iovp_mask = ~(iovp_size - 1); + + DBG_INIT("%s: PAGE_SIZE %ldK, iovp_size %ldK\n", __func__, + PAGE_SIZE >> 10, iovp_size >> 10); + + if (!ioc->name) { + ioc->name = kmalloc(24, GFP_KERNEL); + if (ioc->name) + sprintf((char *) ioc->name, "Unknown (%04x:%04x)", + ioc->func_id & 0xFFFF, (ioc->func_id >> 16) & 0xFFFF); + else + ioc->name = "Unknown"; + } + + ioc_iova_init(ioc); + ioc_resource_init(ioc); + ioc_sac_init(ioc); + + if ((long) ~iovp_mask > (long) ia64_max_iommu_merge_mask) + ia64_max_iommu_merge_mask = ~iovp_mask; + + printk(KERN_INFO PFX + "%s %d.%d HPA 0x%lx IOVA space %dMb at 0x%lx\n", + ioc->name, (ioc->rev >> 4) & 0xF, ioc->rev & 0xF, + hpa, ioc->iov_size >> 20, ioc->ibase); +} + + + +/************************************************************************** +** +** SBA initialization code (HW and SW) +** +** o identify SBA chip itself +** o FIXME: initialize DMA hints for reasonable defaults +** +**************************************************************************/ + +#ifdef CONFIG_PROC_FS +static void * +ioc_start(struct seq_file *s, loff_t *pos) +{ + struct ioc *ioc; + loff_t n = *pos; + + for (ioc = ioc_list; ioc; ioc = ioc->next) + if (!n--) + return ioc; + + return NULL; +} + +static void * +ioc_next(struct seq_file *s, void *v, loff_t *pos) +{ + struct ioc *ioc = v; + + ++*pos; + return ioc->next; +} + +static void +ioc_stop(struct seq_file *s, void *v) +{ +} + +static int +ioc_show(struct seq_file *s, void *v) +{ + struct ioc *ioc = v; + unsigned long *res_ptr = (unsigned long *)ioc->res_map; + int i, used = 0; + + seq_printf(s, "Hewlett Packard %s IOC rev %d.%d\n", + ioc->name, ((ioc->rev >> 4) & 0xF), (ioc->rev & 0xF)); +#ifdef CONFIG_NUMA + if (ioc->node != NUMA_NO_NODE) + seq_printf(s, "NUMA node : %d\n", ioc->node); +#endif + seq_printf(s, "IOVA size : %ld MB\n", ((ioc->pdir_size >> 3) * iovp_size)/(1024*1024)); + seq_printf(s, "IOVA page size : %ld kb\n", iovp_size/1024); + + for (i = 0; i < (ioc->res_size / sizeof(unsigned long)); ++i, ++res_ptr) + used += hweight64(*res_ptr); + + seq_printf(s, "PDIR size : %d entries\n", ioc->pdir_size >> 3); + seq_printf(s, "PDIR used : %d entries\n", used); + +#ifdef PDIR_SEARCH_TIMING + { + unsigned long i = 0, avg = 0, min, max; + min = max = ioc->avg_search[0]; + for (i = 0; i < SBA_SEARCH_SAMPLE; i++) { + avg += ioc->avg_search[i]; + if (ioc->avg_search[i] > max) max = ioc->avg_search[i]; + if (ioc->avg_search[i] < min) min = ioc->avg_search[i]; + } + avg /= SBA_SEARCH_SAMPLE; + seq_printf(s, "Bitmap search : %ld/%ld/%ld (min/avg/max CPU Cycles/IOVA page)\n", + min, avg, max); + } +#endif +#ifndef ALLOW_IOV_BYPASS + seq_printf(s, "IOVA bypass disabled\n"); +#endif + return 0; +} + +static const struct seq_operations ioc_seq_ops = { + .start = ioc_start, + .next = ioc_next, + .stop = ioc_stop, + .show = ioc_show +}; + +static int +ioc_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &ioc_seq_ops); +} + +static const struct file_operations ioc_fops = { + .open = ioc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release +}; + +static void __init +ioc_proc_init(void) +{ + struct proc_dir_entry *dir; + + dir = proc_mkdir("bus/mckinley", NULL); + if (!dir) + return; + + proc_create(ioc_list->name, 0, dir, &ioc_fops); +} +#endif + +static void +sba_connect_bus(struct pci_bus *bus) +{ + acpi_handle handle, parent; + acpi_status status; + struct ioc *ioc; + + if (!PCI_CONTROLLER(bus)) + panic(PFX "no sysdata on bus %d!\n", bus->number); + + if (PCI_CONTROLLER(bus)->iommu) + return; + + handle = acpi_device_handle(PCI_CONTROLLER(bus)->companion); + if (!handle) + return; + + /* + * The IOC scope encloses PCI root bridges in the ACPI + * namespace, so work our way out until we find an IOC we + * claimed previously. + */ + do { + for (ioc = ioc_list; ioc; ioc = ioc->next) + if (ioc->handle == handle) { + PCI_CONTROLLER(bus)->iommu = ioc; + return; + } + + status = acpi_get_parent(handle, &parent); + handle = parent; + } while (ACPI_SUCCESS(status)); + + printk(KERN_WARNING "No IOC for PCI Bus %04x:%02x in ACPI\n", pci_domain_nr(bus), bus->number); +} + +static void __init +sba_map_ioc_to_node(struct ioc *ioc, acpi_handle handle) +{ +#ifdef CONFIG_NUMA + unsigned int node; + + node = acpi_get_node(handle); + if (node != NUMA_NO_NODE && !node_online(node)) + node = NUMA_NO_NODE; + + ioc->node = node; +#endif +} + +static void acpi_sba_ioc_add(struct ioc *ioc) +{ + acpi_handle handle = ioc->handle; + acpi_status status; + u64 hpa, length; + struct acpi_device_info *adi; + + ioc_found = ioc->next; + status = hp_acpi_csr_space(handle, &hpa, &length); + if (ACPI_FAILURE(status)) + goto err; + + status = acpi_get_object_info(handle, &adi); + if (ACPI_FAILURE(status)) + goto err; + + /* + * For HWP0001, only SBA appears in ACPI namespace. It encloses the PCI + * root bridges, and its CSR space includes the IOC function. + */ + if (strncmp("HWP0001", adi->hardware_id.string, 7) == 0) { + hpa += ZX1_IOC_OFFSET; + /* zx1 based systems default to kernel page size iommu pages */ + if (!iovp_shift) + iovp_shift = min(PAGE_SHIFT, 16); + } + kfree(adi); + + /* + * default anything not caught above or specified on cmdline to 4k + * iommu page size + */ + if (!iovp_shift) + iovp_shift = 12; + + ioc_init(hpa, ioc); + /* setup NUMA node association */ + sba_map_ioc_to_node(ioc, handle); + return; + + err: + kfree(ioc); +} + +static const struct acpi_device_id hp_ioc_iommu_device_ids[] = { + {"HWP0001", 0}, + {"HWP0004", 0}, + {"", 0}, +}; + +static int acpi_sba_ioc_attach(struct acpi_device *device, + const struct acpi_device_id *not_used) +{ + struct ioc *ioc; + + ioc = kzalloc(sizeof(*ioc), GFP_KERNEL); + if (!ioc) + return -ENOMEM; + + ioc->next = ioc_found; + ioc_found = ioc; + ioc->handle = device->handle; + return 1; +} + + +static struct acpi_scan_handler acpi_sba_ioc_handler = { + .ids = hp_ioc_iommu_device_ids, + .attach = acpi_sba_ioc_attach, +}; + +static int __init acpi_sba_ioc_init_acpi(void) +{ + return acpi_scan_add_handler(&acpi_sba_ioc_handler); +} +/* This has to run before acpi_scan_init(). */ +arch_initcall(acpi_sba_ioc_init_acpi); + +extern struct dma_map_ops swiotlb_dma_ops; + +static int __init +sba_init(void) +{ + if (!ia64_platform_is("hpzx1") && !ia64_platform_is("hpzx1_swiotlb")) + return 0; + +#if defined(CONFIG_IA64_GENERIC) + /* If we are booting a kdump kernel, the sba_iommu will + * cause devices that were not shutdown properly to MCA + * as soon as they are turned back on. Our only option for + * a successful kdump kernel boot is to use the swiotlb. + */ + if (is_kdump_kernel()) { + dma_ops = &swiotlb_dma_ops; + if (swiotlb_late_init_with_default_size(64 * (1<<20)) != 0) + panic("Unable to initialize software I/O TLB:" + " Try machvec=dig boot option"); + machvec_init("dig"); + return 0; + } +#endif + + /* + * ioc_found should be populated by the acpi_sba_ioc_handler's .attach() + * routine, but that only happens if acpi_scan_init() has already run. + */ + while (ioc_found) + acpi_sba_ioc_add(ioc_found); + + if (!ioc_list) { +#ifdef CONFIG_IA64_GENERIC + /* + * If we didn't find something sba_iommu can claim, we + * need to setup the swiotlb and switch to the dig machvec. + */ + dma_ops = &swiotlb_dma_ops; + if (swiotlb_late_init_with_default_size(64 * (1<<20)) != 0) + panic("Unable to find SBA IOMMU or initialize " + "software I/O TLB: Try machvec=dig boot option"); + machvec_init("dig"); +#else + panic("Unable to find SBA IOMMU: Try a generic or DIG kernel"); +#endif + return 0; + } + +#if defined(CONFIG_IA64_GENERIC) || defined(CONFIG_IA64_HP_ZX1_SWIOTLB) + /* + * hpzx1_swiotlb needs to have a fairly small swiotlb bounce + * buffer setup to support devices with smaller DMA masks than + * sba_iommu can handle. + */ + if (ia64_platform_is("hpzx1_swiotlb")) { + extern void hwsw_init(void); + + hwsw_init(); + } +#endif + +#ifdef CONFIG_PCI + { + struct pci_bus *b = NULL; + while ((b = pci_find_next_bus(b)) != NULL) + sba_connect_bus(b); + } +#endif + +#ifdef CONFIG_PROC_FS + ioc_proc_init(); +#endif + return 0; +} + +subsys_initcall(sba_init); /* must be initialized after ACPI etc., but before any drivers... */ + +static int __init +nosbagart(char *str) +{ + reserve_sba_gart = 0; + return 1; +} + +static int sba_dma_supported (struct device *dev, u64 mask) +{ + /* make sure it's at least 32bit capable */ + return ((mask & 0xFFFFFFFFUL) == 0xFFFFFFFFUL); +} + +static int sba_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) +{ + return 0; +} + +__setup("nosbagart", nosbagart); + +static int __init +sba_page_override(char *str) +{ + unsigned long page_size; + + page_size = memparse(str, &str); + switch (page_size) { + case 4096: + case 8192: + case 16384: + case 65536: + iovp_shift = ffs(page_size) - 1; + break; + default: + printk("%s: unknown/unsupported iommu page size %ld\n", + __func__, page_size); + } + + return 1; +} + +__setup("sbapagesize=",sba_page_override); + +struct dma_map_ops sba_dma_ops = { + .alloc = sba_alloc_coherent, + .free = sba_free_coherent, + .map_page = sba_map_page, + .unmap_page = sba_unmap_page, + .map_sg = sba_map_sg_attrs, + .unmap_sg = sba_unmap_sg_attrs, + .sync_single_for_cpu = machvec_dma_sync_single, + .sync_sg_for_cpu = machvec_dma_sync_sg, + .sync_single_for_device = machvec_dma_sync_single, + .sync_sg_for_device = machvec_dma_sync_sg, + .dma_supported = sba_dma_supported, + .mapping_error = sba_dma_mapping_error, +}; + +void sba_dma_init(void) +{ + dma_ops = &sba_dma_ops; +} diff --git a/kernel/arch/ia64/hp/sim/Kconfig b/kernel/arch/ia64/hp/sim/Kconfig new file mode 100644 index 000000000..d84707d55 --- /dev/null +++ b/kernel/arch/ia64/hp/sim/Kconfig @@ -0,0 +1,22 @@ + +menu "HP Simulator drivers" + depends on IA64_HP_SIM || IA64_GENERIC + +config HP_SIMETH + bool "Simulated Ethernet " + depends on NET + +config HP_SIMSERIAL + bool "Simulated serial driver support" + depends on TTY + +config HP_SIMSERIAL_CONSOLE + bool "Console for HP simulator" + depends on HP_SIMSERIAL + +config HP_SIMSCSI + bool "Simulated SCSI disk" + depends on SCSI=y + +endmenu + diff --git a/kernel/arch/ia64/hp/sim/Makefile b/kernel/arch/ia64/hp/sim/Makefile new file mode 100644 index 000000000..d10da4793 --- /dev/null +++ b/kernel/arch/ia64/hp/sim/Makefile @@ -0,0 +1,16 @@ +# +# ia64/platform/hp/sim/Makefile +# +# Copyright (C) 2002 Hewlett-Packard Co. +# David Mosberger-Tang +# Copyright (C) 1999 Silicon Graphics, Inc. +# Copyright (C) Srinivasa Thirumalachar (sprasad@engr.sgi.com) +# + +obj-y := hpsim_irq.o hpsim_setup.o hpsim.o +obj-$(CONFIG_IA64_GENERIC) += hpsim_machvec.o + +obj-$(CONFIG_HP_SIMETH) += simeth.o +obj-$(CONFIG_HP_SIMSERIAL) += simserial.o +obj-$(CONFIG_HP_SIMSERIAL_CONSOLE) += hpsim_console.o +obj-$(CONFIG_HP_SIMSCSI) += simscsi.o diff --git a/kernel/arch/ia64/hp/sim/boot/Makefile b/kernel/arch/ia64/hp/sim/boot/Makefile new file mode 100644 index 000000000..2e805e0cc --- /dev/null +++ b/kernel/arch/ia64/hp/sim/boot/Makefile @@ -0,0 +1,37 @@ +# +# ia64/boot/Makefile +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 1998, 2003 by David Mosberger-Tang +# + +targets-$(CONFIG_IA64_HP_SIM) += bootloader +targets := vmlinux.bin vmlinux.gz $(targets-y) + +quiet_cmd_cptotop = LN $@ + cmd_cptotop = ln -f $< $@ + +vmlinux.gz: $(obj)/vmlinux.gz $(addprefix $(obj)/,$(targets-y)) + $(call cmd,cptotop) + @echo ' Kernel: $@ is ready' + +boot: bootloader + +bootloader: $(obj)/bootloader + $(call cmd,cptotop) + +$(obj)/vmlinux.gz: $(obj)/vmlinux.bin FORCE + $(call if_changed,gzip) + +$(obj)/vmlinux.bin: vmlinux FORCE + $(call if_changed,objcopy) + + +LDFLAGS_bootloader = -static -T + +$(obj)/bootloader: $(src)/bootloader.lds $(obj)/bootloader.o $(obj)/boot_head.o $(obj)/fw-emu.o \ + lib/lib.a arch/ia64/lib/built-in.o arch/ia64/lib/lib.a FORCE + $(call if_changed,ld) diff --git a/kernel/arch/ia64/hp/sim/boot/boot_head.S b/kernel/arch/ia64/hp/sim/boot/boot_head.S new file mode 100644 index 000000000..880856549 --- /dev/null +++ b/kernel/arch/ia64/hp/sim/boot/boot_head.S @@ -0,0 +1,164 @@ +/* + * Copyright (C) 1998-2003 Hewlett-Packard Co + * David Mosberger-Tang + */ + +#include +#include + + .bss + .align 16 +stack_mem: + .skip 16834 + + .text + +/* This needs to be defined because lib/string.c:strlcat() calls it in case of error... */ +GLOBAL_ENTRY(printk) + break 0 +END(printk) + +GLOBAL_ENTRY(_start) + .prologue + .save rp, r0 + .body + movl gp = __gp + movl sp = stack_mem+16384-16 + bsw.1 + br.call.sptk.many rp=start_bootloader +0: nop 0 /* dummy nop to make unwinding work */ +END(_start) + +/* + * Set a break point on this function so that symbols are available to set breakpoints in + * the kernel being debugged. + */ +GLOBAL_ENTRY(debug_break) + br.ret.sptk.many b0 +END(debug_break) + +GLOBAL_ENTRY(ssc) + .regstk 5,0,0,0 + mov r15=in4 + break 0x80001 + br.ret.sptk.many b0 +END(ssc) + +GLOBAL_ENTRY(jmp_to_kernel) + .regstk 2,0,0,0 + mov r28=in0 + mov b7=in1 + br.sptk.few b7 +END(jmp_to_kernel) + +/* + * r28 contains the index of the PAL function + * r29--31 the args + * Return values in ret0--3 (r8--11) + */ +GLOBAL_ENTRY(pal_emulator_static) + mov r8=-1 + mov r9=256 + ;; + cmp.gtu p6,p7=r9,r28 /* r28 <= 255? */ +(p6) br.cond.sptk.few static + ;; + mov r9=512 + ;; + cmp.gtu p6,p7=r9,r28 +(p6) br.cond.sptk.few stacked + ;; +static: cmp.eq p6,p7=PAL_PTCE_INFO,r28 +(p7) br.cond.sptk.few 1f + ;; + mov r8=0 /* status = 0 */ + movl r9=0x100000000 /* tc.base */ + movl r10=0x0000000200000003 /* count[0], count[1] */ + movl r11=0x1000000000002000 /* stride[0], stride[1] */ + br.cond.sptk.few rp +1: cmp.eq p6,p7=PAL_FREQ_RATIOS,r28 +(p7) br.cond.sptk.few 1f + mov r8=0 /* status = 0 */ + movl r9 =0x100000064 /* proc_ratio (1/100) */ + movl r10=0x100000100 /* bus_ratio<<32 (1/256) */ + movl r11=0x100000064 /* itc_ratio<<32 (1/100) */ + ;; +1: cmp.eq p6,p7=PAL_RSE_INFO,r28 +(p7) br.cond.sptk.few 1f + mov r8=0 /* status = 0 */ + mov r9=96 /* num phys stacked */ + mov r10=0 /* hints */ + mov r11=0 + br.cond.sptk.few rp +1: cmp.eq p6,p7=PAL_CACHE_FLUSH,r28 /* PAL_CACHE_FLUSH */ +(p7) br.cond.sptk.few 1f + mov r9=ar.lc + movl r8=524288 /* flush 512k million cache lines (16MB) */ + ;; + mov ar.lc=r8 + movl r8=0xe000000000000000 + ;; +.loop: fc r8 + add r8=32,r8 + br.cloop.sptk.few .loop + sync.i + ;; + srlz.i + ;; + mov ar.lc=r9 + mov r8=r0 + ;; +1: cmp.eq p6,p7=PAL_PERF_MON_INFO,r28 +(p7) br.cond.sptk.few 1f + mov r8=0 /* status = 0 */ + movl r9 =0x08122f04 /* generic=4 width=47 retired=8 cycles=18 */ + mov r10=0 /* reserved */ + mov r11=0 /* reserved */ + mov r16=0xffff /* implemented PMC */ + mov r17=0x3ffff /* implemented PMD */ + add r18=8,r29 /* second index */ + ;; + st8 [r29]=r16,16 /* store implemented PMC */ + st8 [r18]=r0,16 /* clear remaining bits */ + ;; + st8 [r29]=r0,16 /* clear remaining bits */ + st8 [r18]=r0,16 /* clear remaining bits */ + ;; + st8 [r29]=r17,16 /* store implemented PMD */ + st8 [r18]=r0,16 /* clear remaining bits */ + mov r16=0xf0 /* cycles count capable PMC */ + ;; + st8 [r29]=r0,16 /* clear remaining bits */ + st8 [r18]=r0,16 /* clear remaining bits */ + mov r17=0xf0 /* retired bundles capable PMC */ + ;; + st8 [r29]=r16,16 /* store cycles capable */ + st8 [r18]=r0,16 /* clear remaining bits */ + ;; + st8 [r29]=r0,16 /* clear remaining bits */ + st8 [r18]=r0,16 /* clear remaining bits */ + ;; + st8 [r29]=r17,16 /* store retired bundle capable */ + st8 [r18]=r0,16 /* clear remaining bits */ + ;; + st8 [r29]=r0,16 /* clear remaining bits */ + st8 [r18]=r0,16 /* clear remaining bits */ + ;; +1: cmp.eq p6,p7=PAL_VM_SUMMARY,r28 +(p7) br.cond.sptk.few 1f + mov r8=0 /* status = 0 */ + movl r9=0x2044040020F1865 /* num_tc_levels=2, num_unique_tcs=4 */ + /* max_itr_entry=64, max_dtr_entry=64 */ + /* hash_tag_id=2, max_pkr=15 */ + /* key_size=24, phys_add_size=50, vw=1 */ + movl r10=0x183C /* rid_size=24, impl_va_msb=60 */ + ;; +1: cmp.eq p6,p7=PAL_MEM_ATTRIB,r28 +(p7) br.cond.sptk.few 1f + mov r8=0 /* status = 0 */ + mov r9=0x80|0x01 /* NatPage|WB */ + ;; +1: br.cond.sptk.few rp +stacked: + br.ret.sptk.few rp +END(pal_emulator_static) diff --git a/kernel/arch/ia64/hp/sim/boot/bootloader.c b/kernel/arch/ia64/hp/sim/boot/bootloader.c new file mode 100644 index 000000000..28f4b230b --- /dev/null +++ b/kernel/arch/ia64/hp/sim/boot/bootloader.c @@ -0,0 +1,174 @@ +/* + * arch/ia64/hp/sim/boot/bootloader.c + * + * Loads an ELF kernel. + * + * Copyright (C) 1998-2003 Hewlett-Packard Co + * David Mosberger-Tang + * Stephane Eranian + * + * 01/07/99 S.Eranian modified to pass command line arguments to kernel + */ +struct task_struct; /* forward declaration for elf.h */ + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "ssc.h" + +struct disk_req { + unsigned long addr; + unsigned len; +}; + +struct disk_stat { + int fd; + unsigned count; +}; + +extern void jmp_to_kernel (unsigned long bp, unsigned long e_entry); +extern struct ia64_boot_param *sys_fw_init (const char *args, int arglen); +extern void debug_break (void); + +static void +cons_write (const char *buf) +{ + unsigned long ch; + + while ((ch = *buf++) != '\0') { + ssc(ch, 0, 0, 0, SSC_PUTCHAR); + if (ch == '\n') + ssc('\r', 0, 0, 0, SSC_PUTCHAR); + } +} + +#define MAX_ARGS 32 + +void +start_bootloader (void) +{ + static char mem[4096]; + static char buffer[1024]; + unsigned long off; + int fd, i; + struct disk_req req; + struct disk_stat stat; + struct elfhdr *elf; + struct elf_phdr *elf_phdr; /* program header */ + unsigned long e_entry, e_phoff, e_phnum; + register struct ia64_boot_param *bp; + char *kpath, *args; + long arglen = 0; + + ssc(0, 0, 0, 0, SSC_CONSOLE_INIT); + + /* + * S.Eranian: extract the commandline argument from the simulator + * + * The expected format is as follows: + * + * kernelname args... + * + * Both are optional but you can't have the second one without the first. + */ + arglen = ssc((long) buffer, 0, 0, 0, SSC_GET_ARGS); + + kpath = "vmlinux"; + args = buffer; + if (arglen > 0) { + kpath = buffer; + while (*args != ' ' && *args != '\0') + ++args, --arglen; + if (*args == ' ') + *args++ = '\0', --arglen; + } + + if (arglen <= 0) { + args = ""; + arglen = 1; + } + + fd = ssc((long) kpath, 1, 0, 0, SSC_OPEN); + + if (fd < 0) { + cons_write(kpath); + cons_write(": file not found, reboot now\n"); + for(;;); + } + stat.fd = fd; + off = 0; + + req.len = sizeof(mem); + req.addr = (long) mem; + ssc(fd, 1, (long) &req, off, SSC_READ); + ssc((long) &stat, 0, 0, 0, SSC_WAIT_COMPLETION); + + elf = (struct elfhdr *) mem; + if (elf->e_ident[0] == 0x7f && strncmp(elf->e_ident + 1, "ELF", 3) != 0) { + cons_write("not an ELF file\n"); + return; + } + if (elf->e_type != ET_EXEC) { + cons_write("not an ELF executable\n"); + return; + } + if (!elf_check_arch(elf)) { + cons_write("kernel not for this processor\n"); + return; + } + + e_entry = elf->e_entry; + e_phnum = elf->e_phnum; + e_phoff = elf->e_phoff; + + cons_write("loading "); + cons_write(kpath); + cons_write("...\n"); + + for (i = 0; i < e_phnum; ++i) { + req.len = sizeof(*elf_phdr); + req.addr = (long) mem; + ssc(fd, 1, (long) &req, e_phoff, SSC_READ); + ssc((long) &stat, 0, 0, 0, SSC_WAIT_COMPLETION); + if (stat.count != sizeof(*elf_phdr)) { + cons_write("failed to read phdr\n"); + return; + } + e_phoff += sizeof(*elf_phdr); + + elf_phdr = (struct elf_phdr *) mem; + + if (elf_phdr->p_type != PT_LOAD) + continue; + + req.len = elf_phdr->p_filesz; + req.addr = __pa(elf_phdr->p_paddr); + ssc(fd, 1, (long) &req, elf_phdr->p_offset, SSC_READ); + ssc((long) &stat, 0, 0, 0, SSC_WAIT_COMPLETION); + memset((char *)__pa(elf_phdr->p_paddr) + elf_phdr->p_filesz, 0, + elf_phdr->p_memsz - elf_phdr->p_filesz); + } + ssc(fd, 0, 0, 0, SSC_CLOSE); + + cons_write("starting kernel...\n"); + + /* fake an I/O base address: */ + ia64_setreg(_IA64_REG_AR_KR0, 0xffffc000000UL); + + bp = sys_fw_init(args, arglen); + + ssc(0, (long) kpath, 0, 0, SSC_LOAD_SYMBOLS); + + debug_break(); + jmp_to_kernel((unsigned long) bp, e_entry); + + cons_write("kernel returned!\n"); + ssc(-1, 0, 0, 0, SSC_EXIT); +} diff --git a/kernel/arch/ia64/hp/sim/boot/bootloader.lds b/kernel/arch/ia64/hp/sim/boot/bootloader.lds new file mode 100644 index 000000000..3977f25a1 --- /dev/null +++ b/kernel/arch/ia64/hp/sim/boot/bootloader.lds @@ -0,0 +1,66 @@ +OUTPUT_FORMAT("elf64-ia64-little") +OUTPUT_ARCH(ia64) +ENTRY(_start) +SECTIONS +{ + /* Read-only sections, merged into text segment: */ + . = 0x100000; + + _text = .; + .text : { *(__ivt_section) *(.text) } + _etext = .; + + /* Global data */ + _data = .; + .rodata : { *(.rodata) *(.rodata.*) } + .data : { *(.data) *(.gnu.linkonce.d*) CONSTRUCTORS } + __gp = ALIGN (8) + 0x200000; + .got : { *(.got.plt) *(.got) } + /* We want the small data sections together, so single-instruction offsets + can access them all, and initialized data all before uninitialized, so + we can shorten the on-disk segment size. */ + .sdata : { *(.sdata) } + _edata = .; + + __bss_start = .; + .sbss : { *(.sbss) *(.scommon) } + .bss : { *(.bss) *(COMMON) } + . = ALIGN(64 / 8); + __bss_stop = .; + _end = . ; + + /* Stabs debugging sections. */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } + /* DWARF debug sections. + Symbols in the DWARF debugging sections are relative to the beginning + of the section so we begin them at 0. */ + /* DWARF 1 */ + .debug 0 : { *(.debug) } + .line 0 : { *(.line) } + /* GNU DWARF 1 extensions */ + .debug_srcinfo 0 : { *(.debug_srcinfo) } + .debug_sfnames 0 : { *(.debug_sfnames) } + /* DWARF 1.1 and DWARF 2 */ + .debug_aranges 0 : { *(.debug_aranges) } + .debug_pubnames 0 : { *(.debug_pubnames) } + /* DWARF 2 */ + .debug_info 0 : { *(.debug_info) } + .debug_abbrev 0 : { *(.debug_abbrev) } + .debug_line 0 : { *(.debug_line) } + .debug_frame 0 : { *(.debug_frame) } + .debug_str 0 : { *(.debug_str) } + .debug_loc 0 : { *(.debug_loc) } + .debug_macinfo 0 : { *(.debug_macinfo) } + /* SGI/MIPS DWARF 2 extensions */ + .debug_weaknames 0 : { *(.debug_weaknames) } + .debug_funcnames 0 : { *(.debug_funcnames) } + .debug_typenames 0 : { *(.debug_typenames) } + .debug_varnames 0 : { *(.debug_varnames) } + /* These must appear regardless of . */ +} diff --git a/kernel/arch/ia64/hp/sim/boot/fw-emu.c b/kernel/arch/ia64/hp/sim/boot/fw-emu.c new file mode 100644 index 000000000..87bf9ad8c --- /dev/null +++ b/kernel/arch/ia64/hp/sim/boot/fw-emu.c @@ -0,0 +1,373 @@ +/* + * PAL & SAL emulation. + * + * Copyright (C) 1998-2001 Hewlett-Packard Co + * David Mosberger-Tang + */ + +#ifdef CONFIG_PCI +# include +#endif + +#include +#include +#include +#include +#include + +#include "ssc.h" + +#define MB (1024*1024UL) + +#define SIMPLE_MEMMAP 1 + +#if SIMPLE_MEMMAP +# define NUM_MEM_DESCS 4 +#else +# define NUM_MEM_DESCS 16 +#endif + +static char fw_mem[( sizeof(struct ia64_boot_param) + + sizeof(efi_system_table_t) + + sizeof(efi_runtime_services_t) + + 1*sizeof(efi_config_table_t) + + sizeof(struct ia64_sal_systab) + + sizeof(struct ia64_sal_desc_entry_point) + + NUM_MEM_DESCS*(sizeof(efi_memory_desc_t)) + + 1024)] __attribute__ ((aligned (8))); + +#define SECS_PER_HOUR (60 * 60) +#define SECS_PER_DAY (SECS_PER_HOUR * 24) + +/* Compute the `struct tm' representation of *T, + offset OFFSET seconds east of UTC, + and store year, yday, mon, mday, wday, hour, min, sec into *TP. + Return nonzero if successful. */ +int +offtime (unsigned long t, efi_time_t *tp) +{ + const unsigned short int __mon_yday[2][13] = + { + /* Normal years. */ + { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 }, + /* Leap years. */ + { 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 } + }; + long int days, rem, y; + const unsigned short int *ip; + + days = t / SECS_PER_DAY; + rem = t % SECS_PER_DAY; + while (rem < 0) { + rem += SECS_PER_DAY; + --days; + } + while (rem >= SECS_PER_DAY) { + rem -= SECS_PER_DAY; + ++days; + } + tp->hour = rem / SECS_PER_HOUR; + rem %= SECS_PER_HOUR; + tp->minute = rem / 60; + tp->second = rem % 60; + /* January 1, 1970 was a Thursday. */ + y = 1970; + +# define DIV(a, b) ((a) / (b) - ((a) % (b) < 0)) +# define LEAPS_THRU_END_OF(y) (DIV (y, 4) - DIV (y, 100) + DIV (y, 400)) +# define __isleap(year) \ + ((year) % 4 == 0 && ((year) % 100 != 0 || (year) % 400 == 0)) + + while (days < 0 || days >= (__isleap (y) ? 366 : 365)) { + /* Guess a corrected year, assuming 365 days per year. */ + long int yg = y + days / 365 - (days % 365 < 0); + + /* Adjust DAYS and Y to match the guessed year. */ + days -= ((yg - y) * 365 + LEAPS_THRU_END_OF (yg - 1) + - LEAPS_THRU_END_OF (y - 1)); + y = yg; + } + tp->year = y; + ip = __mon_yday[__isleap(y)]; + for (y = 11; days < (long int) ip[y]; --y) + continue; + days -= ip[y]; + tp->month = y + 1; + tp->day = days + 1; + return 1; +} + +extern void pal_emulator_static (void); + +/* Macro to emulate SAL call using legacy IN and OUT calls to CF8, CFC etc.. */ + +#define BUILD_CMD(addr) ((0x80000000 | (addr)) & ~3) + +#define REG_OFFSET(addr) (0x00000000000000FF & (addr)) +#define DEVICE_FUNCTION(addr) (0x000000000000FF00 & (addr)) +#define BUS_NUMBER(addr) (0x0000000000FF0000 & (addr)) + +static efi_status_t +fw_efi_get_time (efi_time_t *tm, efi_time_cap_t *tc) +{ +#if defined(CONFIG_IA64_HP_SIM) || defined(CONFIG_IA64_GENERIC) + struct { + int tv_sec; /* must be 32bits to work */ + int tv_usec; + } tv32bits; + + ssc((unsigned long) &tv32bits, 0, 0, 0, SSC_GET_TOD); + + memset(tm, 0, sizeof(*tm)); + offtime(tv32bits.tv_sec, tm); + + if (tc) + memset(tc, 0, sizeof(*tc)); +#else +# error Not implemented yet... +#endif + return EFI_SUCCESS; +} + +static void +efi_reset_system (int reset_type, efi_status_t status, unsigned long data_size, efi_char16_t *data) +{ +#if defined(CONFIG_IA64_HP_SIM) || defined(CONFIG_IA64_GENERIC) + ssc(status, 0, 0, 0, SSC_EXIT); +#else +# error Not implemented yet... +#endif +} + +static efi_status_t +efi_unimplemented (void) +{ + return EFI_UNSUPPORTED; +} + +static struct sal_ret_values +sal_emulator (long index, unsigned long in1, unsigned long in2, + unsigned long in3, unsigned long in4, unsigned long in5, + unsigned long in6, unsigned long in7) +{ + long r9 = 0; + long r10 = 0; + long r11 = 0; + long status; + + /* + * Don't do a "switch" here since that gives us code that + * isn't self-relocatable. + */ + status = 0; + if (index == SAL_FREQ_BASE) { + if (in1 == SAL_FREQ_BASE_PLATFORM) + r9 = 200000000; + else if (in1 == SAL_FREQ_BASE_INTERVAL_TIMER) { + /* + * Is this supposed to be the cr.itc frequency + * or something platform specific? The SAL + * doc ain't exactly clear on this... + */ + r9 = 700000000; + } else if (in1 == SAL_FREQ_BASE_REALTIME_CLOCK) + r9 = 1; + else + status = -1; + } else if (index == SAL_SET_VECTORS) { + ; + } else if (index == SAL_GET_STATE_INFO) { + ; + } else if (index == SAL_GET_STATE_INFO_SIZE) { + ; + } else if (index == SAL_CLEAR_STATE_INFO) { + ; + } else if (index == SAL_MC_RENDEZ) { + ; + } else if (index == SAL_MC_SET_PARAMS) { + ; + } else if (index == SAL_CACHE_FLUSH) { + ; + } else if (index == SAL_CACHE_INIT) { + ; +#ifdef CONFIG_PCI + } else if (index == SAL_PCI_CONFIG_READ) { + /* + * in1 contains the PCI configuration address and in2 + * the size of the read. The value that is read is + * returned via the general register r9. + */ + outl(BUILD_CMD(in1), 0xCF8); + if (in2 == 1) /* Reading byte */ + r9 = inb(0xCFC + ((REG_OFFSET(in1) & 3))); + else if (in2 == 2) /* Reading word */ + r9 = inw(0xCFC + ((REG_OFFSET(in1) & 2))); + else /* Reading dword */ + r9 = inl(0xCFC); + status = PCIBIOS_SUCCESSFUL; + } else if (index == SAL_PCI_CONFIG_WRITE) { + /* + * in1 contains the PCI configuration address, in2 the + * size of the write, and in3 the actual value to be + * written out. + */ + outl(BUILD_CMD(in1), 0xCF8); + if (in2 == 1) /* Writing byte */ + outb(in3, 0xCFC + ((REG_OFFSET(in1) & 3))); + else if (in2 == 2) /* Writing word */ + outw(in3, 0xCFC + ((REG_OFFSET(in1) & 2))); + else /* Writing dword */ + outl(in3, 0xCFC); + status = PCIBIOS_SUCCESSFUL; +#endif /* CONFIG_PCI */ + } else if (index == SAL_UPDATE_PAL) { + ; + } else { + status = -1; + } + return ((struct sal_ret_values) {status, r9, r10, r11}); +} + +struct ia64_boot_param * +sys_fw_init (const char *args, int arglen) +{ + efi_system_table_t *efi_systab; + efi_runtime_services_t *efi_runtime; + efi_config_table_t *efi_tables; + struct ia64_sal_systab *sal_systab; + efi_memory_desc_t *efi_memmap, *md; + unsigned long *pal_desc, *sal_desc; + struct ia64_sal_desc_entry_point *sal_ed; + struct ia64_boot_param *bp; + unsigned char checksum = 0; + char *cp, *cmd_line; + int i = 0; +# define MAKE_MD(typ, attr, start, end) \ + do { \ + md = efi_memmap + i++; \ + md->type = typ; \ + md->pad = 0; \ + md->phys_addr = start; \ + md->virt_addr = 0; \ + md->num_pages = (end - start) >> 12; \ + md->attribute = attr; \ + } while (0) + + memset(fw_mem, 0, sizeof(fw_mem)); + + pal_desc = (unsigned long *) &pal_emulator_static; + sal_desc = (unsigned long *) &sal_emulator; + + cp = fw_mem; + efi_systab = (void *) cp; cp += sizeof(*efi_systab); + efi_runtime = (void *) cp; cp += sizeof(*efi_runtime); + efi_tables = (void *) cp; cp += sizeof(*efi_tables); + sal_systab = (void *) cp; cp += sizeof(*sal_systab); + sal_ed = (void *) cp; cp += sizeof(*sal_ed); + efi_memmap = (void *) cp; cp += NUM_MEM_DESCS*sizeof(*efi_memmap); + bp = (void *) cp; cp += sizeof(*bp); + cmd_line = (void *) cp; + + if (args) { + if (arglen >= 1024) + arglen = 1023; + memcpy(cmd_line, args, arglen); + } else { + arglen = 0; + } + cmd_line[arglen] = '\0'; + + memset(efi_systab, 0, sizeof(*efi_systab)); + efi_systab->hdr.signature = EFI_SYSTEM_TABLE_SIGNATURE; + efi_systab->hdr.revision = ((1 << 16) | 00); + efi_systab->hdr.headersize = sizeof(efi_systab->hdr); + efi_systab->fw_vendor = __pa("H\0e\0w\0l\0e\0t\0t\0-\0P\0a\0c\0k\0a\0r\0d\0\0"); + efi_systab->fw_revision = 1; + efi_systab->runtime = (void *) __pa(efi_runtime); + efi_systab->nr_tables = 1; + efi_systab->tables = __pa(efi_tables); + + efi_runtime->hdr.signature = EFI_RUNTIME_SERVICES_SIGNATURE; + efi_runtime->hdr.revision = EFI_RUNTIME_SERVICES_REVISION; + efi_runtime->hdr.headersize = sizeof(efi_runtime->hdr); + efi_runtime->get_time = (void *)__pa(&fw_efi_get_time); + efi_runtime->set_time = (void *)__pa(&efi_unimplemented); + efi_runtime->get_wakeup_time = (void *)__pa(&efi_unimplemented); + efi_runtime->set_wakeup_time = (void *)__pa(&efi_unimplemented); + efi_runtime->set_virtual_address_map = (void *)__pa(&efi_unimplemented); + efi_runtime->get_variable = (void *)__pa(&efi_unimplemented); + efi_runtime->get_next_variable = (void *)__pa(&efi_unimplemented); + efi_runtime->set_variable = (void *)__pa(&efi_unimplemented); + efi_runtime->get_next_high_mono_count = (void *)__pa(&efi_unimplemented); + efi_runtime->reset_system = (void *)__pa(&efi_reset_system); + + efi_tables->guid = SAL_SYSTEM_TABLE_GUID; + efi_tables->table = __pa(sal_systab); + + /* fill in the SAL system table: */ + memcpy(sal_systab->signature, "SST_", 4); + sal_systab->size = sizeof(*sal_systab); + sal_systab->sal_rev_minor = 1; + sal_systab->sal_rev_major = 0; + sal_systab->entry_count = 1; + +#ifdef CONFIG_IA64_GENERIC + strcpy(sal_systab->oem_id, "Generic"); + strcpy(sal_systab->product_id, "IA-64 system"); +#endif + +#ifdef CONFIG_IA64_HP_SIM + strcpy(sal_systab->oem_id, "Hewlett-Packard"); + strcpy(sal_systab->product_id, "HP-simulator"); +#endif + + /* fill in an entry point: */ + sal_ed->type = SAL_DESC_ENTRY_POINT; + sal_ed->pal_proc = __pa(pal_desc[0]); + sal_ed->sal_proc = __pa(sal_desc[0]); + sal_ed->gp = __pa(sal_desc[1]); + + for (cp = (char *) sal_systab; cp < (char *) efi_memmap; ++cp) + checksum += *cp; + + sal_systab->checksum = -checksum; + +#if SIMPLE_MEMMAP + /* simulate free memory at physical address zero */ + MAKE_MD(EFI_BOOT_SERVICES_DATA, EFI_MEMORY_WB, 0*MB, 1*MB); + MAKE_MD(EFI_PAL_CODE, EFI_MEMORY_WB, 1*MB, 2*MB); + MAKE_MD(EFI_CONVENTIONAL_MEMORY, EFI_MEMORY_WB, 2*MB, 130*MB); + MAKE_MD(EFI_CONVENTIONAL_MEMORY, EFI_MEMORY_WB, 4096*MB, 4128*MB); +#else + MAKE_MD( 4, 0x9, 0x0000000000000000, 0x0000000000001000); + MAKE_MD( 7, 0x9, 0x0000000000001000, 0x000000000008a000); + MAKE_MD( 4, 0x9, 0x000000000008a000, 0x00000000000a0000); + MAKE_MD( 5, 0x8000000000000009, 0x00000000000c0000, 0x0000000000100000); + MAKE_MD( 7, 0x9, 0x0000000000100000, 0x0000000004400000); + MAKE_MD( 2, 0x9, 0x0000000004400000, 0x0000000004be5000); + MAKE_MD( 7, 0x9, 0x0000000004be5000, 0x000000007f77e000); + MAKE_MD( 6, 0x8000000000000009, 0x000000007f77e000, 0x000000007fb94000); + MAKE_MD( 6, 0x8000000000000009, 0x000000007fb94000, 0x000000007fb95000); + MAKE_MD( 6, 0x8000000000000009, 0x000000007fb95000, 0x000000007fc00000); + MAKE_MD(13, 0x8000000000000009, 0x000000007fc00000, 0x000000007fc3a000); + MAKE_MD( 7, 0x9, 0x000000007fc3a000, 0x000000007fea0000); + MAKE_MD( 5, 0x8000000000000009, 0x000000007fea0000, 0x000000007fea8000); + MAKE_MD( 7, 0x9, 0x000000007fea8000, 0x000000007feab000); + MAKE_MD( 5, 0x8000000000000009, 0x000000007feab000, 0x000000007ffff000); + MAKE_MD( 7, 0x9, 0x00000000ff400000, 0x0000000104000000); +#endif + + bp->efi_systab = __pa(&fw_mem); + bp->efi_memmap = __pa(efi_memmap); + bp->efi_memmap_size = NUM_MEM_DESCS*sizeof(efi_memory_desc_t); + bp->efi_memdesc_size = sizeof(efi_memory_desc_t); + bp->efi_memdesc_version = 1; + bp->command_line = __pa(cmd_line); + bp->console_info.num_cols = 80; + bp->console_info.num_rows = 25; + bp->console_info.orig_x = 0; + bp->console_info.orig_y = 24; + bp->fpswa = 0; + + return bp; +} diff --git a/kernel/arch/ia64/hp/sim/boot/ssc.h b/kernel/arch/ia64/hp/sim/boot/ssc.h new file mode 100644 index 000000000..3b94c03e4 --- /dev/null +++ b/kernel/arch/ia64/hp/sim/boot/ssc.h @@ -0,0 +1,35 @@ +/* + * Copyright (C) 1998-2003 Hewlett-Packard Co + * David Mosberger-Tang + * Stephane Eranian + */ +#ifndef ssc_h +#define ssc_h + +/* Simulator system calls: */ + +#define SSC_CONSOLE_INIT 20 +#define SSC_GETCHAR 21 +#define SSC_PUTCHAR 31 +#define SSC_OPEN 50 +#define SSC_CLOSE 51 +#define SSC_READ 52 +#define SSC_WRITE 53 +#define SSC_GET_COMPLETION 54 +#define SSC_WAIT_COMPLETION 55 +#define SSC_CONNECT_INTERRUPT 58 +#define SSC_GENERATE_INTERRUPT 59 +#define SSC_SET_PERIODIC_INTERRUPT 60 +#define SSC_GET_RTC 65 +#define SSC_EXIT 66 +#define SSC_LOAD_SYMBOLS 69 +#define SSC_GET_TOD 74 + +#define SSC_GET_ARGS 75 + +/* + * Simulator system call. + */ +extern long ssc (long arg0, long arg1, long arg2, long arg3, int nr); + +#endif /* ssc_h */ diff --git a/kernel/arch/ia64/hp/sim/hpsim.S b/kernel/arch/ia64/hp/sim/hpsim.S new file mode 100644 index 000000000..ff16e8a85 --- /dev/null +++ b/kernel/arch/ia64/hp/sim/hpsim.S @@ -0,0 +1,10 @@ +#include + +/* + * Simulator system call. + */ +GLOBAL_ENTRY(ia64_ssc) + mov r15=r36 + break 0x80001 + br.ret.sptk.many rp +END(ia64_ssc) diff --git a/kernel/arch/ia64/hp/sim/hpsim_console.c b/kernel/arch/ia64/hp/sim/hpsim_console.c new file mode 100644 index 000000000..01663bc42 --- /dev/null +++ b/kernel/arch/ia64/hp/sim/hpsim_console.c @@ -0,0 +1,76 @@ +/* + * Platform dependent support for HP simulator. + * + * Copyright (C) 1998, 1999, 2002 Hewlett-Packard Co + * David Mosberger-Tang + * Copyright (C) 1999 Vijay Chander + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "hpsim_ssc.h" + +static int simcons_init (struct console *, char *); +static void simcons_write (struct console *, const char *, unsigned); +static struct tty_driver *simcons_console_device (struct console *, int *); + +static struct console hpsim_cons = { + .name = "simcons", + .write = simcons_write, + .device = simcons_console_device, + .setup = simcons_init, + .flags = CON_PRINTBUFFER, + .index = -1, +}; + +static int +simcons_init (struct console *cons, char *options) +{ + return 0; +} + +static void +simcons_write (struct console *cons, const char *buf, unsigned count) +{ + unsigned long ch; + + while (count-- > 0) { + ch = *buf++; + ia64_ssc(ch, 0, 0, 0, SSC_PUTCHAR); + if (ch == '\n') + ia64_ssc('\r', 0, 0, 0, SSC_PUTCHAR); + } +} + +static struct tty_driver *simcons_console_device (struct console *c, int *index) +{ + *index = c->index; + return hp_simserial_driver; +} + +int simcons_register(void) +{ + if (!ia64_platform_is("hpsim")) + return 1; + + if (hpsim_cons.flags & CON_ENABLED) + return 1; + + register_console(&hpsim_cons); + return 0; +} diff --git a/kernel/arch/ia64/hp/sim/hpsim_irq.c b/kernel/arch/ia64/hp/sim/hpsim_irq.c new file mode 100644 index 000000000..0aa70ebda --- /dev/null +++ b/kernel/arch/ia64/hp/sim/hpsim_irq.c @@ -0,0 +1,75 @@ +/* + * Platform dependent support for HP simulator. + * + * Copyright (C) 1998-2001 Hewlett-Packard Co + * Copyright (C) 1998-2001 David Mosberger-Tang + */ + +#include +#include +#include +#include + +#include "hpsim_ssc.h" + +static unsigned int +hpsim_irq_startup(struct irq_data *data) +{ + return 0; +} + +static void +hpsim_irq_noop(struct irq_data *data) +{ +} + +static int +hpsim_set_affinity_noop(struct irq_data *d, const struct cpumask *b, bool f) +{ + return 0; +} + +static struct irq_chip irq_type_hp_sim = { + .name = "hpsim", + .irq_startup = hpsim_irq_startup, + .irq_shutdown = hpsim_irq_noop, + .irq_enable = hpsim_irq_noop, + .irq_disable = hpsim_irq_noop, + .irq_ack = hpsim_irq_noop, + .irq_set_affinity = hpsim_set_affinity_noop, +}; + +static void hpsim_irq_set_chip(int irq) +{ + struct irq_chip *chip = irq_get_chip(irq); + + if (chip == &no_irq_chip) + irq_set_chip(irq, &irq_type_hp_sim); +} + +static void hpsim_connect_irq(int intr, int irq) +{ + ia64_ssc(intr, irq, 0, 0, SSC_CONNECT_INTERRUPT); +} + +int hpsim_get_irq(int intr) +{ + int irq = assign_irq_vector(AUTO_ASSIGN); + + if (irq >= 0) { + hpsim_irq_set_chip(irq); + irq_set_handler(irq, handle_simple_irq); + hpsim_connect_irq(intr, irq); + } + + return irq; +} + +void __init +hpsim_irq_init (void) +{ + int i; + + for_each_active_irq(i) + hpsim_irq_set_chip(i); +} diff --git a/kernel/arch/ia64/hp/sim/hpsim_machvec.c b/kernel/arch/ia64/hp/sim/hpsim_machvec.c new file mode 100644 index 000000000..c21419359 --- /dev/null +++ b/kernel/arch/ia64/hp/sim/hpsim_machvec.c @@ -0,0 +1,3 @@ +#define MACHVEC_PLATFORM_NAME hpsim +#define MACHVEC_PLATFORM_HEADER +#include diff --git a/kernel/arch/ia64/hp/sim/hpsim_setup.c b/kernel/arch/ia64/hp/sim/hpsim_setup.c new file mode 100644 index 000000000..664a5402a --- /dev/null +++ b/kernel/arch/ia64/hp/sim/hpsim_setup.c @@ -0,0 +1,40 @@ +/* + * Platform dependent support for HP simulator. + * + * Copyright (C) 1998, 1999, 2002 Hewlett-Packard Co + * David Mosberger-Tang + * Copyright (C) 1999 Vijay Chander + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "hpsim_ssc.h" + +void +ia64_ctl_trace (long on) +{ + ia64_ssc(on, 0, 0, 0, SSC_CTL_TRACE); +} + +void __init +hpsim_setup (char **cmdline_p) +{ + ROOT_DEV = Root_SDA1; /* default to first SCSI drive */ + + simcons_register(); +} diff --git a/kernel/arch/ia64/hp/sim/hpsim_ssc.h b/kernel/arch/ia64/hp/sim/hpsim_ssc.h new file mode 100644 index 000000000..bfa390627 --- /dev/null +++ b/kernel/arch/ia64/hp/sim/hpsim_ssc.h @@ -0,0 +1,36 @@ +/* + * Platform dependent support for HP simulator. + * + * Copyright (C) 1998, 1999 Hewlett-Packard Co + * Copyright (C) 1998, 1999 David Mosberger-Tang + * Copyright (C) 1999 Vijay Chander + */ +#ifndef _IA64_PLATFORM_HPSIM_SSC_H +#define _IA64_PLATFORM_HPSIM_SSC_H + +/* Simulator system calls: */ + +#define SSC_CONSOLE_INIT 20 +#define SSC_GETCHAR 21 +#define SSC_PUTCHAR 31 +#define SSC_CONNECT_INTERRUPT 58 +#define SSC_GENERATE_INTERRUPT 59 +#define SSC_SET_PERIODIC_INTERRUPT 60 +#define SSC_GET_RTC 65 +#define SSC_EXIT 66 +#define SSC_LOAD_SYMBOLS 69 +#define SSC_GET_TOD 74 +#define SSC_CTL_TRACE 76 + +#define SSC_NETDEV_PROBE 100 +#define SSC_NETDEV_SEND 101 +#define SSC_NETDEV_RECV 102 +#define SSC_NETDEV_ATTACH 103 +#define SSC_NETDEV_DETACH 104 + +/* + * Simulator system call. + */ +extern long ia64_ssc (long arg0, long arg1, long arg2, long arg3, int nr); + +#endif /* _IA64_PLATFORM_HPSIM_SSC_H */ diff --git a/kernel/arch/ia64/hp/sim/simeth.c b/kernel/arch/ia64/hp/sim/simeth.c new file mode 100644 index 000000000..d1b04c4c9 --- /dev/null +++ b/kernel/arch/ia64/hp/sim/simeth.c @@ -0,0 +1,509 @@ +/* + * Simulated Ethernet Driver + * + * Copyright (C) 1999-2001, 2003 Hewlett-Packard Co + * Stephane Eranian + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "hpsim_ssc.h" + +#define SIMETH_RECV_MAX 10 + +/* + * Maximum possible received frame for Ethernet. + * We preallocate an sk_buff of that size to avoid costly + * memcpy for temporary buffer into sk_buff. We do basically + * what's done in other drivers, like eepro with a ring. + * The difference is, of course, that we don't have real DMA !!! + */ +#define SIMETH_FRAME_SIZE ETH_FRAME_LEN + + +#define NETWORK_INTR 8 + +struct simeth_local { + struct net_device_stats stats; + int simfd; /* descriptor in the simulator */ +}; + +static int simeth_probe1(void); +static int simeth_open(struct net_device *dev); +static int simeth_close(struct net_device *dev); +static int simeth_tx(struct sk_buff *skb, struct net_device *dev); +static int simeth_rx(struct net_device *dev); +static struct net_device_stats *simeth_get_stats(struct net_device *dev); +static irqreturn_t simeth_interrupt(int irq, void *dev_id); +static void set_multicast_list(struct net_device *dev); +static int simeth_device_event(struct notifier_block *this,unsigned long event, void *ptr); + +static char *simeth_version="0.3"; + +/* + * This variable is used to establish a mapping between the Linux/ia64 kernel + * and the host linux kernel. + * + * As of today, we support only one card, even though most of the code + * is ready for many more. The mapping is then: + * linux/ia64 -> linux/x86 + * eth0 -> eth1 + * + * In the future, we some string operations, we could easily support up + * to 10 cards (0-9). + * + * The default mapping can be changed on the kernel command line by + * specifying simeth=ethX (or whatever string you want). + */ +static char *simeth_device="eth0"; /* default host interface to use */ + + + +static volatile unsigned int card_count; /* how many cards "found" so far */ +static int simeth_debug; /* set to 1 to get debug information */ + +/* + * Used to catch IFF_UP & IFF_DOWN events + */ +static struct notifier_block simeth_dev_notifier = { + simeth_device_event, + NULL +}; + + +/* + * Function used when using a kernel command line option. + * + * Format: simeth=interface_name (like eth0) + */ +static int __init +simeth_setup(char *str) +{ + simeth_device = str; + return 1; +} + +__setup("simeth=", simeth_setup); + +/* + * Function used to probe for simeth devices when not installed + * as a loadable module + */ + +int __init +simeth_probe (void) +{ + int r; + + printk(KERN_INFO "simeth: v%s\n", simeth_version); + + r = simeth_probe1(); + + if (r == 0) register_netdevice_notifier(&simeth_dev_notifier); + + return r; +} + +static inline int +netdev_probe(char *name, unsigned char *ether) +{ + return ia64_ssc(__pa(name), __pa(ether), 0,0, SSC_NETDEV_PROBE); +} + + +static inline int +netdev_attach(int fd, int irq, unsigned int ipaddr) +{ + /* this puts the host interface in the right mode (start interrupting) */ + return ia64_ssc(fd, ipaddr, 0,0, SSC_NETDEV_ATTACH); +} + + +static inline int +netdev_detach(int fd) +{ + /* + * inactivate the host interface (don't interrupt anymore) */ + return ia64_ssc(fd, 0,0,0, SSC_NETDEV_DETACH); +} + +static inline int +netdev_send(int fd, unsigned char *buf, unsigned int len) +{ + return ia64_ssc(fd, __pa(buf), len, 0, SSC_NETDEV_SEND); +} + +static inline int +netdev_read(int fd, unsigned char *buf, unsigned int len) +{ + return ia64_ssc(fd, __pa(buf), len, 0, SSC_NETDEV_RECV); +} + +static const struct net_device_ops simeth_netdev_ops = { + .ndo_open = simeth_open, + .ndo_stop = simeth_close, + .ndo_start_xmit = simeth_tx, + .ndo_get_stats = simeth_get_stats, + .ndo_set_rx_mode = set_multicast_list, /* not yet used */ + +}; + +/* + * Function shared with module code, so cannot be in init section + * + * So far this function "detects" only one card (test_&_set) but could + * be extended easily. + * + * Return: + * - -ENODEV is no device found + * - -ENOMEM is no more memory + * - 0 otherwise + */ +static int +simeth_probe1(void) +{ + unsigned char mac_addr[ETH_ALEN]; + struct simeth_local *local; + struct net_device *dev; + int fd, err, rc; + + /* + * XXX Fix me + * let's support just one card for now + */ + if (test_and_set_bit(0, &card_count)) + return -ENODEV; + + /* + * check with the simulator for the device + */ + fd = netdev_probe(simeth_device, mac_addr); + if (fd == -1) + return -ENODEV; + + dev = alloc_etherdev(sizeof(struct simeth_local)); + if (!dev) + return -ENOMEM; + + memcpy(dev->dev_addr, mac_addr, sizeof(mac_addr)); + + local = netdev_priv(dev); + local->simfd = fd; /* keep track of underlying file descriptor */ + + dev->netdev_ops = &simeth_netdev_ops; + + err = register_netdev(dev); + if (err) { + free_netdev(dev); + return err; + } + + /* + * attach the interrupt in the simulator, this does enable interrupts + * until a netdev_attach() is called + */ + if ((rc = hpsim_get_irq(NETWORK_INTR)) < 0) + panic("%s: out of interrupt vectors!\n", __func__); + dev->irq = rc; + + printk(KERN_INFO "%s: hosteth=%s simfd=%d, HwAddr=%pm, IRQ %d\n", + dev->name, simeth_device, local->simfd, dev->dev_addr, dev->irq); + + return 0; +} + +/* + * actually binds the device to an interrupt vector + */ +static int +simeth_open(struct net_device *dev) +{ + if (request_irq(dev->irq, simeth_interrupt, 0, "simeth", dev)) { + printk(KERN_WARNING "simeth: unable to get IRQ %d.\n", dev->irq); + return -EAGAIN; + } + + netif_start_queue(dev); + + return 0; +} + +/* copied from lapbether.c */ +static __inline__ int dev_is_ethdev(struct net_device *dev) +{ + return ( dev->type == ARPHRD_ETHER && strncmp(dev->name, "dummy", 5)); +} + + +/* + * Handler for IFF_UP or IFF_DOWN + * + * The reason for that is that we don't want to be interrupted when the + * interface is down. There is no way to unconnect in the simualtor. Instead + * we use this function to shutdown packet processing in the frame filter + * in the simulator. Thus no interrupts are generated + * + * + * That's also the place where we pass the IP address of this device to the + * simulator so that that we can start filtering packets for it + * + * There may be a better way of doing this, but I don't know which yet. + */ +static int +simeth_device_event(struct notifier_block *this,unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct simeth_local *local; + struct in_device *in_dev; + struct in_ifaddr **ifap = NULL; + struct in_ifaddr *ifa = NULL; + int r; + + + if ( ! dev ) { + printk(KERN_WARNING "simeth_device_event dev=0\n"); + return NOTIFY_DONE; + } + + if (dev_net(dev) != &init_net) + return NOTIFY_DONE; + + if ( event != NETDEV_UP && event != NETDEV_DOWN ) return NOTIFY_DONE; + + /* + * Check whether or not it's for an ethernet device + * + * XXX Fixme: This works only as long as we support one + * type of ethernet device. + */ + if ( !dev_is_ethdev(dev) ) return NOTIFY_DONE; + + if ((in_dev=dev->ip_ptr) != NULL) { + for (ifap=&in_dev->ifa_list; (ifa=*ifap) != NULL; ifap=&ifa->ifa_next) + if (strcmp(dev->name, ifa->ifa_label) == 0) break; + } + if ( ifa == NULL ) { + printk(KERN_ERR "simeth_open: can't find device %s's ifa\n", dev->name); + return NOTIFY_DONE; + } + + printk(KERN_INFO "simeth_device_event: %s ipaddr=0x%x\n", + dev->name, ntohl(ifa->ifa_local)); + + /* + * XXX Fix me + * if the device was up, and we're simply reconfiguring it, not sure + * we get DOWN then UP. + */ + + local = netdev_priv(dev); + /* now do it for real */ + r = event == NETDEV_UP ? + netdev_attach(local->simfd, dev->irq, ntohl(ifa->ifa_local)): + netdev_detach(local->simfd); + + printk(KERN_INFO "simeth: netdev_attach/detach: event=%s ->%d\n", + event == NETDEV_UP ? "attach":"detach", r); + + return NOTIFY_DONE; +} + +static int +simeth_close(struct net_device *dev) +{ + netif_stop_queue(dev); + + free_irq(dev->irq, dev); + + return 0; +} + +/* + * Only used for debug + */ +static void +frame_print(unsigned char *from, unsigned char *frame, int len) +{ + int i; + + printk("%s: (%d) %02x", from, len, frame[0] & 0xff); + for(i=1; i < 6; i++ ) { + printk(":%02x", frame[i] &0xff); + } + printk(" %2x", frame[6] &0xff); + for(i=7; i < 12; i++ ) { + printk(":%02x", frame[i] &0xff); + } + printk(" [%02x%02x]\n", frame[12], frame[13]); + + for(i=14; i < len; i++ ) { + printk("%02x ", frame[i] &0xff); + if ( (i%10)==0) printk("\n"); + } + printk("\n"); +} + + +/* + * Function used to transmit of frame, very last one on the path before + * going to the simulator. + */ +static int +simeth_tx(struct sk_buff *skb, struct net_device *dev) +{ + struct simeth_local *local = netdev_priv(dev); + +#if 0 + /* ensure we have at least ETH_ZLEN bytes (min frame size) */ + unsigned int length = ETH_ZLEN < skb->len ? skb->len : ETH_ZLEN; + /* Where do the extra padding bytes comes from inthe skbuff ? */ +#else + /* the real driver in the host system is going to take care of that + * or maybe it's the NIC itself. + */ + unsigned int length = skb->len; +#endif + + local->stats.tx_bytes += skb->len; + local->stats.tx_packets++; + + + if (simeth_debug > 5) frame_print("simeth_tx", skb->data, length); + + netdev_send(local->simfd, skb->data, length); + + /* + * we are synchronous on write, so we don't simulate a + * trasnmit complete interrupt, thus we don't need to arm a tx + */ + + dev_kfree_skb(skb); + return NETDEV_TX_OK; +} + +static inline struct sk_buff * +make_new_skb(struct net_device *dev) +{ + struct sk_buff *nskb; + + /* + * The +2 is used to make sure that the IP header is nicely + * aligned (on 4byte boundary I assume 14+2=16) + */ + nskb = dev_alloc_skb(SIMETH_FRAME_SIZE + 2); + if ( nskb == NULL ) { + printk(KERN_NOTICE "%s: memory squeeze. dropping packet.\n", dev->name); + return NULL; + } + + skb_reserve(nskb, 2); /* Align IP on 16 byte boundaries */ + + skb_put(nskb,SIMETH_FRAME_SIZE); + + return nskb; +} + +/* + * called from interrupt handler to process a received frame + */ +static int +simeth_rx(struct net_device *dev) +{ + struct simeth_local *local; + struct sk_buff *skb; + int len; + int rcv_count = SIMETH_RECV_MAX; + + local = netdev_priv(dev); + /* + * the loop concept has been borrowed from other drivers + * looks to me like it's a throttling thing to avoid pushing to many + * packets at one time into the stack. Making sure we can process them + * upstream and make forward progress overall + */ + do { + if ( (skb=make_new_skb(dev)) == NULL ) { + printk(KERN_NOTICE "%s: memory squeeze. dropping packet.\n", dev->name); + local->stats.rx_dropped++; + return 0; + } + /* + * Read only one frame at a time + */ + len = netdev_read(local->simfd, skb->data, SIMETH_FRAME_SIZE); + if ( len == 0 ) { + if ( simeth_debug > 0 ) printk(KERN_WARNING "%s: count=%d netdev_read=0\n", + dev->name, SIMETH_RECV_MAX-rcv_count); + break; + } +#if 0 + /* + * XXX Fix me + * Should really do a csum+copy here + */ + skb_copy_to_linear_data(skb, frame, len); +#endif + skb->protocol = eth_type_trans(skb, dev); + + if ( simeth_debug > 6 ) frame_print("simeth_rx", skb->data, len); + + /* + * push the packet up & trigger software interrupt + */ + netif_rx(skb); + + local->stats.rx_packets++; + local->stats.rx_bytes += len; + + } while ( --rcv_count ); + + return len; /* 0 = nothing left to read, otherwise, we can try again */ +} + +/* + * Interrupt handler (Yes, we can do it too !!!) + */ +static irqreturn_t +simeth_interrupt(int irq, void *dev_id) +{ + struct net_device *dev = dev_id; + + /* + * very simple loop because we get interrupts only when receiving + */ + while (simeth_rx(dev)); + return IRQ_HANDLED; +} + +static struct net_device_stats * +simeth_get_stats(struct net_device *dev) +{ + struct simeth_local *local = netdev_priv(dev); + + return &local->stats; +} + +/* fake multicast ability */ +static void +set_multicast_list(struct net_device *dev) +{ + printk(KERN_WARNING "%s: set_multicast_list called\n", dev->name); +} + +__initcall(simeth_probe); diff --git a/kernel/arch/ia64/hp/sim/simscsi.c b/kernel/arch/ia64/hp/sim/simscsi.c new file mode 100644 index 000000000..3a428f19a --- /dev/null +++ b/kernel/arch/ia64/hp/sim/simscsi.c @@ -0,0 +1,380 @@ +/* + * Simulated SCSI driver. + * + * Copyright (C) 1999, 2001-2003 Hewlett-Packard Co + * David Mosberger-Tang + * Stephane Eranian + * + * 02/01/15 David Mosberger Updated for v2.5.1 + * 99/12/18 David Mosberger Added support for READ10/WRITE10 needed by linux v2.3.33 + */ +#include +#include +#include +#include +#include +#include +#include "hpsim_ssc.h" + +#include +#include +#include +#include + +#define DEBUG_SIMSCSI 0 + +#define SIMSCSI_REQ_QUEUE_LEN 64 +#define DEFAULT_SIMSCSI_ROOT "/var/ski-disks/sd" + +/* Simulator system calls: */ + +#define SSC_OPEN 50 +#define SSC_CLOSE 51 +#define SSC_READ 52 +#define SSC_WRITE 53 +#define SSC_GET_COMPLETION 54 +#define SSC_WAIT_COMPLETION 55 + +#define SSC_WRITE_ACCESS 2 +#define SSC_READ_ACCESS 1 + +#if DEBUG_SIMSCSI + int simscsi_debug; +# define DBG simscsi_debug +#else +# define DBG 0 +#endif + +static struct Scsi_Host *host; + +static void simscsi_interrupt (unsigned long val); +static DECLARE_TASKLET(simscsi_tasklet, simscsi_interrupt, 0); + +struct disk_req { + unsigned long addr; + unsigned len; +}; + +struct disk_stat { + int fd; + unsigned count; +}; + +static int desc[16] = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 +}; + +static struct queue_entry { + struct scsi_cmnd *sc; +} queue[SIMSCSI_REQ_QUEUE_LEN]; + +static int rd, wr; +static atomic_t num_reqs = ATOMIC_INIT(0); + +/* base name for default disks */ +static char *simscsi_root = DEFAULT_SIMSCSI_ROOT; + +#define MAX_ROOT_LEN 128 + +/* + * used to setup a new base for disk images + * to use /foo/bar/disk[a-z] as disk images + * you have to specify simscsi=/foo/bar/disk on the command line + */ +static int __init +simscsi_setup (char *s) +{ + /* XXX Fix me we may need to strcpy() ? */ + if (strlen(s) > MAX_ROOT_LEN) { + printk(KERN_ERR "simscsi_setup: prefix too long---using default %s\n", + simscsi_root); + } else + simscsi_root = s; + return 1; +} + +__setup("simscsi=", simscsi_setup); + +static void +simscsi_interrupt (unsigned long val) +{ + struct scsi_cmnd *sc; + + while ((sc = queue[rd].sc) != NULL) { + atomic_dec(&num_reqs); + queue[rd].sc = NULL; + if (DBG) + printk("simscsi_interrupt: done with %ld\n", sc->serial_number); + (*sc->scsi_done)(sc); + rd = (rd + 1) % SIMSCSI_REQ_QUEUE_LEN; + } +} + +static int +simscsi_biosparam (struct scsi_device *sdev, struct block_device *n, + sector_t capacity, int ip[]) +{ + ip[0] = 64; /* heads */ + ip[1] = 32; /* sectors */ + ip[2] = capacity >> 11; /* cylinders */ + return 0; +} + +static void +simscsi_sg_readwrite (struct scsi_cmnd *sc, int mode, unsigned long offset) +{ + int i; + struct scatterlist *sl; + struct disk_stat stat; + struct disk_req req; + + stat.fd = desc[sc->device->id]; + + scsi_for_each_sg(sc, sl, scsi_sg_count(sc), i) { + req.addr = __pa(sg_virt(sl)); + req.len = sl->length; + if (DBG) + printk("simscsi_sg_%s @ %lx (off %lx) use_sg=%d len=%d\n", + mode == SSC_READ ? "read":"write", req.addr, offset, + scsi_sg_count(sc) - i, sl->length); + ia64_ssc(stat.fd, 1, __pa(&req), offset, mode); + ia64_ssc(__pa(&stat), 0, 0, 0, SSC_WAIT_COMPLETION); + + /* should not happen in our case */ + if (stat.count != req.len) { + sc->result = DID_ERROR << 16; + return; + } + offset += sl->length; + } + sc->result = GOOD; +} + +/* + * function handling both READ_6/WRITE_6 (non-scatter/gather mode) + * commands. + * Added 02/26/99 S.Eranian + */ +static void +simscsi_readwrite6 (struct scsi_cmnd *sc, int mode) +{ + unsigned long offset; + + offset = (((sc->cmnd[1] & 0x1f) << 16) | (sc->cmnd[2] << 8) | sc->cmnd[3])*512; + simscsi_sg_readwrite(sc, mode, offset); +} + +static size_t +simscsi_get_disk_size (int fd) +{ + struct disk_stat stat; + size_t bit, sectors = 0; + struct disk_req req; + char buf[512]; + + /* + * This is a bit kludgey: the simulator doesn't provide a + * direct way of determining the disk size, so we do a binary + * search, assuming a maximum disk size of 128GB. + */ + for (bit = (128UL << 30)/512; bit != 0; bit >>= 1) { + req.addr = __pa(&buf); + req.len = sizeof(buf); + ia64_ssc(fd, 1, __pa(&req), ((sectors | bit) - 1)*512, SSC_READ); + stat.fd = fd; + ia64_ssc(__pa(&stat), 0, 0, 0, SSC_WAIT_COMPLETION); + if (stat.count == sizeof(buf)) + sectors |= bit; + } + return sectors - 1; /* return last valid sector number */ +} + +static void +simscsi_readwrite10 (struct scsi_cmnd *sc, int mode) +{ + unsigned long offset; + + offset = (((unsigned long)sc->cmnd[2] << 24) + | ((unsigned long)sc->cmnd[3] << 16) + | ((unsigned long)sc->cmnd[4] << 8) + | ((unsigned long)sc->cmnd[5] << 0))*512UL; + simscsi_sg_readwrite(sc, mode, offset); +} + +static int +simscsi_queuecommand_lck (struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *)) +{ + unsigned int target_id = sc->device->id; + char fname[MAX_ROOT_LEN+16]; + size_t disk_size; + char *buf; + char localbuf[36]; +#if DEBUG_SIMSCSI + register long sp asm ("sp"); + + if (DBG) + printk("simscsi_queuecommand: target=%d,cmnd=%u,sc=%lu,sp=%lx,done=%p\n", + target_id, sc->cmnd[0], sc->serial_number, sp, done); +#endif + + sc->result = DID_BAD_TARGET << 16; + sc->scsi_done = done; + if (target_id <= 15 && sc->device->lun == 0) { + switch (sc->cmnd[0]) { + case INQUIRY: + if (scsi_bufflen(sc) < 35) { + break; + } + sprintf (fname, "%s%c", simscsi_root, 'a' + target_id); + desc[target_id] = ia64_ssc(__pa(fname), SSC_READ_ACCESS|SSC_WRITE_ACCESS, + 0, 0, SSC_OPEN); + if (desc[target_id] < 0) { + /* disk doesn't exist... */ + break; + } + buf = localbuf; + buf[0] = 0; /* magnetic disk */ + buf[1] = 0; /* not a removable medium */ + buf[2] = 2; /* SCSI-2 compliant device */ + buf[3] = 2; /* SCSI-2 response data format */ + buf[4] = 31; /* additional length (bytes) */ + buf[5] = 0; /* reserved */ + buf[6] = 0; /* reserved */ + buf[7] = 0; /* various flags */ + memcpy(buf + 8, "HP SIMULATED DISK 0.00", 28); + scsi_sg_copy_from_buffer(sc, buf, 36); + sc->result = GOOD; + break; + + case TEST_UNIT_READY: + sc->result = GOOD; + break; + + case READ_6: + if (desc[target_id] < 0 ) + break; + simscsi_readwrite6(sc, SSC_READ); + break; + + case READ_10: + if (desc[target_id] < 0 ) + break; + simscsi_readwrite10(sc, SSC_READ); + break; + + case WRITE_6: + if (desc[target_id] < 0) + break; + simscsi_readwrite6(sc, SSC_WRITE); + break; + + case WRITE_10: + if (desc[target_id] < 0) + break; + simscsi_readwrite10(sc, SSC_WRITE); + break; + + case READ_CAPACITY: + if (desc[target_id] < 0 || scsi_bufflen(sc) < 8) { + break; + } + buf = localbuf; + disk_size = simscsi_get_disk_size(desc[target_id]); + + buf[0] = (disk_size >> 24) & 0xff; + buf[1] = (disk_size >> 16) & 0xff; + buf[2] = (disk_size >> 8) & 0xff; + buf[3] = (disk_size >> 0) & 0xff; + /* set block size of 512 bytes: */ + buf[4] = 0; + buf[5] = 0; + buf[6] = 2; + buf[7] = 0; + scsi_sg_copy_from_buffer(sc, buf, 8); + sc->result = GOOD; + break; + + case MODE_SENSE: + case MODE_SENSE_10: + /* sd.c uses this to determine whether disk does write-caching. */ + scsi_sg_copy_from_buffer(sc, (char *)empty_zero_page, + PAGE_SIZE); + sc->result = GOOD; + break; + + case START_STOP: + printk(KERN_ERR "START_STOP\n"); + break; + + default: + panic("simscsi: unknown SCSI command %u\n", sc->cmnd[0]); + } + } + if (sc->result == DID_BAD_TARGET) { + sc->result |= DRIVER_SENSE << 24; + sc->sense_buffer[0] = 0x70; + sc->sense_buffer[2] = 0x00; + } + if (atomic_read(&num_reqs) >= SIMSCSI_REQ_QUEUE_LEN) { + panic("Attempt to queue command while command is pending!!"); + } + atomic_inc(&num_reqs); + queue[wr].sc = sc; + wr = (wr + 1) % SIMSCSI_REQ_QUEUE_LEN; + + tasklet_schedule(&simscsi_tasklet); + return 0; +} + +static DEF_SCSI_QCMD(simscsi_queuecommand) + +static int +simscsi_host_reset (struct scsi_cmnd *sc) +{ + printk(KERN_ERR "simscsi_host_reset: not implemented\n"); + return 0; +} + +static struct scsi_host_template driver_template = { + .name = "simulated SCSI host adapter", + .proc_name = "simscsi", + .queuecommand = simscsi_queuecommand, + .eh_host_reset_handler = simscsi_host_reset, + .bios_param = simscsi_biosparam, + .can_queue = SIMSCSI_REQ_QUEUE_LEN, + .this_id = -1, + .sg_tablesize = SG_ALL, + .max_sectors = 1024, + .cmd_per_lun = SIMSCSI_REQ_QUEUE_LEN, + .use_clustering = DISABLE_CLUSTERING, +}; + +static int __init +simscsi_init(void) +{ + int error; + + host = scsi_host_alloc(&driver_template, 0); + if (!host) + return -ENOMEM; + + error = scsi_add_host(host, NULL); + if (error) + goto free_host; + scsi_scan_host(host); + return 0; + + free_host: + scsi_host_put(host); + return error; +} + +static void __exit +simscsi_exit(void) +{ + scsi_remove_host(host); + scsi_host_put(host); +} + +module_init(simscsi_init); +module_exit(simscsi_exit); diff --git a/kernel/arch/ia64/hp/sim/simserial.c b/kernel/arch/ia64/hp/sim/simserial.c new file mode 100644 index 000000000..e70cadec7 --- /dev/null +++ b/kernel/arch/ia64/hp/sim/simserial.c @@ -0,0 +1,545 @@ +/* + * Simulated Serial Driver (fake serial) + * + * This driver is mostly used for bringup purposes and will go away. + * It has a strong dependency on the system console. All outputs + * are rerouted to the same facility as the one used by printk which, in our + * case means sys_sim.c console (goes via the simulator). + * + * Copyright (C) 1999-2000, 2002-2003 Hewlett-Packard Co + * Stephane Eranian + * David Mosberger-Tang + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "hpsim_ssc.h" + +#undef SIMSERIAL_DEBUG /* define this to get some debug information */ + +#define KEYBOARD_INTR 3 /* must match with simulator! */ + +#define NR_PORTS 1 /* only one port for now */ + +struct serial_state { + struct tty_port port; + struct circ_buf xmit; + int irq; + int x_char; +}; + +static struct serial_state rs_table[NR_PORTS]; + +struct tty_driver *hp_simserial_driver; + +static struct console *console; + +static void receive_chars(struct tty_port *port) +{ + unsigned char ch; + static unsigned char seen_esc = 0; + + while ( (ch = ia64_ssc(0, 0, 0, 0, SSC_GETCHAR)) ) { + if (ch == 27 && seen_esc == 0) { + seen_esc = 1; + continue; + } else if (seen_esc == 1 && ch == 'O') { + seen_esc = 2; + continue; + } else if (seen_esc == 2) { + if (ch == 'P') /* F1 */ + show_state(); +#ifdef CONFIG_MAGIC_SYSRQ + if (ch == 'S') { /* F4 */ + do { + ch = ia64_ssc(0, 0, 0, 0, SSC_GETCHAR); + } while (!ch); + handle_sysrq(ch); + } +#endif + seen_esc = 0; + continue; + } + seen_esc = 0; + + if (tty_insert_flip_char(port, ch, TTY_NORMAL) == 0) + break; + } + tty_flip_buffer_push(port); +} + +/* + * This is the serial driver's interrupt routine for a single port + */ +static irqreturn_t rs_interrupt_single(int irq, void *dev_id) +{ + struct serial_state *info = dev_id; + + receive_chars(&info->port); + + return IRQ_HANDLED; +} + +/* + * ------------------------------------------------------------------- + * Here ends the serial interrupt routines. + * ------------------------------------------------------------------- + */ + +static int rs_put_char(struct tty_struct *tty, unsigned char ch) +{ + struct serial_state *info = tty->driver_data; + unsigned long flags; + + if (!info->xmit.buf) + return 0; + + local_irq_save(flags); + if (CIRC_SPACE(info->xmit.head, info->xmit.tail, SERIAL_XMIT_SIZE) == 0) { + local_irq_restore(flags); + return 0; + } + info->xmit.buf[info->xmit.head] = ch; + info->xmit.head = (info->xmit.head + 1) & (SERIAL_XMIT_SIZE-1); + local_irq_restore(flags); + return 1; +} + +static void transmit_chars(struct tty_struct *tty, struct serial_state *info, + int *intr_done) +{ + int count; + unsigned long flags; + + local_irq_save(flags); + + if (info->x_char) { + char c = info->x_char; + + console->write(console, &c, 1); + + info->x_char = 0; + + goto out; + } + + if (info->xmit.head == info->xmit.tail || tty->stopped) { +#ifdef SIMSERIAL_DEBUG + printk("transmit_chars: head=%d, tail=%d, stopped=%d\n", + info->xmit.head, info->xmit.tail, tty->stopped); +#endif + goto out; + } + /* + * We removed the loop and try to do it in to chunks. We need + * 2 operations maximum because it's a ring buffer. + * + * First from current to tail if possible. + * Then from the beginning of the buffer until necessary + */ + + count = min(CIRC_CNT(info->xmit.head, info->xmit.tail, SERIAL_XMIT_SIZE), + SERIAL_XMIT_SIZE - info->xmit.tail); + console->write(console, info->xmit.buf+info->xmit.tail, count); + + info->xmit.tail = (info->xmit.tail+count) & (SERIAL_XMIT_SIZE-1); + + /* + * We have more at the beginning of the buffer + */ + count = CIRC_CNT(info->xmit.head, info->xmit.tail, SERIAL_XMIT_SIZE); + if (count) { + console->write(console, info->xmit.buf, count); + info->xmit.tail += count; + } +out: + local_irq_restore(flags); +} + +static void rs_flush_chars(struct tty_struct *tty) +{ + struct serial_state *info = tty->driver_data; + + if (info->xmit.head == info->xmit.tail || tty->stopped || + !info->xmit.buf) + return; + + transmit_chars(tty, info, NULL); +} + +static int rs_write(struct tty_struct * tty, + const unsigned char *buf, int count) +{ + struct serial_state *info = tty->driver_data; + int c, ret = 0; + unsigned long flags; + + if (!info->xmit.buf) + return 0; + + local_irq_save(flags); + while (1) { + c = CIRC_SPACE_TO_END(info->xmit.head, info->xmit.tail, SERIAL_XMIT_SIZE); + if (count < c) + c = count; + if (c <= 0) { + break; + } + memcpy(info->xmit.buf + info->xmit.head, buf, c); + info->xmit.head = ((info->xmit.head + c) & + (SERIAL_XMIT_SIZE-1)); + buf += c; + count -= c; + ret += c; + } + local_irq_restore(flags); + /* + * Hey, we transmit directly from here in our case + */ + if (CIRC_CNT(info->xmit.head, info->xmit.tail, SERIAL_XMIT_SIZE) && + !tty->stopped) + transmit_chars(tty, info, NULL); + + return ret; +} + +static int rs_write_room(struct tty_struct *tty) +{ + struct serial_state *info = tty->driver_data; + + return CIRC_SPACE(info->xmit.head, info->xmit.tail, SERIAL_XMIT_SIZE); +} + +static int rs_chars_in_buffer(struct tty_struct *tty) +{ + struct serial_state *info = tty->driver_data; + + return CIRC_CNT(info->xmit.head, info->xmit.tail, SERIAL_XMIT_SIZE); +} + +static void rs_flush_buffer(struct tty_struct *tty) +{ + struct serial_state *info = tty->driver_data; + unsigned long flags; + + local_irq_save(flags); + info->xmit.head = info->xmit.tail = 0; + local_irq_restore(flags); + + tty_wakeup(tty); +} + +/* + * This function is used to send a high-priority XON/XOFF character to + * the device + */ +static void rs_send_xchar(struct tty_struct *tty, char ch) +{ + struct serial_state *info = tty->driver_data; + + info->x_char = ch; + if (ch) { + /* + * I guess we could call console->write() directly but + * let's do that for now. + */ + transmit_chars(tty, info, NULL); + } +} + +/* + * ------------------------------------------------------------ + * rs_throttle() + * + * This routine is called by the upper-layer tty layer to signal that + * incoming characters should be throttled. + * ------------------------------------------------------------ + */ +static void rs_throttle(struct tty_struct * tty) +{ + if (I_IXOFF(tty)) + rs_send_xchar(tty, STOP_CHAR(tty)); + + printk(KERN_INFO "simrs_throttle called\n"); +} + +static void rs_unthrottle(struct tty_struct * tty) +{ + struct serial_state *info = tty->driver_data; + + if (I_IXOFF(tty)) { + if (info->x_char) + info->x_char = 0; + else + rs_send_xchar(tty, START_CHAR(tty)); + } + printk(KERN_INFO "simrs_unthrottle called\n"); +} + +static int rs_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg) +{ + if ((cmd != TIOCGSERIAL) && (cmd != TIOCSSERIAL) && + (cmd != TIOCSERCONFIG) && (cmd != TIOCSERGSTRUCT) && + (cmd != TIOCMIWAIT)) { + if (tty->flags & (1 << TTY_IO_ERROR)) + return -EIO; + } + + switch (cmd) { + case TIOCGSERIAL: + case TIOCSSERIAL: + case TIOCSERGSTRUCT: + case TIOCMIWAIT: + return 0; + case TIOCSERCONFIG: + case TIOCSERGETLSR: /* Get line status register */ + return -EINVAL; + case TIOCSERGWILD: + case TIOCSERSWILD: + /* "setserial -W" is called in Debian boot */ + printk (KERN_INFO "TIOCSER?WILD ioctl obsolete, ignored.\n"); + return 0; + } + return -ENOIOCTLCMD; +} + +#define RELEVANT_IFLAG(iflag) (iflag & (IGNBRK|BRKINT|IGNPAR|PARMRK|INPCK)) + +/* + * This routine will shutdown a serial port; interrupts are disabled, and + * DTR is dropped if the hangup on close termio flag is on. + */ +static void shutdown(struct tty_port *port) +{ + struct serial_state *info = container_of(port, struct serial_state, + port); + unsigned long flags; + + local_irq_save(flags); + if (info->irq) + free_irq(info->irq, info); + + if (info->xmit.buf) { + free_page((unsigned long) info->xmit.buf); + info->xmit.buf = NULL; + } + local_irq_restore(flags); +} + +static void rs_close(struct tty_struct *tty, struct file * filp) +{ + struct serial_state *info = tty->driver_data; + + tty_port_close(&info->port, tty, filp); +} + +static void rs_hangup(struct tty_struct *tty) +{ + struct serial_state *info = tty->driver_data; + + rs_flush_buffer(tty); + tty_port_hangup(&info->port); +} + +static int activate(struct tty_port *port, struct tty_struct *tty) +{ + struct serial_state *state = container_of(port, struct serial_state, + port); + unsigned long flags, page; + int retval = 0; + + page = get_zeroed_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + + local_irq_save(flags); + + if (state->xmit.buf) + free_page(page); + else + state->xmit.buf = (unsigned char *) page; + + if (state->irq) { + retval = request_irq(state->irq, rs_interrupt_single, 0, + "simserial", state); + if (retval) + goto errout; + } + + state->xmit.head = state->xmit.tail = 0; + + /* + * Set up the tty->alt_speed kludge + */ + if ((port->flags & ASYNC_SPD_MASK) == ASYNC_SPD_HI) + tty->alt_speed = 57600; + if ((port->flags & ASYNC_SPD_MASK) == ASYNC_SPD_VHI) + tty->alt_speed = 115200; + if ((port->flags & ASYNC_SPD_MASK) == ASYNC_SPD_SHI) + tty->alt_speed = 230400; + if ((port->flags & ASYNC_SPD_MASK) == ASYNC_SPD_WARP) + tty->alt_speed = 460800; + +errout: + local_irq_restore(flags); + return retval; +} + + +/* + * This routine is called whenever a serial port is opened. It + * enables interrupts for a serial port, linking in its async structure into + * the IRQ chain. It also performs the serial-specific + * initialization for the tty structure. + */ +static int rs_open(struct tty_struct *tty, struct file * filp) +{ + struct serial_state *info = rs_table + tty->index; + struct tty_port *port = &info->port; + + tty->driver_data = info; + port->low_latency = (port->flags & ASYNC_LOW_LATENCY) ? 1 : 0; + + /* + * figure out which console to use (should be one already) + */ + console = console_drivers; + while (console) { + if ((console->flags & CON_ENABLED) && console->write) break; + console = console->next; + } + + return tty_port_open(port, tty, filp); +} + +/* + * /proc fs routines.... + */ + +static int rs_proc_show(struct seq_file *m, void *v) +{ + int i; + + seq_printf(m, "simserinfo:1.0\n"); + for (i = 0; i < NR_PORTS; i++) + seq_printf(m, "%d: uart:16550 port:3F8 irq:%d\n", + i, rs_table[i].irq); + return 0; +} + +static int rs_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, rs_proc_show, NULL); +} + +static const struct file_operations rs_proc_fops = { + .owner = THIS_MODULE, + .open = rs_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static const struct tty_operations hp_ops = { + .open = rs_open, + .close = rs_close, + .write = rs_write, + .put_char = rs_put_char, + .flush_chars = rs_flush_chars, + .write_room = rs_write_room, + .chars_in_buffer = rs_chars_in_buffer, + .flush_buffer = rs_flush_buffer, + .ioctl = rs_ioctl, + .throttle = rs_throttle, + .unthrottle = rs_unthrottle, + .send_xchar = rs_send_xchar, + .hangup = rs_hangup, + .proc_fops = &rs_proc_fops, +}; + +static const struct tty_port_operations hp_port_ops = { + .activate = activate, + .shutdown = shutdown, +}; + +static int __init simrs_init(void) +{ + struct serial_state *state; + int retval; + + if (!ia64_platform_is("hpsim")) + return -ENODEV; + + hp_simserial_driver = alloc_tty_driver(NR_PORTS); + if (!hp_simserial_driver) + return -ENOMEM; + + printk(KERN_INFO "SimSerial driver with no serial options enabled\n"); + + /* Initialize the tty_driver structure */ + + hp_simserial_driver->driver_name = "simserial"; + hp_simserial_driver->name = "ttyS"; + hp_simserial_driver->major = TTY_MAJOR; + hp_simserial_driver->minor_start = 64; + hp_simserial_driver->type = TTY_DRIVER_TYPE_SERIAL; + hp_simserial_driver->subtype = SERIAL_TYPE_NORMAL; + hp_simserial_driver->init_termios = tty_std_termios; + hp_simserial_driver->init_termios.c_cflag = + B9600 | CS8 | CREAD | HUPCL | CLOCAL; + hp_simserial_driver->flags = TTY_DRIVER_REAL_RAW; + tty_set_operations(hp_simserial_driver, &hp_ops); + + state = rs_table; + tty_port_init(&state->port); + state->port.ops = &hp_port_ops; + state->port.close_delay = 0; /* XXX really 0? */ + + retval = hpsim_get_irq(KEYBOARD_INTR); + if (retval < 0) { + printk(KERN_ERR "%s: out of interrupt vectors!\n", + __func__); + goto err_free_tty; + } + + state->irq = retval; + + /* the port is imaginary */ + printk(KERN_INFO "ttyS0 at 0x03f8 (irq = %d) is a 16550\n", state->irq); + + tty_port_link_device(&state->port, hp_simserial_driver, 0); + retval = tty_register_driver(hp_simserial_driver); + if (retval) { + printk(KERN_ERR "Couldn't register simserial driver\n"); + goto err_free_tty; + } + + return 0; +err_free_tty: + put_tty_driver(hp_simserial_driver); + tty_port_destroy(&state->port); + return retval; +} + +#ifndef MODULE +__initcall(simrs_init); +#endif diff --git a/kernel/arch/ia64/hp/zx1/Makefile b/kernel/arch/ia64/hp/zx1/Makefile new file mode 100644 index 000000000..61e878729 --- /dev/null +++ b/kernel/arch/ia64/hp/zx1/Makefile @@ -0,0 +1,8 @@ +# +# ia64/hp/zx1/Makefile +# +# Copyright (C) 2002 Hewlett Packard +# Copyright (C) Alex Williamson (alex_williamson@hp.com) +# + +obj-$(CONFIG_IA64_GENERIC) += hpzx1_machvec.o hpzx1_swiotlb_machvec.o diff --git a/kernel/arch/ia64/hp/zx1/hpzx1_machvec.c b/kernel/arch/ia64/hp/zx1/hpzx1_machvec.c new file mode 100644 index 000000000..32518b0f9 --- /dev/null +++ b/kernel/arch/ia64/hp/zx1/hpzx1_machvec.c @@ -0,0 +1,3 @@ +#define MACHVEC_PLATFORM_NAME hpzx1 +#define MACHVEC_PLATFORM_HEADER +#include diff --git a/kernel/arch/ia64/hp/zx1/hpzx1_swiotlb_machvec.c b/kernel/arch/ia64/hp/zx1/hpzx1_swiotlb_machvec.c new file mode 100644 index 000000000..4392a96b3 --- /dev/null +++ b/kernel/arch/ia64/hp/zx1/hpzx1_swiotlb_machvec.c @@ -0,0 +1,3 @@ +#define MACHVEC_PLATFORM_NAME hpzx1_swiotlb +#define MACHVEC_PLATFORM_HEADER +#include diff --git a/kernel/arch/ia64/include/asm/Kbuild b/kernel/arch/ia64/include/asm/Kbuild new file mode 100644 index 000000000..9b41b4bcc --- /dev/null +++ b/kernel/arch/ia64/include/asm/Kbuild @@ -0,0 +1,10 @@ + +generic-y += clkdev.h +generic-y += exec.h +generic-y += irq_work.h +generic-y += kvm_para.h +generic-y += mcs_spinlock.h +generic-y += preempt.h +generic-y += scatterlist.h +generic-y += trace_clock.h +generic-y += vtime.h diff --git a/kernel/arch/ia64/include/asm/acenv.h b/kernel/arch/ia64/include/asm/acenv.h new file mode 100644 index 000000000..35ff13afb --- /dev/null +++ b/kernel/arch/ia64/include/asm/acenv.h @@ -0,0 +1,52 @@ +/* + * IA64 specific ACPICA environments and implementation + * + * Copyright (C) 2014, Intel Corporation + * Author: Lv Zheng + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _ASM_IA64_ACENV_H +#define _ASM_IA64_ACENV_H + +#include + +#define COMPILER_DEPENDENT_INT64 long +#define COMPILER_DEPENDENT_UINT64 unsigned long + +/* Asm macros */ + +static inline int +ia64_acpi_acquire_global_lock(unsigned int *lock) +{ + unsigned int old, new, val; + do { + old = *lock; + new = (((old & ~0x3) + 2) + ((old >> 1) & 0x1)); + val = ia64_cmpxchg4_acq(lock, new, old); + } while (unlikely (val != old)); + return (new < 3) ? -1 : 0; +} + +static inline int +ia64_acpi_release_global_lock(unsigned int *lock) +{ + unsigned int old, new, val; + do { + old = *lock; + new = old & ~0x3; + val = ia64_cmpxchg4_acq(lock, new, old); + } while (unlikely (val != old)); + return old & 0x1; +} + +#define ACPI_ACQUIRE_GLOBAL_LOCK(facs, Acq) \ + ((Acq) = ia64_acpi_acquire_global_lock(&facs->global_lock)) + +#define ACPI_RELEASE_GLOBAL_LOCK(facs, Acq) \ + ((Acq) = ia64_acpi_release_global_lock(&facs->global_lock)) + +#endif /* _ASM_IA64_ACENV_H */ diff --git a/kernel/arch/ia64/include/asm/acpi-ext.h b/kernel/arch/ia64/include/asm/acpi-ext.h new file mode 100644 index 000000000..7f8362b37 --- /dev/null +++ b/kernel/arch/ia64/include/asm/acpi-ext.h @@ -0,0 +1,20 @@ +/* + * (c) Copyright 2003, 2006 Hewlett-Packard Development Company, L.P. + * Alex Williamson + * Bjorn Helgaas + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Vendor specific extensions to ACPI. + */ + +#ifndef _ASM_IA64_ACPI_EXT_H +#define _ASM_IA64_ACPI_EXT_H + +#include + +extern acpi_status hp_acpi_csr_space (acpi_handle, u64 *base, u64 *length); + +#endif /* _ASM_IA64_ACPI_EXT_H */ diff --git a/kernel/arch/ia64/include/asm/acpi.h b/kernel/arch/ia64/include/asm/acpi.h new file mode 100644 index 000000000..aa0fdf125 --- /dev/null +++ b/kernel/arch/ia64/include/asm/acpi.h @@ -0,0 +1,147 @@ +/* + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999 Walt Drummond + * Copyright (C) 2000,2001 J.I. Lee + * Copyright (C) 2001,2002 Paul Diefenbaugh + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ + +#ifndef _ASM_ACPI_H +#define _ASM_ACPI_H + +#ifdef __KERNEL__ + +#include + +#include +#include +#include + +#ifdef CONFIG_ACPI +extern int acpi_lapic; +#define acpi_disabled 0 /* ACPI always enabled on IA64 */ +#define acpi_noirq 0 /* ACPI always enabled on IA64 */ +#define acpi_pci_disabled 0 /* ACPI PCI always enabled on IA64 */ +#define acpi_strict 1 /* no ACPI spec workarounds on IA64 */ + +static inline bool acpi_has_cpu_in_madt(void) +{ + return !!acpi_lapic; +} +#endif +#define acpi_processor_cstate_check(x) (x) /* no idle limits on IA64 :) */ +static inline void disable_acpi(void) { } + +#ifdef CONFIG_IA64_GENERIC +const char *acpi_get_sysname (void); +#else +static inline const char *acpi_get_sysname (void) +{ +# if defined (CONFIG_IA64_HP_SIM) + return "hpsim"; +# elif defined (CONFIG_IA64_HP_ZX1) + return "hpzx1"; +# elif defined (CONFIG_IA64_HP_ZX1_SWIOTLB) + return "hpzx1_swiotlb"; +# elif defined (CONFIG_IA64_SGI_SN2) + return "sn2"; +# elif defined (CONFIG_IA64_SGI_UV) + return "uv"; +# elif defined (CONFIG_IA64_DIG) + return "dig"; +# elif defined(CONFIG_IA64_DIG_VTD) + return "dig_vtd"; +# else +# error Unknown platform. Fix acpi.c. +# endif +} +#endif +int acpi_request_vector (u32 int_type); +int acpi_gsi_to_irq (u32 gsi, unsigned int *irq); + +/* Low-level suspend routine. */ +extern int acpi_suspend_lowlevel(void); + +extern unsigned long acpi_wakeup_address; + +/* + * Record the cpei override flag and current logical cpu. This is + * useful for CPU removal. + */ +extern unsigned int can_cpei_retarget(void); +extern unsigned int is_cpu_cpei_target(unsigned int cpu); +extern void set_cpei_target_cpu(unsigned int cpu); +extern unsigned int get_cpei_target_cpu(void); +extern void prefill_possible_map(void); +#ifdef CONFIG_ACPI_HOTPLUG_CPU +extern int additional_cpus; +#else +#define additional_cpus 0 +#endif + +#ifdef CONFIG_ACPI_NUMA +#if MAX_NUMNODES > 256 +#define MAX_PXM_DOMAINS MAX_NUMNODES +#else +#define MAX_PXM_DOMAINS (256) +#endif +extern int pxm_to_nid_map[MAX_PXM_DOMAINS]; +extern int __initdata nid_to_pxm_map[MAX_NUMNODES]; +#endif + +static inline bool arch_has_acpi_pdc(void) { return true; } +static inline void arch_acpi_set_pdc_bits(u32 *buf) +{ + buf[2] |= ACPI_PDC_EST_CAPABILITY_SMP; +} + +#define acpi_unlazy_tlb(x) + +#ifdef CONFIG_ACPI_NUMA +extern cpumask_t early_cpu_possible_map; +#define for_each_possible_early_cpu(cpu) \ + for_each_cpu((cpu), &early_cpu_possible_map) + +static inline void per_cpu_scan_finalize(int min_cpus, int reserve_cpus) +{ + int low_cpu, high_cpu; + int cpu; + int next_nid = 0; + + low_cpu = cpumask_weight(&early_cpu_possible_map); + + high_cpu = max(low_cpu, min_cpus); + high_cpu = min(high_cpu + reserve_cpus, NR_CPUS); + + for (cpu = low_cpu; cpu < high_cpu; cpu++) { + cpumask_set_cpu(cpu, &early_cpu_possible_map); + if (node_cpuid[cpu].nid == NUMA_NO_NODE) { + node_cpuid[cpu].nid = next_nid; + next_nid++; + if (next_nid >= num_online_nodes()) + next_nid = 0; + } + } +} +#endif /* CONFIG_ACPI_NUMA */ + +#endif /*__KERNEL__*/ + +#endif /*_ASM_ACPI_H*/ diff --git a/kernel/arch/ia64/include/asm/agp.h b/kernel/arch/ia64/include/asm/agp.h new file mode 100644 index 000000000..01d09c401 --- /dev/null +++ b/kernel/arch/ia64/include/asm/agp.h @@ -0,0 +1,26 @@ +#ifndef _ASM_IA64_AGP_H +#define _ASM_IA64_AGP_H + +/* + * IA-64 specific AGP definitions. + * + * Copyright (C) 2002-2003 Hewlett-Packard Co + * David Mosberger-Tang + */ + +/* + * To avoid memory-attribute aliasing issues, we require that the AGPGART engine operate + * in coherent mode, which lets us map the AGP memory as normal (write-back) memory + * (unlike x86, where it gets mapped "write-coalescing"). + */ +#define map_page_into_agp(page) /* nothing */ +#define unmap_page_from_agp(page) /* nothing */ +#define flush_agp_cache() mb() + +/* GATT allocation. Returns/accepts GATT kernel virtual address. */ +#define alloc_gatt_pages(order) \ + ((char *)__get_free_pages(GFP_KERNEL, (order))) +#define free_gatt_pages(table, order) \ + free_pages((unsigned long)(table), (order)) + +#endif /* _ASM_IA64_AGP_H */ diff --git a/kernel/arch/ia64/include/asm/asm-offsets.h b/kernel/arch/ia64/include/asm/asm-offsets.h new file mode 100644 index 000000000..d370ee36a --- /dev/null +++ b/kernel/arch/ia64/include/asm/asm-offsets.h @@ -0,0 +1 @@ +#include diff --git a/kernel/arch/ia64/include/asm/asmmacro.h b/kernel/arch/ia64/include/asm/asmmacro.h new file mode 100644 index 000000000..3ab6d75aa --- /dev/null +++ b/kernel/arch/ia64/include/asm/asmmacro.h @@ -0,0 +1,135 @@ +#ifndef _ASM_IA64_ASMMACRO_H +#define _ASM_IA64_ASMMACRO_H + +/* + * Copyright (C) 2000-2001, 2003-2004 Hewlett-Packard Co + * David Mosberger-Tang + */ + + +#define ENTRY(name) \ + .align 32; \ + .proc name; \ +name: + +#define ENTRY_MIN_ALIGN(name) \ + .align 16; \ + .proc name; \ +name: + +#define GLOBAL_ENTRY(name) \ + .global name; \ + ENTRY(name) + +#define END(name) \ + .endp name + +/* + * Helper macros to make unwind directives more readable: + */ + +/* prologue_gr: */ +#define ASM_UNW_PRLG_RP 0x8 +#define ASM_UNW_PRLG_PFS 0x4 +#define ASM_UNW_PRLG_PSP 0x2 +#define ASM_UNW_PRLG_PR 0x1 +#define ASM_UNW_PRLG_GRSAVE(ninputs) (32+(ninputs)) + +/* + * Helper macros for accessing user memory. + * + * When adding any new .section/.previous entries here, make sure to + * also add it to the DISCARD section in arch/ia64/kernel/gate.lds.S or + * unpleasant things will happen. + */ + + .section "__ex_table", "a" // declare section & section attributes + .previous + +# define EX(y,x...) \ + .xdata4 "__ex_table", 99f-., y-.; \ + [99:] x +# define EXCLR(y,x...) \ + .xdata4 "__ex_table", 99f-., y-.+4; \ + [99:] x + +/* + * Tag MCA recoverable instruction ranges. + */ + + .section "__mca_table", "a" // declare section & section attributes + .previous + +# define MCA_RECOVER_RANGE(y) \ + .xdata4 "__mca_table", y-., 99f-.; \ + [99:] + +/* + * Mark instructions that need a load of a virtual address patched to be + * a load of a physical address. We use this either in critical performance + * path (ivt.S - TLB miss processing) or in places where it might not be + * safe to use a "tpa" instruction (mca_asm.S - error recovery). + */ + .section ".data..patch.vtop", "a" // declare section & section attributes + .previous + +#define LOAD_PHYSICAL(pr, reg, obj) \ +[1:](pr)movl reg = obj; \ + .xdata4 ".data..patch.vtop", 1b-. + +/* + * For now, we always put in the McKinley E9 workaround. On CPUs that don't need it, + * we'll patch out the work-around bundles with NOPs, so their impact is minimal. + */ +#define DO_MCKINLEY_E9_WORKAROUND + +#ifdef DO_MCKINLEY_E9_WORKAROUND + .section ".data..patch.mckinley_e9", "a" + .previous +/* workaround for Itanium 2 Errata 9: */ +# define FSYS_RETURN \ + .xdata4 ".data..patch.mckinley_e9", 1f-.; \ +1:{ .mib; \ + nop.m 0; \ + mov r16=ar.pfs; \ + br.call.sptk.many b7=2f;; \ + }; \ +2:{ .mib; \ + nop.m 0; \ + mov ar.pfs=r16; \ + br.ret.sptk.many b6;; \ + } +#else +# define FSYS_RETURN br.ret.sptk.many b6 +#endif + +/* + * If physical stack register size is different from DEF_NUM_STACK_REG, + * dynamically patch the kernel for correct size. + */ + .section ".data..patch.phys_stack_reg", "a" + .previous +#define LOAD_PHYS_STACK_REG_SIZE(reg) \ +[1:] adds reg=IA64_NUM_PHYS_STACK_REG*8+8,r0; \ + .xdata4 ".data..patch.phys_stack_reg", 1b-. + +/* + * Up until early 2004, use of .align within a function caused bad unwind info. + * TEXT_ALIGN(n) expands into ".align n" if a fixed GAS is available or into nothing + * otherwise. + */ +#ifdef HAVE_WORKING_TEXT_ALIGN +# define TEXT_ALIGN(n) .align n +#else +# define TEXT_ALIGN(n) +#endif + +#ifdef HAVE_SERIALIZE_DIRECTIVE +# define dv_serialize_data .serialize.data +# define dv_serialize_instruction .serialize.instruction +#else +# define dv_serialize_data +# define dv_serialize_instruction +#endif + +#endif /* _ASM_IA64_ASMMACRO_H */ diff --git a/kernel/arch/ia64/include/asm/atomic.h b/kernel/arch/ia64/include/asm/atomic.h new file mode 100644 index 000000000..0bf03501f --- /dev/null +++ b/kernel/arch/ia64/include/asm/atomic.h @@ -0,0 +1,196 @@ +#ifndef _ASM_IA64_ATOMIC_H +#define _ASM_IA64_ATOMIC_H + +/* + * Atomic operations that C can't guarantee us. Useful for + * resource counting etc.. + * + * NOTE: don't mess with the types below! The "unsigned long" and + * "int" types were carefully placed so as to ensure proper operation + * of the macros. + * + * Copyright (C) 1998, 1999, 2002-2003 Hewlett-Packard Co + * David Mosberger-Tang + */ +#include + +#include +#include + + +#define ATOMIC_INIT(i) { (i) } +#define ATOMIC64_INIT(i) { (i) } + +#define atomic_read(v) ACCESS_ONCE((v)->counter) +#define atomic64_read(v) ACCESS_ONCE((v)->counter) + +#define atomic_set(v,i) (((v)->counter) = (i)) +#define atomic64_set(v,i) (((v)->counter) = (i)) + +#define ATOMIC_OP(op, c_op) \ +static __inline__ int \ +ia64_atomic_##op (int i, atomic_t *v) \ +{ \ + __s32 old, new; \ + CMPXCHG_BUGCHECK_DECL \ + \ + do { \ + CMPXCHG_BUGCHECK(v); \ + old = atomic_read(v); \ + new = old c_op i; \ + } while (ia64_cmpxchg(acq, v, old, new, sizeof(atomic_t)) != old); \ + return new; \ +} + +ATOMIC_OP(add, +) +ATOMIC_OP(sub, -) + +#undef ATOMIC_OP + +#define atomic_add_return(i,v) \ +({ \ + int __ia64_aar_i = (i); \ + (__builtin_constant_p(i) \ + && ( (__ia64_aar_i == 1) || (__ia64_aar_i == 4) \ + || (__ia64_aar_i == 8) || (__ia64_aar_i == 16) \ + || (__ia64_aar_i == -1) || (__ia64_aar_i == -4) \ + || (__ia64_aar_i == -8) || (__ia64_aar_i == -16))) \ + ? ia64_fetch_and_add(__ia64_aar_i, &(v)->counter) \ + : ia64_atomic_add(__ia64_aar_i, v); \ +}) + +#define atomic_sub_return(i,v) \ +({ \ + int __ia64_asr_i = (i); \ + (__builtin_constant_p(i) \ + && ( (__ia64_asr_i == 1) || (__ia64_asr_i == 4) \ + || (__ia64_asr_i == 8) || (__ia64_asr_i == 16) \ + || (__ia64_asr_i == -1) || (__ia64_asr_i == -4) \ + || (__ia64_asr_i == -8) || (__ia64_asr_i == -16))) \ + ? ia64_fetch_and_add(-__ia64_asr_i, &(v)->counter) \ + : ia64_atomic_sub(__ia64_asr_i, v); \ +}) + +#define ATOMIC64_OP(op, c_op) \ +static __inline__ long \ +ia64_atomic64_##op (__s64 i, atomic64_t *v) \ +{ \ + __s64 old, new; \ + CMPXCHG_BUGCHECK_DECL \ + \ + do { \ + CMPXCHG_BUGCHECK(v); \ + old = atomic64_read(v); \ + new = old c_op i; \ + } while (ia64_cmpxchg(acq, v, old, new, sizeof(atomic64_t)) != old); \ + return new; \ +} + +ATOMIC64_OP(add, +) +ATOMIC64_OP(sub, -) + +#undef ATOMIC64_OP + +#define atomic64_add_return(i,v) \ +({ \ + long __ia64_aar_i = (i); \ + (__builtin_constant_p(i) \ + && ( (__ia64_aar_i == 1) || (__ia64_aar_i == 4) \ + || (__ia64_aar_i == 8) || (__ia64_aar_i == 16) \ + || (__ia64_aar_i == -1) || (__ia64_aar_i == -4) \ + || (__ia64_aar_i == -8) || (__ia64_aar_i == -16))) \ + ? ia64_fetch_and_add(__ia64_aar_i, &(v)->counter) \ + : ia64_atomic64_add(__ia64_aar_i, v); \ +}) + +#define atomic64_sub_return(i,v) \ +({ \ + long __ia64_asr_i = (i); \ + (__builtin_constant_p(i) \ + && ( (__ia64_asr_i == 1) || (__ia64_asr_i == 4) \ + || (__ia64_asr_i == 8) || (__ia64_asr_i == 16) \ + || (__ia64_asr_i == -1) || (__ia64_asr_i == -4) \ + || (__ia64_asr_i == -8) || (__ia64_asr_i == -16))) \ + ? ia64_fetch_and_add(-__ia64_asr_i, &(v)->counter) \ + : ia64_atomic64_sub(__ia64_asr_i, v); \ +}) + +#define atomic_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), old, new)) +#define atomic_xchg(v, new) (xchg(&((v)->counter), new)) + +#define atomic64_cmpxchg(v, old, new) \ + (cmpxchg(&((v)->counter), old, new)) +#define atomic64_xchg(v, new) (xchg(&((v)->counter), new)) + +static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u) +{ + int c, old; + c = atomic_read(v); + for (;;) { + if (unlikely(c == (u))) + break; + old = atomic_cmpxchg((v), c, c + (a)); + if (likely(old == c)) + break; + c = old; + } + return c; +} + + +static __inline__ long atomic64_add_unless(atomic64_t *v, long a, long u) +{ + long c, old; + c = atomic64_read(v); + for (;;) { + if (unlikely(c == (u))) + break; + old = atomic64_cmpxchg((v), c, c + (a)); + if (likely(old == c)) + break; + c = old; + } + return c != (u); +} + +#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) + +/* + * Atomically add I to V and return TRUE if the resulting value is + * negative. + */ +static __inline__ int +atomic_add_negative (int i, atomic_t *v) +{ + return atomic_add_return(i, v) < 0; +} + +static __inline__ long +atomic64_add_negative (__s64 i, atomic64_t *v) +{ + return atomic64_add_return(i, v) < 0; +} + +#define atomic_dec_return(v) atomic_sub_return(1, (v)) +#define atomic_inc_return(v) atomic_add_return(1, (v)) +#define atomic64_dec_return(v) atomic64_sub_return(1, (v)) +#define atomic64_inc_return(v) atomic64_add_return(1, (v)) + +#define atomic_sub_and_test(i,v) (atomic_sub_return((i), (v)) == 0) +#define atomic_dec_and_test(v) (atomic_sub_return(1, (v)) == 0) +#define atomic_inc_and_test(v) (atomic_add_return(1, (v)) == 0) +#define atomic64_sub_and_test(i,v) (atomic64_sub_return((i), (v)) == 0) +#define atomic64_dec_and_test(v) (atomic64_sub_return(1, (v)) == 0) +#define atomic64_inc_and_test(v) (atomic64_add_return(1, (v)) == 0) + +#define atomic_add(i,v) (void)atomic_add_return((i), (v)) +#define atomic_sub(i,v) (void)atomic_sub_return((i), (v)) +#define atomic_inc(v) atomic_add(1, (v)) +#define atomic_dec(v) atomic_sub(1, (v)) + +#define atomic64_add(i,v) (void)atomic64_add_return((i), (v)) +#define atomic64_sub(i,v) (void)atomic64_sub_return((i), (v)) +#define atomic64_inc(v) atomic64_add(1, (v)) +#define atomic64_dec(v) atomic64_sub(1, (v)) + +#endif /* _ASM_IA64_ATOMIC_H */ diff --git a/kernel/arch/ia64/include/asm/barrier.h b/kernel/arch/ia64/include/asm/barrier.h new file mode 100644 index 000000000..f6769eb2b --- /dev/null +++ b/kernel/arch/ia64/include/asm/barrier.h @@ -0,0 +1,93 @@ +/* + * Memory barrier definitions. This is based on information published + * in the Processor Abstraction Layer and the System Abstraction Layer + * manual. + * + * Copyright (C) 1998-2003 Hewlett-Packard Co + * David Mosberger-Tang + * Copyright (C) 1999 Asit Mallick + * Copyright (C) 1999 Don Dugger + */ +#ifndef _ASM_IA64_BARRIER_H +#define _ASM_IA64_BARRIER_H + +#include + +/* + * Macros to force memory ordering. In these descriptions, "previous" + * and "subsequent" refer to program order; "visible" means that all + * architecturally visible effects of a memory access have occurred + * (at a minimum, this means the memory has been read or written). + * + * wmb(): Guarantees that all preceding stores to memory- + * like regions are visible before any subsequent + * stores and that all following stores will be + * visible only after all previous stores. + * rmb(): Like wmb(), but for reads. + * mb(): wmb()/rmb() combo, i.e., all previous memory + * accesses are visible before all subsequent + * accesses and vice versa. This is also known as + * a "fence." + * + * Note: "mb()" and its variants cannot be used as a fence to order + * accesses to memory mapped I/O registers. For that, mf.a needs to + * be used. However, we don't want to always use mf.a because (a) + * it's (presumably) much slower than mf and (b) mf.a is supported for + * sequential memory pages only. + */ +#define mb() ia64_mf() +#define rmb() mb() +#define wmb() mb() + +#define dma_rmb() mb() +#define dma_wmb() mb() + +#ifdef CONFIG_SMP +# define smp_mb() mb() +#else +# define smp_mb() barrier() +#endif + +#define smp_rmb() smp_mb() +#define smp_wmb() smp_mb() + +#define read_barrier_depends() do { } while (0) +#define smp_read_barrier_depends() do { } while (0) + +#define smp_mb__before_atomic() barrier() +#define smp_mb__after_atomic() barrier() + +/* + * IA64 GCC turns volatile stores into st.rel and volatile loads into ld.acq no + * need for asm trickery! + */ + +#define smp_store_release(p, v) \ +do { \ + compiletime_assert_atomic_type(*p); \ + barrier(); \ + ACCESS_ONCE(*p) = (v); \ +} while (0) + +#define smp_load_acquire(p) \ +({ \ + typeof(*p) ___p1 = ACCESS_ONCE(*p); \ + compiletime_assert_atomic_type(*p); \ + barrier(); \ + ___p1; \ +}) + +/* + * XXX check on this ---I suspect what Linus really wants here is + * acquire vs release semantics but we can't discuss this stuff with + * Linus just yet. Grrr... + */ +#define set_mb(var, value) do { (var) = (value); mb(); } while (0) + +/* + * The group barrier in front of the rsm & ssm are necessary to ensure + * that none of the previous instructions in the same group are + * affected by the rsm/ssm. + */ + +#endif /* _ASM_IA64_BARRIER_H */ diff --git a/kernel/arch/ia64/include/asm/bitops.h b/kernel/arch/ia64/include/asm/bitops.h new file mode 100644 index 000000000..71e814524 --- /dev/null +++ b/kernel/arch/ia64/include/asm/bitops.h @@ -0,0 +1,456 @@ +#ifndef _ASM_IA64_BITOPS_H +#define _ASM_IA64_BITOPS_H + +/* + * Copyright (C) 1998-2003 Hewlett-Packard Co + * David Mosberger-Tang + * + * 02/06/02 find_next_bit() and find_first_bit() added from Erich Focht's ia64 + * O(1) scheduler patch + */ + +#ifndef _LINUX_BITOPS_H +#error only can be included directly +#endif + +#include +#include +#include +#include + +/** + * set_bit - Atomically set a bit in memory + * @nr: the bit to set + * @addr: the address to start counting from + * + * This function is atomic and may not be reordered. See __set_bit() + * if you do not require the atomic guarantees. + * Note that @nr may be almost arbitrarily large; this function is not + * restricted to acting on a single-word quantity. + * + * The address must be (at least) "long" aligned. + * Note that there are driver (e.g., eepro100) which use these operations to + * operate on hw-defined data-structures, so we can't easily change these + * operations to force a bigger alignment. + * + * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1). + */ +static __inline__ void +set_bit (int nr, volatile void *addr) +{ + __u32 bit, old, new; + volatile __u32 *m; + CMPXCHG_BUGCHECK_DECL + + m = (volatile __u32 *) addr + (nr >> 5); + bit = 1 << (nr & 31); + do { + CMPXCHG_BUGCHECK(m); + old = *m; + new = old | bit; + } while (cmpxchg_acq(m, old, new) != old); +} + +/** + * __set_bit - Set a bit in memory + * @nr: the bit to set + * @addr: the address to start counting from + * + * Unlike set_bit(), this function is non-atomic and may be reordered. + * If it's called on the same region of memory simultaneously, the effect + * may be that only one operation succeeds. + */ +static __inline__ void +__set_bit (int nr, volatile void *addr) +{ + *((__u32 *) addr + (nr >> 5)) |= (1 << (nr & 31)); +} + +/** + * clear_bit - Clears a bit in memory + * @nr: Bit to clear + * @addr: Address to start counting from + * + * clear_bit() is atomic and may not be reordered. However, it does + * not contain a memory barrier, so if it is used for locking purposes, + * you should call smp_mb__before_atomic() and/or smp_mb__after_atomic() + * in order to ensure changes are visible on other processors. + */ +static __inline__ void +clear_bit (int nr, volatile void *addr) +{ + __u32 mask, old, new; + volatile __u32 *m; + CMPXCHG_BUGCHECK_DECL + + m = (volatile __u32 *) addr + (nr >> 5); + mask = ~(1 << (nr & 31)); + do { + CMPXCHG_BUGCHECK(m); + old = *m; + new = old & mask; + } while (cmpxchg_acq(m, old, new) != old); +} + +/** + * clear_bit_unlock - Clears a bit in memory with release + * @nr: Bit to clear + * @addr: Address to start counting from + * + * clear_bit_unlock() is atomic and may not be reordered. It does + * contain a memory barrier suitable for unlock type operations. + */ +static __inline__ void +clear_bit_unlock (int nr, volatile void *addr) +{ + __u32 mask, old, new; + volatile __u32 *m; + CMPXCHG_BUGCHECK_DECL + + m = (volatile __u32 *) addr + (nr >> 5); + mask = ~(1 << (nr & 31)); + do { + CMPXCHG_BUGCHECK(m); + old = *m; + new = old & mask; + } while (cmpxchg_rel(m, old, new) != old); +} + +/** + * __clear_bit_unlock - Non-atomically clears a bit in memory with release + * @nr: Bit to clear + * @addr: Address to start counting from + * + * Similarly to clear_bit_unlock, the implementation uses a store + * with release semantics. See also arch_spin_unlock(). + */ +static __inline__ void +__clear_bit_unlock(int nr, void *addr) +{ + __u32 * const m = (__u32 *) addr + (nr >> 5); + __u32 const new = *m & ~(1 << (nr & 31)); + + ia64_st4_rel_nta(m, new); +} + +/** + * __clear_bit - Clears a bit in memory (non-atomic version) + * @nr: the bit to clear + * @addr: the address to start counting from + * + * Unlike clear_bit(), this function is non-atomic and may be reordered. + * If it's called on the same region of memory simultaneously, the effect + * may be that only one operation succeeds. + */ +static __inline__ void +__clear_bit (int nr, volatile void *addr) +{ + *((__u32 *) addr + (nr >> 5)) &= ~(1 << (nr & 31)); +} + +/** + * change_bit - Toggle a bit in memory + * @nr: Bit to toggle + * @addr: Address to start counting from + * + * change_bit() is atomic and may not be reordered. + * Note that @nr may be almost arbitrarily large; this function is not + * restricted to acting on a single-word quantity. + */ +static __inline__ void +change_bit (int nr, volatile void *addr) +{ + __u32 bit, old, new; + volatile __u32 *m; + CMPXCHG_BUGCHECK_DECL + + m = (volatile __u32 *) addr + (nr >> 5); + bit = (1 << (nr & 31)); + do { + CMPXCHG_BUGCHECK(m); + old = *m; + new = old ^ bit; + } while (cmpxchg_acq(m, old, new) != old); +} + +/** + * __change_bit - Toggle a bit in memory + * @nr: the bit to toggle + * @addr: the address to start counting from + * + * Unlike change_bit(), this function is non-atomic and may be reordered. + * If it's called on the same region of memory simultaneously, the effect + * may be that only one operation succeeds. + */ +static __inline__ void +__change_bit (int nr, volatile void *addr) +{ + *((__u32 *) addr + (nr >> 5)) ^= (1 << (nr & 31)); +} + +/** + * test_and_set_bit - Set a bit and return its old value + * @nr: Bit to set + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It also implies the acquisition side of the memory barrier. + */ +static __inline__ int +test_and_set_bit (int nr, volatile void *addr) +{ + __u32 bit, old, new; + volatile __u32 *m; + CMPXCHG_BUGCHECK_DECL + + m = (volatile __u32 *) addr + (nr >> 5); + bit = 1 << (nr & 31); + do { + CMPXCHG_BUGCHECK(m); + old = *m; + new = old | bit; + } while (cmpxchg_acq(m, old, new) != old); + return (old & bit) != 0; +} + +/** + * test_and_set_bit_lock - Set a bit and return its old value for lock + * @nr: Bit to set + * @addr: Address to count from + * + * This is the same as test_and_set_bit on ia64 + */ +#define test_and_set_bit_lock test_and_set_bit + +/** + * __test_and_set_bit - Set a bit and return its old value + * @nr: Bit to set + * @addr: Address to count from + * + * This operation is non-atomic and can be reordered. + * If two examples of this operation race, one can appear to succeed + * but actually fail. You must protect multiple accesses with a lock. + */ +static __inline__ int +__test_and_set_bit (int nr, volatile void *addr) +{ + __u32 *p = (__u32 *) addr + (nr >> 5); + __u32 m = 1 << (nr & 31); + int oldbitset = (*p & m) != 0; + + *p |= m; + return oldbitset; +} + +/** + * test_and_clear_bit - Clear a bit and return its old value + * @nr: Bit to clear + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It also implies the acquisition side of the memory barrier. + */ +static __inline__ int +test_and_clear_bit (int nr, volatile void *addr) +{ + __u32 mask, old, new; + volatile __u32 *m; + CMPXCHG_BUGCHECK_DECL + + m = (volatile __u32 *) addr + (nr >> 5); + mask = ~(1 << (nr & 31)); + do { + CMPXCHG_BUGCHECK(m); + old = *m; + new = old & mask; + } while (cmpxchg_acq(m, old, new) != old); + return (old & ~mask) != 0; +} + +/** + * __test_and_clear_bit - Clear a bit and return its old value + * @nr: Bit to clear + * @addr: Address to count from + * + * This operation is non-atomic and can be reordered. + * If two examples of this operation race, one can appear to succeed + * but actually fail. You must protect multiple accesses with a lock. + */ +static __inline__ int +__test_and_clear_bit(int nr, volatile void * addr) +{ + __u32 *p = (__u32 *) addr + (nr >> 5); + __u32 m = 1 << (nr & 31); + int oldbitset = (*p & m) != 0; + + *p &= ~m; + return oldbitset; +} + +/** + * test_and_change_bit - Change a bit and return its old value + * @nr: Bit to change + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It also implies the acquisition side of the memory barrier. + */ +static __inline__ int +test_and_change_bit (int nr, volatile void *addr) +{ + __u32 bit, old, new; + volatile __u32 *m; + CMPXCHG_BUGCHECK_DECL + + m = (volatile __u32 *) addr + (nr >> 5); + bit = (1 << (nr & 31)); + do { + CMPXCHG_BUGCHECK(m); + old = *m; + new = old ^ bit; + } while (cmpxchg_acq(m, old, new) != old); + return (old & bit) != 0; +} + +/** + * __test_and_change_bit - Change a bit and return its old value + * @nr: Bit to change + * @addr: Address to count from + * + * This operation is non-atomic and can be reordered. + */ +static __inline__ int +__test_and_change_bit (int nr, void *addr) +{ + __u32 old, bit = (1 << (nr & 31)); + __u32 *m = (__u32 *) addr + (nr >> 5); + + old = *m; + *m = old ^ bit; + return (old & bit) != 0; +} + +static __inline__ int +test_bit (int nr, const volatile void *addr) +{ + return 1 & (((const volatile __u32 *) addr)[nr >> 5] >> (nr & 31)); +} + +/** + * ffz - find the first zero bit in a long word + * @x: The long word to find the bit in + * + * Returns the bit-number (0..63) of the first (least significant) zero bit. + * Undefined if no zero exists, so code should check against ~0UL first... + */ +static inline unsigned long +ffz (unsigned long x) +{ + unsigned long result; + + result = ia64_popcnt(x & (~x - 1)); + return result; +} + +/** + * __ffs - find first bit in word. + * @x: The word to search + * + * Undefined if no bit exists, so code should check against 0 first. + */ +static __inline__ unsigned long +__ffs (unsigned long x) +{ + unsigned long result; + + result = ia64_popcnt((x-1) & ~x); + return result; +} + +#ifdef __KERNEL__ + +/* + * Return bit number of last (most-significant) bit set. Undefined + * for x==0. Bits are numbered from 0..63 (e.g., ia64_fls(9) == 3). + */ +static inline unsigned long +ia64_fls (unsigned long x) +{ + long double d = x; + long exp; + + exp = ia64_getf_exp(d); + return exp - 0xffff; +} + +/* + * Find the last (most significant) bit set. Returns 0 for x==0 and + * bits are numbered from 1..32 (e.g., fls(9) == 4). + */ +static inline int +fls (int t) +{ + unsigned long x = t & 0xffffffffu; + + if (!x) + return 0; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + return ia64_popcnt(x); +} + +/* + * Find the last (most significant) bit set. Undefined for x==0. + * Bits are numbered from 0..63 (e.g., __fls(9) == 3). + */ +static inline unsigned long +__fls (unsigned long x) +{ + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + x |= x >> 32; + return ia64_popcnt(x) - 1; +} + +#include + +#include + +/* + * hweightN: returns the hamming weight (i.e. the number + * of bits set) of a N-bit word + */ +static __inline__ unsigned long __arch_hweight64(unsigned long x) +{ + unsigned long result; + result = ia64_popcnt(x); + return result; +} + +#define __arch_hweight32(x) ((unsigned int) __arch_hweight64((x) & 0xfffffffful)) +#define __arch_hweight16(x) ((unsigned int) __arch_hweight64((x) & 0xfffful)) +#define __arch_hweight8(x) ((unsigned int) __arch_hweight64((x) & 0xfful)) + +#include + +#endif /* __KERNEL__ */ + +#include + +#ifdef __KERNEL__ + +#include + +#include + +#include + +#endif /* __KERNEL__ */ + +#endif /* _ASM_IA64_BITOPS_H */ diff --git a/kernel/arch/ia64/include/asm/bug.h b/kernel/arch/ia64/include/asm/bug.h new file mode 100644 index 000000000..823616b50 --- /dev/null +++ b/kernel/arch/ia64/include/asm/bug.h @@ -0,0 +1,14 @@ +#ifndef _ASM_IA64_BUG_H +#define _ASM_IA64_BUG_H + +#ifdef CONFIG_BUG +#define ia64_abort() __builtin_trap() +#define BUG() do { printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); ia64_abort(); } while (0) + +/* should this BUG be made generic? */ +#define HAVE_ARCH_BUG +#endif + +#include + +#endif diff --git a/kernel/arch/ia64/include/asm/bugs.h b/kernel/arch/ia64/include/asm/bugs.h new file mode 100644 index 000000000..433523e3b --- /dev/null +++ b/kernel/arch/ia64/include/asm/bugs.h @@ -0,0 +1,19 @@ +/* + * This is included by init/main.c to check for architecture-dependent bugs. + * + * Needs: + * void check_bugs(void); + * + * Based on . + * + * Modified 1998, 1999, 2003 + * David Mosberger-Tang , Hewlett-Packard Co. + */ +#ifndef _ASM_IA64_BUGS_H +#define _ASM_IA64_BUGS_H + +#include + +extern void check_bugs (void); + +#endif /* _ASM_IA64_BUGS_H */ diff --git a/kernel/arch/ia64/include/asm/cache.h b/kernel/arch/ia64/include/asm/cache.h new file mode 100644 index 000000000..988254a7d --- /dev/null +++ b/kernel/arch/ia64/include/asm/cache.h @@ -0,0 +1,29 @@ +#ifndef _ASM_IA64_CACHE_H +#define _ASM_IA64_CACHE_H + + +/* + * Copyright (C) 1998-2000 Hewlett-Packard Co + * David Mosberger-Tang + */ + +/* Bytes per L1 (data) cache line. */ +#define L1_CACHE_SHIFT CONFIG_IA64_L1_CACHE_SHIFT +#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) + +#ifdef CONFIG_SMP +# define SMP_CACHE_SHIFT L1_CACHE_SHIFT +# define SMP_CACHE_BYTES L1_CACHE_BYTES +#else + /* + * The "aligned" directive can only _increase_ alignment, so this is + * safe and provides an easy way to avoid wasting space on a + * uni-processor: + */ +# define SMP_CACHE_SHIFT 3 +# define SMP_CACHE_BYTES (1 << 3) +#endif + +#define __read_mostly __attribute__((__section__(".data..read_mostly"))) + +#endif /* _ASM_IA64_CACHE_H */ diff --git a/kernel/arch/ia64/include/asm/cacheflush.h b/kernel/arch/ia64/include/asm/cacheflush.h new file mode 100644 index 000000000..429eefc93 --- /dev/null +++ b/kernel/arch/ia64/include/asm/cacheflush.h @@ -0,0 +1,54 @@ +#ifndef _ASM_IA64_CACHEFLUSH_H +#define _ASM_IA64_CACHEFLUSH_H + +/* + * Copyright (C) 2002 Hewlett-Packard Co + * David Mosberger-Tang + */ + +#include +#include + +#include + +/* + * Cache flushing routines. This is the kind of stuff that can be very expensive, so try + * to avoid them whenever possible. + */ + +#define flush_cache_all() do { } while (0) +#define flush_cache_mm(mm) do { } while (0) +#define flush_cache_dup_mm(mm) do { } while (0) +#define flush_cache_range(vma, start, end) do { } while (0) +#define flush_cache_page(vma, vmaddr, pfn) do { } while (0) +#define flush_icache_page(vma,page) do { } while (0) +#define flush_cache_vmap(start, end) do { } while (0) +#define flush_cache_vunmap(start, end) do { } while (0) + +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 +#define flush_dcache_page(page) \ +do { \ + clear_bit(PG_arch_1, &(page)->flags); \ +} while (0) + +#define flush_dcache_mmap_lock(mapping) do { } while (0) +#define flush_dcache_mmap_unlock(mapping) do { } while (0) + +extern void flush_icache_range (unsigned long start, unsigned long end); +extern void clflush_cache_range(void *addr, int size); + + +#define flush_icache_user_range(vma, page, user_addr, len) \ +do { \ + unsigned long _addr = (unsigned long) page_address(page) + ((user_addr) & ~PAGE_MASK); \ + flush_icache_range(_addr, _addr + (len)); \ +} while (0) + +#define copy_to_user_page(vma, page, vaddr, dst, src, len) \ +do { memcpy(dst, src, len); \ + flush_icache_user_range(vma, page, vaddr, len); \ +} while (0) +#define copy_from_user_page(vma, page, vaddr, dst, src, len) \ + memcpy(dst, src, len) + +#endif /* _ASM_IA64_CACHEFLUSH_H */ diff --git a/kernel/arch/ia64/include/asm/checksum.h b/kernel/arch/ia64/include/asm/checksum.h new file mode 100644 index 000000000..97af15505 --- /dev/null +++ b/kernel/arch/ia64/include/asm/checksum.h @@ -0,0 +1,79 @@ +#ifndef _ASM_IA64_CHECKSUM_H +#define _ASM_IA64_CHECKSUM_H + +/* + * Modified 1998, 1999 + * David Mosberger-Tang , Hewlett-Packard Co + */ + +/* + * This is a version of ip_compute_csum() optimized for IP headers, + * which always checksum on 4 octet boundaries. + */ +extern __sum16 ip_fast_csum(const void *iph, unsigned int ihl); + +/* + * Computes the checksum of the TCP/UDP pseudo-header returns a 16-bit + * checksum, already complemented + */ +extern __sum16 csum_tcpudp_magic (__be32 saddr, __be32 daddr, + unsigned short len, + unsigned short proto, + __wsum sum); + +extern __wsum csum_tcpudp_nofold (__be32 saddr, __be32 daddr, + unsigned short len, + unsigned short proto, + __wsum sum); + +/* + * Computes the checksum of a memory block at buff, length len, + * and adds in "sum" (32-bit) + * + * returns a 32-bit number suitable for feeding into itself + * or csum_tcpudp_magic + * + * this function must be called with even lengths, except + * for the last fragment, which may be odd + * + * it's best to have buff aligned on a 32-bit boundary + */ +extern __wsum csum_partial(const void *buff, int len, __wsum sum); + +/* + * Same as csum_partial, but copies from src while it checksums. + * + * Here it is even more important to align src and dst on a 32-bit (or + * even better 64-bit) boundary. + */ +extern __wsum csum_partial_copy_from_user(const void __user *src, void *dst, + int len, __wsum sum, + int *errp); + +extern __wsum csum_partial_copy_nocheck(const void *src, void *dst, + int len, __wsum sum); + +/* + * This routine is used for miscellaneous IP-like checksums, mainly in + * icmp.c + */ +extern __sum16 ip_compute_csum(const void *buff, int len); + +/* + * Fold a partial checksum without adding pseudo headers. + */ +static inline __sum16 csum_fold(__wsum csum) +{ + u32 sum = (__force u32)csum; + sum = (sum & 0xffff) + (sum >> 16); + sum = (sum & 0xffff) + (sum >> 16); + return (__force __sum16)~sum; +} + +#define _HAVE_ARCH_IPV6_CSUM 1 +struct in6_addr; +extern __sum16 csum_ipv6_magic(const struct in6_addr *saddr, + const struct in6_addr *daddr, __u32 len, unsigned short proto, + __wsum csum); + +#endif /* _ASM_IA64_CHECKSUM_H */ diff --git a/kernel/arch/ia64/include/asm/clocksource.h b/kernel/arch/ia64/include/asm/clocksource.h new file mode 100644 index 000000000..5c8596e4c --- /dev/null +++ b/kernel/arch/ia64/include/asm/clocksource.h @@ -0,0 +1,10 @@ +/* IA64-specific clocksource additions */ + +#ifndef _ASM_IA64_CLOCKSOURCE_H +#define _ASM_IA64_CLOCKSOURCE_H + +struct arch_clocksource_data { + void *fsys_mmio; /* used by fsyscall asm code */ +}; + +#endif /* _ASM_IA64_CLOCKSOURCE_H */ diff --git a/kernel/arch/ia64/include/asm/cpu.h b/kernel/arch/ia64/include/asm/cpu.h new file mode 100644 index 000000000..fcca30b9f --- /dev/null +++ b/kernel/arch/ia64/include/asm/cpu.h @@ -0,0 +1,22 @@ +#ifndef _ASM_IA64_CPU_H_ +#define _ASM_IA64_CPU_H_ + +#include +#include +#include +#include + +struct ia64_cpu { + struct cpu cpu; +}; + +DECLARE_PER_CPU(struct ia64_cpu, cpu_devices); + +DECLARE_PER_CPU(int, cpu_state); + +#ifdef CONFIG_HOTPLUG_CPU +extern int arch_register_cpu(int num); +extern void arch_unregister_cpu(int); +#endif + +#endif /* _ASM_IA64_CPU_H_ */ diff --git a/kernel/arch/ia64/include/asm/cputime.h b/kernel/arch/ia64/include/asm/cputime.h new file mode 100644 index 000000000..e2d3f5baf --- /dev/null +++ b/kernel/arch/ia64/include/asm/cputime.h @@ -0,0 +1,29 @@ +/* + * Definitions for measuring cputime on ia64 machines. + * + * Based on . + * + * Copyright (C) 2007 FUJITSU LIMITED + * Copyright (C) 2007 Hidetoshi Seto + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * If we have CONFIG_VIRT_CPU_ACCOUNTING_NATIVE, we measure cpu time in nsec. + * Otherwise we measure cpu time in jiffies using the generic definitions. + */ + +#ifndef __IA64_CPUTIME_H +#define __IA64_CPUTIME_H + +#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE +# include +#else +# include +# include +extern void arch_vtime_task_switch(struct task_struct *tsk); +#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ + +#endif /* __IA64_CPUTIME_H */ diff --git a/kernel/arch/ia64/include/asm/current.h b/kernel/arch/ia64/include/asm/current.h new file mode 100644 index 000000000..c659f90fb --- /dev/null +++ b/kernel/arch/ia64/include/asm/current.h @@ -0,0 +1,17 @@ +#ifndef _ASM_IA64_CURRENT_H +#define _ASM_IA64_CURRENT_H + +/* + * Modified 1998-2000 + * David Mosberger-Tang , Hewlett-Packard Co + */ + +#include + +/* + * In kernel mode, thread pointer (r13) is used to point to the current task + * structure. + */ +#define current ((struct task_struct *) ia64_getreg(_IA64_REG_TP)) + +#endif /* _ASM_IA64_CURRENT_H */ diff --git a/kernel/arch/ia64/include/asm/cyclone.h b/kernel/arch/ia64/include/asm/cyclone.h new file mode 100644 index 000000000..88f6500e8 --- /dev/null +++ b/kernel/arch/ia64/include/asm/cyclone.h @@ -0,0 +1,15 @@ +#ifndef ASM_IA64_CYCLONE_H +#define ASM_IA64_CYCLONE_H + +#ifdef CONFIG_IA64_CYCLONE +extern int use_cyclone; +extern void __init cyclone_setup(void); +#else /* CONFIG_IA64_CYCLONE */ +#define use_cyclone 0 +static inline void cyclone_setup(void) +{ + printk(KERN_ERR "Cyclone Counter: System not configured" + " w/ CONFIG_IA64_CYCLONE.\n"); +} +#endif /* CONFIG_IA64_CYCLONE */ +#endif /* !ASM_IA64_CYCLONE_H */ diff --git a/kernel/arch/ia64/include/asm/delay.h b/kernel/arch/ia64/include/asm/delay.h new file mode 100644 index 000000000..a30a62f23 --- /dev/null +++ b/kernel/arch/ia64/include/asm/delay.h @@ -0,0 +1,88 @@ +#ifndef _ASM_IA64_DELAY_H +#define _ASM_IA64_DELAY_H + +/* + * Delay routines using a pre-computed "cycles/usec" value. + * + * Copyright (C) 1998, 1999 Hewlett-Packard Co + * David Mosberger-Tang + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999 Walt Drummond + * Copyright (C) 1999 Asit Mallick + * Copyright (C) 1999 Don Dugger + */ + +#include +#include +#include + +#include +#include + +static __inline__ void +ia64_set_itm (unsigned long val) +{ + ia64_setreg(_IA64_REG_CR_ITM, val); + ia64_srlz_d(); +} + +static __inline__ unsigned long +ia64_get_itm (void) +{ + unsigned long result; + + result = ia64_getreg(_IA64_REG_CR_ITM); + ia64_srlz_d(); + return result; +} + +static __inline__ void +ia64_set_itv (unsigned long val) +{ + ia64_setreg(_IA64_REG_CR_ITV, val); + ia64_srlz_d(); +} + +static __inline__ unsigned long +ia64_get_itv (void) +{ + return ia64_getreg(_IA64_REG_CR_ITV); +} + +static __inline__ void +ia64_set_itc (unsigned long val) +{ + ia64_setreg(_IA64_REG_AR_ITC, val); + ia64_srlz_d(); +} + +static __inline__ unsigned long +ia64_get_itc (void) +{ + unsigned long result; + + result = ia64_getreg(_IA64_REG_AR_ITC); + ia64_barrier(); +#ifdef CONFIG_ITANIUM + while (unlikely((__s32) result == -1)) { + result = ia64_getreg(_IA64_REG_AR_ITC); + ia64_barrier(); + } +#endif + return result; +} + +extern void ia64_delay_loop (unsigned long loops); + +static __inline__ void +__delay (unsigned long loops) +{ + if (unlikely(loops < 1)) + return; + + ia64_delay_loop (loops - 1); +} + +extern void udelay (unsigned long usecs); + +#endif /* _ASM_IA64_DELAY_H */ diff --git a/kernel/arch/ia64/include/asm/device.h b/kernel/arch/ia64/include/asm/device.h new file mode 100644 index 000000000..f69c32ffb --- /dev/null +++ b/kernel/arch/ia64/include/asm/device.h @@ -0,0 +1,18 @@ +/* + * Arch specific extensions to struct device + * + * This file is released under the GPLv2 + */ +#ifndef _ASM_IA64_DEVICE_H +#define _ASM_IA64_DEVICE_H + +struct dev_archdata { +#ifdef CONFIG_INTEL_IOMMU + void *iommu; /* hook for IOMMU specific extension */ +#endif +}; + +struct pdev_archdata { +}; + +#endif /* _ASM_IA64_DEVICE_H */ diff --git a/kernel/arch/ia64/include/asm/div64.h b/kernel/arch/ia64/include/asm/div64.h new file mode 100644 index 000000000..6cd978cef --- /dev/null +++ b/kernel/arch/ia64/include/asm/div64.h @@ -0,0 +1 @@ +#include diff --git a/kernel/arch/ia64/include/asm/dma-mapping.h b/kernel/arch/ia64/include/asm/dma-mapping.h new file mode 100644 index 000000000..cf3ab7e78 --- /dev/null +++ b/kernel/arch/ia64/include/asm/dma-mapping.h @@ -0,0 +1,109 @@ +#ifndef _ASM_IA64_DMA_MAPPING_H +#define _ASM_IA64_DMA_MAPPING_H + +/* + * Copyright (C) 2003-2004 Hewlett-Packard Co + * David Mosberger-Tang + */ +#include +#include +#include +#include + +#define ARCH_HAS_DMA_GET_REQUIRED_MASK + +#define DMA_ERROR_CODE 0 + +extern struct dma_map_ops *dma_ops; +extern struct ia64_machine_vector ia64_mv; +extern void set_iommu_machvec(void); + +extern void machvec_dma_sync_single(struct device *, dma_addr_t, size_t, + enum dma_data_direction); +extern void machvec_dma_sync_sg(struct device *, struct scatterlist *, int, + enum dma_data_direction); + +#define dma_alloc_coherent(d,s,h,f) dma_alloc_attrs(d,s,h,f,NULL) + +static inline void *dma_alloc_attrs(struct device *dev, size_t size, + dma_addr_t *daddr, gfp_t gfp, + struct dma_attrs *attrs) +{ + struct dma_map_ops *ops = platform_dma_get_ops(dev); + void *caddr; + + caddr = ops->alloc(dev, size, daddr, gfp, attrs); + debug_dma_alloc_coherent(dev, size, *daddr, caddr); + return caddr; +} + +#define dma_free_coherent(d,s,c,h) dma_free_attrs(d,s,c,h,NULL) + +static inline void dma_free_attrs(struct device *dev, size_t size, + void *caddr, dma_addr_t daddr, + struct dma_attrs *attrs) +{ + struct dma_map_ops *ops = platform_dma_get_ops(dev); + debug_dma_free_coherent(dev, size, caddr, daddr); + ops->free(dev, size, caddr, daddr, attrs); +} + +#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) +#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) + +#define get_dma_ops(dev) platform_dma_get_ops(dev) + +#include + +static inline int dma_mapping_error(struct device *dev, dma_addr_t daddr) +{ + struct dma_map_ops *ops = platform_dma_get_ops(dev); + debug_dma_mapping_error(dev, daddr); + return ops->mapping_error(dev, daddr); +} + +static inline int dma_supported(struct device *dev, u64 mask) +{ + struct dma_map_ops *ops = platform_dma_get_ops(dev); + return ops->dma_supported(dev, mask); +} + +static inline int +dma_set_mask (struct device *dev, u64 mask) +{ + if (!dev->dma_mask || !dma_supported(dev, mask)) + return -EIO; + *dev->dma_mask = mask; + return 0; +} + +static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) +{ + if (!dev->dma_mask) + return 0; + + return addr + size - 1 <= *dev->dma_mask; +} + +static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) +{ + return paddr; +} + +static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) +{ + return daddr; +} + +static inline void +dma_cache_sync (struct device *dev, void *vaddr, size_t size, + enum dma_data_direction dir) +{ + /* + * IA-64 is cache-coherent, so this is mostly a no-op. However, we do need to + * ensure that dma_cache_sync() enforces order, hence the mb(). + */ + mb(); +} + +#endif /* _ASM_IA64_DMA_MAPPING_H */ diff --git a/kernel/arch/ia64/include/asm/dma.h b/kernel/arch/ia64/include/asm/dma.h new file mode 100644 index 000000000..4d97f60f1 --- /dev/null +++ b/kernel/arch/ia64/include/asm/dma.h @@ -0,0 +1,24 @@ +#ifndef _ASM_IA64_DMA_H +#define _ASM_IA64_DMA_H + +/* + * Copyright (C) 1998-2002 Hewlett-Packard Co + * David Mosberger-Tang + */ + + +#include /* need byte IO */ + +extern unsigned long MAX_DMA_ADDRESS; + +#ifdef CONFIG_PCI + extern int isa_dma_bridge_buggy; +#else +# define isa_dma_bridge_buggy (0) +#endif + +#define free_dma(x) + +void dma_mark_clean(void *addr, size_t size); + +#endif /* _ASM_IA64_DMA_H */ diff --git a/kernel/arch/ia64/include/asm/dmi.h b/kernel/arch/ia64/include/asm/dmi.h new file mode 100644 index 000000000..f365a61f5 --- /dev/null +++ b/kernel/arch/ia64/include/asm/dmi.h @@ -0,0 +1,14 @@ +#ifndef _ASM_DMI_H +#define _ASM_DMI_H 1 + +#include +#include + +/* Use normal IO mappings for DMI */ +#define dmi_early_remap ioremap +#define dmi_early_unmap(x, l) iounmap(x) +#define dmi_remap ioremap +#define dmi_unmap iounmap +#define dmi_alloc(l) kzalloc(l, GFP_ATOMIC) + +#endif diff --git a/kernel/arch/ia64/include/asm/elf.h b/kernel/arch/ia64/include/asm/elf.h new file mode 100644 index 000000000..5a83c5cc3 --- /dev/null +++ b/kernel/arch/ia64/include/asm/elf.h @@ -0,0 +1,234 @@ +#ifndef _ASM_IA64_ELF_H +#define _ASM_IA64_ELF_H + +/* + * ELF-specific definitions. + * + * Copyright (C) 1998-1999, 2002-2004 Hewlett-Packard Co + * David Mosberger-Tang + */ + + +#include +#include +#include + +/* + * This is used to ensure we don't load something for the wrong architecture. + */ +#define elf_check_arch(x) ((x)->e_machine == EM_IA_64) + +/* + * These are used to set parameters in the core dumps. + */ +#define ELF_CLASS ELFCLASS64 +#define ELF_DATA ELFDATA2LSB +#define ELF_ARCH EM_IA_64 + +#define CORE_DUMP_USE_REGSET + +/* Least-significant four bits of ELF header's e_flags are OS-specific. The bits are + interpreted as follows by Linux: */ +#define EF_IA_64_LINUX_EXECUTABLE_STACK 0x1 /* is stack (& heap) executable by default? */ + +#define ELF_EXEC_PAGESIZE PAGE_SIZE + +/* + * This is the location that an ET_DYN program is loaded if exec'ed. + * Typical use of this is to invoke "./ld.so someprog" to test out a + * new version of the loader. We need to make sure that it is out of + * the way of the program that it will "exec", and that there is + * sufficient room for the brk. + */ +#define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE + 0x800000000UL) + +#define PT_IA_64_UNWIND 0x70000001 + +/* IA-64 relocations: */ +#define R_IA64_NONE 0x00 /* none */ +#define R_IA64_IMM14 0x21 /* symbol + addend, add imm14 */ +#define R_IA64_IMM22 0x22 /* symbol + addend, add imm22 */ +#define R_IA64_IMM64 0x23 /* symbol + addend, mov imm64 */ +#define R_IA64_DIR32MSB 0x24 /* symbol + addend, data4 MSB */ +#define R_IA64_DIR32LSB 0x25 /* symbol + addend, data4 LSB */ +#define R_IA64_DIR64MSB 0x26 /* symbol + addend, data8 MSB */ +#define R_IA64_DIR64LSB 0x27 /* symbol + addend, data8 LSB */ +#define R_IA64_GPREL22 0x2a /* @gprel(sym+add), add imm22 */ +#define R_IA64_GPREL64I 0x2b /* @gprel(sym+add), mov imm64 */ +#define R_IA64_GPREL32MSB 0x2c /* @gprel(sym+add), data4 MSB */ +#define R_IA64_GPREL32LSB 0x2d /* @gprel(sym+add), data4 LSB */ +#define R_IA64_GPREL64MSB 0x2e /* @gprel(sym+add), data8 MSB */ +#define R_IA64_GPREL64LSB 0x2f /* @gprel(sym+add), data8 LSB */ +#define R_IA64_LTOFF22 0x32 /* @ltoff(sym+add), add imm22 */ +#define R_IA64_LTOFF64I 0x33 /* @ltoff(sym+add), mov imm64 */ +#define R_IA64_PLTOFF22 0x3a /* @pltoff(sym+add), add imm22 */ +#define R_IA64_PLTOFF64I 0x3b /* @pltoff(sym+add), mov imm64 */ +#define R_IA64_PLTOFF64MSB 0x3e /* @pltoff(sym+add), data8 MSB */ +#define R_IA64_PLTOFF64LSB 0x3f /* @pltoff(sym+add), data8 LSB */ +#define R_IA64_FPTR64I 0x43 /* @fptr(sym+add), mov imm64 */ +#define R_IA64_FPTR32MSB 0x44 /* @fptr(sym+add), data4 MSB */ +#define R_IA64_FPTR32LSB 0x45 /* @fptr(sym+add), data4 LSB */ +#define R_IA64_FPTR64MSB 0x46 /* @fptr(sym+add), data8 MSB */ +#define R_IA64_FPTR64LSB 0x47 /* @fptr(sym+add), data8 LSB */ +#define R_IA64_PCREL60B 0x48 /* @pcrel(sym+add), brl */ +#define R_IA64_PCREL21B 0x49 /* @pcrel(sym+add), ptb, call */ +#define R_IA64_PCREL21M 0x4a /* @pcrel(sym+add), chk.s */ +#define R_IA64_PCREL21F 0x4b /* @pcrel(sym+add), fchkf */ +#define R_IA64_PCREL32MSB 0x4c /* @pcrel(sym+add), data4 MSB */ +#define R_IA64_PCREL32LSB 0x4d /* @pcrel(sym+add), data4 LSB */ +#define R_IA64_PCREL64MSB 0x4e /* @pcrel(sym+add), data8 MSB */ +#define R_IA64_PCREL64LSB 0x4f /* @pcrel(sym+add), data8 LSB */ +#define R_IA64_LTOFF_FPTR22 0x52 /* @ltoff(@fptr(s+a)), imm22 */ +#define R_IA64_LTOFF_FPTR64I 0x53 /* @ltoff(@fptr(s+a)), imm64 */ +#define R_IA64_LTOFF_FPTR32MSB 0x54 /* @ltoff(@fptr(s+a)), 4 MSB */ +#define R_IA64_LTOFF_FPTR32LSB 0x55 /* @ltoff(@fptr(s+a)), 4 LSB */ +#define R_IA64_LTOFF_FPTR64MSB 0x56 /* @ltoff(@fptr(s+a)), 8 MSB */ +#define R_IA64_LTOFF_FPTR64LSB 0x57 /* @ltoff(@fptr(s+a)), 8 LSB */ +#define R_IA64_SEGREL32MSB 0x5c /* @segrel(sym+add), data4 MSB */ +#define R_IA64_SEGREL32LSB 0x5d /* @segrel(sym+add), data4 LSB */ +#define R_IA64_SEGREL64MSB 0x5e /* @segrel(sym+add), data8 MSB */ +#define R_IA64_SEGREL64LSB 0x5f /* @segrel(sym+add), data8 LSB */ +#define R_IA64_SECREL32MSB 0x64 /* @secrel(sym+add), data4 MSB */ +#define R_IA64_SECREL32LSB 0x65 /* @secrel(sym+add), data4 LSB */ +#define R_IA64_SECREL64MSB 0x66 /* @secrel(sym+add), data8 MSB */ +#define R_IA64_SECREL64LSB 0x67 /* @secrel(sym+add), data8 LSB */ +#define R_IA64_REL32MSB 0x6c /* data 4 + REL */ +#define R_IA64_REL32LSB 0x6d /* data 4 + REL */ +#define R_IA64_REL64MSB 0x6e /* data 8 + REL */ +#define R_IA64_REL64LSB 0x6f /* data 8 + REL */ +#define R_IA64_LTV32MSB 0x74 /* symbol + addend, data4 MSB */ +#define R_IA64_LTV32LSB 0x75 /* symbol + addend, data4 LSB */ +#define R_IA64_LTV64MSB 0x76 /* symbol + addend, data8 MSB */ +#define R_IA64_LTV64LSB 0x77 /* symbol + addend, data8 LSB */ +#define R_IA64_PCREL21BI 0x79 /* @pcrel(sym+add), ptb, call */ +#define R_IA64_PCREL22 0x7a /* @pcrel(sym+add), imm22 */ +#define R_IA64_PCREL64I 0x7b /* @pcrel(sym+add), imm64 */ +#define R_IA64_IPLTMSB 0x80 /* dynamic reloc, imported PLT, MSB */ +#define R_IA64_IPLTLSB 0x81 /* dynamic reloc, imported PLT, LSB */ +#define R_IA64_COPY 0x84 /* dynamic reloc, data copy */ +#define R_IA64_SUB 0x85 /* -symbol + addend, add imm22 */ +#define R_IA64_LTOFF22X 0x86 /* LTOFF22, relaxable. */ +#define R_IA64_LDXMOV 0x87 /* Use of LTOFF22X. */ +#define R_IA64_TPREL14 0x91 /* @tprel(sym+add), add imm14 */ +#define R_IA64_TPREL22 0x92 /* @tprel(sym+add), add imm22 */ +#define R_IA64_TPREL64I 0x93 /* @tprel(sym+add), add imm64 */ +#define R_IA64_TPREL64MSB 0x96 /* @tprel(sym+add), data8 MSB */ +#define R_IA64_TPREL64LSB 0x97 /* @tprel(sym+add), data8 LSB */ +#define R_IA64_LTOFF_TPREL22 0x9a /* @ltoff(@tprel(s+a)), add imm22 */ +#define R_IA64_DTPMOD64MSB 0xa6 /* @dtpmod(sym+add), data8 MSB */ +#define R_IA64_DTPMOD64LSB 0xa7 /* @dtpmod(sym+add), data8 LSB */ +#define R_IA64_LTOFF_DTPMOD22 0xaa /* @ltoff(@dtpmod(s+a)), imm22 */ +#define R_IA64_DTPREL14 0xb1 /* @dtprel(sym+add), imm14 */ +#define R_IA64_DTPREL22 0xb2 /* @dtprel(sym+add), imm22 */ +#define R_IA64_DTPREL64I 0xb3 /* @dtprel(sym+add), imm64 */ +#define R_IA64_DTPREL32MSB 0xb4 /* @dtprel(sym+add), data4 MSB */ +#define R_IA64_DTPREL32LSB 0xb5 /* @dtprel(sym+add), data4 LSB */ +#define R_IA64_DTPREL64MSB 0xb6 /* @dtprel(sym+add), data8 MSB */ +#define R_IA64_DTPREL64LSB 0xb7 /* @dtprel(sym+add), data8 LSB */ +#define R_IA64_LTOFF_DTPREL22 0xba /* @ltoff(@dtprel(s+a)), imm22 */ + +/* IA-64 specific section flags: */ +#define SHF_IA_64_SHORT 0x10000000 /* section near gp */ + +/* + * We use (abuse?) this macro to insert the (empty) vm_area that is + * used to map the register backing store. I don't see any better + * place to do this, but we should discuss this with Linus once we can + * talk to him... + */ +extern void ia64_init_addr_space (void); +#define ELF_PLAT_INIT(_r, load_addr) ia64_init_addr_space() + +/* ELF register definitions. This is needed for core dump support. */ + +/* + * elf_gregset_t contains the application-level state in the following order: + * r0-r31 + * NaT bits (for r0-r31; bit N == 1 iff rN is a NaT) + * predicate registers (p0-p63) + * b0-b7 + * ip cfm psr + * ar.rsc ar.bsp ar.bspstore ar.rnat + * ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec ar.csd ar.ssd + */ +#define ELF_NGREG 128 /* we really need just 72 but let's leave some headroom... */ +#define ELF_NFPREG 128 /* f0 and f1 could be omitted, but so what... */ + +/* elf_gregset_t register offsets */ +#define ELF_GR_0_OFFSET 0 +#define ELF_NAT_OFFSET (32 * sizeof(elf_greg_t)) +#define ELF_PR_OFFSET (33 * sizeof(elf_greg_t)) +#define ELF_BR_0_OFFSET (34 * sizeof(elf_greg_t)) +#define ELF_CR_IIP_OFFSET (42 * sizeof(elf_greg_t)) +#define ELF_CFM_OFFSET (43 * sizeof(elf_greg_t)) +#define ELF_CR_IPSR_OFFSET (44 * sizeof(elf_greg_t)) +#define ELF_GR_OFFSET(i) (ELF_GR_0_OFFSET + i * sizeof(elf_greg_t)) +#define ELF_BR_OFFSET(i) (ELF_BR_0_OFFSET + i * sizeof(elf_greg_t)) +#define ELF_AR_RSC_OFFSET (45 * sizeof(elf_greg_t)) +#define ELF_AR_BSP_OFFSET (46 * sizeof(elf_greg_t)) +#define ELF_AR_BSPSTORE_OFFSET (47 * sizeof(elf_greg_t)) +#define ELF_AR_RNAT_OFFSET (48 * sizeof(elf_greg_t)) +#define ELF_AR_CCV_OFFSET (49 * sizeof(elf_greg_t)) +#define ELF_AR_UNAT_OFFSET (50 * sizeof(elf_greg_t)) +#define ELF_AR_FPSR_OFFSET (51 * sizeof(elf_greg_t)) +#define ELF_AR_PFS_OFFSET (52 * sizeof(elf_greg_t)) +#define ELF_AR_LC_OFFSET (53 * sizeof(elf_greg_t)) +#define ELF_AR_EC_OFFSET (54 * sizeof(elf_greg_t)) +#define ELF_AR_CSD_OFFSET (55 * sizeof(elf_greg_t)) +#define ELF_AR_SSD_OFFSET (56 * sizeof(elf_greg_t)) +#define ELF_AR_END_OFFSET (57 * sizeof(elf_greg_t)) + +typedef unsigned long elf_fpxregset_t; + +typedef unsigned long elf_greg_t; +typedef elf_greg_t elf_gregset_t[ELF_NGREG]; + +typedef struct ia64_fpreg elf_fpreg_t; +typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG]; + + + +struct pt_regs; /* forward declaration... */ +extern void ia64_elf_core_copy_regs (struct pt_regs *src, elf_gregset_t dst); +#define ELF_CORE_COPY_REGS(_dest,_regs) ia64_elf_core_copy_regs(_regs, _dest); + +/* This macro yields a bitmask that programs can use to figure out + what instruction set this CPU supports. */ +#define ELF_HWCAP 0 + +/* This macro yields a string that ld.so will use to load + implementation specific libraries for optimization. Not terribly + relevant until we have real hardware to play with... */ +#define ELF_PLATFORM NULL + +#define elf_read_implies_exec(ex, executable_stack) \ + ((executable_stack!=EXSTACK_DISABLE_X) && ((ex).e_flags & EF_IA_64_LINUX_EXECUTABLE_STACK) != 0) + +struct task_struct; + +#define GATE_EHDR ((const struct elfhdr *) GATE_ADDR) + +/* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */ +#define ARCH_DLINFO \ +do { \ + extern char __kernel_syscall_via_epc[]; \ + NEW_AUX_ENT(AT_SYSINFO, (unsigned long) __kernel_syscall_via_epc); \ + NEW_AUX_ENT(AT_SYSINFO_EHDR, (unsigned long) GATE_EHDR); \ +} while (0) + +/* + * format for entries in the Global Offset Table + */ +struct got_entry { + uint64_t val; +}; + +/* + * Layout of the Function Descriptor + */ +struct fdesc { + uint64_t ip; + uint64_t gp; +}; + +#endif /* _ASM_IA64_ELF_H */ diff --git a/kernel/arch/ia64/include/asm/emergency-restart.h b/kernel/arch/ia64/include/asm/emergency-restart.h new file mode 100644 index 000000000..108d8c48e --- /dev/null +++ b/kernel/arch/ia64/include/asm/emergency-restart.h @@ -0,0 +1,6 @@ +#ifndef _ASM_EMERGENCY_RESTART_H +#define _ASM_EMERGENCY_RESTART_H + +#include + +#endif /* _ASM_EMERGENCY_RESTART_H */ diff --git a/kernel/arch/ia64/include/asm/esi.h b/kernel/arch/ia64/include/asm/esi.h new file mode 100644 index 000000000..40991c6ba --- /dev/null +++ b/kernel/arch/ia64/include/asm/esi.h @@ -0,0 +1,29 @@ +/* + * ESI service calls. + * + * Copyright (c) Copyright 2005-2006 Hewlett-Packard Development Company, L.P. + * Alex Williamson + */ +#ifndef esi_h +#define esi_h + +#include + +#define ESI_QUERY 0x00000001 +#define ESI_OPEN_HANDLE 0x02000000 +#define ESI_CLOSE_HANDLE 0x02000001 + +enum esi_proc_type { + ESI_PROC_SERIALIZED, /* calls need to be serialized */ + ESI_PROC_MP_SAFE, /* MP-safe, but not reentrant */ + ESI_PROC_REENTRANT /* MP-safe and reentrant */ +}; + +extern struct ia64_sal_retval esi_call_phys (void *, u64 *); +extern int ia64_esi_call(efi_guid_t, struct ia64_sal_retval *, + enum esi_proc_type, + u64, u64, u64, u64, u64, u64, u64, u64); +extern int ia64_esi_call_phys(efi_guid_t, struct ia64_sal_retval *, u64, u64, + u64, u64, u64, u64, u64, u64); + +#endif /* esi_h */ diff --git a/kernel/arch/ia64/include/asm/fb.h b/kernel/arch/ia64/include/asm/fb.h new file mode 100644 index 000000000..89a397cee --- /dev/null +++ b/kernel/arch/ia64/include/asm/fb.h @@ -0,0 +1,23 @@ +#ifndef _ASM_FB_H_ +#define _ASM_FB_H_ + +#include +#include +#include +#include + +static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma, + unsigned long off) +{ + if (efi_range_is_wc(vma->vm_start, vma->vm_end - vma->vm_start)) + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + else + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); +} + +static inline int fb_is_primary_device(struct fb_info *info) +{ + return 0; +} + +#endif /* _ASM_FB_H_ */ diff --git a/kernel/arch/ia64/include/asm/fpswa.h b/kernel/arch/ia64/include/asm/fpswa.h new file mode 100644 index 000000000..62edfcead --- /dev/null +++ b/kernel/arch/ia64/include/asm/fpswa.h @@ -0,0 +1,73 @@ +#ifndef _ASM_IA64_FPSWA_H +#define _ASM_IA64_FPSWA_H + +/* + * Floating-point Software Assist + * + * Copyright (C) 1999 Intel Corporation. + * Copyright (C) 1999 Asit Mallick + * Copyright (C) 1999 Goutham Rao + */ + +typedef struct { + /* 4 * 128 bits */ + unsigned long fp_lp[4*2]; +} fp_state_low_preserved_t; + +typedef struct { + /* 10 * 128 bits */ + unsigned long fp_lv[10 * 2]; +} fp_state_low_volatile_t; + +typedef struct { + /* 16 * 128 bits */ + unsigned long fp_hp[16 * 2]; +} fp_state_high_preserved_t; + +typedef struct { + /* 96 * 128 bits */ + unsigned long fp_hv[96 * 2]; +} fp_state_high_volatile_t; + +/** + * floating point state to be passed to the FP emulation library by + * the trap/fault handler + */ +typedef struct { + unsigned long bitmask_low64; + unsigned long bitmask_high64; + fp_state_low_preserved_t *fp_state_low_preserved; + fp_state_low_volatile_t *fp_state_low_volatile; + fp_state_high_preserved_t *fp_state_high_preserved; + fp_state_high_volatile_t *fp_state_high_volatile; +} fp_state_t; + +typedef struct { + unsigned long status; + unsigned long err0; + unsigned long err1; + unsigned long err2; +} fpswa_ret_t; + +/** + * function header for the Floating Point software assist + * library. This function is invoked by the Floating point software + * assist trap/fault handler. + */ +typedef fpswa_ret_t (*efi_fpswa_t) (unsigned long trap_type, void *bundle, unsigned long *ipsr, + unsigned long *fsr, unsigned long *isr, unsigned long *preds, + unsigned long *ifs, fp_state_t *fp_state); + +/** + * This is the FPSWA library interface as defined by EFI. We need to pass a + * pointer to the interface itself on a call to the assist library + */ +typedef struct { + unsigned int revision; + unsigned int reserved; + efi_fpswa_t fpswa; +} fpswa_interface_t; + +extern fpswa_interface_t *fpswa_interface; + +#endif /* _ASM_IA64_FPSWA_H */ diff --git a/kernel/arch/ia64/include/asm/ftrace.h b/kernel/arch/ia64/include/asm/ftrace.h new file mode 100644 index 000000000..fbd1a2470 --- /dev/null +++ b/kernel/arch/ia64/include/asm/ftrace.h @@ -0,0 +1,27 @@ +#ifndef _ASM_IA64_FTRACE_H +#define _ASM_IA64_FTRACE_H + +#ifdef CONFIG_FUNCTION_TRACER +#define MCOUNT_INSN_SIZE 32 /* sizeof mcount call */ + +#ifndef __ASSEMBLY__ +extern void _mcount(unsigned long pfs, unsigned long r1, unsigned long b0, unsigned long r0); +#define mcount _mcount + +/* In IA64, MCOUNT_ADDR is set in link time, so it's not a constant at compile time */ +#define MCOUNT_ADDR (((struct fnptr *)mcount)->ip) +#define FTRACE_ADDR (((struct fnptr *)ftrace_caller)->ip) + +static inline unsigned long ftrace_call_adjust(unsigned long addr) +{ + /* second bundle, insn 2 */ + return addr - 0x12; +} + +struct dyn_arch_ftrace { +}; +#endif + +#endif /* CONFIG_FUNCTION_TRACER */ + +#endif /* _ASM_IA64_FTRACE_H */ diff --git a/kernel/arch/ia64/include/asm/futex.h b/kernel/arch/ia64/include/asm/futex.h new file mode 100644 index 000000000..76acbcd5c --- /dev/null +++ b/kernel/arch/ia64/include/asm/futex.h @@ -0,0 +1,126 @@ +#ifndef _ASM_FUTEX_H +#define _ASM_FUTEX_H + +#include +#include +#include + +#define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg) \ +do { \ + register unsigned long r8 __asm ("r8") = 0; \ + __asm__ __volatile__( \ + " mf;; \n" \ + "[1:] " insn ";; \n" \ + " .xdata4 \"__ex_table\", 1b-., 2f-. \n" \ + "[2:]" \ + : "+r" (r8), "=r" (oldval) \ + : "r" (uaddr), "r" (oparg) \ + : "memory"); \ + ret = r8; \ +} while (0) + +#define __futex_atomic_op2(insn, ret, oldval, uaddr, oparg) \ +do { \ + register unsigned long r8 __asm ("r8") = 0; \ + int val, newval; \ + do { \ + __asm__ __volatile__( \ + " mf;; \n" \ + "[1:] ld4 %3=[%4];; \n" \ + " mov %2=%3 \n" \ + insn ";; \n" \ + " mov ar.ccv=%2;; \n" \ + "[2:] cmpxchg4.acq %1=[%4],%3,ar.ccv;; \n" \ + " .xdata4 \"__ex_table\", 1b-., 3f-.\n" \ + " .xdata4 \"__ex_table\", 2b-., 3f-.\n" \ + "[3:]" \ + : "+r" (r8), "=r" (val), "=&r" (oldval), \ + "=&r" (newval) \ + : "r" (uaddr), "r" (oparg) \ + : "memory"); \ + if (unlikely (r8)) \ + break; \ + } while (unlikely (val != oldval)); \ + ret = r8; \ +} while (0) + +static inline int +futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) +{ + int op = (encoded_op >> 28) & 7; + int cmp = (encoded_op >> 24) & 15; + int oparg = (encoded_op << 8) >> 20; + int cmparg = (encoded_op << 20) >> 20; + int oldval = 0, ret; + if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) + oparg = 1 << oparg; + + if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32))) + return -EFAULT; + + pagefault_disable(); + + switch (op) { + case FUTEX_OP_SET: + __futex_atomic_op1("xchg4 %1=[%2],%3", ret, oldval, uaddr, + oparg); + break; + case FUTEX_OP_ADD: + __futex_atomic_op2("add %3=%3,%5", ret, oldval, uaddr, oparg); + break; + case FUTEX_OP_OR: + __futex_atomic_op2("or %3=%3,%5", ret, oldval, uaddr, oparg); + break; + case FUTEX_OP_ANDN: + __futex_atomic_op2("and %3=%3,%5", ret, oldval, uaddr, + ~oparg); + break; + case FUTEX_OP_XOR: + __futex_atomic_op2("xor %3=%3,%5", ret, oldval, uaddr, oparg); + break; + default: + ret = -ENOSYS; + } + + pagefault_enable(); + + if (!ret) { + switch (cmp) { + case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; + case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; + case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; + case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; + case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; + case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; + default: ret = -ENOSYS; + } + } + return ret; +} + +static inline int +futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, + u32 oldval, u32 newval) +{ + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) + return -EFAULT; + + { + register unsigned long r8 __asm ("r8") = 0; + unsigned long prev; + __asm__ __volatile__( + " mf;; \n" + " mov ar.ccv=%4;; \n" + "[1:] cmpxchg4.acq %1=[%2],%3,ar.ccv \n" + " .xdata4 \"__ex_table\", 1b-., 2f-. \n" + "[2:]" + : "+r" (r8), "=&r" (prev) + : "r" (uaddr), "r" (newval), + "rO" ((long) (unsigned) oldval) + : "memory"); + *uval = prev; + return r8; + } +} + +#endif /* _ASM_FUTEX_H */ diff --git a/kernel/arch/ia64/include/asm/gcc_intrin.h b/kernel/arch/ia64/include/asm/gcc_intrin.h new file mode 100644 index 000000000..f9495b175 --- /dev/null +++ b/kernel/arch/ia64/include/asm/gcc_intrin.h @@ -0,0 +1,12 @@ +/* + * + * Copyright (C) 2002,2003 Jun Nakajima + * Copyright (C) 2002,2003 Suresh Siddha + */ +#ifndef _ASM_IA64_GCC_INTRIN_H +#define _ASM_IA64_GCC_INTRIN_H + +#include + +register unsigned long ia64_r13 asm ("r13") __used; +#endif /* _ASM_IA64_GCC_INTRIN_H */ diff --git a/kernel/arch/ia64/include/asm/gpio.h b/kernel/arch/ia64/include/asm/gpio.h new file mode 100644 index 000000000..b3799d88f --- /dev/null +++ b/kernel/arch/ia64/include/asm/gpio.h @@ -0,0 +1,4 @@ +#ifndef __LINUX_GPIO_H +#warning Include linux/gpio.h instead of asm/gpio.h +#include +#endif diff --git a/kernel/arch/ia64/include/asm/hardirq.h b/kernel/arch/ia64/include/asm/hardirq.h new file mode 100644 index 000000000..8fb7d33a6 --- /dev/null +++ b/kernel/arch/ia64/include/asm/hardirq.h @@ -0,0 +1,26 @@ +#ifndef _ASM_IA64_HARDIRQ_H +#define _ASM_IA64_HARDIRQ_H + +/* + * Modified 1998-2002, 2004 Hewlett-Packard Co + * David Mosberger-Tang + */ + +/* + * No irq_cpustat_t for IA-64. The data is held in the per-CPU data structure. + */ + +#define __ARCH_IRQ_STAT 1 + +#define local_softirq_pending() (local_cpu_data->softirq_pending) + +#include +#include + +#include + +extern void __iomem *ipi_base_addr; + +void ack_bad_irq(unsigned int irq); + +#endif /* _ASM_IA64_HARDIRQ_H */ diff --git a/kernel/arch/ia64/include/asm/hpsim.h b/kernel/arch/ia64/include/asm/hpsim.h new file mode 100644 index 000000000..0fe50225d --- /dev/null +++ b/kernel/arch/ia64/include/asm/hpsim.h @@ -0,0 +1,16 @@ +#ifndef _ASMIA64_HPSIM_H +#define _ASMIA64_HPSIM_H + +#ifndef CONFIG_HP_SIMSERIAL_CONSOLE +static inline int simcons_register(void) { return 1; } +#else +int simcons_register(void); +#endif + +struct tty_driver; +extern struct tty_driver *hp_simserial_driver; + +extern int hpsim_get_irq(int intr); +void ia64_ctl_trace(long on); + +#endif diff --git a/kernel/arch/ia64/include/asm/hugetlb.h b/kernel/arch/ia64/include/asm/hugetlb.h new file mode 100644 index 000000000..aa910054b --- /dev/null +++ b/kernel/arch/ia64/include/asm/hugetlb.h @@ -0,0 +1,85 @@ +#ifndef _ASM_IA64_HUGETLB_H +#define _ASM_IA64_HUGETLB_H + +#include +#include + + +void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr, + unsigned long end, unsigned long floor, + unsigned long ceiling); + +int prepare_hugepage_range(struct file *file, + unsigned long addr, unsigned long len); + +static inline int is_hugepage_only_range(struct mm_struct *mm, + unsigned long addr, + unsigned long len) +{ + return (REGION_NUMBER(addr) == RGN_HPAGE || + REGION_NUMBER((addr)+(len)-1) == RGN_HPAGE); +} + +static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm) +{ +} + +static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + set_pte_at(mm, addr, ptep, pte); +} + +static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + return ptep_get_and_clear(mm, addr, ptep); +} + +static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) +{ +} + +static inline int huge_pte_none(pte_t pte) +{ + return pte_none(pte); +} + +static inline pte_t huge_pte_wrprotect(pte_t pte) +{ + return pte_wrprotect(pte); +} + +static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + ptep_set_wrprotect(mm, addr, ptep); +} + +static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t pte, int dirty) +{ + return ptep_set_access_flags(vma, addr, ptep, pte, dirty); +} + +static inline pte_t huge_ptep_get(pte_t *ptep) +{ + return *ptep; +} + +static inline int arch_prepare_hugepage(struct page *page) +{ + return 0; +} + +static inline void arch_release_hugepage(struct page *page) +{ +} + +static inline void arch_clear_hugepage_flags(struct page *page) +{ +} + +#endif /* _ASM_IA64_HUGETLB_H */ diff --git a/kernel/arch/ia64/include/asm/hw_irq.h b/kernel/arch/ia64/include/asm/hw_irq.h new file mode 100644 index 000000000..668786e84 --- /dev/null +++ b/kernel/arch/ia64/include/asm/hw_irq.h @@ -0,0 +1,194 @@ +#ifndef _ASM_IA64_HW_IRQ_H +#define _ASM_IA64_HW_IRQ_H + +/* + * Copyright (C) 2001-2003 Hewlett-Packard Co + * David Mosberger-Tang + */ + +#include +#include +#include +#include + +#include +#include +#include + +#ifndef CONFIG_PARAVIRT +typedef u8 ia64_vector; +#else +typedef u16 ia64_vector; +#endif + +/* + * 0 special + * + * 1,3-14 are reserved from firmware + * + * 16-255 (vectored external interrupts) are available + * + * 15 spurious interrupt (see IVR) + * + * 16 lowest priority, 255 highest priority + * + * 15 classes of 16 interrupts each. + */ +#define IA64_MIN_VECTORED_IRQ 16 +#define IA64_MAX_VECTORED_IRQ 255 +#define IA64_NUM_VECTORS 256 + +#define AUTO_ASSIGN -1 + +#define IA64_SPURIOUS_INT_VECTOR 0x0f + +/* + * Vectors 0x10-0x1f are used for low priority interrupts, e.g. CMCI. + */ +#define IA64_CPEP_VECTOR 0x1c /* corrected platform error polling vector */ +#define IA64_CMCP_VECTOR 0x1d /* corrected machine-check polling vector */ +#define IA64_CPE_VECTOR 0x1e /* corrected platform error interrupt vector */ +#define IA64_CMC_VECTOR 0x1f /* corrected machine-check interrupt vector */ +/* + * Vectors 0x20-0x2f are reserved for legacy ISA IRQs. + * Use vectors 0x30-0xe7 as the default device vector range for ia64. + * Platforms may choose to reduce this range in platform_irq_setup, but the + * platform range must fall within + * [IA64_DEF_FIRST_DEVICE_VECTOR..IA64_DEF_LAST_DEVICE_VECTOR] + */ +extern int ia64_first_device_vector; +extern int ia64_last_device_vector; + +#if defined(CONFIG_SMP) && (defined(CONFIG_IA64_GENERIC) || defined (CONFIG_IA64_DIG)) +/* Reserve the lower priority vector than device vectors for "move IRQ" IPI */ +#define IA64_IRQ_MOVE_VECTOR 0x30 /* "move IRQ" IPI */ +#define IA64_DEF_FIRST_DEVICE_VECTOR 0x31 +#else +#define IA64_DEF_FIRST_DEVICE_VECTOR 0x30 +#endif +#define IA64_DEF_LAST_DEVICE_VECTOR 0xe7 +#define IA64_FIRST_DEVICE_VECTOR ia64_first_device_vector +#define IA64_LAST_DEVICE_VECTOR ia64_last_device_vector +#define IA64_MAX_DEVICE_VECTORS (IA64_DEF_LAST_DEVICE_VECTOR - IA64_DEF_FIRST_DEVICE_VECTOR + 1) +#define IA64_NUM_DEVICE_VECTORS (IA64_LAST_DEVICE_VECTOR - IA64_FIRST_DEVICE_VECTOR + 1) + +#define IA64_MCA_RENDEZ_VECTOR 0xe8 /* MCA rendez interrupt */ +#define IA64_PERFMON_VECTOR 0xee /* performance monitor interrupt vector */ +#define IA64_TIMER_VECTOR 0xef /* use highest-prio group 15 interrupt for timer */ +#define IA64_MCA_WAKEUP_VECTOR 0xf0 /* MCA wakeup (must be >MCA_RENDEZ_VECTOR) */ +#define IA64_IPI_LOCAL_TLB_FLUSH 0xfc /* SMP flush local TLB */ +#define IA64_IPI_RESCHEDULE 0xfd /* SMP reschedule */ +#define IA64_IPI_VECTOR 0xfe /* inter-processor interrupt vector */ + +/* Used for encoding redirected irqs */ + +#define IA64_IRQ_REDIRECTED (1 << 31) + +/* IA64 inter-cpu interrupt related definitions */ + +#define IA64_IPI_DEFAULT_BASE_ADDR 0xfee00000 + +/* Delivery modes for inter-cpu interrupts */ +enum { + IA64_IPI_DM_INT = 0x0, /* pend an external interrupt */ + IA64_IPI_DM_PMI = 0x2, /* pend a PMI */ + IA64_IPI_DM_NMI = 0x4, /* pend an NMI (vector 2) */ + IA64_IPI_DM_INIT = 0x5, /* pend an INIT interrupt */ + IA64_IPI_DM_EXTINT = 0x7, /* pend an 8259-compatible interrupt. */ +}; + +extern __u8 isa_irq_to_vector_map[16]; +#define isa_irq_to_vector(x) isa_irq_to_vector_map[(x)] + +struct irq_cfg { + ia64_vector vector; + cpumask_t domain; + cpumask_t old_domain; + unsigned move_cleanup_count; + u8 move_in_progress : 1; +}; +extern spinlock_t vector_lock; +extern struct irq_cfg irq_cfg[NR_IRQS]; +#define irq_to_domain(x) irq_cfg[(x)].domain +DECLARE_PER_CPU(int[IA64_NUM_VECTORS], vector_irq); + +extern struct irq_chip irq_type_ia64_lsapic; /* CPU-internal interrupt controller */ + +#ifdef CONFIG_PARAVIRT_GUEST +#include +#else +#define ia64_register_ipi ia64_native_register_ipi +#define assign_irq_vector ia64_native_assign_irq_vector +#define free_irq_vector ia64_native_free_irq_vector +#define register_percpu_irq ia64_native_register_percpu_irq +#define ia64_resend_irq ia64_native_resend_irq +#endif + +extern void ia64_native_register_ipi(void); +extern int bind_irq_vector(int irq, int vector, cpumask_t domain); +extern int ia64_native_assign_irq_vector (int irq); /* allocate a free vector */ +extern void ia64_native_free_irq_vector (int vector); +extern int reserve_irq_vector (int vector); +extern void __setup_vector_irq(int cpu); +extern void ia64_send_ipi (int cpu, int vector, int delivery_mode, int redirect); +extern void ia64_native_register_percpu_irq (ia64_vector vec, struct irqaction *action); +extern void destroy_and_reserve_irq (unsigned int irq); + +#if defined(CONFIG_SMP) && (defined(CONFIG_IA64_GENERIC) || defined(CONFIG_IA64_DIG)) +extern int irq_prepare_move(int irq, int cpu); +extern void irq_complete_move(unsigned int irq); +#else +static inline int irq_prepare_move(int irq, int cpu) { return 0; } +static inline void irq_complete_move(unsigned int irq) {} +#endif + +static inline void ia64_native_resend_irq(unsigned int vector) +{ + platform_send_ipi(smp_processor_id(), vector, IA64_IPI_DM_INT, 0); +} + +/* + * Default implementations for the irq-descriptor API: + */ +#ifndef CONFIG_IA64_GENERIC +static inline ia64_vector __ia64_irq_to_vector(int irq) +{ + return irq_cfg[irq].vector; +} + +static inline unsigned int +__ia64_local_vector_to_irq (ia64_vector vec) +{ + return __this_cpu_read(vector_irq[vec]); +} +#endif + +/* + * Next follows the irq descriptor interface. On IA-64, each CPU supports 256 interrupt + * vectors. On smaller systems, there is a one-to-one correspondence between interrupt + * vectors and the Linux irq numbers. However, larger systems may have multiple interrupt + * domains meaning that the translation from vector number to irq number depends on the + * interrupt domain that a CPU belongs to. This API abstracts such platform-dependent + * differences and provides a uniform means to translate between vector and irq numbers + * and to obtain the irq descriptor for a given irq number. + */ + +/* Extract the IA-64 vector that corresponds to IRQ. */ +static inline ia64_vector +irq_to_vector (int irq) +{ + return platform_irq_to_vector(irq); +} + +/* + * Convert the local IA-64 vector to the corresponding irq number. This translation is + * done in the context of the interrupt domain that the currently executing CPU belongs + * to. + */ +static inline unsigned int +local_vector_to_irq (ia64_vector vec) +{ + return platform_local_vector_to_irq(vec); +} + +#endif /* _ASM_IA64_HW_IRQ_H */ diff --git a/kernel/arch/ia64/include/asm/idle.h b/kernel/arch/ia64/include/asm/idle.h new file mode 100644 index 000000000..b7685015a --- /dev/null +++ b/kernel/arch/ia64/include/asm/idle.h @@ -0,0 +1,7 @@ +#ifndef _ASM_IA64_IDLE_H +#define _ASM_IA64_IDLE_H + +static inline void enter_idle(void) { } +static inline void exit_idle(void) { } + +#endif /* _ASM_IA64_IDLE_H */ diff --git a/kernel/arch/ia64/include/asm/intrinsics.h b/kernel/arch/ia64/include/asm/intrinsics.h new file mode 100644 index 000000000..20477ea11 --- /dev/null +++ b/kernel/arch/ia64/include/asm/intrinsics.h @@ -0,0 +1,25 @@ +/* + * Compiler-dependent intrinsics. + * + * Copyright (C) 2002-2003 Hewlett-Packard Co + * David Mosberger-Tang + */ +#ifndef _ASM_IA64_INTRINSICS_H +#define _ASM_IA64_INTRINSICS_H + +#include +#include + +#ifndef __ASSEMBLY__ +#if defined(CONFIG_PARAVIRT) +# undef IA64_INTRINSIC_API +# undef IA64_INTRINSIC_MACRO +# ifdef ASM_SUPPORTED +# define IA64_INTRINSIC_API(name) paravirt_ ## name +# else +# define IA64_INTRINSIC_API(name) pv_cpu_ops.name +# endif +#define IA64_INTRINSIC_MACRO(name) paravirt_ ## name +#endif +#endif /* !__ASSEMBLY__ */ +#endif /* _ASM_IA64_INTRINSICS_H */ diff --git a/kernel/arch/ia64/include/asm/io.h b/kernel/arch/ia64/include/asm/io.h new file mode 100644 index 000000000..80a7e34be --- /dev/null +++ b/kernel/arch/ia64/include/asm/io.h @@ -0,0 +1,449 @@ +#ifndef _ASM_IA64_IO_H +#define _ASM_IA64_IO_H + +/* + * This file contains the definitions for the emulated IO instructions + * inb/inw/inl/outb/outw/outl and the "string versions" of the same + * (insb/insw/insl/outsb/outsw/outsl). You can also use "pausing" + * versions of the single-IO instructions (inb_p/inw_p/..). + * + * This file is not meant to be obfuscating: it's just complicated to + * (a) handle it all in a way that makes gcc able to optimize it as + * well as possible and (b) trying to avoid writing the same thing + * over and over again with slight variations and possibly making a + * mistake somewhere. + * + * Copyright (C) 1998-2003 Hewlett-Packard Co + * David Mosberger-Tang + * Copyright (C) 1999 Asit Mallick + * Copyright (C) 1999 Don Dugger + */ + +#include + +/* We don't use IO slowdowns on the ia64, but.. */ +#define __SLOW_DOWN_IO do { } while (0) +#define SLOW_DOWN_IO do { } while (0) + +#define __IA64_UNCACHED_OFFSET RGN_BASE(RGN_UNCACHED) + +/* + * The legacy I/O space defined by the ia64 architecture supports only 65536 ports, but + * large machines may have multiple other I/O spaces so we can't place any a priori limit + * on IO_SPACE_LIMIT. These additional spaces are described in ACPI. + */ +#define IO_SPACE_LIMIT 0xffffffffffffffffUL + +#define MAX_IO_SPACES_BITS 8 +#define MAX_IO_SPACES (1UL << MAX_IO_SPACES_BITS) +#define IO_SPACE_BITS 24 +#define IO_SPACE_SIZE (1UL << IO_SPACE_BITS) + +#define IO_SPACE_NR(port) ((port) >> IO_SPACE_BITS) +#define IO_SPACE_BASE(space) ((space) << IO_SPACE_BITS) +#define IO_SPACE_PORT(port) ((port) & (IO_SPACE_SIZE - 1)) + +#define IO_SPACE_SPARSE_ENCODING(p) ((((p) >> 2) << 12) | ((p) & 0xfff)) + +struct io_space { + unsigned long mmio_base; /* base in MMIO space */ + int sparse; +}; + +extern struct io_space io_space[]; +extern unsigned int num_io_spaces; + +# ifdef __KERNEL__ + +/* + * All MMIO iomem cookies are in region 6; anything less is a PIO cookie: + * 0xCxxxxxxxxxxxxxxx MMIO cookie (return from ioremap) + * 0x000000001SPPPPPP PIO cookie (S=space number, P..P=port) + * + * ioread/writeX() uses the leading 1 in PIO cookies (PIO_OFFSET) to catch + * code that uses bare port numbers without the prerequisite pci_iomap(). + */ +#define PIO_OFFSET (1UL << (MAX_IO_SPACES_BITS + IO_SPACE_BITS)) +#define PIO_MASK (PIO_OFFSET - 1) +#define PIO_RESERVED __IA64_UNCACHED_OFFSET +#define HAVE_ARCH_PIO_SIZE + +#include +#include +#include +#include + +/* + * Change virtual addresses to physical addresses and vv. + */ +static inline unsigned long +virt_to_phys (volatile void *address) +{ + return (unsigned long) address - PAGE_OFFSET; +} + +static inline void* +phys_to_virt (unsigned long address) +{ + return (void *) (address + PAGE_OFFSET); +} + +#define ARCH_HAS_VALID_PHYS_ADDR_RANGE +extern u64 kern_mem_attribute (unsigned long phys_addr, unsigned long size); +extern int valid_phys_addr_range (phys_addr_t addr, size_t count); /* efi.c */ +extern int valid_mmap_phys_addr_range (unsigned long pfn, size_t count); + +/* + * The following two macros are deprecated and scheduled for removal. + * Please use the PCI-DMA interface defined in instead. + */ +#define bus_to_virt phys_to_virt +#define virt_to_bus virt_to_phys +#define page_to_bus page_to_phys + +# endif /* KERNEL */ + +/* + * Memory fence w/accept. This should never be used in code that is + * not IA-64 specific. + */ +#define __ia64_mf_a() ia64_mfa() + +/** + * ___ia64_mmiowb - I/O write barrier + * + * Ensure ordering of I/O space writes. This will make sure that writes + * following the barrier will arrive after all previous writes. For most + * ia64 platforms, this is a simple 'mf.a' instruction. + * + * See Documentation/DocBook/deviceiobook.tmpl for more information. + */ +static inline void ___ia64_mmiowb(void) +{ + ia64_mfa(); +} + +static inline void* +__ia64_mk_io_addr (unsigned long port) +{ + struct io_space *space; + unsigned long offset; + + space = &io_space[IO_SPACE_NR(port)]; + port = IO_SPACE_PORT(port); + if (space->sparse) + offset = IO_SPACE_SPARSE_ENCODING(port); + else + offset = port; + + return (void *) (space->mmio_base | offset); +} + +#define __ia64_inb ___ia64_inb +#define __ia64_inw ___ia64_inw +#define __ia64_inl ___ia64_inl +#define __ia64_outb ___ia64_outb +#define __ia64_outw ___ia64_outw +#define __ia64_outl ___ia64_outl +#define __ia64_readb ___ia64_readb +#define __ia64_readw ___ia64_readw +#define __ia64_readl ___ia64_readl +#define __ia64_readq ___ia64_readq +#define __ia64_readb_relaxed ___ia64_readb +#define __ia64_readw_relaxed ___ia64_readw +#define __ia64_readl_relaxed ___ia64_readl +#define __ia64_readq_relaxed ___ia64_readq +#define __ia64_writeb ___ia64_writeb +#define __ia64_writew ___ia64_writew +#define __ia64_writel ___ia64_writel +#define __ia64_writeq ___ia64_writeq +#define __ia64_mmiowb ___ia64_mmiowb + +/* + * For the in/out routines, we need to do "mf.a" _after_ doing the I/O access to ensure + * that the access has completed before executing other I/O accesses. Since we're doing + * the accesses through an uncachable (UC) translation, the CPU will execute them in + * program order. However, we still need to tell the compiler not to shuffle them around + * during optimization, which is why we use "volatile" pointers. + */ + +static inline unsigned int +___ia64_inb (unsigned long port) +{ + volatile unsigned char *addr = __ia64_mk_io_addr(port); + unsigned char ret; + + ret = *addr; + __ia64_mf_a(); + return ret; +} + +static inline unsigned int +___ia64_inw (unsigned long port) +{ + volatile unsigned short *addr = __ia64_mk_io_addr(port); + unsigned short ret; + + ret = *addr; + __ia64_mf_a(); + return ret; +} + +static inline unsigned int +___ia64_inl (unsigned long port) +{ + volatile unsigned int *addr = __ia64_mk_io_addr(port); + unsigned int ret; + + ret = *addr; + __ia64_mf_a(); + return ret; +} + +static inline void +___ia64_outb (unsigned char val, unsigned long port) +{ + volatile unsigned char *addr = __ia64_mk_io_addr(port); + + *addr = val; + __ia64_mf_a(); +} + +static inline void +___ia64_outw (unsigned short val, unsigned long port) +{ + volatile unsigned short *addr = __ia64_mk_io_addr(port); + + *addr = val; + __ia64_mf_a(); +} + +static inline void +___ia64_outl (unsigned int val, unsigned long port) +{ + volatile unsigned int *addr = __ia64_mk_io_addr(port); + + *addr = val; + __ia64_mf_a(); +} + +static inline void +__insb (unsigned long port, void *dst, unsigned long count) +{ + unsigned char *dp = dst; + + while (count--) + *dp++ = platform_inb(port); +} + +static inline void +__insw (unsigned long port, void *dst, unsigned long count) +{ + unsigned short *dp = dst; + + while (count--) + put_unaligned(platform_inw(port), dp++); +} + +static inline void +__insl (unsigned long port, void *dst, unsigned long count) +{ + unsigned int *dp = dst; + + while (count--) + put_unaligned(platform_inl(port), dp++); +} + +static inline void +__outsb (unsigned long port, const void *src, unsigned long count) +{ + const unsigned char *sp = src; + + while (count--) + platform_outb(*sp++, port); +} + +static inline void +__outsw (unsigned long port, const void *src, unsigned long count) +{ + const unsigned short *sp = src; + + while (count--) + platform_outw(get_unaligned(sp++), port); +} + +static inline void +__outsl (unsigned long port, const void *src, unsigned long count) +{ + const unsigned int *sp = src; + + while (count--) + platform_outl(get_unaligned(sp++), port); +} + +/* + * Unfortunately, some platforms are broken and do not follow the IA-64 architecture + * specification regarding legacy I/O support. Thus, we have to make these operations + * platform dependent... + */ +#define __inb platform_inb +#define __inw platform_inw +#define __inl platform_inl +#define __outb platform_outb +#define __outw platform_outw +#define __outl platform_outl +#define __mmiowb platform_mmiowb + +#define inb(p) __inb(p) +#define inw(p) __inw(p) +#define inl(p) __inl(p) +#define insb(p,d,c) __insb(p,d,c) +#define insw(p,d,c) __insw(p,d,c) +#define insl(p,d,c) __insl(p,d,c) +#define outb(v,p) __outb(v,p) +#define outw(v,p) __outw(v,p) +#define outl(v,p) __outl(v,p) +#define outsb(p,s,c) __outsb(p,s,c) +#define outsw(p,s,c) __outsw(p,s,c) +#define outsl(p,s,c) __outsl(p,s,c) +#define mmiowb() __mmiowb() + +/* + * The address passed to these functions are ioremap()ped already. + * + * We need these to be machine vectors since some platforms don't provide + * DMA coherence via PIO reads (PCI drivers and the spec imply that this is + * a good idea). Writes are ok though for all existing ia64 platforms (and + * hopefully it'll stay that way). + */ +static inline unsigned char +___ia64_readb (const volatile void __iomem *addr) +{ + return *(volatile unsigned char __force *)addr; +} + +static inline unsigned short +___ia64_readw (const volatile void __iomem *addr) +{ + return *(volatile unsigned short __force *)addr; +} + +static inline unsigned int +___ia64_readl (const volatile void __iomem *addr) +{ + return *(volatile unsigned int __force *) addr; +} + +static inline unsigned long +___ia64_readq (const volatile void __iomem *addr) +{ + return *(volatile unsigned long __force *) addr; +} + +static inline void +__writeb (unsigned char val, volatile void __iomem *addr) +{ + *(volatile unsigned char __force *) addr = val; +} + +static inline void +__writew (unsigned short val, volatile void __iomem *addr) +{ + *(volatile unsigned short __force *) addr = val; +} + +static inline void +__writel (unsigned int val, volatile void __iomem *addr) +{ + *(volatile unsigned int __force *) addr = val; +} + +static inline void +__writeq (unsigned long val, volatile void __iomem *addr) +{ + *(volatile unsigned long __force *) addr = val; +} + +#define __readb platform_readb +#define __readw platform_readw +#define __readl platform_readl +#define __readq platform_readq +#define __readb_relaxed platform_readb_relaxed +#define __readw_relaxed platform_readw_relaxed +#define __readl_relaxed platform_readl_relaxed +#define __readq_relaxed platform_readq_relaxed + +#define readb(a) __readb((a)) +#define readw(a) __readw((a)) +#define readl(a) __readl((a)) +#define readq(a) __readq((a)) +#define readb_relaxed(a) __readb_relaxed((a)) +#define readw_relaxed(a) __readw_relaxed((a)) +#define readl_relaxed(a) __readl_relaxed((a)) +#define readq_relaxed(a) __readq_relaxed((a)) +#define __raw_readb readb +#define __raw_readw readw +#define __raw_readl readl +#define __raw_readq readq +#define __raw_readb_relaxed readb_relaxed +#define __raw_readw_relaxed readw_relaxed +#define __raw_readl_relaxed readl_relaxed +#define __raw_readq_relaxed readq_relaxed +#define writeb(v,a) __writeb((v), (a)) +#define writew(v,a) __writew((v), (a)) +#define writel(v,a) __writel((v), (a)) +#define writeq(v,a) __writeq((v), (a)) +#define writeb_relaxed(v,a) __writeb((v), (a)) +#define writew_relaxed(v,a) __writew((v), (a)) +#define writel_relaxed(v,a) __writel((v), (a)) +#define writeq_relaxed(v,a) __writeq((v), (a)) +#define __raw_writeb writeb +#define __raw_writew writew +#define __raw_writel writel +#define __raw_writeq writeq + +#ifndef inb_p +# define inb_p inb +#endif +#ifndef inw_p +# define inw_p inw +#endif +#ifndef inl_p +# define inl_p inl +#endif + +#ifndef outb_p +# define outb_p outb +#endif +#ifndef outw_p +# define outw_p outw +#endif +#ifndef outl_p +# define outl_p outl +#endif + +# ifdef __KERNEL__ + +extern void __iomem * ioremap(unsigned long offset, unsigned long size); +extern void __iomem * ioremap_nocache (unsigned long offset, unsigned long size); +extern void iounmap (volatile void __iomem *addr); +extern void __iomem * early_ioremap (unsigned long phys_addr, unsigned long size); +#define early_memremap(phys_addr, size) early_ioremap(phys_addr, size) +extern void early_iounmap (volatile void __iomem *addr, unsigned long size); +#define early_memunmap(addr, size) early_iounmap(addr, size) +static inline void __iomem * ioremap_cache (unsigned long phys_addr, unsigned long size) +{ + return ioremap(phys_addr, size); +} + + +/* + * String version of IO memory access ops: + */ +extern void memcpy_fromio(void *dst, const volatile void __iomem *src, long n); +extern void memcpy_toio(volatile void __iomem *dst, const void *src, long n); +extern void memset_io(volatile void __iomem *s, int c, long n); + +# endif /* __KERNEL__ */ + +#endif /* _ASM_IA64_IO_H */ diff --git a/kernel/arch/ia64/include/asm/iommu.h b/kernel/arch/ia64/include/asm/iommu.h new file mode 100644 index 000000000..105c93b00 --- /dev/null +++ b/kernel/arch/ia64/include/asm/iommu.h @@ -0,0 +1,22 @@ +#ifndef _ASM_IA64_IOMMU_H +#define _ASM_IA64_IOMMU_H 1 + +#define cpu_has_x2apic 0 +/* 10 seconds */ +#define DMAR_OPERATION_TIMEOUT (((cycles_t) local_cpu_data->itc_freq)*10) + +extern void pci_iommu_shutdown(void); +extern void no_iommu_init(void); +#ifdef CONFIG_INTEL_IOMMU +extern int force_iommu, no_iommu; +extern int iommu_pass_through; +extern int iommu_detected; +#else +#define iommu_pass_through (0) +#define no_iommu (1) +#define iommu_detected (0) +#endif +extern void iommu_dma_init(void); +extern void machvec_init(const char *name); + +#endif diff --git a/kernel/arch/ia64/include/asm/iommu_table.h b/kernel/arch/ia64/include/asm/iommu_table.h new file mode 100644 index 000000000..92c8d36ae --- /dev/null +++ b/kernel/arch/ia64/include/asm/iommu_table.h @@ -0,0 +1,6 @@ +#ifndef _ASM_IA64_IOMMU_TABLE_H +#define _ASM_IA64_IOMMU_TABLE_H + +#define IOMMU_INIT_POST(_detect) + +#endif /* _ASM_IA64_IOMMU_TABLE_H */ diff --git a/kernel/arch/ia64/include/asm/iosapic.h b/kernel/arch/ia64/include/asm/iosapic.h new file mode 100644 index 000000000..94c89a2d9 --- /dev/null +++ b/kernel/arch/ia64/include/asm/iosapic.h @@ -0,0 +1,121 @@ +#ifndef __ASM_IA64_IOSAPIC_H +#define __ASM_IA64_IOSAPIC_H + +#define IOSAPIC_REG_SELECT 0x0 +#define IOSAPIC_WINDOW 0x10 +#define IOSAPIC_EOI 0x40 + +#define IOSAPIC_VERSION 0x1 + +/* + * Redirection table entry + */ +#define IOSAPIC_RTE_LOW(i) (0x10+i*2) +#define IOSAPIC_RTE_HIGH(i) (0x11+i*2) + +#define IOSAPIC_DEST_SHIFT 16 + +/* + * Delivery mode + */ +#define IOSAPIC_DELIVERY_SHIFT 8 +#define IOSAPIC_FIXED 0x0 +#define IOSAPIC_LOWEST_PRIORITY 0x1 +#define IOSAPIC_PMI 0x2 +#define IOSAPIC_NMI 0x4 +#define IOSAPIC_INIT 0x5 +#define IOSAPIC_EXTINT 0x7 + +/* + * Interrupt polarity + */ +#define IOSAPIC_POLARITY_SHIFT 13 +#define IOSAPIC_POL_HIGH 0 +#define IOSAPIC_POL_LOW 1 + +/* + * Trigger mode + */ +#define IOSAPIC_TRIGGER_SHIFT 15 +#define IOSAPIC_EDGE 0 +#define IOSAPIC_LEVEL 1 + +/* + * Mask bit + */ + +#define IOSAPIC_MASK_SHIFT 16 +#define IOSAPIC_MASK (1< +#else +#define iosapic_pcat_compat_init ia64_native_iosapic_pcat_compat_init +#define __iosapic_read __ia64_native_iosapic_read +#define __iosapic_write __ia64_native_iosapic_write +#define iosapic_get_irq_chip ia64_native_iosapic_get_irq_chip +#endif + +extern void __init ia64_native_iosapic_pcat_compat_init(void); +extern struct irq_chip *ia64_native_iosapic_get_irq_chip(unsigned long trigger); + +static inline unsigned int +__ia64_native_iosapic_read(char __iomem *iosapic, unsigned int reg) +{ + writel(reg, iosapic + IOSAPIC_REG_SELECT); + return readl(iosapic + IOSAPIC_WINDOW); +} + +static inline void +__ia64_native_iosapic_write(char __iomem *iosapic, unsigned int reg, u32 val) +{ + writel(reg, iosapic + IOSAPIC_REG_SELECT); + writel(val, iosapic + IOSAPIC_WINDOW); +} + +static inline void iosapic_eoi(char __iomem *iosapic, u32 vector) +{ + writel(vector, iosapic + IOSAPIC_EOI); +} + +extern void __init iosapic_system_init (int pcat_compat); +extern int iosapic_init (unsigned long address, unsigned int gsi_base); +extern int iosapic_remove (unsigned int gsi_base); +extern int gsi_to_irq (unsigned int gsi); +extern int iosapic_register_intr (unsigned int gsi, unsigned long polarity, + unsigned long trigger); +extern void iosapic_unregister_intr (unsigned int irq); +extern void iosapic_override_isa_irq (unsigned int isa_irq, unsigned int gsi, + unsigned long polarity, + unsigned long trigger); +extern int __init iosapic_register_platform_intr (u32 int_type, + unsigned int gsi, + int pmi_vector, + u16 eid, u16 id, + unsigned long polarity, + unsigned long trigger); + +#ifdef CONFIG_NUMA +extern void map_iosapic_to_node (unsigned int, int); +#endif +#else +#define iosapic_system_init(pcat_compat) do { } while (0) +#define iosapic_init(address,gsi_base) (-EINVAL) +#define iosapic_remove(gsi_base) (-ENODEV) +#define iosapic_register_intr(gsi,polarity,trigger) (gsi) +#define iosapic_unregister_intr(irq) do { } while (0) +#define iosapic_override_isa_irq(isa_irq,gsi,polarity,trigger) do { } while (0) +#define iosapic_register_platform_intr(type,gsi,pmi,eid,id, \ + polarity,trigger) (gsi) +#endif + +# endif /* !__ASSEMBLY__ */ +#endif /* __ASM_IA64_IOSAPIC_H */ diff --git a/kernel/arch/ia64/include/asm/irq.h b/kernel/arch/ia64/include/asm/irq.h new file mode 100644 index 000000000..820667cbe --- /dev/null +++ b/kernel/arch/ia64/include/asm/irq.h @@ -0,0 +1,37 @@ +#ifndef _ASM_IA64_IRQ_H +#define _ASM_IA64_IRQ_H + +/* + * Copyright (C) 1999-2000, 2002 Hewlett-Packard Co + * David Mosberger-Tang + * Stephane Eranian + * + * 11/24/98 S.Eranian updated TIMER_IRQ and irq_canonicalize + * 01/20/99 S.Eranian added keyboard interrupt + * 02/29/00 D.Mosberger moved most things into hw_irq.h + */ + +#include +#include +#include + +static __inline__ int +irq_canonicalize (int irq) +{ + /* + * We do the legacy thing here of pretending that irqs < 16 + * are 8259 irqs. This really shouldn't be necessary at all, + * but we keep it here as serial.c still uses it... + */ + return ((irq == 2) ? 9 : irq); +} + +extern void set_irq_affinity_info (unsigned int irq, int dest, int redir); +bool is_affinity_mask_valid(const struct cpumask *cpumask); + +#define is_affinity_mask_valid is_affinity_mask_valid + +int create_irq(void); +void destroy_irq(unsigned int irq); + +#endif /* _ASM_IA64_IRQ_H */ diff --git a/kernel/arch/ia64/include/asm/irq_regs.h b/kernel/arch/ia64/include/asm/irq_regs.h new file mode 100644 index 000000000..3dd9c0b70 --- /dev/null +++ b/kernel/arch/ia64/include/asm/irq_regs.h @@ -0,0 +1 @@ +#include diff --git a/kernel/arch/ia64/include/asm/irq_remapping.h b/kernel/arch/ia64/include/asm/irq_remapping.h new file mode 100644 index 000000000..e3b3556e2 --- /dev/null +++ b/kernel/arch/ia64/include/asm/irq_remapping.h @@ -0,0 +1,6 @@ +#ifndef __IA64_INTR_REMAPPING_H +#define __IA64_INTR_REMAPPING_H +#define irq_remapping_enabled 0 +#define dmar_alloc_hwirq create_irq +#define dmar_free_hwirq destroy_irq +#endif diff --git a/kernel/arch/ia64/include/asm/irqflags.h b/kernel/arch/ia64/include/asm/irqflags.h new file mode 100644 index 000000000..cec6c06b5 --- /dev/null +++ b/kernel/arch/ia64/include/asm/irqflags.h @@ -0,0 +1,98 @@ +/* + * IRQ flags defines. + * + * Copyright (C) 1998-2003 Hewlett-Packard Co + * David Mosberger-Tang + * Copyright (C) 1999 Asit Mallick + * Copyright (C) 1999 Don Dugger + */ + +#ifndef _ASM_IA64_IRQFLAGS_H +#define _ASM_IA64_IRQFLAGS_H + +#include +#include + +#ifdef CONFIG_IA64_DEBUG_IRQ +extern unsigned long last_cli_ip; +static inline void arch_maybe_save_ip(unsigned long flags) +{ + if (flags & IA64_PSR_I) + last_cli_ip = ia64_getreg(_IA64_REG_IP); +} +#else +#define arch_maybe_save_ip(flags) do {} while (0) +#endif + +/* + * - clearing psr.i is implicitly serialized (visible by next insn) + * - setting psr.i requires data serialization + * - we need a stop-bit before reading PSR because we sometimes + * write a floating-point register right before reading the PSR + * and that writes to PSR.mfl + */ + +static inline unsigned long arch_local_save_flags(void) +{ + ia64_stop(); +#ifdef CONFIG_PARAVIRT + return ia64_get_psr_i(); +#else + return ia64_getreg(_IA64_REG_PSR); +#endif +} + +static inline unsigned long arch_local_irq_save(void) +{ + unsigned long flags = arch_local_save_flags(); + + ia64_stop(); + ia64_rsm(IA64_PSR_I); + arch_maybe_save_ip(flags); + return flags; +} + +static inline void arch_local_irq_disable(void) +{ +#ifdef CONFIG_IA64_DEBUG_IRQ + arch_local_irq_save(); +#else + ia64_stop(); + ia64_rsm(IA64_PSR_I); +#endif +} + +static inline void arch_local_irq_enable(void) +{ + ia64_stop(); + ia64_ssm(IA64_PSR_I); + ia64_srlz_d(); +} + +static inline void arch_local_irq_restore(unsigned long flags) +{ +#ifdef CONFIG_IA64_DEBUG_IRQ + unsigned long old_psr = arch_local_save_flags(); +#endif + ia64_intrin_local_irq_restore(flags & IA64_PSR_I); + arch_maybe_save_ip(old_psr & ~flags); +} + +static inline bool arch_irqs_disabled_flags(unsigned long flags) +{ + return (flags & IA64_PSR_I) == 0; +} + +static inline bool arch_irqs_disabled(void) +{ + return arch_irqs_disabled_flags(arch_local_save_flags()); +} + +static inline void arch_safe_halt(void) +{ + arch_local_irq_enable(); + ia64_pal_halt_light(); /* PAL_HALT_LIGHT */ +} + + +#endif /* _ASM_IA64_IRQFLAGS_H */ diff --git a/kernel/arch/ia64/include/asm/kdebug.h b/kernel/arch/ia64/include/asm/kdebug.h new file mode 100644 index 000000000..d11a69855 --- /dev/null +++ b/kernel/arch/ia64/include/asm/kdebug.h @@ -0,0 +1,57 @@ +#ifndef _IA64_KDEBUG_H +#define _IA64_KDEBUG_H 1 +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) Intel Corporation, 2005 + * + * 2005-Apr Rusty Lynch and Anil S Keshavamurthy + * adopted from + * include/asm-x86_64/kdebug.h + * + * 2005-Oct Keith Owens . Expand notify_die to cover more + * events. + */ + +enum die_val { + DIE_BREAK = 1, + DIE_FAULT, + DIE_OOPS, + DIE_MACHINE_HALT, + DIE_MACHINE_RESTART, + DIE_MCA_MONARCH_ENTER, + DIE_MCA_MONARCH_PROCESS, + DIE_MCA_MONARCH_LEAVE, + DIE_MCA_SLAVE_ENTER, + DIE_MCA_SLAVE_PROCESS, + DIE_MCA_SLAVE_LEAVE, + DIE_MCA_RENDZVOUS_ENTER, + DIE_MCA_RENDZVOUS_PROCESS, + DIE_MCA_RENDZVOUS_LEAVE, + DIE_MCA_NEW_TIMEOUT, + DIE_INIT_ENTER, + DIE_INIT_MONARCH_ENTER, + DIE_INIT_MONARCH_PROCESS, + DIE_INIT_MONARCH_LEAVE, + DIE_INIT_SLAVE_ENTER, + DIE_INIT_SLAVE_PROCESS, + DIE_INIT_SLAVE_LEAVE, + DIE_KDEBUG_ENTER, + DIE_KDEBUG_LEAVE, + DIE_KDUMP_ENTER, + DIE_KDUMP_LEAVE, +}; + +#endif diff --git a/kernel/arch/ia64/include/asm/kexec.h b/kernel/arch/ia64/include/asm/kexec.h new file mode 100644 index 000000000..aea2b81b0 --- /dev/null +++ b/kernel/arch/ia64/include/asm/kexec.h @@ -0,0 +1,45 @@ +#ifndef _ASM_IA64_KEXEC_H +#define _ASM_IA64_KEXEC_H + +#include + +/* Maximum physical address we can use pages from */ +#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL) +/* Maximum address we can reach in physical address mode */ +#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL) +/* Maximum address we can use for the control code buffer */ +#define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE + +#define KEXEC_CONTROL_PAGE_SIZE (8192 + 8192 + 4096) + +/* The native architecture */ +#define KEXEC_ARCH KEXEC_ARCH_IA_64 + +#define kexec_flush_icache_page(page) do { \ + unsigned long page_addr = (unsigned long)page_address(page); \ + flush_icache_range(page_addr, page_addr + PAGE_SIZE); \ + } while(0) + +extern struct kimage *ia64_kimage; +extern const unsigned int relocate_new_kernel_size; +extern void relocate_new_kernel(unsigned long, unsigned long, + struct ia64_boot_param *, unsigned long); +static inline void +crash_setup_regs(struct pt_regs *newregs, struct pt_regs *oldregs) +{ +} +extern struct resource efi_memmap_res; +extern struct resource boot_param_res; +extern void kdump_smp_send_stop(void); +extern void kdump_smp_send_init(void); +extern void kexec_disable_iosapic(void); +extern void crash_save_this_cpu(void); +struct rsvd_region; +extern unsigned long kdump_find_rsvd_region(unsigned long size, + struct rsvd_region *rsvd_regions, int n); +extern void kdump_cpu_freeze(struct unw_frame_info *info, void *arg); +extern int kdump_status[]; +extern atomic_t kdump_cpu_freezed; +extern atomic_t kdump_in_progress; + +#endif /* _ASM_IA64_KEXEC_H */ diff --git a/kernel/arch/ia64/include/asm/kmap_types.h b/kernel/arch/ia64/include/asm/kmap_types.h new file mode 100644 index 000000000..05d5f9996 --- /dev/null +++ b/kernel/arch/ia64/include/asm/kmap_types.h @@ -0,0 +1,12 @@ +#ifndef _ASM_IA64_KMAP_TYPES_H +#define _ASM_IA64_KMAP_TYPES_H + +#ifdef CONFIG_DEBUG_HIGHMEM +#define __WITH_KM_FENCE +#endif + +#include + +#undef __WITH_KM_FENCE + +#endif /* _ASM_IA64_KMAP_TYPES_H */ diff --git a/kernel/arch/ia64/include/asm/kprobes.h b/kernel/arch/ia64/include/asm/kprobes.h new file mode 100644 index 000000000..d5505d6f2 --- /dev/null +++ b/kernel/arch/ia64/include/asm/kprobes.h @@ -0,0 +1,127 @@ +#ifndef _ASM_KPROBES_H +#define _ASM_KPROBES_H +/* + * Kernel Probes (KProbes) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2002, 2004 + * Copyright (C) Intel Corporation, 2005 + * + * 2005-Apr Rusty Lynch and Anil S Keshavamurthy + * adapted from i386 + */ +#include +#include +#include +#include + +#define __ARCH_WANT_KPROBES_INSN_SLOT +#define MAX_INSN_SIZE 2 /* last half is for kprobe-booster */ +#define BREAK_INST (long)(__IA64_BREAK_KPROBE << 6) +#define NOP_M_INST (long)(1<<27) +#define BRL_INST(i1, i2) ((long)((0xcL << 37) | /* brl */ \ + (0x1L << 12) | /* many */ \ + (((i1) & 1) << 36) | ((i2) << 13))) /* imm */ + +typedef union cmp_inst { + struct { + unsigned long long qp : 6; + unsigned long long p1 : 6; + unsigned long long c : 1; + unsigned long long r2 : 7; + unsigned long long r3 : 7; + unsigned long long p2 : 6; + unsigned long long ta : 1; + unsigned long long x2 : 2; + unsigned long long tb : 1; + unsigned long long opcode : 4; + unsigned long long reserved : 23; + }f; + unsigned long long l; +} cmp_inst_t; + +struct kprobe; + +typedef struct _bundle { + struct { + unsigned long long template : 5; + unsigned long long slot0 : 41; + unsigned long long slot1_p0 : 64-46; + } quad0; + struct { + unsigned long long slot1_p1 : 41 - (64-46); + unsigned long long slot2 : 41; + } quad1; +} __attribute__((__aligned__(16))) bundle_t; + +struct prev_kprobe { + struct kprobe *kp; + unsigned long status; +}; + +#define MAX_PARAM_RSE_SIZE (0x60+0x60/0x3f) +/* per-cpu kprobe control block */ +#define ARCH_PREV_KPROBE_SZ 2 +struct kprobe_ctlblk { + unsigned long kprobe_status; + struct pt_regs jprobe_saved_regs; + unsigned long jprobes_saved_stacked_regs[MAX_PARAM_RSE_SIZE]; + unsigned long *bsp; + unsigned long cfm; + atomic_t prev_kprobe_index; + struct prev_kprobe prev_kprobe[ARCH_PREV_KPROBE_SZ]; +}; + +#define kretprobe_blacklist_size 0 + +#define SLOT0_OPCODE_SHIFT (37) +#define SLOT1_p1_OPCODE_SHIFT (37 - (64-46)) +#define SLOT2_OPCODE_SHIFT (37) + +#define INDIRECT_CALL_OPCODE (1) +#define IP_RELATIVE_CALL_OPCODE (5) +#define IP_RELATIVE_BRANCH_OPCODE (4) +#define IP_RELATIVE_PREDICT_OPCODE (7) +#define LONG_BRANCH_OPCODE (0xC) +#define LONG_CALL_OPCODE (0xD) +#define flush_insn_slot(p) do { } while (0) + +typedef struct kprobe_opcode { + bundle_t bundle; +} kprobe_opcode_t; + +/* Architecture specific copy of original instruction*/ +struct arch_specific_insn { + /* copy of the instruction to be emulated */ + kprobe_opcode_t *insn; + #define INST_FLAG_FIX_RELATIVE_IP_ADDR 1 + #define INST_FLAG_FIX_BRANCH_REG 2 + #define INST_FLAG_BREAK_INST 4 + #define INST_FLAG_BOOSTABLE 8 + unsigned long inst_flag; + unsigned short target_br_reg; + unsigned short slot; +}; + +extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr); +extern int kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data); + +extern void invalidate_stacked_regs(void); +extern void flush_register_stack(void); +extern void arch_remove_kprobe(struct kprobe *p); + +#endif /* _ASM_KPROBES_H */ diff --git a/kernel/arch/ia64/include/asm/kregs.h b/kernel/arch/ia64/include/asm/kregs.h new file mode 100644 index 000000000..39e65f663 --- /dev/null +++ b/kernel/arch/ia64/include/asm/kregs.h @@ -0,0 +1,165 @@ +#ifndef _ASM_IA64_KREGS_H +#define _ASM_IA64_KREGS_H + +/* + * Copyright (C) 2001-2002 Hewlett-Packard Co + * David Mosberger-Tang + */ +/* + * This file defines the kernel register usage convention used by Linux/ia64. + */ + +/* + * Kernel registers: + */ +#define IA64_KR_IO_BASE 0 /* ar.k0: legacy I/O base address */ +#define IA64_KR_TSSD 1 /* ar.k1: IVE uses this as the TSSD */ +#define IA64_KR_PER_CPU_DATA 3 /* ar.k3: physical per-CPU base */ +#define IA64_KR_CURRENT_STACK 4 /* ar.k4: what's mapped in IA64_TR_CURRENT_STACK */ +#define IA64_KR_FPU_OWNER 5 /* ar.k5: fpu-owner (UP only, at the moment) */ +#define IA64_KR_CURRENT 6 /* ar.k6: "current" task pointer */ +#define IA64_KR_PT_BASE 7 /* ar.k7: page table base address (physical) */ + +#define _IA64_KR_PASTE(x,y) x##y +#define _IA64_KR_PREFIX(n) _IA64_KR_PASTE(ar.k, n) +#define IA64_KR(n) _IA64_KR_PREFIX(IA64_KR_##n) + +/* + * Translation registers: + */ +#define IA64_TR_KERNEL 0 /* itr0, dtr0: maps kernel image (code & data) */ +#define IA64_TR_PALCODE 1 /* itr1: maps PALcode as required by EFI */ +#define IA64_TR_CURRENT_STACK 1 /* dtr1: maps kernel's memory- & register-stacks */ + +#define IA64_TR_ALLOC_BASE 2 /* itr&dtr: Base of dynamic TR resource*/ +#define IA64_TR_ALLOC_MAX 64 /* Max number for dynamic use*/ + +/* Processor status register bits: */ +#define IA64_PSR_BE_BIT 1 +#define IA64_PSR_UP_BIT 2 +#define IA64_PSR_AC_BIT 3 +#define IA64_PSR_MFL_BIT 4 +#define IA64_PSR_MFH_BIT 5 +#define IA64_PSR_IC_BIT 13 +#define IA64_PSR_I_BIT 14 +#define IA64_PSR_PK_BIT 15 +#define IA64_PSR_DT_BIT 17 +#define IA64_PSR_DFL_BIT 18 +#define IA64_PSR_DFH_BIT 19 +#define IA64_PSR_SP_BIT 20 +#define IA64_PSR_PP_BIT 21 +#define IA64_PSR_DI_BIT 22 +#define IA64_PSR_SI_BIT 23 +#define IA64_PSR_DB_BIT 24 +#define IA64_PSR_LP_BIT 25 +#define IA64_PSR_TB_BIT 26 +#define IA64_PSR_RT_BIT 27 +/* The following are not affected by save_flags()/restore_flags(): */ +#define IA64_PSR_CPL0_BIT 32 +#define IA64_PSR_CPL1_BIT 33 +#define IA64_PSR_IS_BIT 34 +#define IA64_PSR_MC_BIT 35 +#define IA64_PSR_IT_BIT 36 +#define IA64_PSR_ID_BIT 37 +#define IA64_PSR_DA_BIT 38 +#define IA64_PSR_DD_BIT 39 +#define IA64_PSR_SS_BIT 40 +#define IA64_PSR_RI_BIT 41 +#define IA64_PSR_ED_BIT 43 +#define IA64_PSR_BN_BIT 44 +#define IA64_PSR_IA_BIT 45 + +/* A mask of PSR bits that we generally don't want to inherit across a clone2() or an + execve(). Only list flags here that need to be cleared/set for BOTH clone2() and + execve(). */ +#define IA64_PSR_BITS_TO_CLEAR (IA64_PSR_MFL | IA64_PSR_MFH | IA64_PSR_DB | IA64_PSR_LP | \ + IA64_PSR_TB | IA64_PSR_ID | IA64_PSR_DA | IA64_PSR_DD | \ + IA64_PSR_SS | IA64_PSR_ED | IA64_PSR_IA) +#define IA64_PSR_BITS_TO_SET (IA64_PSR_DFH | IA64_PSR_SP) + +#define IA64_PSR_BE (__IA64_UL(1) << IA64_PSR_BE_BIT) +#define IA64_PSR_UP (__IA64_UL(1) << IA64_PSR_UP_BIT) +#define IA64_PSR_AC (__IA64_UL(1) << IA64_PSR_AC_BIT) +#define IA64_PSR_MFL (__IA64_UL(1) << IA64_PSR_MFL_BIT) +#define IA64_PSR_MFH (__IA64_UL(1) << IA64_PSR_MFH_BIT) +#define IA64_PSR_IC (__IA64_UL(1) << IA64_PSR_IC_BIT) +#define IA64_PSR_I (__IA64_UL(1) << IA64_PSR_I_BIT) +#define IA64_PSR_PK (__IA64_UL(1) << IA64_PSR_PK_BIT) +#define IA64_PSR_DT (__IA64_UL(1) << IA64_PSR_DT_BIT) +#define IA64_PSR_DFL (__IA64_UL(1) << IA64_PSR_DFL_BIT) +#define IA64_PSR_DFH (__IA64_UL(1) << IA64_PSR_DFH_BIT) +#define IA64_PSR_SP (__IA64_UL(1) << IA64_PSR_SP_BIT) +#define IA64_PSR_PP (__IA64_UL(1) << IA64_PSR_PP_BIT) +#define IA64_PSR_DI (__IA64_UL(1) << IA64_PSR_DI_BIT) +#define IA64_PSR_SI (__IA64_UL(1) << IA64_PSR_SI_BIT) +#define IA64_PSR_DB (__IA64_UL(1) << IA64_PSR_DB_BIT) +#define IA64_PSR_LP (__IA64_UL(1) << IA64_PSR_LP_BIT) +#define IA64_PSR_TB (__IA64_UL(1) << IA64_PSR_TB_BIT) +#define IA64_PSR_RT (__IA64_UL(1) << IA64_PSR_RT_BIT) +/* The following are not affected by save_flags()/restore_flags(): */ +#define IA64_PSR_CPL (__IA64_UL(3) << IA64_PSR_CPL0_BIT) +#define IA64_PSR_IS (__IA64_UL(1) << IA64_PSR_IS_BIT) +#define IA64_PSR_MC (__IA64_UL(1) << IA64_PSR_MC_BIT) +#define IA64_PSR_IT (__IA64_UL(1) << IA64_PSR_IT_BIT) +#define IA64_PSR_ID (__IA64_UL(1) << IA64_PSR_ID_BIT) +#define IA64_PSR_DA (__IA64_UL(1) << IA64_PSR_DA_BIT) +#define IA64_PSR_DD (__IA64_UL(1) << IA64_PSR_DD_BIT) +#define IA64_PSR_SS (__IA64_UL(1) << IA64_PSR_SS_BIT) +#define IA64_PSR_RI (__IA64_UL(3) << IA64_PSR_RI_BIT) +#define IA64_PSR_ED (__IA64_UL(1) << IA64_PSR_ED_BIT) +#define IA64_PSR_BN (__IA64_UL(1) << IA64_PSR_BN_BIT) +#define IA64_PSR_IA (__IA64_UL(1) << IA64_PSR_IA_BIT) + +/* User mask bits: */ +#define IA64_PSR_UM (IA64_PSR_BE | IA64_PSR_UP | IA64_PSR_AC | IA64_PSR_MFL | IA64_PSR_MFH) + +/* Default Control Register */ +#define IA64_DCR_PP_BIT 0 /* privileged performance monitor default */ +#define IA64_DCR_BE_BIT 1 /* big-endian default */ +#define IA64_DCR_LC_BIT 2 /* ia32 lock-check enable */ +#define IA64_DCR_DM_BIT 8 /* defer TLB miss faults */ +#define IA64_DCR_DP_BIT 9 /* defer page-not-present faults */ +#define IA64_DCR_DK_BIT 10 /* defer key miss faults */ +#define IA64_DCR_DX_BIT 11 /* defer key permission faults */ +#define IA64_DCR_DR_BIT 12 /* defer access right faults */ +#define IA64_DCR_DA_BIT 13 /* defer access bit faults */ +#define IA64_DCR_DD_BIT 14 /* defer debug faults */ + +#define IA64_DCR_PP (__IA64_UL(1) << IA64_DCR_PP_BIT) +#define IA64_DCR_BE (__IA64_UL(1) << IA64_DCR_BE_BIT) +#define IA64_DCR_LC (__IA64_UL(1) << IA64_DCR_LC_BIT) +#define IA64_DCR_DM (__IA64_UL(1) << IA64_DCR_DM_BIT) +#define IA64_DCR_DP (__IA64_UL(1) << IA64_DCR_DP_BIT) +#define IA64_DCR_DK (__IA64_UL(1) << IA64_DCR_DK_BIT) +#define IA64_DCR_DX (__IA64_UL(1) << IA64_DCR_DX_BIT) +#define IA64_DCR_DR (__IA64_UL(1) << IA64_DCR_DR_BIT) +#define IA64_DCR_DA (__IA64_UL(1) << IA64_DCR_DA_BIT) +#define IA64_DCR_DD (__IA64_UL(1) << IA64_DCR_DD_BIT) + +/* Interrupt Status Register */ +#define IA64_ISR_X_BIT 32 /* execute access */ +#define IA64_ISR_W_BIT 33 /* write access */ +#define IA64_ISR_R_BIT 34 /* read access */ +#define IA64_ISR_NA_BIT 35 /* non-access */ +#define IA64_ISR_SP_BIT 36 /* speculative load exception */ +#define IA64_ISR_RS_BIT 37 /* mandatory register-stack exception */ +#define IA64_ISR_IR_BIT 38 /* invalid register frame exception */ +#define IA64_ISR_CODE_MASK 0xf + +#define IA64_ISR_X (__IA64_UL(1) << IA64_ISR_X_BIT) +#define IA64_ISR_W (__IA64_UL(1) << IA64_ISR_W_BIT) +#define IA64_ISR_R (__IA64_UL(1) << IA64_ISR_R_BIT) +#define IA64_ISR_NA (__IA64_UL(1) << IA64_ISR_NA_BIT) +#define IA64_ISR_SP (__IA64_UL(1) << IA64_ISR_SP_BIT) +#define IA64_ISR_RS (__IA64_UL(1) << IA64_ISR_RS_BIT) +#define IA64_ISR_IR (__IA64_UL(1) << IA64_ISR_IR_BIT) + +/* ISR code field for non-access instructions */ +#define IA64_ISR_CODE_TPA 0 +#define IA64_ISR_CODE_FC 1 +#define IA64_ISR_CODE_PROBE 2 +#define IA64_ISR_CODE_TAK 3 +#define IA64_ISR_CODE_LFETCH 4 +#define IA64_ISR_CODE_PROBEF 5 + +#endif /* _ASM_IA64_kREGS_H */ diff --git a/kernel/arch/ia64/include/asm/libata-portmap.h b/kernel/arch/ia64/include/asm/libata-portmap.h new file mode 100644 index 000000000..0e00c9a9f --- /dev/null +++ b/kernel/arch/ia64/include/asm/libata-portmap.h @@ -0,0 +1,12 @@ +#ifndef __ASM_IA64_LIBATA_PORTMAP_H +#define __ASM_IA64_LIBATA_PORTMAP_H + +#define ATA_PRIMARY_CMD 0x1F0 +#define ATA_PRIMARY_CTL 0x3F6 +#define ATA_PRIMARY_IRQ(dev) isa_irq_to_vector(14) + +#define ATA_SECONDARY_CMD 0x170 +#define ATA_SECONDARY_CTL 0x376 +#define ATA_SECONDARY_IRQ(dev) isa_irq_to_vector(15) + +#endif diff --git a/kernel/arch/ia64/include/asm/linkage.h b/kernel/arch/ia64/include/asm/linkage.h new file mode 100644 index 000000000..787575701 --- /dev/null +++ b/kernel/arch/ia64/include/asm/linkage.h @@ -0,0 +1,18 @@ +#ifndef __ASM_LINKAGE_H +#define __ASM_LINKAGE_H + +#ifndef __ASSEMBLY__ + +#define asmlinkage CPP_ASMLINKAGE __attribute__((syscall_linkage)) + +#else + +#include + +#endif + +#define cond_syscall(x) asm(".weak\t" #x "#\n" #x "#\t=\tsys_ni_syscall#") +#define SYSCALL_ALIAS(alias, name) \ + asm ( #alias "# = " #name "#\n\t.globl " #alias "#") + +#endif diff --git a/kernel/arch/ia64/include/asm/local.h b/kernel/arch/ia64/include/asm/local.h new file mode 100644 index 000000000..c11c530f7 --- /dev/null +++ b/kernel/arch/ia64/include/asm/local.h @@ -0,0 +1 @@ +#include diff --git a/kernel/arch/ia64/include/asm/local64.h b/kernel/arch/ia64/include/asm/local64.h new file mode 100644 index 000000000..36c93b5cc --- /dev/null +++ b/kernel/arch/ia64/include/asm/local64.h @@ -0,0 +1 @@ +#include diff --git a/kernel/arch/ia64/include/asm/machvec.h b/kernel/arch/ia64/include/asm/machvec.h new file mode 100644 index 000000000..9c39bdfc2 --- /dev/null +++ b/kernel/arch/ia64/include/asm/machvec.h @@ -0,0 +1,367 @@ +/* + * Machine vector for IA-64. + * + * Copyright (C) 1999 Silicon Graphics, Inc. + * Copyright (C) Srinivasa Thirumalachar + * Copyright (C) Vijay Chander + * Copyright (C) 1999-2001, 2003-2004 Hewlett-Packard Co. + * David Mosberger-Tang + */ +#ifndef _ASM_IA64_MACHVEC_H +#define _ASM_IA64_MACHVEC_H + +#include + +/* forward declarations: */ +struct device; +struct pt_regs; +struct scatterlist; +struct page; +struct mm_struct; +struct pci_bus; +struct task_struct; +struct pci_dev; +struct msi_desc; +struct dma_attrs; + +typedef void ia64_mv_setup_t (char **); +typedef void ia64_mv_cpu_init_t (void); +typedef void ia64_mv_irq_init_t (void); +typedef void ia64_mv_send_ipi_t (int, int, int, int); +typedef void ia64_mv_timer_interrupt_t (int, void *); +typedef void ia64_mv_global_tlb_purge_t (struct mm_struct *, unsigned long, unsigned long, unsigned long); +typedef void ia64_mv_tlb_migrate_finish_t (struct mm_struct *); +typedef u8 ia64_mv_irq_to_vector (int); +typedef unsigned int ia64_mv_local_vector_to_irq (u8); +typedef char *ia64_mv_pci_get_legacy_mem_t (struct pci_bus *); +typedef int ia64_mv_pci_legacy_read_t (struct pci_bus *, u16 port, u32 *val, + u8 size); +typedef int ia64_mv_pci_legacy_write_t (struct pci_bus *, u16 port, u32 val, + u8 size); +typedef void ia64_mv_migrate_t(struct task_struct * task); +typedef void ia64_mv_pci_fixup_bus_t (struct pci_bus *); +typedef void ia64_mv_kernel_launch_event_t(void); + +/* DMA-mapping interface: */ +typedef void ia64_mv_dma_init (void); +typedef u64 ia64_mv_dma_get_required_mask (struct device *); +typedef struct dma_map_ops *ia64_mv_dma_get_ops(struct device *); + +/* + * WARNING: The legacy I/O space is _architected_. Platforms are + * expected to follow this architected model (see Section 10.7 in the + * IA-64 Architecture Software Developer's Manual). Unfortunately, + * some broken machines do not follow that model, which is why we have + * to make the inX/outX operations part of the machine vector. + * Platform designers should follow the architected model whenever + * possible. + */ +typedef unsigned int ia64_mv_inb_t (unsigned long); +typedef unsigned int ia64_mv_inw_t (unsigned long); +typedef unsigned int ia64_mv_inl_t (unsigned long); +typedef void ia64_mv_outb_t (unsigned char, unsigned long); +typedef void ia64_mv_outw_t (unsigned short, unsigned long); +typedef void ia64_mv_outl_t (unsigned int, unsigned long); +typedef void ia64_mv_mmiowb_t (void); +typedef unsigned char ia64_mv_readb_t (const volatile void __iomem *); +typedef unsigned short ia64_mv_readw_t (const volatile void __iomem *); +typedef unsigned int ia64_mv_readl_t (const volatile void __iomem *); +typedef unsigned long ia64_mv_readq_t (const volatile void __iomem *); +typedef unsigned char ia64_mv_readb_relaxed_t (const volatile void __iomem *); +typedef unsigned short ia64_mv_readw_relaxed_t (const volatile void __iomem *); +typedef unsigned int ia64_mv_readl_relaxed_t (const volatile void __iomem *); +typedef unsigned long ia64_mv_readq_relaxed_t (const volatile void __iomem *); + +typedef int ia64_mv_setup_msi_irq_t (struct pci_dev *pdev, struct msi_desc *); +typedef void ia64_mv_teardown_msi_irq_t (unsigned int irq); + +static inline void +machvec_noop (void) +{ +} + +static inline void +machvec_noop_mm (struct mm_struct *mm) +{ +} + +static inline void +machvec_noop_task (struct task_struct *task) +{ +} + +static inline void +machvec_noop_bus (struct pci_bus *bus) +{ +} + +extern void machvec_setup (char **); +extern void machvec_timer_interrupt (int, void *); +extern void machvec_tlb_migrate_finish (struct mm_struct *); + +# if defined (CONFIG_IA64_HP_SIM) +# include +# elif defined (CONFIG_IA64_DIG) +# include +# elif defined(CONFIG_IA64_DIG_VTD) +# include +# elif defined (CONFIG_IA64_HP_ZX1) +# include +# elif defined (CONFIG_IA64_HP_ZX1_SWIOTLB) +# include +# elif defined (CONFIG_IA64_SGI_SN2) +# include +# elif defined (CONFIG_IA64_SGI_UV) +# include +# elif defined (CONFIG_IA64_GENERIC) + +# ifdef MACHVEC_PLATFORM_HEADER +# include MACHVEC_PLATFORM_HEADER +# else +# define ia64_platform_name ia64_mv.name +# define platform_setup ia64_mv.setup +# define platform_cpu_init ia64_mv.cpu_init +# define platform_irq_init ia64_mv.irq_init +# define platform_send_ipi ia64_mv.send_ipi +# define platform_timer_interrupt ia64_mv.timer_interrupt +# define platform_global_tlb_purge ia64_mv.global_tlb_purge +# define platform_tlb_migrate_finish ia64_mv.tlb_migrate_finish +# define platform_dma_init ia64_mv.dma_init +# define platform_dma_get_required_mask ia64_mv.dma_get_required_mask +# define platform_dma_get_ops ia64_mv.dma_get_ops +# define platform_irq_to_vector ia64_mv.irq_to_vector +# define platform_local_vector_to_irq ia64_mv.local_vector_to_irq +# define platform_pci_get_legacy_mem ia64_mv.pci_get_legacy_mem +# define platform_pci_legacy_read ia64_mv.pci_legacy_read +# define platform_pci_legacy_write ia64_mv.pci_legacy_write +# define platform_inb ia64_mv.inb +# define platform_inw ia64_mv.inw +# define platform_inl ia64_mv.inl +# define platform_outb ia64_mv.outb +# define platform_outw ia64_mv.outw +# define platform_outl ia64_mv.outl +# define platform_mmiowb ia64_mv.mmiowb +# define platform_readb ia64_mv.readb +# define platform_readw ia64_mv.readw +# define platform_readl ia64_mv.readl +# define platform_readq ia64_mv.readq +# define platform_readb_relaxed ia64_mv.readb_relaxed +# define platform_readw_relaxed ia64_mv.readw_relaxed +# define platform_readl_relaxed ia64_mv.readl_relaxed +# define platform_readq_relaxed ia64_mv.readq_relaxed +# define platform_migrate ia64_mv.migrate +# define platform_setup_msi_irq ia64_mv.setup_msi_irq +# define platform_teardown_msi_irq ia64_mv.teardown_msi_irq +# define platform_pci_fixup_bus ia64_mv.pci_fixup_bus +# define platform_kernel_launch_event ia64_mv.kernel_launch_event +# endif + +/* __attribute__((__aligned__(16))) is required to make size of the + * structure multiple of 16 bytes. + * This will fillup the holes created because of section 3.3.1 in + * Software Conventions guide. + */ +struct ia64_machine_vector { + const char *name; + ia64_mv_setup_t *setup; + ia64_mv_cpu_init_t *cpu_init; + ia64_mv_irq_init_t *irq_init; + ia64_mv_send_ipi_t *send_ipi; + ia64_mv_timer_interrupt_t *timer_interrupt; + ia64_mv_global_tlb_purge_t *global_tlb_purge; + ia64_mv_tlb_migrate_finish_t *tlb_migrate_finish; + ia64_mv_dma_init *dma_init; + ia64_mv_dma_get_required_mask *dma_get_required_mask; + ia64_mv_dma_get_ops *dma_get_ops; + ia64_mv_irq_to_vector *irq_to_vector; + ia64_mv_local_vector_to_irq *local_vector_to_irq; + ia64_mv_pci_get_legacy_mem_t *pci_get_legacy_mem; + ia64_mv_pci_legacy_read_t *pci_legacy_read; + ia64_mv_pci_legacy_write_t *pci_legacy_write; + ia64_mv_inb_t *inb; + ia64_mv_inw_t *inw; + ia64_mv_inl_t *inl; + ia64_mv_outb_t *outb; + ia64_mv_outw_t *outw; + ia64_mv_outl_t *outl; + ia64_mv_mmiowb_t *mmiowb; + ia64_mv_readb_t *readb; + ia64_mv_readw_t *readw; + ia64_mv_readl_t *readl; + ia64_mv_readq_t *readq; + ia64_mv_readb_relaxed_t *readb_relaxed; + ia64_mv_readw_relaxed_t *readw_relaxed; + ia64_mv_readl_relaxed_t *readl_relaxed; + ia64_mv_readq_relaxed_t *readq_relaxed; + ia64_mv_migrate_t *migrate; + ia64_mv_setup_msi_irq_t *setup_msi_irq; + ia64_mv_teardown_msi_irq_t *teardown_msi_irq; + ia64_mv_pci_fixup_bus_t *pci_fixup_bus; + ia64_mv_kernel_launch_event_t *kernel_launch_event; +} __attribute__((__aligned__(16))); /* align attrib? see above comment */ + +#define MACHVEC_INIT(name) \ +{ \ + #name, \ + platform_setup, \ + platform_cpu_init, \ + platform_irq_init, \ + platform_send_ipi, \ + platform_timer_interrupt, \ + platform_global_tlb_purge, \ + platform_tlb_migrate_finish, \ + platform_dma_init, \ + platform_dma_get_required_mask, \ + platform_dma_get_ops, \ + platform_irq_to_vector, \ + platform_local_vector_to_irq, \ + platform_pci_get_legacy_mem, \ + platform_pci_legacy_read, \ + platform_pci_legacy_write, \ + platform_inb, \ + platform_inw, \ + platform_inl, \ + platform_outb, \ + platform_outw, \ + platform_outl, \ + platform_mmiowb, \ + platform_readb, \ + platform_readw, \ + platform_readl, \ + platform_readq, \ + platform_readb_relaxed, \ + platform_readw_relaxed, \ + platform_readl_relaxed, \ + platform_readq_relaxed, \ + platform_migrate, \ + platform_setup_msi_irq, \ + platform_teardown_msi_irq, \ + platform_pci_fixup_bus, \ + platform_kernel_launch_event \ +} + +extern struct ia64_machine_vector ia64_mv; +extern void machvec_init (const char *name); +extern void machvec_init_from_cmdline(const char *cmdline); + +# else +# error Unknown configuration. Update arch/ia64/include/asm/machvec.h. +# endif /* CONFIG_IA64_GENERIC */ + +extern void swiotlb_dma_init(void); +extern struct dma_map_ops *dma_get_ops(struct device *); + +/* + * Define default versions so we can extend machvec for new platforms without having + * to update the machvec files for all existing platforms. + */ +#ifndef platform_setup +# define platform_setup machvec_setup +#endif +#ifndef platform_cpu_init +# define platform_cpu_init machvec_noop +#endif +#ifndef platform_irq_init +# define platform_irq_init machvec_noop +#endif + +#ifndef platform_send_ipi +# define platform_send_ipi ia64_send_ipi /* default to architected version */ +#endif +#ifndef platform_timer_interrupt +# define platform_timer_interrupt machvec_timer_interrupt +#endif +#ifndef platform_global_tlb_purge +# define platform_global_tlb_purge ia64_global_tlb_purge /* default to architected version */ +#endif +#ifndef platform_tlb_migrate_finish +# define platform_tlb_migrate_finish machvec_noop_mm +#endif +#ifndef platform_kernel_launch_event +# define platform_kernel_launch_event machvec_noop +#endif +#ifndef platform_dma_init +# define platform_dma_init swiotlb_dma_init +#endif +#ifndef platform_dma_get_ops +# define platform_dma_get_ops dma_get_ops +#endif +#ifndef platform_dma_get_required_mask +# define platform_dma_get_required_mask ia64_dma_get_required_mask +#endif +#ifndef platform_irq_to_vector +# define platform_irq_to_vector __ia64_irq_to_vector +#endif +#ifndef platform_local_vector_to_irq +# define platform_local_vector_to_irq __ia64_local_vector_to_irq +#endif +#ifndef platform_pci_get_legacy_mem +# define platform_pci_get_legacy_mem ia64_pci_get_legacy_mem +#endif +#ifndef platform_pci_legacy_read +# define platform_pci_legacy_read ia64_pci_legacy_read +extern int ia64_pci_legacy_read(struct pci_bus *bus, u16 port, u32 *val, u8 size); +#endif +#ifndef platform_pci_legacy_write +# define platform_pci_legacy_write ia64_pci_legacy_write +extern int ia64_pci_legacy_write(struct pci_bus *bus, u16 port, u32 val, u8 size); +#endif +#ifndef platform_inb +# define platform_inb __ia64_inb +#endif +#ifndef platform_inw +# define platform_inw __ia64_inw +#endif +#ifndef platform_inl +# define platform_inl __ia64_inl +#endif +#ifndef platform_outb +# define platform_outb __ia64_outb +#endif +#ifndef platform_outw +# define platform_outw __ia64_outw +#endif +#ifndef platform_outl +# define platform_outl __ia64_outl +#endif +#ifndef platform_mmiowb +# define platform_mmiowb __ia64_mmiowb +#endif +#ifndef platform_readb +# define platform_readb __ia64_readb +#endif +#ifndef platform_readw +# define platform_readw __ia64_readw +#endif +#ifndef platform_readl +# define platform_readl __ia64_readl +#endif +#ifndef platform_readq +# define platform_readq __ia64_readq +#endif +#ifndef platform_readb_relaxed +# define platform_readb_relaxed __ia64_readb_relaxed +#endif +#ifndef platform_readw_relaxed +# define platform_readw_relaxed __ia64_readw_relaxed +#endif +#ifndef platform_readl_relaxed +# define platform_readl_relaxed __ia64_readl_relaxed +#endif +#ifndef platform_readq_relaxed +# define platform_readq_relaxed __ia64_readq_relaxed +#endif +#ifndef platform_migrate +# define platform_migrate machvec_noop_task +#endif +#ifndef platform_setup_msi_irq +# define platform_setup_msi_irq ((ia64_mv_setup_msi_irq_t*)NULL) +#endif +#ifndef platform_teardown_msi_irq +# define platform_teardown_msi_irq ((ia64_mv_teardown_msi_irq_t*)NULL) +#endif +#ifndef platform_pci_fixup_bus +# define platform_pci_fixup_bus machvec_noop_bus +#endif + +#endif /* _ASM_IA64_MACHVEC_H */ diff --git a/kernel/arch/ia64/include/asm/machvec_dig.h b/kernel/arch/ia64/include/asm/machvec_dig.h new file mode 100644 index 000000000..1f7403a2f --- /dev/null +++ b/kernel/arch/ia64/include/asm/machvec_dig.h @@ -0,0 +1,16 @@ +#ifndef _ASM_IA64_MACHVEC_DIG_h +#define _ASM_IA64_MACHVEC_DIG_h + +extern ia64_mv_setup_t dig_setup; + +/* + * This stuff has dual use! + * + * For a generic kernel, the macros are used to initialize the + * platform's machvec structure. When compiling a non-generic kernel, + * the macros are used directly. + */ +#define ia64_platform_name "dig" +#define platform_setup dig_setup + +#endif /* _ASM_IA64_MACHVEC_DIG_h */ diff --git a/kernel/arch/ia64/include/asm/machvec_dig_vtd.h b/kernel/arch/ia64/include/asm/machvec_dig_vtd.h new file mode 100644 index 000000000..44308b4c3 --- /dev/null +++ b/kernel/arch/ia64/include/asm/machvec_dig_vtd.h @@ -0,0 +1,18 @@ +#ifndef _ASM_IA64_MACHVEC_DIG_VTD_h +#define _ASM_IA64_MACHVEC_DIG_VTD_h + +extern ia64_mv_setup_t dig_setup; +extern ia64_mv_dma_init pci_iommu_alloc; + +/* + * This stuff has dual use! + * + * For a generic kernel, the macros are used to initialize the + * platform's machvec structure. When compiling a non-generic kernel, + * the macros are used directly. + */ +#define ia64_platform_name "dig_vtd" +#define platform_setup dig_setup +#define platform_dma_init pci_iommu_alloc + +#endif /* _ASM_IA64_MACHVEC_DIG_VTD_h */ diff --git a/kernel/arch/ia64/include/asm/machvec_hpsim.h b/kernel/arch/ia64/include/asm/machvec_hpsim.h new file mode 100644 index 000000000..e75711279 --- /dev/null +++ b/kernel/arch/ia64/include/asm/machvec_hpsim.h @@ -0,0 +1,18 @@ +#ifndef _ASM_IA64_MACHVEC_HPSIM_h +#define _ASM_IA64_MACHVEC_HPSIM_h + +extern ia64_mv_setup_t hpsim_setup; +extern ia64_mv_irq_init_t hpsim_irq_init; + +/* + * This stuff has dual use! + * + * For a generic kernel, the macros are used to initialize the + * platform's machvec structure. When compiling a non-generic kernel, + * the macros are used directly. + */ +#define ia64_platform_name "hpsim" +#define platform_setup hpsim_setup +#define platform_irq_init hpsim_irq_init + +#endif /* _ASM_IA64_MACHVEC_HPSIM_h */ diff --git a/kernel/arch/ia64/include/asm/machvec_hpzx1.h b/kernel/arch/ia64/include/asm/machvec_hpzx1.h new file mode 100644 index 000000000..c74d3159e --- /dev/null +++ b/kernel/arch/ia64/include/asm/machvec_hpzx1.h @@ -0,0 +1,18 @@ +#ifndef _ASM_IA64_MACHVEC_HPZX1_h +#define _ASM_IA64_MACHVEC_HPZX1_h + +extern ia64_mv_setup_t dig_setup; +extern ia64_mv_dma_init sba_dma_init; + +/* + * This stuff has dual use! + * + * For a generic kernel, the macros are used to initialize the + * platform's machvec structure. When compiling a non-generic kernel, + * the macros are used directly. + */ +#define ia64_platform_name "hpzx1" +#define platform_setup dig_setup +#define platform_dma_init sba_dma_init + +#endif /* _ASM_IA64_MACHVEC_HPZX1_h */ diff --git a/kernel/arch/ia64/include/asm/machvec_hpzx1_swiotlb.h b/kernel/arch/ia64/include/asm/machvec_hpzx1_swiotlb.h new file mode 100644 index 000000000..906ef6210 --- /dev/null +++ b/kernel/arch/ia64/include/asm/machvec_hpzx1_swiotlb.h @@ -0,0 +1,19 @@ +#ifndef _ASM_IA64_MACHVEC_HPZX1_SWIOTLB_h +#define _ASM_IA64_MACHVEC_HPZX1_SWIOTLB_h + +extern ia64_mv_setup_t dig_setup; +extern ia64_mv_dma_get_ops hwsw_dma_get_ops; + +/* + * This stuff has dual use! + * + * For a generic kernel, the macros are used to initialize the + * platform's machvec structure. When compiling a non-generic kernel, + * the macros are used directly. + */ +#define ia64_platform_name "hpzx1_swiotlb" +#define platform_setup dig_setup +#define platform_dma_init machvec_noop +#define platform_dma_get_ops hwsw_dma_get_ops + +#endif /* _ASM_IA64_MACHVEC_HPZX1_SWIOTLB_h */ diff --git a/kernel/arch/ia64/include/asm/machvec_init.h b/kernel/arch/ia64/include/asm/machvec_init.h new file mode 100644 index 000000000..37a469849 --- /dev/null +++ b/kernel/arch/ia64/include/asm/machvec_init.h @@ -0,0 +1,35 @@ +#include +#include + +extern ia64_mv_send_ipi_t ia64_send_ipi; +extern ia64_mv_global_tlb_purge_t ia64_global_tlb_purge; +extern ia64_mv_dma_get_required_mask ia64_dma_get_required_mask; +extern ia64_mv_irq_to_vector __ia64_irq_to_vector; +extern ia64_mv_local_vector_to_irq __ia64_local_vector_to_irq; +extern ia64_mv_pci_get_legacy_mem_t ia64_pci_get_legacy_mem; +extern ia64_mv_pci_legacy_read_t ia64_pci_legacy_read; +extern ia64_mv_pci_legacy_write_t ia64_pci_legacy_write; + +extern ia64_mv_inb_t __ia64_inb; +extern ia64_mv_inw_t __ia64_inw; +extern ia64_mv_inl_t __ia64_inl; +extern ia64_mv_outb_t __ia64_outb; +extern ia64_mv_outw_t __ia64_outw; +extern ia64_mv_outl_t __ia64_outl; +extern ia64_mv_mmiowb_t __ia64_mmiowb; +extern ia64_mv_readb_t __ia64_readb; +extern ia64_mv_readw_t __ia64_readw; +extern ia64_mv_readl_t __ia64_readl; +extern ia64_mv_readq_t __ia64_readq; +extern ia64_mv_readb_t __ia64_readb_relaxed; +extern ia64_mv_readw_t __ia64_readw_relaxed; +extern ia64_mv_readl_t __ia64_readl_relaxed; +extern ia64_mv_readq_t __ia64_readq_relaxed; + +#define MACHVEC_HELPER(name) \ + struct ia64_machine_vector machvec_##name __attribute__ ((unused, __section__ (".machvec"))) \ + = MACHVEC_INIT(name); + +#define MACHVEC_DEFINE(name) MACHVEC_HELPER(name) + +MACHVEC_DEFINE(MACHVEC_PLATFORM_NAME) diff --git a/kernel/arch/ia64/include/asm/machvec_sn2.h b/kernel/arch/ia64/include/asm/machvec_sn2.h new file mode 100644 index 000000000..ece9fa85b --- /dev/null +++ b/kernel/arch/ia64/include/asm/machvec_sn2.h @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2002-2003,2006 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/NoticeExplan + */ + +#ifndef _ASM_IA64_MACHVEC_SN2_H +#define _ASM_IA64_MACHVEC_SN2_H + +extern ia64_mv_setup_t sn_setup; +extern ia64_mv_cpu_init_t sn_cpu_init; +extern ia64_mv_irq_init_t sn_irq_init; +extern ia64_mv_send_ipi_t sn2_send_IPI; +extern ia64_mv_timer_interrupt_t sn_timer_interrupt; +extern ia64_mv_global_tlb_purge_t sn2_global_tlb_purge; +extern ia64_mv_tlb_migrate_finish_t sn_tlb_migrate_finish; +extern ia64_mv_irq_to_vector sn_irq_to_vector; +extern ia64_mv_local_vector_to_irq sn_local_vector_to_irq; +extern ia64_mv_pci_get_legacy_mem_t sn_pci_get_legacy_mem; +extern ia64_mv_pci_legacy_read_t sn_pci_legacy_read; +extern ia64_mv_pci_legacy_write_t sn_pci_legacy_write; +extern ia64_mv_inb_t __sn_inb; +extern ia64_mv_inw_t __sn_inw; +extern ia64_mv_inl_t __sn_inl; +extern ia64_mv_outb_t __sn_outb; +extern ia64_mv_outw_t __sn_outw; +extern ia64_mv_outl_t __sn_outl; +extern ia64_mv_mmiowb_t __sn_mmiowb; +extern ia64_mv_readb_t __sn_readb; +extern ia64_mv_readw_t __sn_readw; +extern ia64_mv_readl_t __sn_readl; +extern ia64_mv_readq_t __sn_readq; +extern ia64_mv_readb_t __sn_readb_relaxed; +extern ia64_mv_readw_t __sn_readw_relaxed; +extern ia64_mv_readl_t __sn_readl_relaxed; +extern ia64_mv_readq_t __sn_readq_relaxed; +extern ia64_mv_dma_get_required_mask sn_dma_get_required_mask; +extern ia64_mv_dma_init sn_dma_init; +extern ia64_mv_migrate_t sn_migrate; +extern ia64_mv_kernel_launch_event_t sn_kernel_launch_event; +extern ia64_mv_setup_msi_irq_t sn_setup_msi_irq; +extern ia64_mv_teardown_msi_irq_t sn_teardown_msi_irq; +extern ia64_mv_pci_fixup_bus_t sn_pci_fixup_bus; + + +/* + * This stuff has dual use! + * + * For a generic kernel, the macros are used to initialize the + * platform's machvec structure. When compiling a non-generic kernel, + * the macros are used directly. + */ +#define ia64_platform_name "sn2" +#define platform_setup sn_setup +#define platform_cpu_init sn_cpu_init +#define platform_irq_init sn_irq_init +#define platform_send_ipi sn2_send_IPI +#define platform_timer_interrupt sn_timer_interrupt +#define platform_global_tlb_purge sn2_global_tlb_purge +#define platform_tlb_migrate_finish sn_tlb_migrate_finish +#define platform_pci_fixup sn_pci_fixup +#define platform_inb __sn_inb +#define platform_inw __sn_inw +#define platform_inl __sn_inl +#define platform_outb __sn_outb +#define platform_outw __sn_outw +#define platform_outl __sn_outl +#define platform_mmiowb __sn_mmiowb +#define platform_readb __sn_readb +#define platform_readw __sn_readw +#define platform_readl __sn_readl +#define platform_readq __sn_readq +#define platform_readb_relaxed __sn_readb_relaxed +#define platform_readw_relaxed __sn_readw_relaxed +#define platform_readl_relaxed __sn_readl_relaxed +#define platform_readq_relaxed __sn_readq_relaxed +#define platform_irq_to_vector sn_irq_to_vector +#define platform_local_vector_to_irq sn_local_vector_to_irq +#define platform_pci_get_legacy_mem sn_pci_get_legacy_mem +#define platform_pci_legacy_read sn_pci_legacy_read +#define platform_pci_legacy_write sn_pci_legacy_write +#define platform_dma_get_required_mask sn_dma_get_required_mask +#define platform_dma_init sn_dma_init +#define platform_migrate sn_migrate +#define platform_kernel_launch_event sn_kernel_launch_event +#ifdef CONFIG_PCI_MSI +#define platform_setup_msi_irq sn_setup_msi_irq +#define platform_teardown_msi_irq sn_teardown_msi_irq +#else +#define platform_setup_msi_irq ((ia64_mv_setup_msi_irq_t*)NULL) +#define platform_teardown_msi_irq ((ia64_mv_teardown_msi_irq_t*)NULL) +#endif +#define platform_pci_fixup_bus sn_pci_fixup_bus + +#include + +#endif /* _ASM_IA64_MACHVEC_SN2_H */ diff --git a/kernel/arch/ia64/include/asm/machvec_uv.h b/kernel/arch/ia64/include/asm/machvec_uv.h new file mode 100644 index 000000000..2c50853f3 --- /dev/null +++ b/kernel/arch/ia64/include/asm/machvec_uv.h @@ -0,0 +1,26 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * SGI UV Core Functions + * + * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved. + */ + +#ifndef _ASM_IA64_MACHVEC_UV_H +#define _ASM_IA64_MACHVEC_UV_H + +extern ia64_mv_setup_t uv_setup; + +/* + * This stuff has dual use! + * + * For a generic kernel, the macros are used to initialize the + * platform's machvec structure. When compiling a non-generic kernel, + * the macros are used directly. + */ +#define ia64_platform_name "uv" +#define platform_setup uv_setup + +#endif /* _ASM_IA64_MACHVEC_UV_H */ diff --git a/kernel/arch/ia64/include/asm/mc146818rtc.h b/kernel/arch/ia64/include/asm/mc146818rtc.h new file mode 100644 index 000000000..407787a23 --- /dev/null +++ b/kernel/arch/ia64/include/asm/mc146818rtc.h @@ -0,0 +1,10 @@ +#ifndef _ASM_IA64_MC146818RTC_H +#define _ASM_IA64_MC146818RTC_H + +/* + * Machine dependent access functions for RTC registers. + */ + +/* empty include file to satisfy the include in genrtc.c */ + +#endif /* _ASM_IA64_MC146818RTC_H */ diff --git a/kernel/arch/ia64/include/asm/mca.h b/kernel/arch/ia64/include/asm/mca.h new file mode 100644 index 000000000..8c7096168 --- /dev/null +++ b/kernel/arch/ia64/include/asm/mca.h @@ -0,0 +1,187 @@ +/* + * File: mca.h + * Purpose: Machine check handling specific defines + * + * Copyright (C) 1999, 2004 Silicon Graphics, Inc. + * Copyright (C) Vijay Chander + * Copyright (C) Srinivasa Thirumalachar + * Copyright (C) Russ Anderson + */ + +#ifndef _ASM_IA64_MCA_H +#define _ASM_IA64_MCA_H + +#if !defined(__ASSEMBLY__) + +#include +#include + +#include +#include +#include +#include + +#define IA64_MCA_RENDEZ_TIMEOUT (20 * 1000) /* value in milliseconds - 20 seconds */ + +typedef struct ia64_fptr { + unsigned long fp; + unsigned long gp; +} ia64_fptr_t; + +typedef union cmcv_reg_u { + u64 cmcv_regval; + struct { + u64 cmcr_vector : 8; + u64 cmcr_reserved1 : 4; + u64 cmcr_ignored1 : 1; + u64 cmcr_reserved2 : 3; + u64 cmcr_mask : 1; + u64 cmcr_ignored2 : 47; + } cmcv_reg_s; + +} cmcv_reg_t; + +#define cmcv_mask cmcv_reg_s.cmcr_mask +#define cmcv_vector cmcv_reg_s.cmcr_vector + +enum { + IA64_MCA_RENDEZ_CHECKIN_NOTDONE = 0x0, + IA64_MCA_RENDEZ_CHECKIN_DONE = 0x1, + IA64_MCA_RENDEZ_CHECKIN_INIT = 0x2, + IA64_MCA_RENDEZ_CHECKIN_CONCURRENT_MCA = 0x3, +}; + +/* Information maintained by the MC infrastructure */ +typedef struct ia64_mc_info_s { + u64 imi_mca_handler; + size_t imi_mca_handler_size; + u64 imi_monarch_init_handler; + size_t imi_monarch_init_handler_size; + u64 imi_slave_init_handler; + size_t imi_slave_init_handler_size; + u8 imi_rendez_checkin[NR_CPUS]; + +} ia64_mc_info_t; + +/* Handover state from SAL to OS and vice versa, for both MCA and INIT events. + * Besides the handover state, it also contains some saved registers from the + * time of the event. + * Note: mca_asm.S depends on the precise layout of this structure. + */ + +struct ia64_sal_os_state { + + /* SAL to OS */ + unsigned long os_gp; /* GP of the os registered with the SAL, physical */ + unsigned long pal_proc; /* PAL_PROC entry point, physical */ + unsigned long sal_proc; /* SAL_PROC entry point, physical */ + unsigned long rv_rc; /* MCA - Rendezvous state, INIT - reason code */ + unsigned long proc_state_param; /* from R18 */ + unsigned long monarch; /* 1 for a monarch event, 0 for a slave */ + + /* common */ + unsigned long sal_ra; /* Return address in SAL, physical */ + unsigned long sal_gp; /* GP of the SAL - physical */ + pal_min_state_area_t *pal_min_state; /* from R17. physical in asm, virtual in C */ + /* Previous values of IA64_KR(CURRENT) and IA64_KR(CURRENT_STACK). + * Note: if the MCA/INIT recovery code wants to resume to a new context + * then it must change these values to reflect the new kernel stack. + */ + unsigned long prev_IA64_KR_CURRENT; /* previous value of IA64_KR(CURRENT) */ + unsigned long prev_IA64_KR_CURRENT_STACK; + struct task_struct *prev_task; /* previous task, NULL if it is not useful */ + /* Some interrupt registers are not saved in minstate, pt_regs or + * switch_stack. Because MCA/INIT can occur when interrupts are + * disabled, we need to save the additional interrupt registers over + * MCA/INIT and resume. + */ + unsigned long isr; + unsigned long ifa; + unsigned long itir; + unsigned long iipa; + unsigned long iim; + unsigned long iha; + + /* OS to SAL */ + unsigned long os_status; /* OS status to SAL, enum below */ + unsigned long context; /* 0 if return to same context + 1 if return to new context */ + + /* I-resources */ + unsigned long iip; + unsigned long ipsr; + unsigned long ifs; +}; + +enum { + IA64_MCA_CORRECTED = 0x0, /* Error has been corrected by OS_MCA */ + IA64_MCA_WARM_BOOT = -1, /* Warm boot of the system need from SAL */ + IA64_MCA_COLD_BOOT = -2, /* Cold boot of the system need from SAL */ + IA64_MCA_HALT = -3 /* System to be halted by SAL */ +}; + +enum { + IA64_INIT_RESUME = 0x0, /* Resume after return from INIT */ + IA64_INIT_WARM_BOOT = -1, /* Warm boot of the system need from SAL */ +}; + +enum { + IA64_MCA_SAME_CONTEXT = 0x0, /* SAL to return to same context */ + IA64_MCA_NEW_CONTEXT = -1 /* SAL to return to new context */ +}; + +/* Per-CPU MCA state that is too big for normal per-CPU variables. */ + +struct ia64_mca_cpu { + u64 mca_stack[KERNEL_STACK_SIZE/8]; + u64 init_stack[KERNEL_STACK_SIZE/8]; +}; + +/* Array of physical addresses of each CPU's MCA area. */ +extern unsigned long __per_cpu_mca[NR_CPUS]; + +extern int cpe_vector; +extern int ia64_cpe_irq; +extern void ia64_mca_init(void); +extern void ia64_mca_irq_init(void); +extern void ia64_mca_cpu_init(void *); +extern void ia64_os_mca_dispatch(void); +extern void ia64_os_mca_dispatch_end(void); +extern void ia64_mca_ucmc_handler(struct pt_regs *, struct ia64_sal_os_state *); +extern void ia64_init_handler(struct pt_regs *, + struct switch_stack *, + struct ia64_sal_os_state *); +extern void ia64_os_init_on_kdump(void); +extern void ia64_monarch_init_handler(void); +extern void ia64_slave_init_handler(void); +extern void ia64_mca_cmc_vector_setup(void); +extern int ia64_reg_MCA_extension(int (*fn)(void *, struct ia64_sal_os_state *)); +extern void ia64_unreg_MCA_extension(void); +extern unsigned long ia64_get_rnat(unsigned long *); +extern void ia64_set_psr_mc(void); +extern void ia64_mca_printk(const char * fmt, ...) + __attribute__ ((format (printf, 1, 2))); + +struct ia64_mca_notify_die { + struct ia64_sal_os_state *sos; + int *monarch_cpu; + int *data; +}; + +DECLARE_PER_CPU(u64, ia64_mca_pal_base); + +#else /* __ASSEMBLY__ */ + +#define IA64_MCA_CORRECTED 0x0 /* Error has been corrected by OS_MCA */ +#define IA64_MCA_WARM_BOOT -1 /* Warm boot of the system need from SAL */ +#define IA64_MCA_COLD_BOOT -2 /* Cold boot of the system need from SAL */ +#define IA64_MCA_HALT -3 /* System to be halted by SAL */ + +#define IA64_INIT_RESUME 0x0 /* Resume after return from INIT */ +#define IA64_INIT_WARM_BOOT -1 /* Warm boot of the system need from SAL */ + +#define IA64_MCA_SAME_CONTEXT 0x0 /* SAL to return to same context */ +#define IA64_MCA_NEW_CONTEXT -1 /* SAL to return to new context */ + +#endif /* !__ASSEMBLY__ */ +#endif /* _ASM_IA64_MCA_H */ diff --git a/kernel/arch/ia64/include/asm/mca_asm.h b/kernel/arch/ia64/include/asm/mca_asm.h new file mode 100644 index 000000000..13c1d4994 --- /dev/null +++ b/kernel/arch/ia64/include/asm/mca_asm.h @@ -0,0 +1,244 @@ +/* + * File: mca_asm.h + * Purpose: Machine check handling specific defines + * + * Copyright (C) 1999 Silicon Graphics, Inc. + * Copyright (C) Vijay Chander + * Copyright (C) Srinivasa Thirumalachar + * Copyright (C) 2000 Hewlett-Packard Co. + * Copyright (C) 2000 David Mosberger-Tang + * Copyright (C) 2002 Intel Corp. + * Copyright (C) 2002 Jenna Hall + * Copyright (C) 2005 Silicon Graphics, Inc + * Copyright (C) 2005 Keith Owens + */ +#ifndef _ASM_IA64_MCA_ASM_H +#define _ASM_IA64_MCA_ASM_H + +#include + +#define PSR_IC 13 +#define PSR_I 14 +#define PSR_DT 17 +#define PSR_RT 27 +#define PSR_MC 35 +#define PSR_IT 36 +#define PSR_BN 44 + +/* + * This macro converts a instruction virtual address to a physical address + * Right now for simulation purposes the virtual addresses are + * direct mapped to physical addresses. + * 1. Lop off bits 61 thru 63 in the virtual address + */ +#define INST_VA_TO_PA(addr) \ + dep addr = 0, addr, 61, 3 +/* + * This macro converts a data virtual address to a physical address + * Right now for simulation purposes the virtual addresses are + * direct mapped to physical addresses. + * 1. Lop off bits 61 thru 63 in the virtual address + */ +#define DATA_VA_TO_PA(addr) \ + tpa addr = addr +/* + * This macro converts a data physical address to a virtual address + * Right now for simulation purposes the virtual addresses are + * direct mapped to physical addresses. + * 1. Put 0x7 in bits 61 thru 63. + */ +#define DATA_PA_TO_VA(addr,temp) \ + mov temp = 0x7 ;; \ + dep addr = temp, addr, 61, 3 + +#define GET_THIS_PADDR(reg, var) \ + mov reg = IA64_KR(PER_CPU_DATA);; \ + addl reg = THIS_CPU(var), reg + +/* + * This macro jumps to the instruction at the given virtual address + * and starts execution in physical mode with all the address + * translations turned off. + * 1. Save the current psr + * 2. Make sure that all the upper 32 bits are off + * + * 3. Clear the interrupt enable and interrupt state collection bits + * in the psr before updating the ipsr and iip. + * + * 4. Turn off the instruction, data and rse translation bits of the psr + * and store the new value into ipsr + * Also make sure that the interrupts are disabled. + * Ensure that we are in little endian mode. + * [psr.{rt, it, dt, i, be} = 0] + * + * 5. Get the physical address corresponding to the virtual address + * of the next instruction bundle and put it in iip. + * (Using magic numbers 24 and 40 in the deposint instruction since + * the IA64_SDK code directly maps to lower 24bits as physical address + * from a virtual address). + * + * 6. Do an rfi to move the values from ipsr to psr and iip to ip. + */ +#define PHYSICAL_MODE_ENTER(temp1, temp2, start_addr, old_psr) \ + mov old_psr = psr; \ + ;; \ + dep old_psr = 0, old_psr, 32, 32; \ + \ + mov ar.rsc = 0 ; \ + ;; \ + srlz.d; \ + mov temp2 = ar.bspstore; \ + ;; \ + DATA_VA_TO_PA(temp2); \ + ;; \ + mov temp1 = ar.rnat; \ + ;; \ + mov ar.bspstore = temp2; \ + ;; \ + mov ar.rnat = temp1; \ + mov temp1 = psr; \ + mov temp2 = psr; \ + ;; \ + \ + dep temp2 = 0, temp2, PSR_IC, 2; \ + ;; \ + mov psr.l = temp2; \ + ;; \ + srlz.d; \ + dep temp1 = 0, temp1, 32, 32; \ + ;; \ + dep temp1 = 0, temp1, PSR_IT, 1; \ + ;; \ + dep temp1 = 0, temp1, PSR_DT, 1; \ + ;; \ + dep temp1 = 0, temp1, PSR_RT, 1; \ + ;; \ + dep temp1 = 0, temp1, PSR_I, 1; \ + ;; \ + dep temp1 = 0, temp1, PSR_IC, 1; \ + ;; \ + dep temp1 = -1, temp1, PSR_MC, 1; \ + ;; \ + mov cr.ipsr = temp1; \ + ;; \ + LOAD_PHYSICAL(p0, temp2, start_addr); \ + ;; \ + mov cr.iip = temp2; \ + mov cr.ifs = r0; \ + DATA_VA_TO_PA(sp); \ + DATA_VA_TO_PA(gp); \ + ;; \ + srlz.i; \ + ;; \ + nop 1; \ + nop 2; \ + nop 1; \ + nop 2; \ + rfi; \ + ;; + +/* + * This macro jumps to the instruction at the given virtual address + * and starts execution in virtual mode with all the address + * translations turned on. + * 1. Get the old saved psr + * + * 2. Clear the interrupt state collection bit in the current psr. + * + * 3. Set the instruction translation bit back in the old psr + * Note we have to do this since we are right now saving only the + * lower 32-bits of old psr.(Also the old psr has the data and + * rse translation bits on) + * + * 4. Set ipsr to this old_psr with "it" bit set and "bn" = 1. + * + * 5. Reset the current thread pointer (r13). + * + * 6. Set iip to the virtual address of the next instruction bundle. + * + * 7. Do an rfi to move ipsr to psr and iip to ip. + */ + +#define VIRTUAL_MODE_ENTER(temp1, temp2, start_addr, old_psr) \ + mov temp2 = psr; \ + ;; \ + mov old_psr = temp2; \ + ;; \ + dep temp2 = 0, temp2, PSR_IC, 2; \ + ;; \ + mov psr.l = temp2; \ + mov ar.rsc = 0; \ + ;; \ + srlz.d; \ + mov r13 = ar.k6; \ + mov temp2 = ar.bspstore; \ + ;; \ + DATA_PA_TO_VA(temp2,temp1); \ + ;; \ + mov temp1 = ar.rnat; \ + ;; \ + mov ar.bspstore = temp2; \ + ;; \ + mov ar.rnat = temp1; \ + ;; \ + mov temp1 = old_psr; \ + ;; \ + mov temp2 = 1; \ + ;; \ + dep temp1 = temp2, temp1, PSR_IC, 1; \ + ;; \ + dep temp1 = temp2, temp1, PSR_IT, 1; \ + ;; \ + dep temp1 = temp2, temp1, PSR_DT, 1; \ + ;; \ + dep temp1 = temp2, temp1, PSR_RT, 1; \ + ;; \ + dep temp1 = temp2, temp1, PSR_BN, 1; \ + ;; \ + \ + mov cr.ipsr = temp1; \ + movl temp2 = start_addr; \ + ;; \ + mov cr.iip = temp2; \ + movl gp = __gp \ + ;; \ + DATA_PA_TO_VA(sp, temp1); \ + srlz.i; \ + ;; \ + nop 1; \ + nop 2; \ + nop 1; \ + rfi \ + ;; + +/* + * The MCA and INIT stacks in struct ia64_mca_cpu look like normal kernel + * stacks, except that the SAL/OS state and a switch_stack are stored near the + * top of the MCA/INIT stack. To support concurrent entry to MCA or INIT, as + * well as MCA over INIT, each event needs its own SAL/OS state. All entries + * are 16 byte aligned. + * + * +---------------------------+ + * | pt_regs | + * +---------------------------+ + * | switch_stack | + * +---------------------------+ + * | SAL/OS state | + * +---------------------------+ + * | 16 byte scratch area | + * +---------------------------+ <-------- SP at start of C MCA handler + * | ..... | + * +---------------------------+ + * | RBS for MCA/INIT handler | + * +---------------------------+ + * | struct task for MCA/INIT | + * +---------------------------+ <-------- Bottom of MCA/INIT stack + */ + +#define ALIGN16(x) ((x)&~15) +#define MCA_PT_REGS_OFFSET ALIGN16(KERNEL_STACK_SIZE-IA64_PT_REGS_SIZE) +#define MCA_SWITCH_STACK_OFFSET ALIGN16(MCA_PT_REGS_OFFSET-IA64_SWITCH_STACK_SIZE) +#define MCA_SOS_OFFSET ALIGN16(MCA_SWITCH_STACK_OFFSET-IA64_SAL_OS_STATE_SIZE) +#define MCA_SP_OFFSET ALIGN16(MCA_SOS_OFFSET-16) + +#endif /* _ASM_IA64_MCA_ASM_H */ diff --git a/kernel/arch/ia64/include/asm/meminit.h b/kernel/arch/ia64/include/asm/meminit.h new file mode 100644 index 000000000..092f1c91b --- /dev/null +++ b/kernel/arch/ia64/include/asm/meminit.h @@ -0,0 +1,74 @@ +#ifndef meminit_h +#define meminit_h + +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ + + +/* + * Entries defined so far: + * - boot param structure itself + * - memory map + * - initrd (optional) + * - command line string + * - kernel code & data + * - crash dumping code reserved region + * - Kernel memory map built from EFI memory map + * - ELF core header + * + * More could be added if necessary + */ +#define IA64_MAX_RSVD_REGIONS 9 + +struct rsvd_region { + u64 start; /* virtual address of beginning of element */ + u64 end; /* virtual address of end of element + 1 */ +}; + +extern struct rsvd_region rsvd_region[IA64_MAX_RSVD_REGIONS + 1]; +extern int num_rsvd_regions; + +extern void find_memory (void); +extern void reserve_memory (void); +extern void find_initrd (void); +extern int filter_rsvd_memory (u64 start, u64 end, void *arg); +extern int filter_memory (u64 start, u64 end, void *arg); +extern unsigned long efi_memmap_init(u64 *s, u64 *e); +extern int find_max_min_low_pfn (u64, u64, void *); + +extern unsigned long vmcore_find_descriptor_size(unsigned long address); +extern int reserve_elfcorehdr(u64 *start, u64 *end); + +/* + * For rounding an address to the next IA64_GRANULE_SIZE or order + */ +#define GRANULEROUNDDOWN(n) ((n) & ~(IA64_GRANULE_SIZE-1)) +#define GRANULEROUNDUP(n) (((n)+IA64_GRANULE_SIZE-1) & ~(IA64_GRANULE_SIZE-1)) + +#ifdef CONFIG_NUMA + extern void call_pernode_memory (unsigned long start, unsigned long len, void *func); +#else +# define call_pernode_memory(start, len, func) (*func)(start, len, 0) +#endif + +#define IGNORE_PFN0 1 /* XXX fix me: ignore pfn 0 until TLB miss handler is updated... */ + +extern int register_active_ranges(u64 start, u64 len, int nid); + +#ifdef CONFIG_VIRTUAL_MEM_MAP +# define LARGE_GAP 0x40000000 /* Use virtual mem map if hole is > than this */ + extern unsigned long VMALLOC_END; + extern struct page *vmem_map; + extern int find_largest_hole(u64 start, u64 end, void *arg); + extern int create_mem_map_page_table(u64 start, u64 end, void *arg); + extern int vmemmap_find_next_valid_pfn(int, int); +#else +static inline int vmemmap_find_next_valid_pfn(int node, int i) +{ + return i + 1; +} +#endif +#endif /* meminit_h */ diff --git a/kernel/arch/ia64/include/asm/mman.h b/kernel/arch/ia64/include/asm/mman.h new file mode 100644 index 000000000..fdd5f5229 --- /dev/null +++ b/kernel/arch/ia64/include/asm/mman.h @@ -0,0 +1,17 @@ +/* + * Based on . + * + * Modified 1998-2000, 2002 + * David Mosberger-Tang , Hewlett-Packard Co + */ +#ifndef _ASM_IA64_MMAN_H +#define _ASM_IA64_MMAN_H + +#include + +#ifndef __ASSEMBLY__ +#define arch_mmap_check ia64_mmap_check +int ia64_mmap_check(unsigned long addr, unsigned long len, + unsigned long flags); +#endif +#endif /* _ASM_IA64_MMAN_H */ diff --git a/kernel/arch/ia64/include/asm/mmu.h b/kernel/arch/ia64/include/asm/mmu.h new file mode 100644 index 000000000..611432ba5 --- /dev/null +++ b/kernel/arch/ia64/include/asm/mmu.h @@ -0,0 +1,13 @@ +#ifndef __MMU_H +#define __MMU_H + +/* + * Type for a context number. We declare it volatile to ensure proper + * ordering when it's accessed outside of spinlock'd critical sections + * (e.g., as done in activate_mm() and init_new_context()). + */ +typedef volatile unsigned long mm_context_t; + +typedef unsigned long nv_mm_context_t; + +#endif diff --git a/kernel/arch/ia64/include/asm/mmu_context.h b/kernel/arch/ia64/include/asm/mmu_context.h new file mode 100644 index 000000000..7f2a45660 --- /dev/null +++ b/kernel/arch/ia64/include/asm/mmu_context.h @@ -0,0 +1,198 @@ +#ifndef _ASM_IA64_MMU_CONTEXT_H +#define _ASM_IA64_MMU_CONTEXT_H + +/* + * Copyright (C) 1998-2002 Hewlett-Packard Co + * David Mosberger-Tang + */ + +/* + * Routines to manage the allocation of task context numbers. Task context + * numbers are used to reduce or eliminate the need to perform TLB flushes + * due to context switches. Context numbers are implemented using ia-64 + * region ids. Since the IA-64 TLB does not consider the region number when + * performing a TLB lookup, we need to assign a unique region id to each + * region in a process. We use the least significant three bits in aregion + * id for this purpose. + */ + +#define IA64_REGION_ID_KERNEL 0 /* the kernel's region id (tlb.c depends on this being 0) */ + +#define ia64_rid(ctx,addr) (((ctx) << 3) | (addr >> 61)) + +# include +# ifndef __ASSEMBLY__ + +#include +#include +#include +#include + +#include +#include + +struct ia64_ctx { + spinlock_t lock; + unsigned int next; /* next context number to use */ + unsigned int limit; /* available free range */ + unsigned int max_ctx; /* max. context value supported by all CPUs */ + /* call wrap_mmu_context when next >= max */ + unsigned long *bitmap; /* bitmap size is max_ctx+1 */ + unsigned long *flushmap;/* pending rid to be flushed */ +}; + +extern struct ia64_ctx ia64_ctx; +DECLARE_PER_CPU(u8, ia64_need_tlb_flush); + +extern void mmu_context_init (void); +extern void wrap_mmu_context (struct mm_struct *mm); + +static inline void +enter_lazy_tlb (struct mm_struct *mm, struct task_struct *tsk) +{ +} + +/* + * When the context counter wraps around all TLBs need to be flushed because + * an old context number might have been reused. This is signalled by the + * ia64_need_tlb_flush per-CPU variable, which is checked in the routine + * below. Called by activate_mm(). + */ +static inline void +delayed_tlb_flush (void) +{ + extern void local_flush_tlb_all (void); + unsigned long flags; + + if (unlikely(__ia64_per_cpu_var(ia64_need_tlb_flush))) { + spin_lock_irqsave(&ia64_ctx.lock, flags); + if (__ia64_per_cpu_var(ia64_need_tlb_flush)) { + local_flush_tlb_all(); + __ia64_per_cpu_var(ia64_need_tlb_flush) = 0; + } + spin_unlock_irqrestore(&ia64_ctx.lock, flags); + } +} + +static inline nv_mm_context_t +get_mmu_context (struct mm_struct *mm) +{ + unsigned long flags; + nv_mm_context_t context = mm->context; + + if (likely(context)) + goto out; + + spin_lock_irqsave(&ia64_ctx.lock, flags); + /* re-check, now that we've got the lock: */ + context = mm->context; + if (context == 0) { + cpumask_clear(mm_cpumask(mm)); + if (ia64_ctx.next >= ia64_ctx.limit) { + ia64_ctx.next = find_next_zero_bit(ia64_ctx.bitmap, + ia64_ctx.max_ctx, ia64_ctx.next); + ia64_ctx.limit = find_next_bit(ia64_ctx.bitmap, + ia64_ctx.max_ctx, ia64_ctx.next); + if (ia64_ctx.next >= ia64_ctx.max_ctx) + wrap_mmu_context(mm); + } + mm->context = context = ia64_ctx.next++; + __set_bit(context, ia64_ctx.bitmap); + } + spin_unlock_irqrestore(&ia64_ctx.lock, flags); +out: + /* + * Ensure we're not starting to use "context" before any old + * uses of it are gone from our TLB. + */ + delayed_tlb_flush(); + + return context; +} + +/* + * Initialize context number to some sane value. MM is guaranteed to be a + * brand-new address-space, so no TLB flushing is needed, ever. + */ +static inline int +init_new_context (struct task_struct *p, struct mm_struct *mm) +{ + mm->context = 0; + return 0; +} + +static inline void +destroy_context (struct mm_struct *mm) +{ + /* Nothing to do. */ +} + +static inline void +reload_context (nv_mm_context_t context) +{ + unsigned long rid; + unsigned long rid_incr = 0; + unsigned long rr0, rr1, rr2, rr3, rr4, old_rr4; + + old_rr4 = ia64_get_rr(RGN_BASE(RGN_HPAGE)); + rid = context << 3; /* make space for encoding the region number */ + rid_incr = 1 << 8; + + /* encode the region id, preferred page size, and VHPT enable bit: */ + rr0 = (rid << 8) | (PAGE_SHIFT << 2) | 1; + rr1 = rr0 + 1*rid_incr; + rr2 = rr0 + 2*rid_incr; + rr3 = rr0 + 3*rid_incr; + rr4 = rr0 + 4*rid_incr; +#ifdef CONFIG_HUGETLB_PAGE + rr4 = (rr4 & (~(0xfcUL))) | (old_rr4 & 0xfc); + +# if RGN_HPAGE != 4 +# error "reload_context assumes RGN_HPAGE is 4" +# endif +#endif + + ia64_set_rr0_to_rr4(rr0, rr1, rr2, rr3, rr4); + ia64_srlz_i(); /* srlz.i implies srlz.d */ +} + +/* + * Must be called with preemption off + */ +static inline void +activate_context (struct mm_struct *mm) +{ + nv_mm_context_t context; + + do { + context = get_mmu_context(mm); + if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm))) + cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); + reload_context(context); + /* + * in the unlikely event of a TLB-flush by another thread, + * redo the load. + */ + } while (unlikely(context != mm->context)); +} + +#define deactivate_mm(tsk,mm) do { } while (0) + +/* + * Switch from address space PREV to address space NEXT. + */ +static inline void +activate_mm (struct mm_struct *prev, struct mm_struct *next) +{ + /* + * We may get interrupts here, but that's OK because interrupt + * handlers cannot touch user-space. + */ + ia64_set_kr(IA64_KR_PT_BASE, __pa(next->pgd)); + activate_context(next); +} + +#define switch_mm(prev_mm,next_mm,next_task) activate_mm(prev_mm, next_mm) + +# endif /* ! __ASSEMBLY__ */ +#endif /* _ASM_IA64_MMU_CONTEXT_H */ diff --git a/kernel/arch/ia64/include/asm/mmzone.h b/kernel/arch/ia64/include/asm/mmzone.h new file mode 100644 index 000000000..e0de61709 --- /dev/null +++ b/kernel/arch/ia64/include/asm/mmzone.h @@ -0,0 +1,42 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2000,2003 Silicon Graphics, Inc. All rights reserved. + * Copyright (c) 2002 NEC Corp. + * Copyright (c) 2002 Erich Focht + * Copyright (c) 2002 Kimio Suganuma + */ +#ifndef _ASM_IA64_MMZONE_H +#define _ASM_IA64_MMZONE_H + +#include +#include +#include + +#ifdef CONFIG_NUMA + +static inline int pfn_to_nid(unsigned long pfn) +{ + extern int paddr_to_nid(unsigned long); + int nid = paddr_to_nid(pfn << PAGE_SHIFT); + if (nid < 0) + return 0; + else + return nid; +} + +#ifdef CONFIG_IA64_DIG /* DIG systems are small */ +# define MAX_PHYSNODE_ID 8 +# define NR_NODE_MEMBLKS (MAX_NUMNODES * 8) +#else /* sn2 is the biggest case, so we use that if !DIG */ +# define MAX_PHYSNODE_ID 2048 +# define NR_NODE_MEMBLKS (MAX_NUMNODES * 4) +#endif + +#else /* CONFIG_NUMA */ +# define NR_NODE_MEMBLKS (MAX_NUMNODES * 4) +#endif /* CONFIG_NUMA */ + +#endif /* _ASM_IA64_MMZONE_H */ diff --git a/kernel/arch/ia64/include/asm/module.h b/kernel/arch/ia64/include/asm/module.h new file mode 100644 index 000000000..dfba22a87 --- /dev/null +++ b/kernel/arch/ia64/include/asm/module.h @@ -0,0 +1,40 @@ +#ifndef _ASM_IA64_MODULE_H +#define _ASM_IA64_MODULE_H + +#include + +/* + * IA-64-specific support for kernel module loader. + * + * Copyright (C) 2003 Hewlett-Packard Co + * David Mosberger-Tang + */ + +struct elf64_shdr; /* forward declration */ + +struct mod_arch_specific { + struct elf64_shdr *core_plt; /* core PLT section */ + struct elf64_shdr *init_plt; /* init PLT section */ + struct elf64_shdr *got; /* global offset table */ + struct elf64_shdr *opd; /* official procedure descriptors */ + struct elf64_shdr *unwind; /* unwind-table section */ +#ifdef CONFIG_PARAVIRT + struct elf64_shdr *paravirt_bundles; + /* paravirt_alt_bundle_patch table */ + struct elf64_shdr *paravirt_insts; + /* paravirt_alt_inst_patch table */ +#endif + unsigned long gp; /* global-pointer for module */ + + void *core_unw_table; /* core unwind-table cookie returned by unwinder */ + void *init_unw_table; /* init unwind-table cookie returned by unwinder */ + unsigned int next_got_entry; /* index of next available got entry */ +}; + +#define MODULE_PROC_FAMILY "ia64" +#define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY \ + "gcc-" __stringify(__GNUC__) "." __stringify(__GNUC_MINOR__) + +#define ARCH_SHF_SMALL SHF_IA_64_SHORT + +#endif /* _ASM_IA64_MODULE_H */ diff --git a/kernel/arch/ia64/include/asm/msidef.h b/kernel/arch/ia64/include/asm/msidef.h new file mode 100644 index 000000000..592c1047a --- /dev/null +++ b/kernel/arch/ia64/include/asm/msidef.h @@ -0,0 +1,42 @@ +#ifndef _IA64_MSI_DEF_H +#define _IA64_MSI_DEF_H + +/* + * Shifts for APIC-based data + */ + +#define MSI_DATA_VECTOR_SHIFT 0 +#define MSI_DATA_VECTOR(v) (((u8)v) << MSI_DATA_VECTOR_SHIFT) +#define MSI_DATA_VECTOR_MASK 0xffffff00 + +#define MSI_DATA_DELIVERY_MODE_SHIFT 8 +#define MSI_DATA_DELIVERY_FIXED (0 << MSI_DATA_DELIVERY_MODE_SHIFT) +#define MSI_DATA_DELIVERY_LOWPRI (1 << MSI_DATA_DELIVERY_MODE_SHIFT) + +#define MSI_DATA_LEVEL_SHIFT 14 +#define MSI_DATA_LEVEL_DEASSERT (0 << MSI_DATA_LEVEL_SHIFT) +#define MSI_DATA_LEVEL_ASSERT (1 << MSI_DATA_LEVEL_SHIFT) + +#define MSI_DATA_TRIGGER_SHIFT 15 +#define MSI_DATA_TRIGGER_EDGE (0 << MSI_DATA_TRIGGER_SHIFT) +#define MSI_DATA_TRIGGER_LEVEL (1 << MSI_DATA_TRIGGER_SHIFT) + +/* + * Shift/mask fields for APIC-based bus address + */ + +#define MSI_ADDR_DEST_ID_SHIFT 4 +#define MSI_ADDR_HEADER 0xfee00000 + +#define MSI_ADDR_DEST_ID_MASK 0xfff0000f +#define MSI_ADDR_DEST_ID_CPU(cpu) ((cpu) << MSI_ADDR_DEST_ID_SHIFT) + +#define MSI_ADDR_DEST_MODE_SHIFT 2 +#define MSI_ADDR_DEST_MODE_PHYS (0 << MSI_ADDR_DEST_MODE_SHIFT) +#define MSI_ADDR_DEST_MODE_LOGIC (1 << MSI_ADDR_DEST_MODE_SHIFT) + +#define MSI_ADDR_REDIRECTION_SHIFT 3 +#define MSI_ADDR_REDIRECTION_CPU (0 << MSI_ADDR_REDIRECTION_SHIFT) +#define MSI_ADDR_REDIRECTION_LOWPRI (1 << MSI_ADDR_REDIRECTION_SHIFT) + +#endif/* _IA64_MSI_DEF_H */ diff --git a/kernel/arch/ia64/include/asm/mutex.h b/kernel/arch/ia64/include/asm/mutex.h new file mode 100644 index 000000000..f41e66d65 --- /dev/null +++ b/kernel/arch/ia64/include/asm/mutex.h @@ -0,0 +1,90 @@ +/* + * ia64 implementation of the mutex fastpath. + * + * Copyright (C) 2006 Ken Chen + * + */ + +#ifndef _ASM_MUTEX_H +#define _ASM_MUTEX_H + +/** + * __mutex_fastpath_lock - try to take the lock by moving the count + * from 1 to a 0 value + * @count: pointer of type atomic_t + * @fail_fn: function to call if the original value was not 1 + * + * Change the count from 1 to a value lower than 1, and call if + * it wasn't 1 originally. This function MUST leave the value lower than + * 1 even when the "1" assertion wasn't true. + */ +static inline void +__mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *)) +{ + if (unlikely(ia64_fetchadd4_acq(count, -1) != 1)) + fail_fn(count); +} + +/** + * __mutex_fastpath_lock_retval - try to take the lock by moving the count + * from 1 to a 0 value + * @count: pointer of type atomic_t + * + * Change the count from 1 to a value lower than 1. This function returns 0 + * if the fastpath succeeds, or -1 otherwise. + */ +static inline int +__mutex_fastpath_lock_retval(atomic_t *count) +{ + if (unlikely(ia64_fetchadd4_acq(count, -1) != 1)) + return -1; + return 0; +} + +/** + * __mutex_fastpath_unlock - try to promote the count from 0 to 1 + * @count: pointer of type atomic_t + * @fail_fn: function to call if the original value was not 0 + * + * Try to promote the count from 0 to 1. If it wasn't 0, call . + * In the failure case, this function is allowed to either set the value to + * 1, or to set it to a value lower than 1. + * + * If the implementation sets it to a value of lower than 1, then the + * __mutex_slowpath_needs_to_unlock() macro needs to return 1, it needs + * to return 0 otherwise. + */ +static inline void +__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *)) +{ + int ret = ia64_fetchadd4_rel(count, 1); + if (unlikely(ret < 0)) + fail_fn(count); +} + +#define __mutex_slowpath_needs_to_unlock() 1 + +/** + * __mutex_fastpath_trylock - try to acquire the mutex, without waiting + * + * @count: pointer of type atomic_t + * @fail_fn: fallback function + * + * Change the count from 1 to a value lower than 1, and return 0 (failure) + * if it wasn't 1 originally, or return 1 (success) otherwise. This function + * MUST leave the value lower than 1 even when the "1" assertion wasn't true. + * Additionally, if the value was < 0 originally, this function must not leave + * it to 0 on failure. + * + * If the architecture has no effective trylock variant, it should call the + * spinlock-based trylock variant unconditionally. + */ +static inline int +__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *)) +{ + if (cmpxchg_acq(count, 1, 0) == 1) + return 1; + return 0; +} + +#endif diff --git a/kernel/arch/ia64/include/asm/native/inst.h b/kernel/arch/ia64/include/asm/native/inst.h new file mode 100644 index 000000000..d2d46efb3 --- /dev/null +++ b/kernel/arch/ia64/include/asm/native/inst.h @@ -0,0 +1,194 @@ +/****************************************************************************** + * arch/ia64/include/asm/native/inst.h + * + * Copyright (c) 2008 Isaku Yamahata + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#define DO_SAVE_MIN IA64_NATIVE_DO_SAVE_MIN + +#define __paravirt_switch_to ia64_native_switch_to +#define __paravirt_leave_syscall ia64_native_leave_syscall +#define __paravirt_work_processed_syscall ia64_native_work_processed_syscall +#define __paravirt_leave_kernel ia64_native_leave_kernel +#define __paravirt_pending_syscall_end ia64_work_pending_syscall_end +#define __paravirt_work_processed_syscall_target \ + ia64_work_processed_syscall + +#define paravirt_fsyscall_table ia64_native_fsyscall_table +#define paravirt_fsys_bubble_down ia64_native_fsys_bubble_down + +#ifdef CONFIG_PARAVIRT_GUEST_ASM_CLOBBER_CHECK +# define PARAVIRT_POISON 0xdeadbeefbaadf00d +# define CLOBBER(clob) \ + ;; \ + movl clob = PARAVIRT_POISON; \ + ;; +# define CLOBBER_PRED(pred_clob) \ + ;; \ + cmp.eq pred_clob, p0 = r0, r0 \ + ;; +#else +# define CLOBBER(clob) /* nothing */ +# define CLOBBER_PRED(pred_clob) /* nothing */ +#endif + +#define MOV_FROM_IFA(reg) \ + mov reg = cr.ifa + +#define MOV_FROM_ITIR(reg) \ + mov reg = cr.itir + +#define MOV_FROM_ISR(reg) \ + mov reg = cr.isr + +#define MOV_FROM_IHA(reg) \ + mov reg = cr.iha + +#define MOV_FROM_IPSR(pred, reg) \ +(pred) mov reg = cr.ipsr + +#define MOV_FROM_IIM(reg) \ + mov reg = cr.iim + +#define MOV_FROM_IIP(reg) \ + mov reg = cr.iip + +#define MOV_FROM_IVR(reg, clob) \ + mov reg = cr.ivr \ + CLOBBER(clob) + +#define MOV_FROM_PSR(pred, reg, clob) \ +(pred) mov reg = psr \ + CLOBBER(clob) + +#define MOV_FROM_ITC(pred, pred_clob, reg, clob) \ +(pred) mov reg = ar.itc \ + CLOBBER(clob) \ + CLOBBER_PRED(pred_clob) + +#define MOV_TO_IFA(reg, clob) \ + mov cr.ifa = reg \ + CLOBBER(clob) + +#define MOV_TO_ITIR(pred, reg, clob) \ +(pred) mov cr.itir = reg \ + CLOBBER(clob) + +#define MOV_TO_IHA(pred, reg, clob) \ +(pred) mov cr.iha = reg \ + CLOBBER(clob) + +#define MOV_TO_IPSR(pred, reg, clob) \ +(pred) mov cr.ipsr = reg \ + CLOBBER(clob) + +#define MOV_TO_IFS(pred, reg, clob) \ +(pred) mov cr.ifs = reg \ + CLOBBER(clob) + +#define MOV_TO_IIP(reg, clob) \ + mov cr.iip = reg \ + CLOBBER(clob) + +#define MOV_TO_KR(kr, reg, clob0, clob1) \ + mov IA64_KR(kr) = reg \ + CLOBBER(clob0) \ + CLOBBER(clob1) + +#define ITC_I(pred, reg, clob) \ +(pred) itc.i reg \ + CLOBBER(clob) + +#define ITC_D(pred, reg, clob) \ +(pred) itc.d reg \ + CLOBBER(clob) + +#define ITC_I_AND_D(pred_i, pred_d, reg, clob) \ +(pred_i) itc.i reg; \ +(pred_d) itc.d reg \ + CLOBBER(clob) + +#define THASH(pred, reg0, reg1, clob) \ +(pred) thash reg0 = reg1 \ + CLOBBER(clob) + +#define SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(clob0, clob1) \ + ssm psr.ic | PSR_DEFAULT_BITS \ + CLOBBER(clob0) \ + CLOBBER(clob1) \ + ;; \ + srlz.i /* guarantee that interruption collectin is on */ \ + ;; + +#define SSM_PSR_IC_AND_SRLZ_D(clob0, clob1) \ + ssm psr.ic \ + CLOBBER(clob0) \ + CLOBBER(clob1) \ + ;; \ + srlz.d + +#define RSM_PSR_IC(clob) \ + rsm psr.ic \ + CLOBBER(clob) + +#define SSM_PSR_I(pred, pred_clob, clob) \ +(pred) ssm psr.i \ + CLOBBER(clob) \ + CLOBBER_PRED(pred_clob) + +#define RSM_PSR_I(pred, clob0, clob1) \ +(pred) rsm psr.i \ + CLOBBER(clob0) \ + CLOBBER(clob1) + +#define RSM_PSR_I_IC(clob0, clob1, clob2) \ + rsm psr.i | psr.ic \ + CLOBBER(clob0) \ + CLOBBER(clob1) \ + CLOBBER(clob2) + +#define RSM_PSR_DT \ + rsm psr.dt + +#define RSM_PSR_BE_I(clob0, clob1) \ + rsm psr.be | psr.i \ + CLOBBER(clob0) \ + CLOBBER(clob1) + +#define SSM_PSR_DT_AND_SRLZ_I \ + ssm psr.dt \ + ;; \ + srlz.i + +#define BSW_0(clob0, clob1, clob2) \ + bsw.0 \ + CLOBBER(clob0) \ + CLOBBER(clob1) \ + CLOBBER(clob2) + +#define BSW_1(clob0, clob1) \ + bsw.1 \ + CLOBBER(clob0) \ + CLOBBER(clob1) + +#define COVER \ + cover + +#define RFI \ + rfi diff --git a/kernel/arch/ia64/include/asm/native/irq.h b/kernel/arch/ia64/include/asm/native/irq.h new file mode 100644 index 000000000..887a228e2 --- /dev/null +++ b/kernel/arch/ia64/include/asm/native/irq.h @@ -0,0 +1,33 @@ +/****************************************************************************** + * arch/ia64/include/asm/native/irq.h + * + * Copyright (c) 2008 Isaku Yamahata + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _ASM_IA64_NATIVE_IRQ_H +#define _ASM_IA64_NATIVE_IRQ_H + +#define NR_VECTORS 256 + +#if (NR_VECTORS + 32 * NR_CPUS) < 1024 +#define IA64_NATIVE_NR_IRQS (NR_VECTORS + 32 * NR_CPUS) +#else +#define IA64_NATIVE_NR_IRQS 1024 +#endif + +#endif /* _ASM_IA64_NATIVE_IRQ_H */ diff --git a/kernel/arch/ia64/include/asm/native/patchlist.h b/kernel/arch/ia64/include/asm/native/patchlist.h new file mode 100644 index 000000000..be16ca931 --- /dev/null +++ b/kernel/arch/ia64/include/asm/native/patchlist.h @@ -0,0 +1,38 @@ +/****************************************************************************** + * arch/ia64/include/asm/native/inst.h + * + * Copyright (c) 2008 Isaku Yamahata + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#define __paravirt_start_gate_fsyscall_patchlist \ + __ia64_native_start_gate_fsyscall_patchlist +#define __paravirt_end_gate_fsyscall_patchlist \ + __ia64_native_end_gate_fsyscall_patchlist +#define __paravirt_start_gate_brl_fsys_bubble_down_patchlist \ + __ia64_native_start_gate_brl_fsys_bubble_down_patchlist +#define __paravirt_end_gate_brl_fsys_bubble_down_patchlist \ + __ia64_native_end_gate_brl_fsys_bubble_down_patchlist +#define __paravirt_start_gate_vtop_patchlist \ + __ia64_native_start_gate_vtop_patchlist +#define __paravirt_end_gate_vtop_patchlist \ + __ia64_native_end_gate_vtop_patchlist +#define __paravirt_start_gate_mckinley_e9_patchlist \ + __ia64_native_start_gate_mckinley_e9_patchlist +#define __paravirt_end_gate_mckinley_e9_patchlist \ + __ia64_native_end_gate_mckinley_e9_patchlist diff --git a/kernel/arch/ia64/include/asm/native/pvchk_inst.h b/kernel/arch/ia64/include/asm/native/pvchk_inst.h new file mode 100644 index 000000000..8d72962ec --- /dev/null +++ b/kernel/arch/ia64/include/asm/native/pvchk_inst.h @@ -0,0 +1,271 @@ +#ifndef _ASM_NATIVE_PVCHK_INST_H +#define _ASM_NATIVE_PVCHK_INST_H + +/****************************************************************************** + * arch/ia64/include/asm/native/pvchk_inst.h + * Checker for paravirtualizations of privileged operations. + * + * Copyright (C) 2005 Hewlett-Packard Co + * Dan Magenheimer + * + * Copyright (c) 2008 Isaku Yamahata + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/********************************************** + * Instructions paravirtualized for correctness + **********************************************/ + +/* "fc" and "thash" are privilege-sensitive instructions, meaning they + * may have different semantics depending on whether they are executed + * at PL0 vs PL!=0. When paravirtualized, these instructions mustn't + * be allowed to execute directly, lest incorrect semantics result. + */ + +#define fc .error "fc should not be used directly." +#define thash .error "thash should not be used directly." + +/* Note that "ttag" and "cover" are also privilege-sensitive; "ttag" + * is not currently used (though it may be in a long-format VHPT system!) + * and the semantics of cover only change if psr.ic is off which is very + * rare (and currently non-existent outside of assembly code + */ +#define ttag .error "ttag should not be used directly." +#define cover .error "cover should not be used directly." + +/* There are also privilege-sensitive registers. These registers are + * readable at any privilege level but only writable at PL0. + */ +#define cpuid .error "cpuid should not be used directly." +#define pmd .error "pmd should not be used directly." + +/* + * mov ar.eflag = + * mov = ar.eflag + */ + +/********************************************** + * Instructions paravirtualized for performance + **********************************************/ +/* + * Those instructions include '.' which can't be handled by cpp. + * or can't be handled by cpp easily. + * They are handled by sed instead of cpp. + */ + +/* for .S + * itc.i + * itc.d + * + * bsw.0 + * bsw.1 + * + * ssm psr.ic | PSR_DEFAULT_BITS + * ssm psr.ic + * rsm psr.ic + * ssm psr.i + * rsm psr.i + * rsm psr.i | psr.ic + * rsm psr.dt + * ssm psr.dt + * + * mov = cr.ifa + * mov = cr.itir + * mov = cr.isr + * mov = cr.iha + * mov = cr.ipsr + * mov = cr.iim + * mov = cr.iip + * mov = cr.ivr + * mov = psr + * + * mov cr.ifa = + * mov cr.itir = + * mov cr.iha = + * mov cr.ipsr = + * mov cr.ifs = + * mov cr.iip = + * mov cr.kr = + */ + +/* for intrinsics + * ssm psr.i + * rsm psr.i + * mov = psr + * mov = ivr + * mov = tpr + * mov cr.itm = + * mov eoi = + * mov rr[] = + * mov = rr[] + * mov = kr + * mov kr = + * ptc.ga + */ + +/************************************************************* + * define paravirtualized instrcution macros as nop to ingore. + * and check whether arguments are appropriate. + *************************************************************/ + +/* check whether reg is a regular register */ +.macro is_rreg_in reg + .ifc "\reg", "r0" + nop 0 + .exitm + .endif + ;; + mov \reg = r0 + ;; +.endm +#define IS_RREG_IN(reg) is_rreg_in reg ; + +#define IS_RREG_OUT(reg) \ + ;; \ + mov reg = r0 \ + ;; + +#define IS_RREG_CLOB(reg) IS_RREG_OUT(reg) + +/* check whether pred is a predicate register */ +#define IS_PRED_IN(pred) \ + ;; \ + (pred) nop 0 \ + ;; + +#define IS_PRED_OUT(pred) \ + ;; \ + cmp.eq pred, p0 = r0, r0 \ + ;; + +#define IS_PRED_CLOB(pred) IS_PRED_OUT(pred) + + +#define DO_SAVE_MIN(__COVER, SAVE_IFS, EXTRA, WORKAROUND) \ + nop 0 +#define MOV_FROM_IFA(reg) \ + IS_RREG_OUT(reg) +#define MOV_FROM_ITIR(reg) \ + IS_RREG_OUT(reg) +#define MOV_FROM_ISR(reg) \ + IS_RREG_OUT(reg) +#define MOV_FROM_IHA(reg) \ + IS_RREG_OUT(reg) +#define MOV_FROM_IPSR(pred, reg) \ + IS_PRED_IN(pred) \ + IS_RREG_OUT(reg) +#define MOV_FROM_IIM(reg) \ + IS_RREG_OUT(reg) +#define MOV_FROM_IIP(reg) \ + IS_RREG_OUT(reg) +#define MOV_FROM_IVR(reg, clob) \ + IS_RREG_OUT(reg) \ + IS_RREG_CLOB(clob) +#define MOV_FROM_PSR(pred, reg, clob) \ + IS_PRED_IN(pred) \ + IS_RREG_OUT(reg) \ + IS_RREG_CLOB(clob) +#define MOV_FROM_ITC(pred, pred_clob, reg, clob) \ + IS_PRED_IN(pred) \ + IS_PRED_CLOB(pred_clob) \ + IS_RREG_OUT(reg) \ + IS_RREG_CLOB(clob) +#define MOV_TO_IFA(reg, clob) \ + IS_RREG_IN(reg) \ + IS_RREG_CLOB(clob) +#define MOV_TO_ITIR(pred, reg, clob) \ + IS_PRED_IN(pred) \ + IS_RREG_IN(reg) \ + IS_RREG_CLOB(clob) +#define MOV_TO_IHA(pred, reg, clob) \ + IS_PRED_IN(pred) \ + IS_RREG_IN(reg) \ + IS_RREG_CLOB(clob) +#define MOV_TO_IPSR(pred, reg, clob) \ + IS_PRED_IN(pred) \ + IS_RREG_IN(reg) \ + IS_RREG_CLOB(clob) +#define MOV_TO_IFS(pred, reg, clob) \ + IS_PRED_IN(pred) \ + IS_RREG_IN(reg) \ + IS_RREG_CLOB(clob) +#define MOV_TO_IIP(reg, clob) \ + IS_RREG_IN(reg) \ + IS_RREG_CLOB(clob) +#define MOV_TO_KR(kr, reg, clob0, clob1) \ + IS_RREG_IN(reg) \ + IS_RREG_CLOB(clob0) \ + IS_RREG_CLOB(clob1) +#define ITC_I(pred, reg, clob) \ + IS_PRED_IN(pred) \ + IS_RREG_IN(reg) \ + IS_RREG_CLOB(clob) +#define ITC_D(pred, reg, clob) \ + IS_PRED_IN(pred) \ + IS_RREG_IN(reg) \ + IS_RREG_CLOB(clob) +#define ITC_I_AND_D(pred_i, pred_d, reg, clob) \ + IS_PRED_IN(pred_i) \ + IS_PRED_IN(pred_d) \ + IS_RREG_IN(reg) \ + IS_RREG_CLOB(clob) +#define THASH(pred, reg0, reg1, clob) \ + IS_PRED_IN(pred) \ + IS_RREG_OUT(reg0) \ + IS_RREG_IN(reg1) \ + IS_RREG_CLOB(clob) +#define SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(clob0, clob1) \ + IS_RREG_CLOB(clob0) \ + IS_RREG_CLOB(clob1) +#define SSM_PSR_IC_AND_SRLZ_D(clob0, clob1) \ + IS_RREG_CLOB(clob0) \ + IS_RREG_CLOB(clob1) +#define RSM_PSR_IC(clob) \ + IS_RREG_CLOB(clob) +#define SSM_PSR_I(pred, pred_clob, clob) \ + IS_PRED_IN(pred) \ + IS_PRED_CLOB(pred_clob) \ + IS_RREG_CLOB(clob) +#define RSM_PSR_I(pred, clob0, clob1) \ + IS_PRED_IN(pred) \ + IS_RREG_CLOB(clob0) \ + IS_RREG_CLOB(clob1) +#define RSM_PSR_I_IC(clob0, clob1, clob2) \ + IS_RREG_CLOB(clob0) \ + IS_RREG_CLOB(clob1) \ + IS_RREG_CLOB(clob2) +#define RSM_PSR_DT \ + nop 0 +#define RSM_PSR_BE_I(clob0, clob1) \ + IS_RREG_CLOB(clob0) \ + IS_RREG_CLOB(clob1) +#define SSM_PSR_DT_AND_SRLZ_I \ + nop 0 +#define BSW_0(clob0, clob1, clob2) \ + IS_RREG_CLOB(clob0) \ + IS_RREG_CLOB(clob1) \ + IS_RREG_CLOB(clob2) +#define BSW_1(clob0, clob1) \ + IS_RREG_CLOB(clob0) \ + IS_RREG_CLOB(clob1) +#define COVER \ + nop 0 +#define RFI \ + br.ret.sptk.many rp /* defining nop causes dependency error */ + +#endif /* _ASM_NATIVE_PVCHK_INST_H */ diff --git a/kernel/arch/ia64/include/asm/nodedata.h b/kernel/arch/ia64/include/asm/nodedata.h new file mode 100644 index 000000000..2fb337b0e --- /dev/null +++ b/kernel/arch/ia64/include/asm/nodedata.h @@ -0,0 +1,63 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2000 Silicon Graphics, Inc. All rights reserved. + * Copyright (c) 2002 NEC Corp. + * Copyright (c) 2002 Erich Focht + * Copyright (c) 2002 Kimio Suganuma + */ +#ifndef _ASM_IA64_NODEDATA_H +#define _ASM_IA64_NODEDATA_H + +#include + +#include +#include + +#ifdef CONFIG_NUMA + +/* + * Node Data. One of these structures is located on each node of a NUMA system. + */ + +struct pglist_data; +struct ia64_node_data { + short active_cpu_count; + short node; + struct pglist_data *pg_data_ptrs[MAX_NUMNODES]; +}; + + +/* + * Return a pointer to the node_data structure for the executing cpu. + */ +#define local_node_data (local_cpu_data->node_data) + +/* + * Given a node id, return a pointer to the pg_data_t for the node. + * + * NODE_DATA - should be used in all code not related to system + * initialization. It uses pernode data structures to minimize + * offnode memory references. However, these structure are not + * present during boot. This macro can be used once cpu_init + * completes. + */ +#define NODE_DATA(nid) (local_node_data->pg_data_ptrs[nid]) + +/* + * LOCAL_DATA_ADDR - This is to calculate the address of other node's + * "local_node_data" at hot-plug phase. The local_node_data + * is pointed by per_cpu_page. Kernel usually use it for + * just executing cpu. However, when new node is hot-added, + * the addresses of local data for other nodes are necessary + * to update all of them. + */ +#define LOCAL_DATA_ADDR(pgdat) \ + ((struct ia64_node_data *)((u64)(pgdat) + \ + L1_CACHE_ALIGN(sizeof(struct pglist_data)))) + +#endif /* CONFIG_NUMA */ + +#endif /* _ASM_IA64_NODEDATA_H */ diff --git a/kernel/arch/ia64/include/asm/numa.h b/kernel/arch/ia64/include/asm/numa.h new file mode 100644 index 000000000..2db0a6c6d --- /dev/null +++ b/kernel/arch/ia64/include/asm/numa.h @@ -0,0 +1,79 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * This file contains NUMA specific prototypes and definitions. + * + * 2002/08/05 Erich Focht + * + */ +#ifndef _ASM_IA64_NUMA_H +#define _ASM_IA64_NUMA_H + + +#ifdef CONFIG_NUMA + +#include +#include +#include +#include +#include + +#include + +extern u16 cpu_to_node_map[NR_CPUS] __cacheline_aligned; +extern cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned; +extern pg_data_t *pgdat_list[MAX_NUMNODES]; + +/* Stuff below this line could be architecture independent */ + +extern int num_node_memblks; /* total number of memory chunks */ + +/* + * List of node memory chunks. Filled when parsing SRAT table to + * obtain information about memory nodes. +*/ + +struct node_memblk_s { + unsigned long start_paddr; + unsigned long size; + int nid; /* which logical node contains this chunk? */ + int bank; /* which mem bank on this node */ +}; + +struct node_cpuid_s { + u16 phys_id; /* id << 8 | eid */ + int nid; /* logical node containing this CPU */ +}; + +extern struct node_memblk_s node_memblk[NR_NODE_MEMBLKS]; +extern struct node_cpuid_s node_cpuid[NR_CPUS]; + +/* + * ACPI 2.0 SLIT (System Locality Information Table) + * http://devresource.hp.com/devresource/Docs/TechPapers/IA64/slit.pdf + * + * This is a matrix with "distances" between nodes, they should be + * proportional to the memory access latency ratios. + */ + +extern u8 numa_slit[MAX_NUMNODES * MAX_NUMNODES]; +#define node_distance(from,to) (numa_slit[(from) * MAX_NUMNODES + (to)]) + +extern int paddr_to_nid(unsigned long paddr); + +#define local_nodeid (cpu_to_node_map[smp_processor_id()]) + +extern void map_cpu_to_node(int cpu, int nid); +extern void unmap_cpu_from_node(int cpu, int nid); +extern void numa_clear_node(int cpu); + +#else /* !CONFIG_NUMA */ +#define map_cpu_to_node(cpu, nid) do{}while(0) +#define unmap_cpu_from_node(cpu, nid) do{}while(0) +#define paddr_to_nid(addr) 0 +#define numa_clear_node(cpu) do { } while (0) +#endif /* CONFIG_NUMA */ + +#endif /* _ASM_IA64_NUMA_H */ diff --git a/kernel/arch/ia64/include/asm/page.h b/kernel/arch/ia64/include/asm/page.h new file mode 100644 index 000000000..ec48bb9f9 --- /dev/null +++ b/kernel/arch/ia64/include/asm/page.h @@ -0,0 +1,236 @@ +#ifndef _ASM_IA64_PAGE_H +#define _ASM_IA64_PAGE_H +/* + * Pagetable related stuff. + * + * Copyright (C) 1998, 1999, 2002 Hewlett-Packard Co + * David Mosberger-Tang + */ + +#include +#include + +/* + * The top three bits of an IA64 address are its Region Number. + * Different regions are assigned to different purposes. + */ +#define RGN_SHIFT (61) +#define RGN_BASE(r) (__IA64_UL_CONST(r)<> PAGE_SHIFT) + +#ifdef CONFIG_VIRTUAL_MEM_MAP +extern int ia64_pfn_valid (unsigned long pfn); +#else +# define ia64_pfn_valid(pfn) 1 +#endif + +#ifdef CONFIG_VIRTUAL_MEM_MAP +extern struct page *vmem_map; +#ifdef CONFIG_DISCONTIGMEM +# define page_to_pfn(page) ((unsigned long) (page - vmem_map)) +# define pfn_to_page(pfn) (vmem_map + (pfn)) +#else +# include +#endif +#else +# include +#endif + +#ifdef CONFIG_FLATMEM +# define pfn_valid(pfn) (((pfn) < max_mapnr) && ia64_pfn_valid(pfn)) +#elif defined(CONFIG_DISCONTIGMEM) +extern unsigned long min_low_pfn; +extern unsigned long max_low_pfn; +# define pfn_valid(pfn) (((pfn) >= min_low_pfn) && ((pfn) < max_low_pfn) && ia64_pfn_valid(pfn)) +#endif + +#define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT) +#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) +#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) + +typedef union ia64_va { + struct { + unsigned long off : 61; /* intra-region offset */ + unsigned long reg : 3; /* region number */ + } f; + unsigned long l; + void *p; +} ia64_va; + +/* + * Note: These macros depend on the fact that PAGE_OFFSET has all + * region bits set to 1 and all other bits set to zero. They are + * expressed in this way to ensure they result in a single "dep" + * instruction. + */ +#define __pa(x) ({ia64_va _v; _v.l = (long) (x); _v.f.reg = 0; _v.l;}) +#define __va(x) ({ia64_va _v; _v.l = (long) (x); _v.f.reg = -1; _v.p;}) + +#define REGION_NUMBER(x) ({ia64_va _v; _v.l = (long) (x); _v.f.reg;}) +#define REGION_OFFSET(x) ({ia64_va _v; _v.l = (long) (x); _v.f.off;}) + +#ifdef CONFIG_HUGETLB_PAGE +# define htlbpage_to_page(x) (((unsigned long) REGION_NUMBER(x) << 61) \ + | (REGION_OFFSET(x) >> (HPAGE_SHIFT-PAGE_SHIFT))) +# define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) +extern unsigned int hpage_shift; +#endif + +static __inline__ int +get_order (unsigned long size) +{ + long double d = size - 1; + long order; + + order = ia64_getf_exp(d); + order = order - PAGE_SHIFT - 0xffff + 1; + if (order < 0) + order = 0; + return order; +} + +#endif /* !__ASSEMBLY__ */ + +#ifdef STRICT_MM_TYPECHECKS + /* + * These are used to make use of C type-checking.. + */ + typedef struct { unsigned long pte; } pte_t; + typedef struct { unsigned long pmd; } pmd_t; +#if CONFIG_PGTABLE_LEVELS == 4 + typedef struct { unsigned long pud; } pud_t; +#endif + typedef struct { unsigned long pgd; } pgd_t; + typedef struct { unsigned long pgprot; } pgprot_t; + typedef struct page *pgtable_t; + +# define pte_val(x) ((x).pte) +# define pmd_val(x) ((x).pmd) +#if CONFIG_PGTABLE_LEVELS == 4 +# define pud_val(x) ((x).pud) +#endif +# define pgd_val(x) ((x).pgd) +# define pgprot_val(x) ((x).pgprot) + +# define __pte(x) ((pte_t) { (x) } ) +# define __pmd(x) ((pmd_t) { (x) } ) +# define __pgprot(x) ((pgprot_t) { (x) } ) + +#else /* !STRICT_MM_TYPECHECKS */ + /* + * .. while these make it easier on the compiler + */ +# ifndef __ASSEMBLY__ + typedef unsigned long pte_t; + typedef unsigned long pmd_t; + typedef unsigned long pgd_t; + typedef unsigned long pgprot_t; + typedef struct page *pgtable_t; +# endif + +# define pte_val(x) (x) +# define pmd_val(x) (x) +# define pgd_val(x) (x) +# define pgprot_val(x) (x) + +# define __pte(x) (x) +# define __pgd(x) (x) +# define __pgprot(x) (x) +#endif /* !STRICT_MM_TYPECHECKS */ + +#define PAGE_OFFSET RGN_BASE(RGN_KERNEL) + +#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \ + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC | \ + (((current->personality & READ_IMPLIES_EXEC) != 0) \ + ? VM_EXEC : 0)) + +#define GATE_ADDR RGN_BASE(RGN_GATE) + +/* + * 0xa000000000000000+2*PERCPU_PAGE_SIZE + * - 0xa000000000000000+3*PERCPU_PAGE_SIZE remain unmapped (guard page) + */ +#define KERNEL_START (GATE_ADDR+__IA64_UL_CONST(0x100000000)) +#define PERCPU_ADDR (-PERCPU_PAGE_SIZE) +#define LOAD_OFFSET (KERNEL_START - KERNEL_TR_PAGE_SIZE) + +#define __HAVE_ARCH_GATE_AREA 1 + +#endif /* _ASM_IA64_PAGE_H */ diff --git a/kernel/arch/ia64/include/asm/pal.h b/kernel/arch/ia64/include/asm/pal.h new file mode 100644 index 000000000..2e69284df --- /dev/null +++ b/kernel/arch/ia64/include/asm/pal.h @@ -0,0 +1,1825 @@ +#ifndef _ASM_IA64_PAL_H +#define _ASM_IA64_PAL_H + +/* + * Processor Abstraction Layer definitions. + * + * This is based on Intel IA-64 Architecture Software Developer's Manual rev 1.0 + * chapter 11 IA-64 Processor Abstraction Layer + * + * Copyright (C) 1998-2001 Hewlett-Packard Co + * David Mosberger-Tang + * Stephane Eranian + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999 Walt Drummond + * Copyright (C) 1999 Srinivasa Prasad Thirumalachar + * Copyright (C) 2008 Silicon Graphics, Inc. (SGI) + * + * 99/10/01 davidm Make sure we pass zero for reserved parameters. + * 00/03/07 davidm Updated pal_cache_flush() to be in sync with PAL v2.6. + * 00/03/23 cfleck Modified processor min-state save area to match updated PAL & SAL info + * 00/05/24 eranian Updated to latest PAL spec, fix structures bugs, added + * 00/05/25 eranian Support for stack calls, and static physical calls + * 00/06/18 eranian Support for stacked physical calls + * 06/10/26 rja Support for Intel Itanium Architecture Software Developer's + * Manual Rev 2.2 (Jan 2006) + */ + +/* + * Note that some of these calls use a static-register only calling + * convention which has nothing to do with the regular calling + * convention. + */ +#define PAL_CACHE_FLUSH 1 /* flush i/d cache */ +#define PAL_CACHE_INFO 2 /* get detailed i/d cache info */ +#define PAL_CACHE_INIT 3 /* initialize i/d cache */ +#define PAL_CACHE_SUMMARY 4 /* get summary of cache hierarchy */ +#define PAL_MEM_ATTRIB 5 /* list supported memory attributes */ +#define PAL_PTCE_INFO 6 /* purge TLB info */ +#define PAL_VM_INFO 7 /* return supported virtual memory features */ +#define PAL_VM_SUMMARY 8 /* return summary on supported vm features */ +#define PAL_BUS_GET_FEATURES 9 /* return processor bus interface features settings */ +#define PAL_BUS_SET_FEATURES 10 /* set processor bus features */ +#define PAL_DEBUG_INFO 11 /* get number of debug registers */ +#define PAL_FIXED_ADDR 12 /* get fixed component of processors's directed address */ +#define PAL_FREQ_BASE 13 /* base frequency of the platform */ +#define PAL_FREQ_RATIOS 14 /* ratio of processor, bus and ITC frequency */ +#define PAL_PERF_MON_INFO 15 /* return performance monitor info */ +#define PAL_PLATFORM_ADDR 16 /* set processor interrupt block and IO port space addr */ +#define PAL_PROC_GET_FEATURES 17 /* get configurable processor features & settings */ +#define PAL_PROC_SET_FEATURES 18 /* enable/disable configurable processor features */ +#define PAL_RSE_INFO 19 /* return rse information */ +#define PAL_VERSION 20 /* return version of PAL code */ +#define PAL_MC_CLEAR_LOG 21 /* clear all processor log info */ +#define PAL_MC_DRAIN 22 /* drain operations which could result in an MCA */ +#define PAL_MC_EXPECTED 23 /* set/reset expected MCA indicator */ +#define PAL_MC_DYNAMIC_STATE 24 /* get processor dynamic state */ +#define PAL_MC_ERROR_INFO 25 /* get processor MCA info and static state */ +#define PAL_MC_RESUME 26 /* Return to interrupted process */ +#define PAL_MC_REGISTER_MEM 27 /* Register memory for PAL to use during MCAs and inits */ +#define PAL_HALT 28 /* enter the low power HALT state */ +#define PAL_HALT_LIGHT 29 /* enter the low power light halt state*/ +#define PAL_COPY_INFO 30 /* returns info needed to relocate PAL */ +#define PAL_CACHE_LINE_INIT 31 /* init tags & data of cache line */ +#define PAL_PMI_ENTRYPOINT 32 /* register PMI memory entry points with the processor */ +#define PAL_ENTER_IA_32_ENV 33 /* enter IA-32 system environment */ +#define PAL_VM_PAGE_SIZE 34 /* return vm TC and page walker page sizes */ + +#define PAL_MEM_FOR_TEST 37 /* get amount of memory needed for late processor test */ +#define PAL_CACHE_PROT_INFO 38 /* get i/d cache protection info */ +#define PAL_REGISTER_INFO 39 /* return AR and CR register information*/ +#define PAL_SHUTDOWN 40 /* enter processor shutdown state */ +#define PAL_PREFETCH_VISIBILITY 41 /* Make Processor Prefetches Visible */ +#define PAL_LOGICAL_TO_PHYSICAL 42 /* returns information on logical to physical processor mapping */ +#define PAL_CACHE_SHARED_INFO 43 /* returns information on caches shared by logical processor */ +#define PAL_GET_HW_POLICY 48 /* Get current hardware resource sharing policy */ +#define PAL_SET_HW_POLICY 49 /* Set current hardware resource sharing policy */ +#define PAL_VP_INFO 50 /* Information about virtual processor features */ +#define PAL_MC_HW_TRACKING 51 /* Hardware tracking status */ + +#define PAL_COPY_PAL 256 /* relocate PAL procedures and PAL PMI */ +#define PAL_HALT_INFO 257 /* return the low power capabilities of processor */ +#define PAL_TEST_PROC 258 /* perform late processor self-test */ +#define PAL_CACHE_READ 259 /* read tag & data of cacheline for diagnostic testing */ +#define PAL_CACHE_WRITE 260 /* write tag & data of cacheline for diagnostic testing */ +#define PAL_VM_TR_READ 261 /* read contents of translation register */ +#define PAL_GET_PSTATE 262 /* get the current P-state */ +#define PAL_SET_PSTATE 263 /* set the P-state */ +#define PAL_BRAND_INFO 274 /* Processor branding information */ + +#define PAL_GET_PSTATE_TYPE_LASTSET 0 +#define PAL_GET_PSTATE_TYPE_AVGANDRESET 1 +#define PAL_GET_PSTATE_TYPE_AVGNORESET 2 +#define PAL_GET_PSTATE_TYPE_INSTANT 3 + +#define PAL_MC_ERROR_INJECT 276 /* Injects processor error or returns injection capabilities */ + +#ifndef __ASSEMBLY__ + +#include +#include + +/* + * Data types needed to pass information into PAL procedures and + * interpret information returned by them. + */ + +/* Return status from the PAL procedure */ +typedef s64 pal_status_t; + +#define PAL_STATUS_SUCCESS 0 /* No error */ +#define PAL_STATUS_UNIMPLEMENTED (-1) /* Unimplemented procedure */ +#define PAL_STATUS_EINVAL (-2) /* Invalid argument */ +#define PAL_STATUS_ERROR (-3) /* Error */ +#define PAL_STATUS_CACHE_INIT_FAIL (-4) /* Could not initialize the + * specified level and type of + * cache without sideeffects + * and "restrict" was 1 + */ +#define PAL_STATUS_REQUIRES_MEMORY (-9) /* Call requires PAL memory buffer */ + +/* Processor cache level in the hierarchy */ +typedef u64 pal_cache_level_t; +#define PAL_CACHE_LEVEL_L0 0 /* L0 */ +#define PAL_CACHE_LEVEL_L1 1 /* L1 */ +#define PAL_CACHE_LEVEL_L2 2 /* L2 */ + + +/* Processor cache type at a particular level in the hierarchy */ + +typedef u64 pal_cache_type_t; +#define PAL_CACHE_TYPE_INSTRUCTION 1 /* Instruction cache */ +#define PAL_CACHE_TYPE_DATA 2 /* Data or unified cache */ +#define PAL_CACHE_TYPE_INSTRUCTION_DATA 3 /* Both Data & Instruction */ + + +#define PAL_CACHE_FLUSH_INVALIDATE 1 /* Invalidate clean lines */ +#define PAL_CACHE_FLUSH_CHK_INTRS 2 /* check for interrupts/mc while flushing */ + +/* Processor cache line size in bytes */ +typedef int pal_cache_line_size_t; + +/* Processor cache line state */ +typedef u64 pal_cache_line_state_t; +#define PAL_CACHE_LINE_STATE_INVALID 0 /* Invalid */ +#define PAL_CACHE_LINE_STATE_SHARED 1 /* Shared */ +#define PAL_CACHE_LINE_STATE_EXCLUSIVE 2 /* Exclusive */ +#define PAL_CACHE_LINE_STATE_MODIFIED 3 /* Modified */ + +typedef struct pal_freq_ratio { + u32 den, num; /* numerator & denominator */ +} itc_ratio, proc_ratio; + +typedef union pal_cache_config_info_1_s { + struct { + u64 u : 1, /* 0 Unified cache ? */ + at : 2, /* 2-1 Cache mem attr*/ + reserved : 5, /* 7-3 Reserved */ + associativity : 8, /* 16-8 Associativity*/ + line_size : 8, /* 23-17 Line size */ + stride : 8, /* 31-24 Stride */ + store_latency : 8, /*39-32 Store latency*/ + load_latency : 8, /* 47-40 Load latency*/ + store_hints : 8, /* 55-48 Store hints*/ + load_hints : 8; /* 63-56 Load hints */ + } pcci1_bits; + u64 pcci1_data; +} pal_cache_config_info_1_t; + +typedef union pal_cache_config_info_2_s { + struct { + u32 cache_size; /*cache size in bytes*/ + + + u32 alias_boundary : 8, /* 39-32 aliased addr + * separation for max + * performance. + */ + tag_ls_bit : 8, /* 47-40 LSb of addr*/ + tag_ms_bit : 8, /* 55-48 MSb of addr*/ + reserved : 8; /* 63-56 Reserved */ + } pcci2_bits; + u64 pcci2_data; +} pal_cache_config_info_2_t; + + +typedef struct pal_cache_config_info_s { + pal_status_t pcci_status; + pal_cache_config_info_1_t pcci_info_1; + pal_cache_config_info_2_t pcci_info_2; + u64 pcci_reserved; +} pal_cache_config_info_t; + +#define pcci_ld_hints pcci_info_1.pcci1_bits.load_hints +#define pcci_st_hints pcci_info_1.pcci1_bits.store_hints +#define pcci_ld_latency pcci_info_1.pcci1_bits.load_latency +#define pcci_st_latency pcci_info_1.pcci1_bits.store_latency +#define pcci_stride pcci_info_1.pcci1_bits.stride +#define pcci_line_size pcci_info_1.pcci1_bits.line_size +#define pcci_assoc pcci_info_1.pcci1_bits.associativity +#define pcci_cache_attr pcci_info_1.pcci1_bits.at +#define pcci_unified pcci_info_1.pcci1_bits.u +#define pcci_tag_msb pcci_info_2.pcci2_bits.tag_ms_bit +#define pcci_tag_lsb pcci_info_2.pcci2_bits.tag_ls_bit +#define pcci_alias_boundary pcci_info_2.pcci2_bits.alias_boundary +#define pcci_cache_size pcci_info_2.pcci2_bits.cache_size + + + +/* Possible values for cache attributes */ + +#define PAL_CACHE_ATTR_WT 0 /* Write through cache */ +#define PAL_CACHE_ATTR_WB 1 /* Write back cache */ +#define PAL_CACHE_ATTR_WT_OR_WB 2 /* Either write thru or write + * back depending on TLB + * memory attributes + */ + + +/* Possible values for cache hints */ + +#define PAL_CACHE_HINT_TEMP_1 0 /* Temporal level 1 */ +#define PAL_CACHE_HINT_NTEMP_1 1 /* Non-temporal level 1 */ +#define PAL_CACHE_HINT_NTEMP_ALL 3 /* Non-temporal all levels */ + +/* Processor cache protection information */ +typedef union pal_cache_protection_element_u { + u32 pcpi_data; + struct { + u32 data_bits : 8, /* # data bits covered by + * each unit of protection + */ + + tagprot_lsb : 6, /* Least -do- */ + tagprot_msb : 6, /* Most Sig. tag address + * bit that this + * protection covers. + */ + prot_bits : 6, /* # of protection bits */ + method : 4, /* Protection method */ + t_d : 2; /* Indicates which part + * of the cache this + * protection encoding + * applies. + */ + } pcp_info; +} pal_cache_protection_element_t; + +#define pcpi_cache_prot_part pcp_info.t_d +#define pcpi_prot_method pcp_info.method +#define pcpi_prot_bits pcp_info.prot_bits +#define pcpi_tagprot_msb pcp_info.tagprot_msb +#define pcpi_tagprot_lsb pcp_info.tagprot_lsb +#define pcpi_data_bits pcp_info.data_bits + +/* Processor cache part encodings */ +#define PAL_CACHE_PROT_PART_DATA 0 /* Data protection */ +#define PAL_CACHE_PROT_PART_TAG 1 /* Tag protection */ +#define PAL_CACHE_PROT_PART_TAG_DATA 2 /* Tag+data protection (tag is + * more significant ) + */ +#define PAL_CACHE_PROT_PART_DATA_TAG 3 /* Data+tag protection (data is + * more significant ) + */ +#define PAL_CACHE_PROT_PART_MAX 6 + + +typedef struct pal_cache_protection_info_s { + pal_status_t pcpi_status; + pal_cache_protection_element_t pcp_info[PAL_CACHE_PROT_PART_MAX]; +} pal_cache_protection_info_t; + + +/* Processor cache protection method encodings */ +#define PAL_CACHE_PROT_METHOD_NONE 0 /* No protection */ +#define PAL_CACHE_PROT_METHOD_ODD_PARITY 1 /* Odd parity */ +#define PAL_CACHE_PROT_METHOD_EVEN_PARITY 2 /* Even parity */ +#define PAL_CACHE_PROT_METHOD_ECC 3 /* ECC protection */ + + +/* Processor cache line identification in the hierarchy */ +typedef union pal_cache_line_id_u { + u64 pclid_data; + struct { + u64 cache_type : 8, /* 7-0 cache type */ + level : 8, /* 15-8 level of the + * cache in the + * hierarchy. + */ + way : 8, /* 23-16 way in the set + */ + part : 8, /* 31-24 part of the + * cache + */ + reserved : 32; /* 63-32 is reserved*/ + } pclid_info_read; + struct { + u64 cache_type : 8, /* 7-0 cache type */ + level : 8, /* 15-8 level of the + * cache in the + * hierarchy. + */ + way : 8, /* 23-16 way in the set + */ + part : 8, /* 31-24 part of the + * cache + */ + mesi : 8, /* 39-32 cache line + * state + */ + start : 8, /* 47-40 lsb of data to + * invert + */ + length : 8, /* 55-48 #bits to + * invert + */ + trigger : 8; /* 63-56 Trigger error + * by doing a load + * after the write + */ + + } pclid_info_write; +} pal_cache_line_id_u_t; + +#define pclid_read_part pclid_info_read.part +#define pclid_read_way pclid_info_read.way +#define pclid_read_level pclid_info_read.level +#define pclid_read_cache_type pclid_info_read.cache_type + +#define pclid_write_trigger pclid_info_write.trigger +#define pclid_write_length pclid_info_write.length +#define pclid_write_start pclid_info_write.start +#define pclid_write_mesi pclid_info_write.mesi +#define pclid_write_part pclid_info_write.part +#define pclid_write_way pclid_info_write.way +#define pclid_write_level pclid_info_write.level +#define pclid_write_cache_type pclid_info_write.cache_type + +/* Processor cache line part encodings */ +#define PAL_CACHE_LINE_ID_PART_DATA 0 /* Data */ +#define PAL_CACHE_LINE_ID_PART_TAG 1 /* Tag */ +#define PAL_CACHE_LINE_ID_PART_DATA_PROT 2 /* Data protection */ +#define PAL_CACHE_LINE_ID_PART_TAG_PROT 3 /* Tag protection */ +#define PAL_CACHE_LINE_ID_PART_DATA_TAG_PROT 4 /* Data+tag + * protection + */ +typedef struct pal_cache_line_info_s { + pal_status_t pcli_status; /* Return status of the read cache line + * info call. + */ + u64 pcli_data; /* 64-bit data, tag, protection bits .. */ + u64 pcli_data_len; /* data length in bits */ + pal_cache_line_state_t pcli_cache_line_state; /* mesi state */ + +} pal_cache_line_info_t; + + +/* Machine Check related crap */ + +/* Pending event status bits */ +typedef u64 pal_mc_pending_events_t; + +#define PAL_MC_PENDING_MCA (1 << 0) +#define PAL_MC_PENDING_INIT (1 << 1) + +/* Error information type */ +typedef u64 pal_mc_info_index_t; + +#define PAL_MC_INFO_PROCESSOR 0 /* Processor */ +#define PAL_MC_INFO_CACHE_CHECK 1 /* Cache check */ +#define PAL_MC_INFO_TLB_CHECK 2 /* Tlb check */ +#define PAL_MC_INFO_BUS_CHECK 3 /* Bus check */ +#define PAL_MC_INFO_REQ_ADDR 4 /* Requestor address */ +#define PAL_MC_INFO_RESP_ADDR 5 /* Responder address */ +#define PAL_MC_INFO_TARGET_ADDR 6 /* Target address */ +#define PAL_MC_INFO_IMPL_DEP 7 /* Implementation + * dependent + */ + +#define PAL_TLB_CHECK_OP_PURGE 8 + +typedef struct pal_process_state_info_s { + u64 reserved1 : 2, + rz : 1, /* PAL_CHECK processor + * rendezvous + * successful. + */ + + ra : 1, /* PAL_CHECK attempted + * a rendezvous. + */ + me : 1, /* Distinct multiple + * errors occurred + */ + + mn : 1, /* Min. state save + * area has been + * registered with PAL + */ + + sy : 1, /* Storage integrity + * synched + */ + + + co : 1, /* Continuable */ + ci : 1, /* MC isolated */ + us : 1, /* Uncontained storage + * damage. + */ + + + hd : 1, /* Non-essential hw + * lost (no loss of + * functionality) + * causing the + * processor to run in + * degraded mode. + */ + + tl : 1, /* 1 => MC occurred + * after an instr was + * executed but before + * the trap that + * resulted from instr + * execution was + * generated. + * (Trap Lost ) + */ + mi : 1, /* More information available + * call PAL_MC_ERROR_INFO + */ + pi : 1, /* Precise instruction pointer */ + pm : 1, /* Precise min-state save area */ + + dy : 1, /* Processor dynamic + * state valid + */ + + + in : 1, /* 0 = MC, 1 = INIT */ + rs : 1, /* RSE valid */ + cm : 1, /* MC corrected */ + ex : 1, /* MC is expected */ + cr : 1, /* Control regs valid*/ + pc : 1, /* Perf cntrs valid */ + dr : 1, /* Debug regs valid */ + tr : 1, /* Translation regs + * valid + */ + rr : 1, /* Region regs valid */ + ar : 1, /* App regs valid */ + br : 1, /* Branch regs valid */ + pr : 1, /* Predicate registers + * valid + */ + + fp : 1, /* fp registers valid*/ + b1 : 1, /* Preserved bank one + * general registers + * are valid + */ + b0 : 1, /* Preserved bank zero + * general registers + * are valid + */ + gr : 1, /* General registers + * are valid + * (excl. banked regs) + */ + dsize : 16, /* size of dynamic + * state returned + * by the processor + */ + + se : 1, /* Shared error. MCA in a + shared structure */ + reserved2 : 10, + cc : 1, /* Cache check */ + tc : 1, /* TLB check */ + bc : 1, /* Bus check */ + rc : 1, /* Register file check */ + uc : 1; /* Uarch check */ + +} pal_processor_state_info_t; + +typedef struct pal_cache_check_info_s { + u64 op : 4, /* Type of cache + * operation that + * caused the machine + * check. + */ + level : 2, /* Cache level */ + reserved1 : 2, + dl : 1, /* Failure in data part + * of cache line + */ + tl : 1, /* Failure in tag part + * of cache line + */ + dc : 1, /* Failure in dcache */ + ic : 1, /* Failure in icache */ + mesi : 3, /* Cache line state */ + mv : 1, /* mesi valid */ + way : 5, /* Way in which the + * error occurred + */ + wiv : 1, /* Way field valid */ + reserved2 : 1, + dp : 1, /* Data poisoned on MBE */ + reserved3 : 6, + hlth : 2, /* Health indicator */ + + index : 20, /* Cache line index */ + reserved4 : 2, + + is : 1, /* instruction set (1 == ia32) */ + iv : 1, /* instruction set field valid */ + pl : 2, /* privilege level */ + pv : 1, /* privilege level field valid */ + mcc : 1, /* Machine check corrected */ + tv : 1, /* Target address + * structure is valid + */ + rq : 1, /* Requester identifier + * structure is valid + */ + rp : 1, /* Responder identifier + * structure is valid + */ + pi : 1; /* Precise instruction pointer + * structure is valid + */ +} pal_cache_check_info_t; + +typedef struct pal_tlb_check_info_s { + + u64 tr_slot : 8, /* Slot# of TR where + * error occurred + */ + trv : 1, /* tr_slot field is valid */ + reserved1 : 1, + level : 2, /* TLB level where failure occurred */ + reserved2 : 4, + dtr : 1, /* Fail in data TR */ + itr : 1, /* Fail in inst TR */ + dtc : 1, /* Fail in data TC */ + itc : 1, /* Fail in inst. TC */ + op : 4, /* Cache operation */ + reserved3 : 6, + hlth : 2, /* Health indicator */ + reserved4 : 22, + + is : 1, /* instruction set (1 == ia32) */ + iv : 1, /* instruction set field valid */ + pl : 2, /* privilege level */ + pv : 1, /* privilege level field valid */ + mcc : 1, /* Machine check corrected */ + tv : 1, /* Target address + * structure is valid + */ + rq : 1, /* Requester identifier + * structure is valid + */ + rp : 1, /* Responder identifier + * structure is valid + */ + pi : 1; /* Precise instruction pointer + * structure is valid + */ +} pal_tlb_check_info_t; + +typedef struct pal_bus_check_info_s { + u64 size : 5, /* Xaction size */ + ib : 1, /* Internal bus error */ + eb : 1, /* External bus error */ + cc : 1, /* Error occurred + * during cache-cache + * transfer. + */ + type : 8, /* Bus xaction type*/ + sev : 5, /* Bus error severity*/ + hier : 2, /* Bus hierarchy level */ + dp : 1, /* Data poisoned on MBE */ + bsi : 8, /* Bus error status + * info + */ + reserved2 : 22, + + is : 1, /* instruction set (1 == ia32) */ + iv : 1, /* instruction set field valid */ + pl : 2, /* privilege level */ + pv : 1, /* privilege level field valid */ + mcc : 1, /* Machine check corrected */ + tv : 1, /* Target address + * structure is valid + */ + rq : 1, /* Requester identifier + * structure is valid + */ + rp : 1, /* Responder identifier + * structure is valid + */ + pi : 1; /* Precise instruction pointer + * structure is valid + */ +} pal_bus_check_info_t; + +typedef struct pal_reg_file_check_info_s { + u64 id : 4, /* Register file identifier */ + op : 4, /* Type of register + * operation that + * caused the machine + * check. + */ + reg_num : 7, /* Register number */ + rnv : 1, /* reg_num valid */ + reserved2 : 38, + + is : 1, /* instruction set (1 == ia32) */ + iv : 1, /* instruction set field valid */ + pl : 2, /* privilege level */ + pv : 1, /* privilege level field valid */ + mcc : 1, /* Machine check corrected */ + reserved3 : 3, + pi : 1; /* Precise instruction pointer + * structure is valid + */ +} pal_reg_file_check_info_t; + +typedef struct pal_uarch_check_info_s { + u64 sid : 5, /* Structure identification */ + level : 3, /* Level of failure */ + array_id : 4, /* Array identification */ + op : 4, /* Type of + * operation that + * caused the machine + * check. + */ + way : 6, /* Way of structure */ + wv : 1, /* way valid */ + xv : 1, /* index valid */ + reserved1 : 6, + hlth : 2, /* Health indicator */ + index : 8, /* Index or set of the uarch + * structure that failed. + */ + reserved2 : 24, + + is : 1, /* instruction set (1 == ia32) */ + iv : 1, /* instruction set field valid */ + pl : 2, /* privilege level */ + pv : 1, /* privilege level field valid */ + mcc : 1, /* Machine check corrected */ + tv : 1, /* Target address + * structure is valid + */ + rq : 1, /* Requester identifier + * structure is valid + */ + rp : 1, /* Responder identifier + * structure is valid + */ + pi : 1; /* Precise instruction pointer + * structure is valid + */ +} pal_uarch_check_info_t; + +typedef union pal_mc_error_info_u { + u64 pmei_data; + pal_processor_state_info_t pme_processor; + pal_cache_check_info_t pme_cache; + pal_tlb_check_info_t pme_tlb; + pal_bus_check_info_t pme_bus; + pal_reg_file_check_info_t pme_reg_file; + pal_uarch_check_info_t pme_uarch; +} pal_mc_error_info_t; + +#define pmci_proc_unknown_check pme_processor.uc +#define pmci_proc_bus_check pme_processor.bc +#define pmci_proc_tlb_check pme_processor.tc +#define pmci_proc_cache_check pme_processor.cc +#define pmci_proc_dynamic_state_size pme_processor.dsize +#define pmci_proc_gpr_valid pme_processor.gr +#define pmci_proc_preserved_bank0_gpr_valid pme_processor.b0 +#define pmci_proc_preserved_bank1_gpr_valid pme_processor.b1 +#define pmci_proc_fp_valid pme_processor.fp +#define pmci_proc_predicate_regs_valid pme_processor.pr +#define pmci_proc_branch_regs_valid pme_processor.br +#define pmci_proc_app_regs_valid pme_processor.ar +#define pmci_proc_region_regs_valid pme_processor.rr +#define pmci_proc_translation_regs_valid pme_processor.tr +#define pmci_proc_debug_regs_valid pme_processor.dr +#define pmci_proc_perf_counters_valid pme_processor.pc +#define pmci_proc_control_regs_valid pme_processor.cr +#define pmci_proc_machine_check_expected pme_processor.ex +#define pmci_proc_machine_check_corrected pme_processor.cm +#define pmci_proc_rse_valid pme_processor.rs +#define pmci_proc_machine_check_or_init pme_processor.in +#define pmci_proc_dynamic_state_valid pme_processor.dy +#define pmci_proc_operation pme_processor.op +#define pmci_proc_trap_lost pme_processor.tl +#define pmci_proc_hardware_damage pme_processor.hd +#define pmci_proc_uncontained_storage_damage pme_processor.us +#define pmci_proc_machine_check_isolated pme_processor.ci +#define pmci_proc_continuable pme_processor.co +#define pmci_proc_storage_intergrity_synced pme_processor.sy +#define pmci_proc_min_state_save_area_regd pme_processor.mn +#define pmci_proc_distinct_multiple_errors pme_processor.me +#define pmci_proc_pal_attempted_rendezvous pme_processor.ra +#define pmci_proc_pal_rendezvous_complete pme_processor.rz + + +#define pmci_cache_level pme_cache.level +#define pmci_cache_line_state pme_cache.mesi +#define pmci_cache_line_state_valid pme_cache.mv +#define pmci_cache_line_index pme_cache.index +#define pmci_cache_instr_cache_fail pme_cache.ic +#define pmci_cache_data_cache_fail pme_cache.dc +#define pmci_cache_line_tag_fail pme_cache.tl +#define pmci_cache_line_data_fail pme_cache.dl +#define pmci_cache_operation pme_cache.op +#define pmci_cache_way_valid pme_cache.wv +#define pmci_cache_target_address_valid pme_cache.tv +#define pmci_cache_way pme_cache.way +#define pmci_cache_mc pme_cache.mc + +#define pmci_tlb_instr_translation_cache_fail pme_tlb.itc +#define pmci_tlb_data_translation_cache_fail pme_tlb.dtc +#define pmci_tlb_instr_translation_reg_fail pme_tlb.itr +#define pmci_tlb_data_translation_reg_fail pme_tlb.dtr +#define pmci_tlb_translation_reg_slot pme_tlb.tr_slot +#define pmci_tlb_mc pme_tlb.mc + +#define pmci_bus_status_info pme_bus.bsi +#define pmci_bus_req_address_valid pme_bus.rq +#define pmci_bus_resp_address_valid pme_bus.rp +#define pmci_bus_target_address_valid pme_bus.tv +#define pmci_bus_error_severity pme_bus.sev +#define pmci_bus_transaction_type pme_bus.type +#define pmci_bus_cache_cache_transfer pme_bus.cc +#define pmci_bus_transaction_size pme_bus.size +#define pmci_bus_internal_error pme_bus.ib +#define pmci_bus_external_error pme_bus.eb +#define pmci_bus_mc pme_bus.mc + +/* + * NOTE: this min_state_save area struct only includes the 1KB + * architectural state save area. The other 3 KB is scratch space + * for PAL. + */ + +typedef struct pal_min_state_area_s { + u64 pmsa_nat_bits; /* nat bits for saved GRs */ + u64 pmsa_gr[15]; /* GR1 - GR15 */ + u64 pmsa_bank0_gr[16]; /* GR16 - GR31 */ + u64 pmsa_bank1_gr[16]; /* GR16 - GR31 */ + u64 pmsa_pr; /* predicate registers */ + u64 pmsa_br0; /* branch register 0 */ + u64 pmsa_rsc; /* ar.rsc */ + u64 pmsa_iip; /* cr.iip */ + u64 pmsa_ipsr; /* cr.ipsr */ + u64 pmsa_ifs; /* cr.ifs */ + u64 pmsa_xip; /* previous iip */ + u64 pmsa_xpsr; /* previous psr */ + u64 pmsa_xfs; /* previous ifs */ + u64 pmsa_br1; /* branch register 1 */ + u64 pmsa_reserved[70]; /* pal_min_state_area should total to 1KB */ +} pal_min_state_area_t; + + +struct ia64_pal_retval { + /* + * A zero status value indicates call completed without error. + * A negative status value indicates reason of call failure. + * A positive status value indicates success but an + * informational value should be printed (e.g., "reboot for + * change to take effect"). + */ + s64 status; + u64 v0; + u64 v1; + u64 v2; +}; + +/* + * Note: Currently unused PAL arguments are generally labeled + * "reserved" so the value specified in the PAL documentation + * (generally 0) MUST be passed. Reserved parameters are not optional + * parameters. + */ +extern struct ia64_pal_retval ia64_pal_call_static (u64, u64, u64, u64); +extern struct ia64_pal_retval ia64_pal_call_stacked (u64, u64, u64, u64); +extern struct ia64_pal_retval ia64_pal_call_phys_static (u64, u64, u64, u64); +extern struct ia64_pal_retval ia64_pal_call_phys_stacked (u64, u64, u64, u64); +extern void ia64_save_scratch_fpregs (struct ia64_fpreg *); +extern void ia64_load_scratch_fpregs (struct ia64_fpreg *); + +#define PAL_CALL(iprv,a0,a1,a2,a3) do { \ + struct ia64_fpreg fr[6]; \ + ia64_save_scratch_fpregs(fr); \ + iprv = ia64_pal_call_static(a0, a1, a2, a3); \ + ia64_load_scratch_fpregs(fr); \ +} while (0) + +#define PAL_CALL_STK(iprv,a0,a1,a2,a3) do { \ + struct ia64_fpreg fr[6]; \ + ia64_save_scratch_fpregs(fr); \ + iprv = ia64_pal_call_stacked(a0, a1, a2, a3); \ + ia64_load_scratch_fpregs(fr); \ +} while (0) + +#define PAL_CALL_PHYS(iprv,a0,a1,a2,a3) do { \ + struct ia64_fpreg fr[6]; \ + ia64_save_scratch_fpregs(fr); \ + iprv = ia64_pal_call_phys_static(a0, a1, a2, a3); \ + ia64_load_scratch_fpregs(fr); \ +} while (0) + +#define PAL_CALL_PHYS_STK(iprv,a0,a1,a2,a3) do { \ + struct ia64_fpreg fr[6]; \ + ia64_save_scratch_fpregs(fr); \ + iprv = ia64_pal_call_phys_stacked(a0, a1, a2, a3); \ + ia64_load_scratch_fpregs(fr); \ +} while (0) + +typedef int (*ia64_pal_handler) (u64, ...); +extern ia64_pal_handler ia64_pal; +extern void ia64_pal_handler_init (void *); + +extern ia64_pal_handler ia64_pal; + +extern pal_cache_config_info_t l0d_cache_config_info; +extern pal_cache_config_info_t l0i_cache_config_info; +extern pal_cache_config_info_t l1_cache_config_info; +extern pal_cache_config_info_t l2_cache_config_info; + +extern pal_cache_protection_info_t l0d_cache_protection_info; +extern pal_cache_protection_info_t l0i_cache_protection_info; +extern pal_cache_protection_info_t l1_cache_protection_info; +extern pal_cache_protection_info_t l2_cache_protection_info; + +extern pal_cache_config_info_t pal_cache_config_info_get(pal_cache_level_t, + pal_cache_type_t); + +extern pal_cache_protection_info_t pal_cache_protection_info_get(pal_cache_level_t, + pal_cache_type_t); + + +extern void pal_error(int); + + +/* Useful wrappers for the current list of pal procedures */ + +typedef union pal_bus_features_u { + u64 pal_bus_features_val; + struct { + u64 pbf_reserved1 : 29; + u64 pbf_req_bus_parking : 1; + u64 pbf_bus_lock_mask : 1; + u64 pbf_enable_half_xfer_rate : 1; + u64 pbf_reserved2 : 20; + u64 pbf_enable_shared_line_replace : 1; + u64 pbf_enable_exclusive_line_replace : 1; + u64 pbf_disable_xaction_queueing : 1; + u64 pbf_disable_resp_err_check : 1; + u64 pbf_disable_berr_check : 1; + u64 pbf_disable_bus_req_internal_err_signal : 1; + u64 pbf_disable_bus_req_berr_signal : 1; + u64 pbf_disable_bus_init_event_check : 1; + u64 pbf_disable_bus_init_event_signal : 1; + u64 pbf_disable_bus_addr_err_check : 1; + u64 pbf_disable_bus_addr_err_signal : 1; + u64 pbf_disable_bus_data_err_check : 1; + } pal_bus_features_s; +} pal_bus_features_u_t; + +extern void pal_bus_features_print (u64); + +/* Provide information about configurable processor bus features */ +static inline s64 +ia64_pal_bus_get_features (pal_bus_features_u_t *features_avail, + pal_bus_features_u_t *features_status, + pal_bus_features_u_t *features_control) +{ + struct ia64_pal_retval iprv; + PAL_CALL_PHYS(iprv, PAL_BUS_GET_FEATURES, 0, 0, 0); + if (features_avail) + features_avail->pal_bus_features_val = iprv.v0; + if (features_status) + features_status->pal_bus_features_val = iprv.v1; + if (features_control) + features_control->pal_bus_features_val = iprv.v2; + return iprv.status; +} + +/* Enables/disables specific processor bus features */ +static inline s64 +ia64_pal_bus_set_features (pal_bus_features_u_t feature_select) +{ + struct ia64_pal_retval iprv; + PAL_CALL_PHYS(iprv, PAL_BUS_SET_FEATURES, feature_select.pal_bus_features_val, 0, 0); + return iprv.status; +} + +/* Get detailed cache information */ +static inline s64 +ia64_pal_cache_config_info (u64 cache_level, u64 cache_type, pal_cache_config_info_t *conf) +{ + struct ia64_pal_retval iprv; + + PAL_CALL(iprv, PAL_CACHE_INFO, cache_level, cache_type, 0); + + if (iprv.status == 0) { + conf->pcci_status = iprv.status; + conf->pcci_info_1.pcci1_data = iprv.v0; + conf->pcci_info_2.pcci2_data = iprv.v1; + conf->pcci_reserved = iprv.v2; + } + return iprv.status; + +} + +/* Get detailed cche protection information */ +static inline s64 +ia64_pal_cache_prot_info (u64 cache_level, u64 cache_type, pal_cache_protection_info_t *prot) +{ + struct ia64_pal_retval iprv; + + PAL_CALL(iprv, PAL_CACHE_PROT_INFO, cache_level, cache_type, 0); + + if (iprv.status == 0) { + prot->pcpi_status = iprv.status; + prot->pcp_info[0].pcpi_data = iprv.v0 & 0xffffffff; + prot->pcp_info[1].pcpi_data = iprv.v0 >> 32; + prot->pcp_info[2].pcpi_data = iprv.v1 & 0xffffffff; + prot->pcp_info[3].pcpi_data = iprv.v1 >> 32; + prot->pcp_info[4].pcpi_data = iprv.v2 & 0xffffffff; + prot->pcp_info[5].pcpi_data = iprv.v2 >> 32; + } + return iprv.status; +} + +/* + * Flush the processor instruction or data caches. *PROGRESS must be + * initialized to zero before calling this for the first time.. + */ +static inline s64 +ia64_pal_cache_flush (u64 cache_type, u64 invalidate, u64 *progress, u64 *vector) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_CACHE_FLUSH, cache_type, invalidate, *progress); + if (vector) + *vector = iprv.v0; + *progress = iprv.v1; + return iprv.status; +} + + +/* Initialize the processor controlled caches */ +static inline s64 +ia64_pal_cache_init (u64 level, u64 cache_type, u64 rest) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_CACHE_INIT, level, cache_type, rest); + return iprv.status; +} + +/* Initialize the tags and data of a data or unified cache line of + * processor controlled cache to known values without the availability + * of backing memory. + */ +static inline s64 +ia64_pal_cache_line_init (u64 physical_addr, u64 data_value) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_CACHE_LINE_INIT, physical_addr, data_value, 0); + return iprv.status; +} + + +/* Read the data and tag of a processor controlled cache line for diags */ +static inline s64 +ia64_pal_cache_read (pal_cache_line_id_u_t line_id, u64 physical_addr) +{ + struct ia64_pal_retval iprv; + PAL_CALL_PHYS_STK(iprv, PAL_CACHE_READ, line_id.pclid_data, + physical_addr, 0); + return iprv.status; +} + +/* Return summary information about the hierarchy of caches controlled by the processor */ +static inline long ia64_pal_cache_summary(unsigned long *cache_levels, + unsigned long *unique_caches) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_CACHE_SUMMARY, 0, 0, 0); + if (cache_levels) + *cache_levels = iprv.v0; + if (unique_caches) + *unique_caches = iprv.v1; + return iprv.status; +} + +/* Write the data and tag of a processor-controlled cache line for diags */ +static inline s64 +ia64_pal_cache_write (pal_cache_line_id_u_t line_id, u64 physical_addr, u64 data) +{ + struct ia64_pal_retval iprv; + PAL_CALL_PHYS_STK(iprv, PAL_CACHE_WRITE, line_id.pclid_data, + physical_addr, data); + return iprv.status; +} + + +/* Return the parameters needed to copy relocatable PAL procedures from ROM to memory */ +static inline s64 +ia64_pal_copy_info (u64 copy_type, u64 num_procs, u64 num_iopics, + u64 *buffer_size, u64 *buffer_align) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_COPY_INFO, copy_type, num_procs, num_iopics); + if (buffer_size) + *buffer_size = iprv.v0; + if (buffer_align) + *buffer_align = iprv.v1; + return iprv.status; +} + +/* Copy relocatable PAL procedures from ROM to memory */ +static inline s64 +ia64_pal_copy_pal (u64 target_addr, u64 alloc_size, u64 processor, u64 *pal_proc_offset) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_COPY_PAL, target_addr, alloc_size, processor); + if (pal_proc_offset) + *pal_proc_offset = iprv.v0; + return iprv.status; +} + +/* Return the number of instruction and data debug register pairs */ +static inline long ia64_pal_debug_info(unsigned long *inst_regs, + unsigned long *data_regs) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_DEBUG_INFO, 0, 0, 0); + if (inst_regs) + *inst_regs = iprv.v0; + if (data_regs) + *data_regs = iprv.v1; + + return iprv.status; +} + +#ifdef TBD +/* Switch from IA64-system environment to IA-32 system environment */ +static inline s64 +ia64_pal_enter_ia32_env (ia32_env1, ia32_env2, ia32_env3) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_ENTER_IA_32_ENV, ia32_env1, ia32_env2, ia32_env3); + return iprv.status; +} +#endif + +/* Get unique geographical address of this processor on its bus */ +static inline s64 +ia64_pal_fixed_addr (u64 *global_unique_addr) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_FIXED_ADDR, 0, 0, 0); + if (global_unique_addr) + *global_unique_addr = iprv.v0; + return iprv.status; +} + +/* Get base frequency of the platform if generated by the processor */ +static inline long ia64_pal_freq_base(unsigned long *platform_base_freq) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_FREQ_BASE, 0, 0, 0); + if (platform_base_freq) + *platform_base_freq = iprv.v0; + return iprv.status; +} + +/* + * Get the ratios for processor frequency, bus frequency and interval timer to + * to base frequency of the platform + */ +static inline s64 +ia64_pal_freq_ratios (struct pal_freq_ratio *proc_ratio, struct pal_freq_ratio *bus_ratio, + struct pal_freq_ratio *itc_ratio) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_FREQ_RATIOS, 0, 0, 0); + if (proc_ratio) + *(u64 *)proc_ratio = iprv.v0; + if (bus_ratio) + *(u64 *)bus_ratio = iprv.v1; + if (itc_ratio) + *(u64 *)itc_ratio = iprv.v2; + return iprv.status; +} + +/* + * Get the current hardware resource sharing policy of the processor + */ +static inline s64 +ia64_pal_get_hw_policy (u64 proc_num, u64 *cur_policy, u64 *num_impacted, + u64 *la) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_GET_HW_POLICY, proc_num, 0, 0); + if (cur_policy) + *cur_policy = iprv.v0; + if (num_impacted) + *num_impacted = iprv.v1; + if (la) + *la = iprv.v2; + return iprv.status; +} + +/* Make the processor enter HALT or one of the implementation dependent low + * power states where prefetching and execution are suspended and cache and + * TLB coherency is not maintained. + */ +static inline s64 +ia64_pal_halt (u64 halt_state) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_HALT, halt_state, 0, 0); + return iprv.status; +} + +typedef union pal_power_mgmt_info_u { + u64 ppmi_data; + struct { + u64 exit_latency : 16, + entry_latency : 16, + power_consumption : 28, + im : 1, + co : 1, + reserved : 2; + } pal_power_mgmt_info_s; +} pal_power_mgmt_info_u_t; + +/* Return information about processor's optional power management capabilities. */ +static inline s64 +ia64_pal_halt_info (pal_power_mgmt_info_u_t *power_buf) +{ + struct ia64_pal_retval iprv; + PAL_CALL_STK(iprv, PAL_HALT_INFO, (unsigned long) power_buf, 0, 0); + return iprv.status; +} + +/* Get the current P-state information */ +static inline s64 +ia64_pal_get_pstate (u64 *pstate_index, unsigned long type) +{ + struct ia64_pal_retval iprv; + PAL_CALL_STK(iprv, PAL_GET_PSTATE, type, 0, 0); + *pstate_index = iprv.v0; + return iprv.status; +} + +/* Set the P-state */ +static inline s64 +ia64_pal_set_pstate (u64 pstate_index) +{ + struct ia64_pal_retval iprv; + PAL_CALL_STK(iprv, PAL_SET_PSTATE, pstate_index, 0, 0); + return iprv.status; +} + +/* Processor branding information*/ +static inline s64 +ia64_pal_get_brand_info (char *brand_info) +{ + struct ia64_pal_retval iprv; + PAL_CALL_STK(iprv, PAL_BRAND_INFO, 0, (u64)brand_info, 0); + return iprv.status; +} + +/* Cause the processor to enter LIGHT HALT state, where prefetching and execution are + * suspended, but cache and TLB coherency is maintained. + */ +static inline s64 +ia64_pal_halt_light (void) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_HALT_LIGHT, 0, 0, 0); + return iprv.status; +} + +/* Clear all the processor error logging registers and reset the indicator that allows + * the error logging registers to be written. This procedure also checks the pending + * machine check bit and pending INIT bit and reports their states. + */ +static inline s64 +ia64_pal_mc_clear_log (u64 *pending_vector) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_MC_CLEAR_LOG, 0, 0, 0); + if (pending_vector) + *pending_vector = iprv.v0; + return iprv.status; +} + +/* Ensure that all outstanding transactions in a processor are completed or that any + * MCA due to thes outstanding transaction is taken. + */ +static inline s64 +ia64_pal_mc_drain (void) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_MC_DRAIN, 0, 0, 0); + return iprv.status; +} + +/* Return the machine check dynamic processor state */ +static inline s64 +ia64_pal_mc_dynamic_state (u64 info_type, u64 dy_buffer, u64 *size) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_MC_DYNAMIC_STATE, info_type, dy_buffer, 0); + if (size) + *size = iprv.v0; + return iprv.status; +} + +/* Return processor machine check information */ +static inline s64 +ia64_pal_mc_error_info (u64 info_index, u64 type_index, u64 *size, u64 *error_info) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_MC_ERROR_INFO, info_index, type_index, 0); + if (size) + *size = iprv.v0; + if (error_info) + *error_info = iprv.v1; + return iprv.status; +} + +/* Injects the requested processor error or returns info on + * supported injection capabilities for current processor implementation + */ +static inline s64 +ia64_pal_mc_error_inject_phys (u64 err_type_info, u64 err_struct_info, + u64 err_data_buffer, u64 *capabilities, u64 *resources) +{ + struct ia64_pal_retval iprv; + PAL_CALL_PHYS_STK(iprv, PAL_MC_ERROR_INJECT, err_type_info, + err_struct_info, err_data_buffer); + if (capabilities) + *capabilities= iprv.v0; + if (resources) + *resources= iprv.v1; + return iprv.status; +} + +static inline s64 +ia64_pal_mc_error_inject_virt (u64 err_type_info, u64 err_struct_info, + u64 err_data_buffer, u64 *capabilities, u64 *resources) +{ + struct ia64_pal_retval iprv; + PAL_CALL_STK(iprv, PAL_MC_ERROR_INJECT, err_type_info, + err_struct_info, err_data_buffer); + if (capabilities) + *capabilities= iprv.v0; + if (resources) + *resources= iprv.v1; + return iprv.status; +} + +/* Inform PALE_CHECK whether a machine check is expected so that PALE_CHECK willnot + * attempt to correct any expected machine checks. + */ +static inline s64 +ia64_pal_mc_expected (u64 expected, u64 *previous) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_MC_EXPECTED, expected, 0, 0); + if (previous) + *previous = iprv.v0; + return iprv.status; +} + +typedef union pal_hw_tracking_u { + u64 pht_data; + struct { + u64 itc :4, /* Instruction cache tracking */ + dct :4, /* Date cache tracking */ + itt :4, /* Instruction TLB tracking */ + ddt :4, /* Data TLB tracking */ + reserved:48; + } pal_hw_tracking_s; +} pal_hw_tracking_u_t; + +/* + * Hardware tracking status. + */ +static inline s64 +ia64_pal_mc_hw_tracking (u64 *status) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_MC_HW_TRACKING, 0, 0, 0); + if (status) + *status = iprv.v0; + return iprv.status; +} + +/* Register a platform dependent location with PAL to which it can save + * minimal processor state in the event of a machine check or initialization + * event. + */ +static inline s64 +ia64_pal_mc_register_mem (u64 physical_addr, u64 size, u64 *req_size) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_MC_REGISTER_MEM, physical_addr, size, 0); + if (req_size) + *req_size = iprv.v0; + return iprv.status; +} + +/* Restore minimal architectural processor state, set CMC interrupt if necessary + * and resume execution + */ +static inline s64 +ia64_pal_mc_resume (u64 set_cmci, u64 save_ptr) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_MC_RESUME, set_cmci, save_ptr, 0); + return iprv.status; +} + +/* Return the memory attributes implemented by the processor */ +static inline s64 +ia64_pal_mem_attrib (u64 *mem_attrib) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_MEM_ATTRIB, 0, 0, 0); + if (mem_attrib) + *mem_attrib = iprv.v0 & 0xff; + return iprv.status; +} + +/* Return the amount of memory needed for second phase of processor + * self-test and the required alignment of memory. + */ +static inline s64 +ia64_pal_mem_for_test (u64 *bytes_needed, u64 *alignment) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_MEM_FOR_TEST, 0, 0, 0); + if (bytes_needed) + *bytes_needed = iprv.v0; + if (alignment) + *alignment = iprv.v1; + return iprv.status; +} + +typedef union pal_perf_mon_info_u { + u64 ppmi_data; + struct { + u64 generic : 8, + width : 8, + cycles : 8, + retired : 8, + reserved : 32; + } pal_perf_mon_info_s; +} pal_perf_mon_info_u_t; + +/* Return the performance monitor information about what can be counted + * and how to configure the monitors to count the desired events. + */ +static inline s64 +ia64_pal_perf_mon_info (u64 *pm_buffer, pal_perf_mon_info_u_t *pm_info) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_PERF_MON_INFO, (unsigned long) pm_buffer, 0, 0); + if (pm_info) + pm_info->ppmi_data = iprv.v0; + return iprv.status; +} + +/* Specifies the physical address of the processor interrupt block + * and I/O port space. + */ +static inline s64 +ia64_pal_platform_addr (u64 type, u64 physical_addr) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_PLATFORM_ADDR, type, physical_addr, 0); + return iprv.status; +} + +/* Set the SAL PMI entrypoint in memory */ +static inline s64 +ia64_pal_pmi_entrypoint (u64 sal_pmi_entry_addr) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_PMI_ENTRYPOINT, sal_pmi_entry_addr, 0, 0); + return iprv.status; +} + +struct pal_features_s; +/* Provide information about configurable processor features */ +static inline s64 +ia64_pal_proc_get_features (u64 *features_avail, + u64 *features_status, + u64 *features_control, + u64 features_set) +{ + struct ia64_pal_retval iprv; + PAL_CALL_PHYS(iprv, PAL_PROC_GET_FEATURES, 0, features_set, 0); + if (iprv.status == 0) { + *features_avail = iprv.v0; + *features_status = iprv.v1; + *features_control = iprv.v2; + } + return iprv.status; +} + +/* Enable/disable processor dependent features */ +static inline s64 +ia64_pal_proc_set_features (u64 feature_select) +{ + struct ia64_pal_retval iprv; + PAL_CALL_PHYS(iprv, PAL_PROC_SET_FEATURES, feature_select, 0, 0); + return iprv.status; +} + +/* + * Put everything in a struct so we avoid the global offset table whenever + * possible. + */ +typedef struct ia64_ptce_info_s { + unsigned long base; + u32 count[2]; + u32 stride[2]; +} ia64_ptce_info_t; + +/* Return the information required for the architected loop used to purge + * (initialize) the entire TC + */ +static inline s64 +ia64_get_ptce (ia64_ptce_info_t *ptce) +{ + struct ia64_pal_retval iprv; + + if (!ptce) + return -1; + + PAL_CALL(iprv, PAL_PTCE_INFO, 0, 0, 0); + if (iprv.status == 0) { + ptce->base = iprv.v0; + ptce->count[0] = iprv.v1 >> 32; + ptce->count[1] = iprv.v1 & 0xffffffff; + ptce->stride[0] = iprv.v2 >> 32; + ptce->stride[1] = iprv.v2 & 0xffffffff; + } + return iprv.status; +} + +/* Return info about implemented application and control registers. */ +static inline s64 +ia64_pal_register_info (u64 info_request, u64 *reg_info_1, u64 *reg_info_2) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_REGISTER_INFO, info_request, 0, 0); + if (reg_info_1) + *reg_info_1 = iprv.v0; + if (reg_info_2) + *reg_info_2 = iprv.v1; + return iprv.status; +} + +typedef union pal_hints_u { + unsigned long ph_data; + struct { + unsigned long si : 1, + li : 1, + reserved : 62; + } pal_hints_s; +} pal_hints_u_t; + +/* Return information about the register stack and RSE for this processor + * implementation. + */ +static inline long ia64_pal_rse_info(unsigned long *num_phys_stacked, + pal_hints_u_t *hints) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_RSE_INFO, 0, 0, 0); + if (num_phys_stacked) + *num_phys_stacked = iprv.v0; + if (hints) + hints->ph_data = iprv.v1; + return iprv.status; +} + +/* + * Set the current hardware resource sharing policy of the processor + */ +static inline s64 +ia64_pal_set_hw_policy (u64 policy) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_SET_HW_POLICY, policy, 0, 0); + return iprv.status; +} + +/* Cause the processor to enter SHUTDOWN state, where prefetching and execution are + * suspended, but cause cache and TLB coherency to be maintained. + * This is usually called in IA-32 mode. + */ +static inline s64 +ia64_pal_shutdown (void) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_SHUTDOWN, 0, 0, 0); + return iprv.status; +} + +/* Perform the second phase of processor self-test. */ +static inline s64 +ia64_pal_test_proc (u64 test_addr, u64 test_size, u64 attributes, u64 *self_test_state) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_TEST_PROC, test_addr, test_size, attributes); + if (self_test_state) + *self_test_state = iprv.v0; + return iprv.status; +} + +typedef union pal_version_u { + u64 pal_version_val; + struct { + u64 pv_pal_b_rev : 8; + u64 pv_pal_b_model : 8; + u64 pv_reserved1 : 8; + u64 pv_pal_vendor : 8; + u64 pv_pal_a_rev : 8; + u64 pv_pal_a_model : 8; + u64 pv_reserved2 : 16; + } pal_version_s; +} pal_version_u_t; + + +/* + * Return PAL version information. While the documentation states that + * PAL_VERSION can be called in either physical or virtual mode, some + * implementations only allow physical calls. We don't call it very often, + * so the overhead isn't worth eliminating. + */ +static inline s64 +ia64_pal_version (pal_version_u_t *pal_min_version, pal_version_u_t *pal_cur_version) +{ + struct ia64_pal_retval iprv; + PAL_CALL_PHYS(iprv, PAL_VERSION, 0, 0, 0); + if (pal_min_version) + pal_min_version->pal_version_val = iprv.v0; + + if (pal_cur_version) + pal_cur_version->pal_version_val = iprv.v1; + + return iprv.status; +} + +typedef union pal_tc_info_u { + u64 pti_val; + struct { + u64 num_sets : 8, + associativity : 8, + num_entries : 16, + pf : 1, + unified : 1, + reduce_tr : 1, + reserved : 29; + } pal_tc_info_s; +} pal_tc_info_u_t; + +#define tc_reduce_tr pal_tc_info_s.reduce_tr +#define tc_unified pal_tc_info_s.unified +#define tc_pf pal_tc_info_s.pf +#define tc_num_entries pal_tc_info_s.num_entries +#define tc_associativity pal_tc_info_s.associativity +#define tc_num_sets pal_tc_info_s.num_sets + + +/* Return information about the virtual memory characteristics of the processor + * implementation. + */ +static inline s64 +ia64_pal_vm_info (u64 tc_level, u64 tc_type, pal_tc_info_u_t *tc_info, u64 *tc_pages) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_VM_INFO, tc_level, tc_type, 0); + if (tc_info) + tc_info->pti_val = iprv.v0; + if (tc_pages) + *tc_pages = iprv.v1; + return iprv.status; +} + +/* Get page size information about the virtual memory characteristics of the processor + * implementation. + */ +static inline s64 ia64_pal_vm_page_size(u64 *tr_pages, u64 *vw_pages) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_VM_PAGE_SIZE, 0, 0, 0); + if (tr_pages) + *tr_pages = iprv.v0; + if (vw_pages) + *vw_pages = iprv.v1; + return iprv.status; +} + +typedef union pal_vm_info_1_u { + u64 pvi1_val; + struct { + u64 vw : 1, + phys_add_size : 7, + key_size : 8, + max_pkr : 8, + hash_tag_id : 8, + max_dtr_entry : 8, + max_itr_entry : 8, + max_unique_tcs : 8, + num_tc_levels : 8; + } pal_vm_info_1_s; +} pal_vm_info_1_u_t; + +#define PAL_MAX_PURGES 0xFFFF /* all ones is means unlimited */ + +typedef union pal_vm_info_2_u { + u64 pvi2_val; + struct { + u64 impl_va_msb : 8, + rid_size : 8, + max_purges : 16, + reserved : 32; + } pal_vm_info_2_s; +} pal_vm_info_2_u_t; + +/* Get summary information about the virtual memory characteristics of the processor + * implementation. + */ +static inline s64 +ia64_pal_vm_summary (pal_vm_info_1_u_t *vm_info_1, pal_vm_info_2_u_t *vm_info_2) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_VM_SUMMARY, 0, 0, 0); + if (vm_info_1) + vm_info_1->pvi1_val = iprv.v0; + if (vm_info_2) + vm_info_2->pvi2_val = iprv.v1; + return iprv.status; +} + +typedef union pal_vp_info_u { + u64 pvi_val; + struct { + u64 index: 48, /* virtual feature set info */ + vmm_id: 16; /* feature set id */ + } pal_vp_info_s; +} pal_vp_info_u_t; + +/* + * Returns information about virtual processor features + */ +static inline s64 +ia64_pal_vp_info (u64 feature_set, u64 vp_buffer, u64 *vp_info, u64 *vmm_id) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_VP_INFO, feature_set, vp_buffer, 0); + if (vp_info) + *vp_info = iprv.v0; + if (vmm_id) + *vmm_id = iprv.v1; + return iprv.status; +} + +typedef union pal_itr_valid_u { + u64 piv_val; + struct { + u64 access_rights_valid : 1, + priv_level_valid : 1, + dirty_bit_valid : 1, + mem_attr_valid : 1, + reserved : 60; + } pal_tr_valid_s; +} pal_tr_valid_u_t; + +/* Read a translation register */ +static inline s64 +ia64_pal_tr_read (u64 reg_num, u64 tr_type, u64 *tr_buffer, pal_tr_valid_u_t *tr_valid) +{ + struct ia64_pal_retval iprv; + PAL_CALL_PHYS_STK(iprv, PAL_VM_TR_READ, reg_num, tr_type,(u64)ia64_tpa(tr_buffer)); + if (tr_valid) + tr_valid->piv_val = iprv.v0; + return iprv.status; +} + +/* + * PAL_PREFETCH_VISIBILITY transaction types + */ +#define PAL_VISIBILITY_VIRTUAL 0 +#define PAL_VISIBILITY_PHYSICAL 1 + +/* + * PAL_PREFETCH_VISIBILITY return codes + */ +#define PAL_VISIBILITY_OK 1 +#define PAL_VISIBILITY_OK_REMOTE_NEEDED 0 +#define PAL_VISIBILITY_INVAL_ARG -2 +#define PAL_VISIBILITY_ERROR -3 + +static inline s64 +ia64_pal_prefetch_visibility (s64 trans_type) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_PREFETCH_VISIBILITY, trans_type, 0, 0); + return iprv.status; +} + +/* data structure for getting information on logical to physical mappings */ +typedef union pal_log_overview_u { + struct { + u64 num_log :16, /* Total number of logical + * processors on this die + */ + tpc :8, /* Threads per core */ + reserved3 :8, /* Reserved */ + cpp :8, /* Cores per processor */ + reserved2 :8, /* Reserved */ + ppid :8, /* Physical processor ID */ + reserved1 :8; /* Reserved */ + } overview_bits; + u64 overview_data; +} pal_log_overview_t; + +typedef union pal_proc_n_log_info1_u{ + struct { + u64 tid :16, /* Thread id */ + reserved2 :16, /* Reserved */ + cid :16, /* Core id */ + reserved1 :16; /* Reserved */ + } ppli1_bits; + u64 ppli1_data; +} pal_proc_n_log_info1_t; + +typedef union pal_proc_n_log_info2_u { + struct { + u64 la :16, /* Logical address */ + reserved :48; /* Reserved */ + } ppli2_bits; + u64 ppli2_data; +} pal_proc_n_log_info2_t; + +typedef struct pal_logical_to_physical_s +{ + pal_log_overview_t overview; + pal_proc_n_log_info1_t ppli1; + pal_proc_n_log_info2_t ppli2; +} pal_logical_to_physical_t; + +#define overview_num_log overview.overview_bits.num_log +#define overview_tpc overview.overview_bits.tpc +#define overview_cpp overview.overview_bits.cpp +#define overview_ppid overview.overview_bits.ppid +#define log1_tid ppli1.ppli1_bits.tid +#define log1_cid ppli1.ppli1_bits.cid +#define log2_la ppli2.ppli2_bits.la + +/* Get information on logical to physical processor mappings. */ +static inline s64 +ia64_pal_logical_to_phys(u64 proc_number, pal_logical_to_physical_t *mapping) +{ + struct ia64_pal_retval iprv; + + PAL_CALL(iprv, PAL_LOGICAL_TO_PHYSICAL, proc_number, 0, 0); + + if (iprv.status == PAL_STATUS_SUCCESS) + { + mapping->overview.overview_data = iprv.v0; + mapping->ppli1.ppli1_data = iprv.v1; + mapping->ppli2.ppli2_data = iprv.v2; + } + + return iprv.status; +} + +typedef struct pal_cache_shared_info_s +{ + u64 num_shared; + pal_proc_n_log_info1_t ppli1; + pal_proc_n_log_info2_t ppli2; +} pal_cache_shared_info_t; + +/* Get information on logical to physical processor mappings. */ +static inline s64 +ia64_pal_cache_shared_info(u64 level, + u64 type, + u64 proc_number, + pal_cache_shared_info_t *info) +{ + struct ia64_pal_retval iprv; + + PAL_CALL(iprv, PAL_CACHE_SHARED_INFO, level, type, proc_number); + + if (iprv.status == PAL_STATUS_SUCCESS) { + info->num_shared = iprv.v0; + info->ppli1.ppli1_data = iprv.v1; + info->ppli2.ppli2_data = iprv.v2; + } + + return iprv.status; +} +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_IA64_PAL_H */ diff --git a/kernel/arch/ia64/include/asm/param.h b/kernel/arch/ia64/include/asm/param.h new file mode 100644 index 000000000..1295913d6 --- /dev/null +++ b/kernel/arch/ia64/include/asm/param.h @@ -0,0 +1,17 @@ +/* + * Fundamental kernel parameters. + * + * Based on . + * + * Modified 1998, 1999, 2002-2003 + * David Mosberger-Tang , Hewlett-Packard Co + */ +#ifndef _ASM_IA64_PARAM_H +#define _ASM_IA64_PARAM_H + +#include + +# define HZ CONFIG_HZ +# define USER_HZ HZ +# define CLOCKS_PER_SEC HZ /* frequency at which times() counts */ +#endif /* _ASM_IA64_PARAM_H */ diff --git a/kernel/arch/ia64/include/asm/paravirt.h b/kernel/arch/ia64/include/asm/paravirt.h new file mode 100644 index 000000000..b53518a98 --- /dev/null +++ b/kernel/arch/ia64/include/asm/paravirt.h @@ -0,0 +1,321 @@ +/****************************************************************************** + * Copyright (c) 2008 Isaku Yamahata + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + + +#ifndef __ASM_PARAVIRT_H +#define __ASM_PARAVIRT_H + +#ifndef __ASSEMBLY__ +/****************************************************************************** + * fsys related addresses + */ +struct pv_fsys_data { + unsigned long *fsyscall_table; + void *fsys_bubble_down; +}; + +extern struct pv_fsys_data pv_fsys_data; + +unsigned long *paravirt_get_fsyscall_table(void); +char *paravirt_get_fsys_bubble_down(void); + +/****************************************************************************** + * patchlist addresses for gate page + */ +enum pv_gate_patchlist { + PV_GATE_START_FSYSCALL, + PV_GATE_END_FSYSCALL, + + PV_GATE_START_BRL_FSYS_BUBBLE_DOWN, + PV_GATE_END_BRL_FSYS_BUBBLE_DOWN, + + PV_GATE_START_VTOP, + PV_GATE_END_VTOP, + + PV_GATE_START_MCKINLEY_E9, + PV_GATE_END_MCKINLEY_E9, +}; + +struct pv_patchdata { + unsigned long start_fsyscall_patchlist; + unsigned long end_fsyscall_patchlist; + unsigned long start_brl_fsys_bubble_down_patchlist; + unsigned long end_brl_fsys_bubble_down_patchlist; + unsigned long start_vtop_patchlist; + unsigned long end_vtop_patchlist; + unsigned long start_mckinley_e9_patchlist; + unsigned long end_mckinley_e9_patchlist; + + void *gate_section; +}; + +extern struct pv_patchdata pv_patchdata; + +unsigned long paravirt_get_gate_patchlist(enum pv_gate_patchlist type); +void *paravirt_get_gate_section(void); +#endif + +#ifdef CONFIG_PARAVIRT_GUEST + +#define PARAVIRT_HYPERVISOR_TYPE_DEFAULT 0 + +#ifndef __ASSEMBLY__ + +#include +#include + +/****************************************************************************** + * general info + */ +struct pv_info { + unsigned int kernel_rpl; + int paravirt_enabled; + const char *name; +}; + +extern struct pv_info pv_info; + +static inline int paravirt_enabled(void) +{ + return pv_info.paravirt_enabled; +} + +static inline unsigned int get_kernel_rpl(void) +{ + return pv_info.kernel_rpl; +} + +/****************************************************************************** + * initialization hooks. + */ +struct rsvd_region; + +struct pv_init_ops { + void (*banner)(void); + + int (*reserve_memory)(struct rsvd_region *region); + + void (*arch_setup_early)(void); + void (*arch_setup_console)(char **cmdline_p); + int (*arch_setup_nomca)(void); + + void (*post_smp_prepare_boot_cpu)(void); + +#ifdef ASM_SUPPORTED + unsigned long (*patch_bundle)(void *sbundle, void *ebundle, + unsigned long type); + unsigned long (*patch_inst)(unsigned long stag, unsigned long etag, + unsigned long type); +#endif + void (*patch_branch)(unsigned long tag, unsigned long type); +}; + +extern struct pv_init_ops pv_init_ops; + +static inline void paravirt_banner(void) +{ + if (pv_init_ops.banner) + pv_init_ops.banner(); +} + +static inline int paravirt_reserve_memory(struct rsvd_region *region) +{ + if (pv_init_ops.reserve_memory) + return pv_init_ops.reserve_memory(region); + return 0; +} + +static inline void paravirt_arch_setup_early(void) +{ + if (pv_init_ops.arch_setup_early) + pv_init_ops.arch_setup_early(); +} + +static inline void paravirt_arch_setup_console(char **cmdline_p) +{ + if (pv_init_ops.arch_setup_console) + pv_init_ops.arch_setup_console(cmdline_p); +} + +static inline int paravirt_arch_setup_nomca(void) +{ + if (pv_init_ops.arch_setup_nomca) + return pv_init_ops.arch_setup_nomca(); + return 0; +} + +static inline void paravirt_post_smp_prepare_boot_cpu(void) +{ + if (pv_init_ops.post_smp_prepare_boot_cpu) + pv_init_ops.post_smp_prepare_boot_cpu(); +} + +/****************************************************************************** + * replacement of iosapic operations. + */ + +struct pv_iosapic_ops { + void (*pcat_compat_init)(void); + + struct irq_chip *(*__get_irq_chip)(unsigned long trigger); + + unsigned int (*__read)(char __iomem *iosapic, unsigned int reg); + void (*__write)(char __iomem *iosapic, unsigned int reg, u32 val); +}; + +extern struct pv_iosapic_ops pv_iosapic_ops; + +static inline void +iosapic_pcat_compat_init(void) +{ + if (pv_iosapic_ops.pcat_compat_init) + pv_iosapic_ops.pcat_compat_init(); +} + +static inline struct irq_chip* +iosapic_get_irq_chip(unsigned long trigger) +{ + return pv_iosapic_ops.__get_irq_chip(trigger); +} + +static inline unsigned int +__iosapic_read(char __iomem *iosapic, unsigned int reg) +{ + return pv_iosapic_ops.__read(iosapic, reg); +} + +static inline void +__iosapic_write(char __iomem *iosapic, unsigned int reg, u32 val) +{ + return pv_iosapic_ops.__write(iosapic, reg, val); +} + +/****************************************************************************** + * replacement of irq operations. + */ + +struct pv_irq_ops { + void (*register_ipi)(void); + + int (*assign_irq_vector)(int irq); + void (*free_irq_vector)(int vector); + + void (*register_percpu_irq)(ia64_vector vec, + struct irqaction *action); + + void (*resend_irq)(unsigned int vector); +}; + +extern struct pv_irq_ops pv_irq_ops; + +static inline void +ia64_register_ipi(void) +{ + pv_irq_ops.register_ipi(); +} + +static inline int +assign_irq_vector(int irq) +{ + return pv_irq_ops.assign_irq_vector(irq); +} + +static inline void +free_irq_vector(int vector) +{ + return pv_irq_ops.free_irq_vector(vector); +} + +static inline void +register_percpu_irq(ia64_vector vec, struct irqaction *action) +{ + pv_irq_ops.register_percpu_irq(vec, action); +} + +static inline void +ia64_resend_irq(unsigned int vector) +{ + pv_irq_ops.resend_irq(vector); +} + +/****************************************************************************** + * replacement of time operations. + */ + +extern struct itc_jitter_data_t itc_jitter_data; +extern volatile int time_keeper_id; + +struct pv_time_ops { + void (*init_missing_ticks_accounting)(int cpu); + int (*do_steal_accounting)(unsigned long *new_itm); + + void (*clocksource_resume)(void); + + unsigned long long (*sched_clock)(void); +}; + +extern struct pv_time_ops pv_time_ops; + +static inline void +paravirt_init_missing_ticks_accounting(int cpu) +{ + if (pv_time_ops.init_missing_ticks_accounting) + pv_time_ops.init_missing_ticks_accounting(cpu); +} + +struct static_key; +extern struct static_key paravirt_steal_enabled; +extern struct static_key paravirt_steal_rq_enabled; + +static inline int +paravirt_do_steal_accounting(unsigned long *new_itm) +{ + return pv_time_ops.do_steal_accounting(new_itm); +} + +static inline unsigned long long paravirt_sched_clock(void) +{ + return pv_time_ops.sched_clock(); +} + +#endif /* !__ASSEMBLY__ */ + +#else +/* fallback for native case */ + +#ifndef __ASSEMBLY__ + +#define paravirt_banner() do { } while (0) +#define paravirt_reserve_memory(region) 0 + +#define paravirt_arch_setup_early() do { } while (0) +#define paravirt_arch_setup_console(cmdline_p) do { } while (0) +#define paravirt_arch_setup_nomca() 0 +#define paravirt_post_smp_prepare_boot_cpu() do { } while (0) + +#define paravirt_init_missing_ticks_accounting(cpu) do { } while (0) +#define paravirt_do_steal_accounting(new_itm) 0 + +#endif /* __ASSEMBLY__ */ + + +#endif /* CONFIG_PARAVIRT_GUEST */ + +#endif /* __ASM_PARAVIRT_H */ diff --git a/kernel/arch/ia64/include/asm/paravirt_patch.h b/kernel/arch/ia64/include/asm/paravirt_patch.h new file mode 100644 index 000000000..128ff5db6 --- /dev/null +++ b/kernel/arch/ia64/include/asm/paravirt_patch.h @@ -0,0 +1,143 @@ +/****************************************************************************** + * Copyright (c) 2008 Isaku Yamahata + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef __ASM_PARAVIRT_PATCH_H +#define __ASM_PARAVIRT_PATCH_H + +#ifdef __ASSEMBLY__ + + .section .paravirt_branches, "a" + .previous +#define PARAVIRT_PATCH_SITE_BR(type) \ + { \ + [1:] ; \ + br.cond.sptk.many 2f ; \ + nop.b 0 ; \ + nop.b 0;; ; \ + } ; \ + 2: \ + .xdata8 ".paravirt_branches", 1b, type + +#else + +#include +#include + +/* for binary patch */ +struct paravirt_patch_site_bundle { + void *sbundle; + void *ebundle; + unsigned long type; +}; + +/* label means the beginning of new bundle */ +#define paravirt_alt_bundle(instr, privop) \ + "\t998:\n" \ + "\t" instr "\n" \ + "\t999:\n" \ + "\t.pushsection .paravirt_bundles, \"a\"\n" \ + "\t.popsection\n" \ + "\t.xdata8 \".paravirt_bundles\", 998b, 999b, " \ + __stringify(privop) "\n" + + +struct paravirt_patch_bundle_elem { + const void *sbundle; + const void *ebundle; + unsigned long type; +}; + + +struct paravirt_patch_site_inst { + unsigned long stag; + unsigned long etag; + unsigned long type; +}; + +#define paravirt_alt_inst(instr, privop) \ + "\t[998:]\n" \ + "\t" instr "\n" \ + "\t[999:]\n" \ + "\t.pushsection .paravirt_insts, \"a\"\n" \ + "\t.popsection\n" \ + "\t.xdata8 \".paravirt_insts\", 998b, 999b, " \ + __stringify(privop) "\n" + +struct paravirt_patch_site_branch { + unsigned long tag; + unsigned long type; +}; + +struct paravirt_patch_branch_target { + const void *entry; + unsigned long type; +}; + +void +__paravirt_patch_apply_branch( + unsigned long tag, unsigned long type, + const struct paravirt_patch_branch_target *entries, + unsigned int nr_entries); + +void +paravirt_patch_reloc_br(unsigned long tag, const void *target); + +void +paravirt_patch_reloc_brl(unsigned long tag, const void *target); + + +#if defined(ASM_SUPPORTED) && defined(CONFIG_PARAVIRT) +unsigned long +ia64_native_patch_bundle(void *sbundle, void *ebundle, unsigned long type); + +unsigned long +__paravirt_patch_apply_bundle(void *sbundle, void *ebundle, unsigned long type, + const struct paravirt_patch_bundle_elem *elems, + unsigned long nelems, + const struct paravirt_patch_bundle_elem **found); + +void +paravirt_patch_apply_bundle(const struct paravirt_patch_site_bundle *start, + const struct paravirt_patch_site_bundle *end); + +void +paravirt_patch_apply_inst(const struct paravirt_patch_site_inst *start, + const struct paravirt_patch_site_inst *end); + +void paravirt_patch_apply(void); +#else +#define paravirt_patch_apply_bundle(start, end) do { } while (0) +#define paravirt_patch_apply_inst(start, end) do { } while (0) +#define paravirt_patch_apply() do { } while (0) +#endif + +#endif /* !__ASSEMBLEY__ */ + +#endif /* __ASM_PARAVIRT_PATCH_H */ + +/* + * Local variables: + * mode: C + * c-set-style: "linux" + * c-basic-offset: 8 + * tab-width: 8 + * indent-tabs-mode: t + * End: + */ diff --git a/kernel/arch/ia64/include/asm/paravirt_privop.h b/kernel/arch/ia64/include/asm/paravirt_privop.h new file mode 100644 index 000000000..8f6cb11c9 --- /dev/null +++ b/kernel/arch/ia64/include/asm/paravirt_privop.h @@ -0,0 +1,479 @@ +/****************************************************************************** + * Copyright (c) 2008 Isaku Yamahata + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef _ASM_IA64_PARAVIRT_PRIVOP_H +#define _ASM_IA64_PARAVIRT_PRIVOP_H + +#ifdef CONFIG_PARAVIRT + +#ifndef __ASSEMBLY__ + +#include +#include /* for IA64_PSR_I */ + +/****************************************************************************** + * replacement of intrinsics operations. + */ + +struct pv_cpu_ops { + void (*fc)(void *addr); + unsigned long (*thash)(unsigned long addr); + unsigned long (*get_cpuid)(int index); + unsigned long (*get_pmd)(int index); + unsigned long (*getreg)(int reg); + void (*setreg)(int reg, unsigned long val); + void (*ptcga)(unsigned long addr, unsigned long size); + unsigned long (*get_rr)(unsigned long index); + void (*set_rr)(unsigned long index, unsigned long val); + void (*set_rr0_to_rr4)(unsigned long val0, unsigned long val1, + unsigned long val2, unsigned long val3, + unsigned long val4); + void (*ssm_i)(void); + void (*rsm_i)(void); + unsigned long (*get_psr_i)(void); + void (*intrin_local_irq_restore)(unsigned long flags); +}; + +extern struct pv_cpu_ops pv_cpu_ops; + +extern void ia64_native_setreg_func(int regnum, unsigned long val); +extern unsigned long ia64_native_getreg_func(int regnum); + +/************************************************/ +/* Instructions paravirtualized for performance */ +/************************************************/ + +#ifndef ASM_SUPPORTED +#define paravirt_ssm_i() pv_cpu_ops.ssm_i() +#define paravirt_rsm_i() pv_cpu_ops.rsm_i() +#define __paravirt_getreg() pv_cpu_ops.getreg() +#endif + +/* mask for ia64_native_ssm/rsm() must be constant.("i" constraing). + * static inline function doesn't satisfy it. */ +#define paravirt_ssm(mask) \ + do { \ + if ((mask) == IA64_PSR_I) \ + paravirt_ssm_i(); \ + else \ + ia64_native_ssm(mask); \ + } while (0) + +#define paravirt_rsm(mask) \ + do { \ + if ((mask) == IA64_PSR_I) \ + paravirt_rsm_i(); \ + else \ + ia64_native_rsm(mask); \ + } while (0) + +/* returned ip value should be the one in the caller, + * not in __paravirt_getreg() */ +#define paravirt_getreg(reg) \ + ({ \ + unsigned long res; \ + if ((reg) == _IA64_REG_IP) \ + res = ia64_native_getreg(_IA64_REG_IP); \ + else \ + res = __paravirt_getreg(reg); \ + res; \ + }) + +/****************************************************************************** + * replacement of hand written assembly codes. + */ +struct pv_cpu_asm_switch { + unsigned long switch_to; + unsigned long leave_syscall; + unsigned long work_processed_syscall; + unsigned long leave_kernel; +}; +void paravirt_cpu_asm_init(const struct pv_cpu_asm_switch *cpu_asm_switch); + +#endif /* __ASSEMBLY__ */ + +#define IA64_PARAVIRT_ASM_FUNC(name) paravirt_ ## name + +#else + +/* fallback for native case */ +#define IA64_PARAVIRT_ASM_FUNC(name) ia64_native_ ## name + +#endif /* CONFIG_PARAVIRT */ + +#if defined(CONFIG_PARAVIRT) && defined(ASM_SUPPORTED) +#define paravirt_dv_serialize_data() ia64_dv_serialize_data() +#else +#define paravirt_dv_serialize_data() /* nothing */ +#endif + +/* these routines utilize privilege-sensitive or performance-sensitive + * privileged instructions so the code must be replaced with + * paravirtualized versions */ +#define ia64_switch_to IA64_PARAVIRT_ASM_FUNC(switch_to) +#define ia64_leave_syscall IA64_PARAVIRT_ASM_FUNC(leave_syscall) +#define ia64_work_processed_syscall \ + IA64_PARAVIRT_ASM_FUNC(work_processed_syscall) +#define ia64_leave_kernel IA64_PARAVIRT_ASM_FUNC(leave_kernel) + + +#if defined(CONFIG_PARAVIRT) +/****************************************************************************** + * binary patching infrastructure + */ +#define PARAVIRT_PATCH_TYPE_FC 1 +#define PARAVIRT_PATCH_TYPE_THASH 2 +#define PARAVIRT_PATCH_TYPE_GET_CPUID 3 +#define PARAVIRT_PATCH_TYPE_GET_PMD 4 +#define PARAVIRT_PATCH_TYPE_PTCGA 5 +#define PARAVIRT_PATCH_TYPE_GET_RR 6 +#define PARAVIRT_PATCH_TYPE_SET_RR 7 +#define PARAVIRT_PATCH_TYPE_SET_RR0_TO_RR4 8 +#define PARAVIRT_PATCH_TYPE_SSM_I 9 +#define PARAVIRT_PATCH_TYPE_RSM_I 10 +#define PARAVIRT_PATCH_TYPE_GET_PSR_I 11 +#define PARAVIRT_PATCH_TYPE_INTRIN_LOCAL_IRQ_RESTORE 12 + +/* PARAVIRT_PATY_TYPE_[GS]ETREG + _IA64_REG_xxx */ +#define PARAVIRT_PATCH_TYPE_GETREG 0x10000000 +#define PARAVIRT_PATCH_TYPE_SETREG 0x20000000 + +/* + * struct task_struct* (*ia64_switch_to)(void* next_task); + * void *ia64_leave_syscall; + * void *ia64_work_processed_syscall + * void *ia64_leave_kernel; + */ + +#define PARAVIRT_PATCH_TYPE_BR_START 0x30000000 +#define PARAVIRT_PATCH_TYPE_BR_SWITCH_TO \ + (PARAVIRT_PATCH_TYPE_BR_START + 0) +#define PARAVIRT_PATCH_TYPE_BR_LEAVE_SYSCALL \ + (PARAVIRT_PATCH_TYPE_BR_START + 1) +#define PARAVIRT_PATCH_TYPE_BR_WORK_PROCESSED_SYSCALL \ + (PARAVIRT_PATCH_TYPE_BR_START + 2) +#define PARAVIRT_PATCH_TYPE_BR_LEAVE_KERNEL \ + (PARAVIRT_PATCH_TYPE_BR_START + 3) + +#ifdef ASM_SUPPORTED +#include + +/* + * pv_cpu_ops calling stub. + * normal function call convension can't be written by gcc + * inline assembly. + * + * from the caller's point of view, + * the following registers will be clobbered. + * r2, r3 + * r8-r15 + * r16, r17 + * b6, b7 + * p6-p15 + * ar.ccv + * + * from the callee's point of view , + * the following registers can be used. + * r2, r3: scratch + * r8: scratch, input argument0 and return value + * r0-r15: scratch, input argument1-5 + * b6: return pointer + * b7: scratch + * p6-p15: scratch + * ar.ccv: scratch + * + * other registers must not be changed. especially + * b0: rp: preserved. gcc ignores b0 in clobbered register. + * r16: saved gp + */ +/* 5 bundles */ +#define __PARAVIRT_BR \ + ";;\n" \ + "{ .mlx\n" \ + "nop 0\n" \ + "movl r2 = %[op_addr]\n"/* get function pointer address */ \ + ";;\n" \ + "}\n" \ + "1:\n" \ + "{ .mii\n" \ + "ld8 r2 = [r2]\n" /* load function descriptor address */ \ + "mov r17 = ip\n" /* get ip to calc return address */ \ + "mov r16 = gp\n" /* save gp */ \ + ";;\n" \ + "}\n" \ + "{ .mii\n" \ + "ld8 r3 = [r2], 8\n" /* load entry address */ \ + "adds r17 = 1f - 1b, r17\n" /* calculate return address */ \ + ";;\n" \ + "mov b7 = r3\n" /* set entry address */ \ + "}\n" \ + "{ .mib\n" \ + "ld8 gp = [r2]\n" /* load gp value */ \ + "mov b6 = r17\n" /* set return address */ \ + "br.cond.sptk.few b7\n" /* intrinsics are very short isns */ \ + "}\n" \ + "1:\n" \ + "{ .mii\n" \ + "mov gp = r16\n" /* restore gp value */ \ + "nop 0\n" \ + "nop 0\n" \ + ";;\n" \ + "}\n" + +#define PARAVIRT_OP(op) \ + [op_addr] "i"(&pv_cpu_ops.op) + +#define PARAVIRT_TYPE(type) \ + PARAVIRT_PATCH_TYPE_ ## type + +#define PARAVIRT_REG_CLOBBERS0 \ + "r2", "r3", /*"r8",*/ "r9", "r10", "r11", "r14", \ + "r15", "r16", "r17" + +#define PARAVIRT_REG_CLOBBERS1 \ + "r2","r3", /*"r8",*/ "r9", "r10", "r11", "r14", \ + "r15", "r16", "r17" + +#define PARAVIRT_REG_CLOBBERS2 \ + "r2", "r3", /*"r8", "r9",*/ "r10", "r11", "r14", \ + "r15", "r16", "r17" + +#define PARAVIRT_REG_CLOBBERS5 \ + "r2", "r3", /*"r8", "r9", "r10", "r11", "r14",*/ \ + "r15", "r16", "r17" + +#define PARAVIRT_BR_CLOBBERS \ + "b6", "b7" + +#define PARAVIRT_PR_CLOBBERS \ + "p6", "p7", "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15" + +#define PARAVIRT_AR_CLOBBERS \ + "ar.ccv" + +#define PARAVIRT_CLOBBERS0 \ + PARAVIRT_REG_CLOBBERS0, \ + PARAVIRT_BR_CLOBBERS, \ + PARAVIRT_PR_CLOBBERS, \ + PARAVIRT_AR_CLOBBERS, \ + "memory" + +#define PARAVIRT_CLOBBERS1 \ + PARAVIRT_REG_CLOBBERS1, \ + PARAVIRT_BR_CLOBBERS, \ + PARAVIRT_PR_CLOBBERS, \ + PARAVIRT_AR_CLOBBERS, \ + "memory" + +#define PARAVIRT_CLOBBERS2 \ + PARAVIRT_REG_CLOBBERS2, \ + PARAVIRT_BR_CLOBBERS, \ + PARAVIRT_PR_CLOBBERS, \ + PARAVIRT_AR_CLOBBERS, \ + "memory" + +#define PARAVIRT_CLOBBERS5 \ + PARAVIRT_REG_CLOBBERS5, \ + PARAVIRT_BR_CLOBBERS, \ + PARAVIRT_PR_CLOBBERS, \ + PARAVIRT_AR_CLOBBERS, \ + "memory" + +#define PARAVIRT_BR0(op, type) \ + register unsigned long ia64_clobber asm ("r8"); \ + asm volatile (paravirt_alt_bundle(__PARAVIRT_BR, \ + PARAVIRT_TYPE(type)) \ + : "=r"(ia64_clobber) \ + : PARAVIRT_OP(op) \ + : PARAVIRT_CLOBBERS0) + +#define PARAVIRT_BR0_RET(op, type) \ + register unsigned long ia64_intri_res asm ("r8"); \ + asm volatile (paravirt_alt_bundle(__PARAVIRT_BR, \ + PARAVIRT_TYPE(type)) \ + : "=r"(ia64_intri_res) \ + : PARAVIRT_OP(op) \ + : PARAVIRT_CLOBBERS0) + +#define PARAVIRT_BR1(op, type, arg1) \ + register unsigned long __##arg1 asm ("r8") = arg1; \ + register unsigned long ia64_clobber asm ("r8"); \ + asm volatile (paravirt_alt_bundle(__PARAVIRT_BR, \ + PARAVIRT_TYPE(type)) \ + : "=r"(ia64_clobber) \ + : PARAVIRT_OP(op), "0"(__##arg1) \ + : PARAVIRT_CLOBBERS1) + +#define PARAVIRT_BR1_RET(op, type, arg1) \ + register unsigned long ia64_intri_res asm ("r8"); \ + register unsigned long __##arg1 asm ("r8") = arg1; \ + asm volatile (paravirt_alt_bundle(__PARAVIRT_BR, \ + PARAVIRT_TYPE(type)) \ + : "=r"(ia64_intri_res) \ + : PARAVIRT_OP(op), "0"(__##arg1) \ + : PARAVIRT_CLOBBERS1) + +#define PARAVIRT_BR1_VOID(op, type, arg1) \ + register void *__##arg1 asm ("r8") = arg1; \ + register unsigned long ia64_clobber asm ("r8"); \ + asm volatile (paravirt_alt_bundle(__PARAVIRT_BR, \ + PARAVIRT_TYPE(type)) \ + : "=r"(ia64_clobber) \ + : PARAVIRT_OP(op), "0"(__##arg1) \ + : PARAVIRT_CLOBBERS1) + +#define PARAVIRT_BR2(op, type, arg1, arg2) \ + register unsigned long __##arg1 asm ("r8") = arg1; \ + register unsigned long __##arg2 asm ("r9") = arg2; \ + register unsigned long ia64_clobber1 asm ("r8"); \ + register unsigned long ia64_clobber2 asm ("r9"); \ + asm volatile (paravirt_alt_bundle(__PARAVIRT_BR, \ + PARAVIRT_TYPE(type)) \ + : "=r"(ia64_clobber1), "=r"(ia64_clobber2) \ + : PARAVIRT_OP(op), "0"(__##arg1), "1"(__##arg2) \ + : PARAVIRT_CLOBBERS2) + + +#define PARAVIRT_DEFINE_CPU_OP0(op, type) \ + static inline void \ + paravirt_ ## op (void) \ + { \ + PARAVIRT_BR0(op, type); \ + } + +#define PARAVIRT_DEFINE_CPU_OP0_RET(op, type) \ + static inline unsigned long \ + paravirt_ ## op (void) \ + { \ + PARAVIRT_BR0_RET(op, type); \ + return ia64_intri_res; \ + } + +#define PARAVIRT_DEFINE_CPU_OP1_VOID(op, type) \ + static inline void \ + paravirt_ ## op (void *arg1) \ + { \ + PARAVIRT_BR1_VOID(op, type, arg1); \ + } + +#define PARAVIRT_DEFINE_CPU_OP1(op, type) \ + static inline void \ + paravirt_ ## op (unsigned long arg1) \ + { \ + PARAVIRT_BR1(op, type, arg1); \ + } + +#define PARAVIRT_DEFINE_CPU_OP1_RET(op, type) \ + static inline unsigned long \ + paravirt_ ## op (unsigned long arg1) \ + { \ + PARAVIRT_BR1_RET(op, type, arg1); \ + return ia64_intri_res; \ + } + +#define PARAVIRT_DEFINE_CPU_OP2(op, type) \ + static inline void \ + paravirt_ ## op (unsigned long arg1, \ + unsigned long arg2) \ + { \ + PARAVIRT_BR2(op, type, arg1, arg2); \ + } + + +PARAVIRT_DEFINE_CPU_OP1_VOID(fc, FC); +PARAVIRT_DEFINE_CPU_OP1_RET(thash, THASH) +PARAVIRT_DEFINE_CPU_OP1_RET(get_cpuid, GET_CPUID) +PARAVIRT_DEFINE_CPU_OP1_RET(get_pmd, GET_PMD) +PARAVIRT_DEFINE_CPU_OP2(ptcga, PTCGA) +PARAVIRT_DEFINE_CPU_OP1_RET(get_rr, GET_RR) +PARAVIRT_DEFINE_CPU_OP2(set_rr, SET_RR) +PARAVIRT_DEFINE_CPU_OP0(ssm_i, SSM_I) +PARAVIRT_DEFINE_CPU_OP0(rsm_i, RSM_I) +PARAVIRT_DEFINE_CPU_OP0_RET(get_psr_i, GET_PSR_I) +PARAVIRT_DEFINE_CPU_OP1(intrin_local_irq_restore, INTRIN_LOCAL_IRQ_RESTORE) + +static inline void +paravirt_set_rr0_to_rr4(unsigned long val0, unsigned long val1, + unsigned long val2, unsigned long val3, + unsigned long val4) +{ + register unsigned long __val0 asm ("r8") = val0; + register unsigned long __val1 asm ("r9") = val1; + register unsigned long __val2 asm ("r10") = val2; + register unsigned long __val3 asm ("r11") = val3; + register unsigned long __val4 asm ("r14") = val4; + + register unsigned long ia64_clobber0 asm ("r8"); + register unsigned long ia64_clobber1 asm ("r9"); + register unsigned long ia64_clobber2 asm ("r10"); + register unsigned long ia64_clobber3 asm ("r11"); + register unsigned long ia64_clobber4 asm ("r14"); + + asm volatile (paravirt_alt_bundle(__PARAVIRT_BR, + PARAVIRT_TYPE(SET_RR0_TO_RR4)) + : "=r"(ia64_clobber0), + "=r"(ia64_clobber1), + "=r"(ia64_clobber2), + "=r"(ia64_clobber3), + "=r"(ia64_clobber4) + : PARAVIRT_OP(set_rr0_to_rr4), + "0"(__val0), "1"(__val1), "2"(__val2), + "3"(__val3), "4"(__val4) + : PARAVIRT_CLOBBERS5); +} + +/* unsigned long paravirt_getreg(int reg) */ +#define __paravirt_getreg(reg) \ + ({ \ + register unsigned long ia64_intri_res asm ("r8"); \ + register unsigned long __reg asm ("r8") = (reg); \ + \ + asm volatile (paravirt_alt_bundle(__PARAVIRT_BR, \ + PARAVIRT_TYPE(GETREG) \ + + (reg)) \ + : "=r"(ia64_intri_res) \ + : PARAVIRT_OP(getreg), "0"(__reg) \ + : PARAVIRT_CLOBBERS1); \ + \ + ia64_intri_res; \ + }) + +/* void paravirt_setreg(int reg, unsigned long val) */ +#define paravirt_setreg(reg, val) \ + do { \ + register unsigned long __val asm ("r8") = val; \ + register unsigned long __reg asm ("r9") = reg; \ + register unsigned long ia64_clobber1 asm ("r8"); \ + register unsigned long ia64_clobber2 asm ("r9"); \ + \ + asm volatile (paravirt_alt_bundle(__PARAVIRT_BR, \ + PARAVIRT_TYPE(SETREG) \ + + (reg)) \ + : "=r"(ia64_clobber1), \ + "=r"(ia64_clobber2) \ + : PARAVIRT_OP(setreg), \ + "1"(__reg), "0"(__val) \ + : PARAVIRT_CLOBBERS2); \ + } while (0) + +#endif /* ASM_SUPPORTED */ +#endif /* CONFIG_PARAVIRT && ASM_SUPPOTED */ + +#endif /* _ASM_IA64_PARAVIRT_PRIVOP_H */ diff --git a/kernel/arch/ia64/include/asm/parport.h b/kernel/arch/ia64/include/asm/parport.h new file mode 100644 index 000000000..638b4d271 --- /dev/null +++ b/kernel/arch/ia64/include/asm/parport.h @@ -0,0 +1,19 @@ +/* + * parport.h: platform-specific PC-style parport initialisation + * + * Copyright (C) 1999, 2000 Tim Waugh + * + * This file should only be included by drivers/parport/parport_pc.c. + */ + +#ifndef _ASM_IA64_PARPORT_H +#define _ASM_IA64_PARPORT_H 1 + +static int parport_pc_find_isa_ports(int autoirq, int autodma); + +static int parport_pc_find_nonpci_ports(int autoirq, int autodma) +{ + return parport_pc_find_isa_ports(autoirq, autodma); +} + +#endif /* _ASM_IA64_PARPORT_H */ diff --git a/kernel/arch/ia64/include/asm/patch.h b/kernel/arch/ia64/include/asm/patch.h new file mode 100644 index 000000000..295fe6ab4 --- /dev/null +++ b/kernel/arch/ia64/include/asm/patch.h @@ -0,0 +1,27 @@ +#ifndef _ASM_IA64_PATCH_H +#define _ASM_IA64_PATCH_H + +/* + * Copyright (C) 2003 Hewlett-Packard Co + * David Mosberger-Tang + * + * There are a number of reasons for patching instructions. Rather than duplicating code + * all over the place, we put the common stuff here. Reasons for patching: in-kernel + * module-loader, virtual-to-physical patch-list, McKinley Errata 9 workaround, and gate + * shared library. Undoubtedly, some of these reasons will disappear and others will + * be added over time. + */ +#include +#include + +extern void ia64_patch (u64 insn_addr, u64 mask, u64 val); /* patch any insn slot */ +extern void ia64_patch_imm64 (u64 insn_addr, u64 val); /* patch "movl" w/abs. value*/ +extern void ia64_patch_imm60 (u64 insn_addr, u64 val); /* patch "brl" w/ip-rel value */ + +extern void ia64_patch_mckinley_e9 (unsigned long start, unsigned long end); +extern void ia64_patch_vtop (unsigned long start, unsigned long end); +extern void ia64_patch_phys_stack_reg(unsigned long val); +extern void ia64_patch_rse (unsigned long start, unsigned long end); +extern void ia64_patch_gate (void); + +#endif /* _ASM_IA64_PATCH_H */ diff --git a/kernel/arch/ia64/include/asm/pci.h b/kernel/arch/ia64/include/asm/pci.h new file mode 100644 index 000000000..52af5ed9f --- /dev/null +++ b/kernel/arch/ia64/include/asm/pci.h @@ -0,0 +1,133 @@ +#ifndef _ASM_IA64_PCI_H +#define _ASM_IA64_PCI_H + +#include +#include +#include +#include +#include + +#include +#include +#include + +struct pci_vector_struct { + __u16 segment; /* PCI Segment number */ + __u16 bus; /* PCI Bus number */ + __u32 pci_id; /* ACPI split 16 bits device, 16 bits function (see section 6.1.1) */ + __u8 pin; /* PCI PIN (0 = A, 1 = B, 2 = C, 3 = D) */ + __u32 irq; /* IRQ assigned */ +}; + +/* + * Can be used to override the logic in pci_scan_bus for skipping already-configured bus + * numbers - to be used for buggy BIOSes or architectures with incomplete PCI setup by the + * loader. + */ +#define pcibios_assign_all_busses() 0 + +#define PCIBIOS_MIN_IO 0x1000 +#define PCIBIOS_MIN_MEM 0x10000000 + +void pcibios_config_init(void); + +struct pci_dev; + +/* + * PCI_DMA_BUS_IS_PHYS should be set to 1 if there is _necessarily_ a direct + * correspondence between device bus addresses and CPU physical addresses. + * Platforms with a hardware I/O MMU _must_ turn this off to suppress the + * bounce buffer handling code in the block and network device layers. + * Platforms with separate bus address spaces _must_ turn this off and provide + * a device DMA mapping implementation that takes care of the necessary + * address translation. + * + * For now, the ia64 platforms which may have separate/multiple bus address + * spaces all have I/O MMUs which support the merging of physically + * discontiguous buffers, so we can use that as the sole factor to determine + * the setting of PCI_DMA_BUS_IS_PHYS. + */ +extern unsigned long ia64_max_iommu_merge_mask; +#define PCI_DMA_BUS_IS_PHYS (ia64_max_iommu_merge_mask == ~0UL) + +#include + +#ifdef CONFIG_PCI +static inline void pci_dma_burst_advice(struct pci_dev *pdev, + enum pci_dma_burst_strategy *strat, + unsigned long *strategy_parameter) +{ + unsigned long cacheline_size; + u8 byte; + + pci_read_config_byte(pdev, PCI_CACHE_LINE_SIZE, &byte); + if (byte == 0) + cacheline_size = 1024; + else + cacheline_size = (int) byte * 4; + + *strat = PCI_DMA_BURST_MULTIPLE; + *strategy_parameter = cacheline_size; +} +#endif + +#define HAVE_PCI_MMAP +extern int pci_mmap_page_range (struct pci_dev *dev, struct vm_area_struct *vma, + enum pci_mmap_state mmap_state, int write_combine); +#define HAVE_PCI_LEGACY +extern int pci_mmap_legacy_page_range(struct pci_bus *bus, + struct vm_area_struct *vma, + enum pci_mmap_state mmap_state); + +#define pci_get_legacy_mem platform_pci_get_legacy_mem +#define pci_legacy_read platform_pci_legacy_read +#define pci_legacy_write platform_pci_legacy_write + +struct iospace_resource { + struct list_head list; + struct resource res; +}; + +struct pci_controller { + struct acpi_device *companion; + void *iommu; + int segment; + int node; /* nearest node with memory or NUMA_NO_NODE for global allocation */ + + void *platform_data; +}; + + +#define PCI_CONTROLLER(busdev) ((struct pci_controller *) busdev->sysdata) +#define pci_domain_nr(busdev) (PCI_CONTROLLER(busdev)->segment) + +extern struct pci_ops pci_root_ops; + +static inline int pci_proc_domain(struct pci_bus *bus) +{ + return (pci_domain_nr(bus) != 0); +} + +static inline struct resource * +pcibios_select_root(struct pci_dev *pdev, struct resource *res) +{ + struct resource *root = NULL; + + if (res->flags & IORESOURCE_IO) + root = &ioport_resource; + if (res->flags & IORESOURCE_MEM) + root = &iomem_resource; + + return root; +} + +#define HAVE_ARCH_PCI_GET_LEGACY_IDE_IRQ +static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel) +{ + return channel ? isa_irq_to_vector(15) : isa_irq_to_vector(14); +} + +#ifdef CONFIG_INTEL_IOMMU +extern void pci_iommu_alloc(void); +#endif +#endif /* _ASM_IA64_PCI_H */ diff --git a/kernel/arch/ia64/include/asm/percpu.h b/kernel/arch/ia64/include/asm/percpu.h new file mode 100644 index 000000000..0ec484d2d --- /dev/null +++ b/kernel/arch/ia64/include/asm/percpu.h @@ -0,0 +1,54 @@ +#ifndef _ASM_IA64_PERCPU_H +#define _ASM_IA64_PERCPU_H + +/* + * Copyright (C) 2002-2003 Hewlett-Packard Co + * David Mosberger-Tang + */ + +#define PERCPU_ENOUGH_ROOM PERCPU_PAGE_SIZE + +#ifdef __ASSEMBLY__ +# define THIS_CPU(var) (var) /* use this to mark accesses to per-CPU variables... */ +#else /* !__ASSEMBLY__ */ + + +#include + +#ifdef CONFIG_SMP + +#ifdef HAVE_MODEL_SMALL_ATTRIBUTE +# define PER_CPU_ATTRIBUTES __attribute__((__model__ (__small__))) +#endif + +#define __my_cpu_offset __ia64_per_cpu_var(local_per_cpu_offset) + +extern void *per_cpu_init(void); + +#else /* ! SMP */ + +#define per_cpu_init() (__phys_per_cpu_start) + +#endif /* SMP */ + +#define PER_CPU_BASE_SECTION ".data..percpu" + +/* + * Be extremely careful when taking the address of this variable! Due to virtual + * remapping, it is different from the canonical address returned by this_cpu_ptr(&var)! + * On the positive side, using __ia64_per_cpu_var() instead of this_cpu_ptr() is slightly + * more efficient. + */ +#define __ia64_per_cpu_var(var) (*({ \ + __verify_pcpu_ptr(&(var)); \ + ((typeof(var) __kernel __force *)&(var)); \ +})) + +#include + +/* Equal to __per_cpu_offset[smp_processor_id()], but faster to access: */ +DECLARE_PER_CPU(unsigned long, local_per_cpu_offset); + +#endif /* !__ASSEMBLY__ */ + +#endif /* _ASM_IA64_PERCPU_H */ diff --git a/kernel/arch/ia64/include/asm/perfmon.h b/kernel/arch/ia64/include/asm/perfmon.h new file mode 100644 index 000000000..15476dd3a --- /dev/null +++ b/kernel/arch/ia64/include/asm/perfmon.h @@ -0,0 +1,110 @@ +/* + * Copyright (C) 2001-2003 Hewlett-Packard Co + * Stephane Eranian + */ +#ifndef _ASM_IA64_PERFMON_H +#define _ASM_IA64_PERFMON_H + +#include + + +extern long perfmonctl(int fd, int cmd, void *arg, int narg); + +typedef struct { + void (*handler)(int irq, void *arg, struct pt_regs *regs); +} pfm_intr_handler_desc_t; + +extern void pfm_save_regs (struct task_struct *); +extern void pfm_load_regs (struct task_struct *); + +extern void pfm_exit_thread(struct task_struct *); +extern int pfm_use_debug_registers(struct task_struct *); +extern int pfm_release_debug_registers(struct task_struct *); +extern void pfm_syst_wide_update_task(struct task_struct *, unsigned long info, int is_ctxswin); +extern void pfm_inherit(struct task_struct *task, struct pt_regs *regs); +extern void pfm_init_percpu(void); +extern void pfm_handle_work(void); +extern int pfm_install_alt_pmu_interrupt(pfm_intr_handler_desc_t *h); +extern int pfm_remove_alt_pmu_interrupt(pfm_intr_handler_desc_t *h); + + + +/* + * Reset PMD register flags + */ +#define PFM_PMD_SHORT_RESET 0 +#define PFM_PMD_LONG_RESET 1 + +typedef union { + unsigned int val; + struct { + unsigned int notify_user:1; /* notify user program of overflow */ + unsigned int reset_ovfl_pmds:1; /* reset overflowed PMDs */ + unsigned int block_task:1; /* block monitored task on kernel exit */ + unsigned int mask_monitoring:1; /* mask monitors via PMCx.plm */ + unsigned int reserved:28; /* for future use */ + } bits; +} pfm_ovfl_ctrl_t; + +typedef struct { + unsigned char ovfl_pmd; /* index of overflowed PMD */ + unsigned char ovfl_notify; /* =1 if monitor requested overflow notification */ + unsigned short active_set; /* event set active at the time of the overflow */ + pfm_ovfl_ctrl_t ovfl_ctrl; /* return: perfmon controls to set by handler */ + + unsigned long pmd_last_reset; /* last reset value of of the PMD */ + unsigned long smpl_pmds[4]; /* bitmask of other PMD of interest on overflow */ + unsigned long smpl_pmds_values[PMU_MAX_PMDS]; /* values for the other PMDs of interest */ + unsigned long pmd_value; /* current 64-bit value of the PMD */ + unsigned long pmd_eventid; /* eventid associated with PMD */ +} pfm_ovfl_arg_t; + + +typedef struct { + char *fmt_name; + pfm_uuid_t fmt_uuid; + size_t fmt_arg_size; + unsigned long fmt_flags; + + int (*fmt_validate)(struct task_struct *task, unsigned int flags, int cpu, void *arg); + int (*fmt_getsize)(struct task_struct *task, unsigned int flags, int cpu, void *arg, unsigned long *size); + int (*fmt_init)(struct task_struct *task, void *buf, unsigned int flags, int cpu, void *arg); + int (*fmt_handler)(struct task_struct *task, void *buf, pfm_ovfl_arg_t *arg, struct pt_regs *regs, unsigned long stamp); + int (*fmt_restart)(struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs); + int (*fmt_restart_active)(struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs); + int (*fmt_exit)(struct task_struct *task, void *buf, struct pt_regs *regs); + + struct list_head fmt_list; +} pfm_buffer_fmt_t; + +extern int pfm_register_buffer_fmt(pfm_buffer_fmt_t *fmt); +extern int pfm_unregister_buffer_fmt(pfm_uuid_t uuid); + +/* + * perfmon interface exported to modules + */ +extern int pfm_mod_read_pmds(struct task_struct *, void *req, unsigned int nreq, struct pt_regs *regs); +extern int pfm_mod_write_pmcs(struct task_struct *, void *req, unsigned int nreq, struct pt_regs *regs); +extern int pfm_mod_write_ibrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs); +extern int pfm_mod_write_dbrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs); + +/* + * describe the content of the local_cpu_date->pfm_syst_info field + */ +#define PFM_CPUINFO_SYST_WIDE 0x1 /* if set a system wide session exists */ +#define PFM_CPUINFO_DCR_PP 0x2 /* if set the system wide session has started */ +#define PFM_CPUINFO_EXCL_IDLE 0x4 /* the system wide session excludes the idle task */ + +/* + * sysctl control structure. visible to sampling formats + */ +typedef struct { + int debug; /* turn on/off debugging via syslog */ + int debug_ovfl; /* turn on/off debug printk in overflow handler */ + int fastctxsw; /* turn on/off fast (unsecure) ctxsw */ + int expert_mode; /* turn on/off value checking */ +} pfm_sysctl_t; +extern pfm_sysctl_t pfm_sysctl; + + +#endif /* _ASM_IA64_PERFMON_H */ diff --git a/kernel/arch/ia64/include/asm/pgalloc.h b/kernel/arch/ia64/include/asm/pgalloc.h new file mode 100644 index 000000000..f5e70e961 --- /dev/null +++ b/kernel/arch/ia64/include/asm/pgalloc.h @@ -0,0 +1,125 @@ +#ifndef _ASM_IA64_PGALLOC_H +#define _ASM_IA64_PGALLOC_H + +/* + * This file contains the functions and defines necessary to allocate + * page tables. + * + * This hopefully works with any (fixed) ia-64 page-size, as defined + * in (currently 8192). + * + * Copyright (C) 1998-2001 Hewlett-Packard Co + * David Mosberger-Tang + * Copyright (C) 2000, Goutham Rao + */ + + +#include +#include +#include +#include +#include + +#include + +static inline pgd_t *pgd_alloc(struct mm_struct *mm) +{ + return quicklist_alloc(0, GFP_KERNEL, NULL); +} + +static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) +{ + quicklist_free(0, NULL, pgd); +} + +#if CONFIG_PGTABLE_LEVELS == 4 +static inline void +pgd_populate(struct mm_struct *mm, pgd_t * pgd_entry, pud_t * pud) +{ + pgd_val(*pgd_entry) = __pa(pud); +} + +static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) +{ + return quicklist_alloc(0, GFP_KERNEL, NULL); +} + +static inline void pud_free(struct mm_struct *mm, pud_t *pud) +{ + quicklist_free(0, NULL, pud); +} +#define __pud_free_tlb(tlb, pud, address) pud_free((tlb)->mm, pud) +#endif /* CONFIG_PGTABLE_LEVELS == 4 */ + +static inline void +pud_populate(struct mm_struct *mm, pud_t * pud_entry, pmd_t * pmd) +{ + pud_val(*pud_entry) = __pa(pmd); +} + +static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) +{ + return quicklist_alloc(0, GFP_KERNEL, NULL); +} + +static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) +{ + quicklist_free(0, NULL, pmd); +} + +#define __pmd_free_tlb(tlb, pmd, address) pmd_free((tlb)->mm, pmd) + +static inline void +pmd_populate(struct mm_struct *mm, pmd_t * pmd_entry, pgtable_t pte) +{ + pmd_val(*pmd_entry) = page_to_phys(pte); +} +#define pmd_pgtable(pmd) pmd_page(pmd) + +static inline void +pmd_populate_kernel(struct mm_struct *mm, pmd_t * pmd_entry, pte_t * pte) +{ + pmd_val(*pmd_entry) = __pa(pte); +} + +static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr) +{ + struct page *page; + void *pg; + + pg = quicklist_alloc(0, GFP_KERNEL, NULL); + if (!pg) + return NULL; + page = virt_to_page(pg); + if (!pgtable_page_ctor(page)) { + quicklist_free(0, NULL, pg); + return NULL; + } + return page; +} + +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, + unsigned long addr) +{ + return quicklist_alloc(0, GFP_KERNEL, NULL); +} + +static inline void pte_free(struct mm_struct *mm, pgtable_t pte) +{ + pgtable_page_dtor(pte); + quicklist_free_page(0, NULL, pte); +} + +static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) +{ + quicklist_free(0, NULL, pte); +} + +static inline void check_pgt_cache(void) +{ + quicklist_trim(0, NULL, 25, 16); +} + +#define __pte_free_tlb(tlb, pte, address) pte_free((tlb)->mm, pte) + +#endif /* _ASM_IA64_PGALLOC_H */ diff --git a/kernel/arch/ia64/include/asm/pgtable.h b/kernel/arch/ia64/include/asm/pgtable.h new file mode 100644 index 000000000..9f3ed9ee8 --- /dev/null +++ b/kernel/arch/ia64/include/asm/pgtable.h @@ -0,0 +1,594 @@ +#ifndef _ASM_IA64_PGTABLE_H +#define _ASM_IA64_PGTABLE_H + +/* + * This file contains the functions and defines necessary to modify and use + * the IA-64 page table tree. + * + * This hopefully works with any (fixed) IA-64 page-size, as defined + * in . + * + * Copyright (C) 1998-2005 Hewlett-Packard Co + * David Mosberger-Tang + */ + + +#include +#include +#include +#include + +#define IA64_MAX_PHYS_BITS 50 /* max. number of physical address bits (architected) */ + +/* + * First, define the various bits in a PTE. Note that the PTE format + * matches the VHPT short format, the firt doubleword of the VHPD long + * format, and the first doubleword of the TLB insertion format. + */ +#define _PAGE_P_BIT 0 +#define _PAGE_A_BIT 5 +#define _PAGE_D_BIT 6 + +#define _PAGE_P (1 << _PAGE_P_BIT) /* page present bit */ +#define _PAGE_MA_WB (0x0 << 2) /* write back memory attribute */ +#define _PAGE_MA_UC (0x4 << 2) /* uncacheable memory attribute */ +#define _PAGE_MA_UCE (0x5 << 2) /* UC exported attribute */ +#define _PAGE_MA_WC (0x6 << 2) /* write coalescing memory attribute */ +#define _PAGE_MA_NAT (0x7 << 2) /* not-a-thing attribute */ +#define _PAGE_MA_MASK (0x7 << 2) +#define _PAGE_PL_0 (0 << 7) /* privilege level 0 (kernel) */ +#define _PAGE_PL_1 (1 << 7) /* privilege level 1 (unused) */ +#define _PAGE_PL_2 (2 << 7) /* privilege level 2 (unused) */ +#define _PAGE_PL_3 (3 << 7) /* privilege level 3 (user) */ +#define _PAGE_PL_MASK (3 << 7) +#define _PAGE_AR_R (0 << 9) /* read only */ +#define _PAGE_AR_RX (1 << 9) /* read & execute */ +#define _PAGE_AR_RW (2 << 9) /* read & write */ +#define _PAGE_AR_RWX (3 << 9) /* read, write & execute */ +#define _PAGE_AR_R_RW (4 << 9) /* read / read & write */ +#define _PAGE_AR_RX_RWX (5 << 9) /* read & exec / read, write & exec */ +#define _PAGE_AR_RWX_RW (6 << 9) /* read, write & exec / read & write */ +#define _PAGE_AR_X_RX (7 << 9) /* exec & promote / read & exec */ +#define _PAGE_AR_MASK (7 << 9) +#define _PAGE_AR_SHIFT 9 +#define _PAGE_A (1 << _PAGE_A_BIT) /* page accessed bit */ +#define _PAGE_D (1 << _PAGE_D_BIT) /* page dirty bit */ +#define _PAGE_PPN_MASK (((__IA64_UL(1) << IA64_MAX_PHYS_BITS) - 1) & ~0xfffUL) +#define _PAGE_ED (__IA64_UL(1) << 52) /* exception deferral */ +#define _PAGE_PROTNONE (__IA64_UL(1) << 63) + +#define _PFN_MASK _PAGE_PPN_MASK +/* Mask of bits which may be changed by pte_modify(); the odd bits are there for _PAGE_PROTNONE */ +#define _PAGE_CHG_MASK (_PAGE_P | _PAGE_PROTNONE | _PAGE_PL_MASK | _PAGE_AR_MASK | _PAGE_ED) + +#define _PAGE_SIZE_4K 12 +#define _PAGE_SIZE_8K 13 +#define _PAGE_SIZE_16K 14 +#define _PAGE_SIZE_64K 16 +#define _PAGE_SIZE_256K 18 +#define _PAGE_SIZE_1M 20 +#define _PAGE_SIZE_4M 22 +#define _PAGE_SIZE_16M 24 +#define _PAGE_SIZE_64M 26 +#define _PAGE_SIZE_256M 28 +#define _PAGE_SIZE_1G 30 +#define _PAGE_SIZE_4G 32 + +#define __ACCESS_BITS _PAGE_ED | _PAGE_A | _PAGE_P | _PAGE_MA_WB +#define __DIRTY_BITS_NO_ED _PAGE_A | _PAGE_P | _PAGE_D | _PAGE_MA_WB +#define __DIRTY_BITS _PAGE_ED | __DIRTY_BITS_NO_ED + +/* + * How many pointers will a page table level hold expressed in shift + */ +#define PTRS_PER_PTD_SHIFT (PAGE_SHIFT-3) + +/* + * Definitions for fourth level: + */ +#define PTRS_PER_PTE (__IA64_UL(1) << (PTRS_PER_PTD_SHIFT)) + +/* + * Definitions for third level: + * + * PMD_SHIFT determines the size of the area a third-level page table + * can map. + */ +#define PMD_SHIFT (PAGE_SHIFT + (PTRS_PER_PTD_SHIFT)) +#define PMD_SIZE (1UL << PMD_SHIFT) +#define PMD_MASK (~(PMD_SIZE-1)) +#define PTRS_PER_PMD (1UL << (PTRS_PER_PTD_SHIFT)) + +#if CONFIG_PGTABLE_LEVELS == 4 +/* + * Definitions for second level: + * + * PUD_SHIFT determines the size of the area a second-level page table + * can map. + */ +#define PUD_SHIFT (PMD_SHIFT + (PTRS_PER_PTD_SHIFT)) +#define PUD_SIZE (1UL << PUD_SHIFT) +#define PUD_MASK (~(PUD_SIZE-1)) +#define PTRS_PER_PUD (1UL << (PTRS_PER_PTD_SHIFT)) +#endif + +/* + * Definitions for first level: + * + * PGDIR_SHIFT determines what a first-level page table entry can map. + */ +#if CONFIG_PGTABLE_LEVELS == 4 +#define PGDIR_SHIFT (PUD_SHIFT + (PTRS_PER_PTD_SHIFT)) +#else +#define PGDIR_SHIFT (PMD_SHIFT + (PTRS_PER_PTD_SHIFT)) +#endif +#define PGDIR_SIZE (__IA64_UL(1) << PGDIR_SHIFT) +#define PGDIR_MASK (~(PGDIR_SIZE-1)) +#define PTRS_PER_PGD_SHIFT PTRS_PER_PTD_SHIFT +#define PTRS_PER_PGD (1UL << PTRS_PER_PGD_SHIFT) +#define USER_PTRS_PER_PGD (5*PTRS_PER_PGD/8) /* regions 0-4 are user regions */ +#define FIRST_USER_ADDRESS 0UL + +/* + * All the normal masks have the "page accessed" bits on, as any time + * they are used, the page is accessed. They are cleared only by the + * page-out routines. + */ +#define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_A) +#define PAGE_SHARED __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RW) +#define PAGE_READONLY __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_R) +#define PAGE_COPY __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_R) +#define PAGE_COPY_EXEC __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RX) +#define PAGE_GATE __pgprot(__ACCESS_BITS | _PAGE_PL_0 | _PAGE_AR_X_RX) +#define PAGE_KERNEL __pgprot(__DIRTY_BITS | _PAGE_PL_0 | _PAGE_AR_RWX) +#define PAGE_KERNELRX __pgprot(__ACCESS_BITS | _PAGE_PL_0 | _PAGE_AR_RX) +#define PAGE_KERNEL_UC __pgprot(__DIRTY_BITS | _PAGE_PL_0 | _PAGE_AR_RWX | \ + _PAGE_MA_UC) + +# ifndef __ASSEMBLY__ + +#include /* for mm_struct */ +#include +#include +#include + +/* + * Next come the mappings that determine how mmap() protection bits + * (PROT_EXEC, PROT_READ, PROT_WRITE, PROT_NONE) get implemented. The + * _P version gets used for a private shared memory segment, the _S + * version gets used for a shared memory segment with MAP_SHARED on. + * In a private shared memory segment, we do a copy-on-write if a task + * attempts to write to the page. + */ + /* xwr */ +#define __P000 PAGE_NONE +#define __P001 PAGE_READONLY +#define __P010 PAGE_READONLY /* write to priv pg -> copy & make writable */ +#define __P011 PAGE_READONLY /* ditto */ +#define __P100 __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_X_RX) +#define __P101 __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RX) +#define __P110 PAGE_COPY_EXEC +#define __P111 PAGE_COPY_EXEC + +#define __S000 PAGE_NONE +#define __S001 PAGE_READONLY +#define __S010 PAGE_SHARED /* we don't have (and don't need) write-only */ +#define __S011 PAGE_SHARED +#define __S100 __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_X_RX) +#define __S101 __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RX) +#define __S110 __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RWX) +#define __S111 __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RWX) + +#define pgd_ERROR(e) printk("%s:%d: bad pgd %016lx.\n", __FILE__, __LINE__, pgd_val(e)) +#if CONFIG_PGTABLE_LEVELS == 4 +#define pud_ERROR(e) printk("%s:%d: bad pud %016lx.\n", __FILE__, __LINE__, pud_val(e)) +#endif +#define pmd_ERROR(e) printk("%s:%d: bad pmd %016lx.\n", __FILE__, __LINE__, pmd_val(e)) +#define pte_ERROR(e) printk("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e)) + + +/* + * Some definitions to translate between mem_map, PTEs, and page addresses: + */ + + +/* Quick test to see if ADDR is a (potentially) valid physical address. */ +static inline long +ia64_phys_addr_valid (unsigned long addr) +{ + return (addr & (local_cpu_data->unimpl_pa_mask)) == 0; +} + +/* + * kern_addr_valid(ADDR) tests if ADDR is pointing to valid kernel + * memory. For the return value to be meaningful, ADDR must be >= + * PAGE_OFFSET. This operation can be relatively expensive (e.g., + * require a hash-, or multi-level tree-lookup or something of that + * sort) but it guarantees to return TRUE only if accessing the page + * at that address does not cause an error. Note that there may be + * addresses for which kern_addr_valid() returns FALSE even though an + * access would not cause an error (e.g., this is typically true for + * memory mapped I/O regions. + * + * XXX Need to implement this for IA-64. + */ +#define kern_addr_valid(addr) (1) + + +/* + * Now come the defines and routines to manage and access the three-level + * page table. + */ + + +#define VMALLOC_START (RGN_BASE(RGN_GATE) + 0x200000000UL) +#ifdef CONFIG_VIRTUAL_MEM_MAP +# define VMALLOC_END_INIT (RGN_BASE(RGN_GATE) + (1UL << (4*PAGE_SHIFT - 9))) +extern unsigned long VMALLOC_END; +#else +#if defined(CONFIG_SPARSEMEM) && defined(CONFIG_SPARSEMEM_VMEMMAP) +/* SPARSEMEM_VMEMMAP uses half of vmalloc... */ +# define VMALLOC_END (RGN_BASE(RGN_GATE) + (1UL << (4*PAGE_SHIFT - 10))) +# define vmemmap ((struct page *)VMALLOC_END) +#else +# define VMALLOC_END (RGN_BASE(RGN_GATE) + (1UL << (4*PAGE_SHIFT - 9))) +#endif +#endif + +/* fs/proc/kcore.c */ +#define kc_vaddr_to_offset(v) ((v) - RGN_BASE(RGN_GATE)) +#define kc_offset_to_vaddr(o) ((o) + RGN_BASE(RGN_GATE)) + +#define RGN_MAP_SHIFT (PGDIR_SHIFT + PTRS_PER_PGD_SHIFT - 3) +#define RGN_MAP_LIMIT ((1UL << RGN_MAP_SHIFT) - PAGE_SIZE) /* per region addr limit */ + +/* + * Conversion functions: convert page frame number (pfn) and a protection value to a page + * table entry (pte). + */ +#define pfn_pte(pfn, pgprot) \ +({ pte_t __pte; pte_val(__pte) = ((pfn) << PAGE_SHIFT) | pgprot_val(pgprot); __pte; }) + +/* Extract pfn from pte. */ +#define pte_pfn(_pte) ((pte_val(_pte) & _PFN_MASK) >> PAGE_SHIFT) + +#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) + +/* This takes a physical page address that is used by the remapping functions */ +#define mk_pte_phys(physpage, pgprot) \ +({ pte_t __pte; pte_val(__pte) = physpage + pgprot_val(pgprot); __pte; }) + +#define pte_modify(_pte, newprot) \ + (__pte((pte_val(_pte) & ~_PAGE_CHG_MASK) | (pgprot_val(newprot) & _PAGE_CHG_MASK))) + +#define pte_none(pte) (!pte_val(pte)) +#define pte_present(pte) (pte_val(pte) & (_PAGE_P | _PAGE_PROTNONE)) +#define pte_clear(mm,addr,pte) (pte_val(*(pte)) = 0UL) +/* pte_page() returns the "struct page *" corresponding to the PTE: */ +#define pte_page(pte) virt_to_page(((pte_val(pte) & _PFN_MASK) + PAGE_OFFSET)) + +#define pmd_none(pmd) (!pmd_val(pmd)) +#define pmd_bad(pmd) (!ia64_phys_addr_valid(pmd_val(pmd))) +#define pmd_present(pmd) (pmd_val(pmd) != 0UL) +#define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0UL) +#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val(pmd) & _PFN_MASK)) +#define pmd_page(pmd) virt_to_page((pmd_val(pmd) + PAGE_OFFSET)) + +#define pud_none(pud) (!pud_val(pud)) +#define pud_bad(pud) (!ia64_phys_addr_valid(pud_val(pud))) +#define pud_present(pud) (pud_val(pud) != 0UL) +#define pud_clear(pudp) (pud_val(*(pudp)) = 0UL) +#define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & _PFN_MASK)) +#define pud_page(pud) virt_to_page((pud_val(pud) + PAGE_OFFSET)) + +#if CONFIG_PGTABLE_LEVELS == 4 +#define pgd_none(pgd) (!pgd_val(pgd)) +#define pgd_bad(pgd) (!ia64_phys_addr_valid(pgd_val(pgd))) +#define pgd_present(pgd) (pgd_val(pgd) != 0UL) +#define pgd_clear(pgdp) (pgd_val(*(pgdp)) = 0UL) +#define pgd_page_vaddr(pgd) ((unsigned long) __va(pgd_val(pgd) & _PFN_MASK)) +#define pgd_page(pgd) virt_to_page((pgd_val(pgd) + PAGE_OFFSET)) +#endif + +/* + * The following have defined behavior only work if pte_present() is true. + */ +#define pte_write(pte) ((unsigned) (((pte_val(pte) & _PAGE_AR_MASK) >> _PAGE_AR_SHIFT) - 2) <= 4) +#define pte_exec(pte) ((pte_val(pte) & _PAGE_AR_RX) != 0) +#define pte_dirty(pte) ((pte_val(pte) & _PAGE_D) != 0) +#define pte_young(pte) ((pte_val(pte) & _PAGE_A) != 0) +#define pte_special(pte) 0 + +/* + * Note: we convert AR_RWX to AR_RX and AR_RW to AR_R by clearing the 2nd bit in the + * access rights: + */ +#define pte_wrprotect(pte) (__pte(pte_val(pte) & ~_PAGE_AR_RW)) +#define pte_mkwrite(pte) (__pte(pte_val(pte) | _PAGE_AR_RW)) +#define pte_mkold(pte) (__pte(pte_val(pte) & ~_PAGE_A)) +#define pte_mkyoung(pte) (__pte(pte_val(pte) | _PAGE_A)) +#define pte_mkclean(pte) (__pte(pte_val(pte) & ~_PAGE_D)) +#define pte_mkdirty(pte) (__pte(pte_val(pte) | _PAGE_D)) +#define pte_mkhuge(pte) (__pte(pte_val(pte))) +#define pte_mkspecial(pte) (pte) + +/* + * Because ia64's Icache and Dcache is not coherent (on a cpu), we need to + * sync icache and dcache when we insert *new* executable page. + * __ia64_sync_icache_dcache() check Pg_arch_1 bit and flush icache + * if necessary. + * + * set_pte() is also called by the kernel, but we can expect that the kernel + * flushes icache explicitly if necessary. + */ +#define pte_present_exec_user(pte)\ + ((pte_val(pte) & (_PAGE_P | _PAGE_PL_MASK | _PAGE_AR_RX)) == \ + (_PAGE_P | _PAGE_PL_3 | _PAGE_AR_RX)) + +extern void __ia64_sync_icache_dcache(pte_t pteval); +static inline void set_pte(pte_t *ptep, pte_t pteval) +{ + /* page is present && page is user && page is executable + * && (page swapin or new page or page migraton + * || copy_on_write with page copying.) + */ + if (pte_present_exec_user(pteval) && + (!pte_present(*ptep) || + pte_pfn(*ptep) != pte_pfn(pteval))) + /* load_module() calles flush_icache_range() explicitly*/ + __ia64_sync_icache_dcache(pteval); + *ptep = pteval; +} + +#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval) + +/* + * Make page protection values cacheable, uncacheable, or write- + * combining. Note that "protection" is really a misnomer here as the + * protection value contains the memory attribute bits, dirty bits, and + * various other bits as well. + */ +#define pgprot_cacheable(prot) __pgprot((pgprot_val(prot) & ~_PAGE_MA_MASK) | _PAGE_MA_WB) +#define pgprot_noncached(prot) __pgprot((pgprot_val(prot) & ~_PAGE_MA_MASK) | _PAGE_MA_UC) +#define pgprot_writecombine(prot) __pgprot((pgprot_val(prot) & ~_PAGE_MA_MASK) | _PAGE_MA_WC) + +struct file; +extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, + unsigned long size, pgprot_t vma_prot); +#define __HAVE_PHYS_MEM_ACCESS_PROT + +static inline unsigned long +pgd_index (unsigned long address) +{ + unsigned long region = address >> 61; + unsigned long l1index = (address >> PGDIR_SHIFT) & ((PTRS_PER_PGD >> 3) - 1); + + return (region << (PAGE_SHIFT - 6)) | l1index; +} + +/* The offset in the 1-level directory is given by the 3 region bits + (61..63) and the level-1 bits. */ +static inline pgd_t* +pgd_offset (const struct mm_struct *mm, unsigned long address) +{ + return mm->pgd + pgd_index(address); +} + +/* In the kernel's mapped region we completely ignore the region number + (since we know it's in region number 5). */ +#define pgd_offset_k(addr) \ + (init_mm.pgd + (((addr) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))) + +/* Look up a pgd entry in the gate area. On IA-64, the gate-area + resides in the kernel-mapped segment, hence we use pgd_offset_k() + here. */ +#define pgd_offset_gate(mm, addr) pgd_offset_k(addr) + +#if CONFIG_PGTABLE_LEVELS == 4 +/* Find an entry in the second-level page table.. */ +#define pud_offset(dir,addr) \ + ((pud_t *) pgd_page_vaddr(*(dir)) + (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))) +#endif + +/* Find an entry in the third-level page table.. */ +#define pmd_offset(dir,addr) \ + ((pmd_t *) pud_page_vaddr(*(dir)) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))) + +/* + * Find an entry in the third-level page table. This looks more complicated than it + * should be because some platforms place page tables in high memory. + */ +#define pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) +#define pte_offset_kernel(dir,addr) ((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(addr)) +#define pte_offset_map(dir,addr) pte_offset_kernel(dir, addr) +#define pte_unmap(pte) do { } while (0) + +/* atomic versions of the some PTE manipulations: */ + +static inline int +ptep_test_and_clear_young (struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) +{ +#ifdef CONFIG_SMP + if (!pte_young(*ptep)) + return 0; + return test_and_clear_bit(_PAGE_A_BIT, ptep); +#else + pte_t pte = *ptep; + if (!pte_young(pte)) + return 0; + set_pte_at(vma->vm_mm, addr, ptep, pte_mkold(pte)); + return 1; +#endif +} + +static inline pte_t +ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +{ +#ifdef CONFIG_SMP + return __pte(xchg((long *) ptep, 0)); +#else + pte_t pte = *ptep; + pte_clear(mm, addr, ptep); + return pte; +#endif +} + +static inline void +ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +{ +#ifdef CONFIG_SMP + unsigned long new, old; + + do { + old = pte_val(*ptep); + new = pte_val(pte_wrprotect(__pte (old))); + } while (cmpxchg((unsigned long *) ptep, old, new) != old); +#else + pte_t old_pte = *ptep; + set_pte_at(mm, addr, ptep, pte_wrprotect(old_pte)); +#endif +} + +static inline int +pte_same (pte_t a, pte_t b) +{ + return pte_val(a) == pte_val(b); +} + +#define update_mmu_cache(vma, address, ptep) do { } while (0) + +extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; +extern void paging_init (void); + +/* + * Note: The macros below rely on the fact that MAX_SWAPFILES_SHIFT <= number of + * bits in the swap-type field of the swap pte. It would be nice to + * enforce that, but we can't easily include here. + * (Of course, better still would be to define MAX_SWAPFILES_SHIFT here...). + * + * Format of swap pte: + * bit 0 : present bit (must be zero) + * bits 1- 7: swap-type + * bits 8-62: swap offset + * bit 63 : _PAGE_PROTNONE bit + */ +#define __swp_type(entry) (((entry).val >> 1) & 0x7f) +#define __swp_offset(entry) (((entry).val << 1) >> 9) +#define __swp_entry(type,offset) ((swp_entry_t) { ((type) << 1) | ((long) (offset) << 8) }) +#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) +#define __swp_entry_to_pte(x) ((pte_t) { (x).val }) + +/* + * ZERO_PAGE is a global shared page that is always zero: used + * for zero-mapped memory areas etc.. + */ +extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)]; +extern struct page *zero_page_memmap_ptr; +#define ZERO_PAGE(vaddr) (zero_page_memmap_ptr) + +/* We provide our own get_unmapped_area to cope with VA holes for userland */ +#define HAVE_ARCH_UNMAPPED_AREA + +#ifdef CONFIG_HUGETLB_PAGE +#define HUGETLB_PGDIR_SHIFT (HPAGE_SHIFT + 2*(PAGE_SHIFT-3)) +#define HUGETLB_PGDIR_SIZE (__IA64_UL(1) << HUGETLB_PGDIR_SHIFT) +#define HUGETLB_PGDIR_MASK (~(HUGETLB_PGDIR_SIZE-1)) +#endif + + +#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS +/* + * Update PTEP with ENTRY, which is guaranteed to be a less + * restrictive PTE. That is, ENTRY may have the ACCESSED, DIRTY, and + * WRITABLE bits turned on, when the value at PTEP did not. The + * WRITABLE bit may only be turned if SAFELY_WRITABLE is TRUE. + * + * SAFELY_WRITABLE is TRUE if we can update the value at PTEP without + * having to worry about races. On SMP machines, there are only two + * cases where this is true: + * + * (1) *PTEP has the PRESENT bit turned OFF + * (2) ENTRY has the DIRTY bit turned ON + * + * On ia64, we could implement this routine with a cmpxchg()-loop + * which ORs in the _PAGE_A/_PAGE_D bit if they're set in ENTRY. + * However, like on x86, we can get a more streamlined version by + * observing that it is OK to drop ACCESSED bit updates when + * SAFELY_WRITABLE is FALSE. Besides being rare, all that would do is + * result in an extra Access-bit fault, which would then turn on the + * ACCESSED bit in the low-level fault handler (iaccess_bit or + * daccess_bit in ivt.S). + */ +#ifdef CONFIG_SMP +# define ptep_set_access_flags(__vma, __addr, __ptep, __entry, __safely_writable) \ +({ \ + int __changed = !pte_same(*(__ptep), __entry); \ + if (__changed && __safely_writable) { \ + set_pte(__ptep, __entry); \ + flush_tlb_page(__vma, __addr); \ + } \ + __changed; \ +}) +#else +# define ptep_set_access_flags(__vma, __addr, __ptep, __entry, __safely_writable) \ +({ \ + int __changed = !pte_same(*(__ptep), __entry); \ + if (__changed) { \ + set_pte_at((__vma)->vm_mm, (__addr), __ptep, __entry); \ + flush_tlb_page(__vma, __addr); \ + } \ + __changed; \ +}) +#endif + +# ifdef CONFIG_VIRTUAL_MEM_MAP + /* arch mem_map init routine is needed due to holes in a virtual mem_map */ +# define __HAVE_ARCH_MEMMAP_INIT + extern void memmap_init (unsigned long size, int nid, unsigned long zone, + unsigned long start_pfn); +# endif /* CONFIG_VIRTUAL_MEM_MAP */ +# endif /* !__ASSEMBLY__ */ + +/* + * Identity-mapped regions use a large page size. We'll call such large pages + * "granules". If you can think of a better name that's unambiguous, let me + * know... + */ +#if defined(CONFIG_IA64_GRANULE_64MB) +# define IA64_GRANULE_SHIFT _PAGE_SIZE_64M +#elif defined(CONFIG_IA64_GRANULE_16MB) +# define IA64_GRANULE_SHIFT _PAGE_SIZE_16M +#endif +#define IA64_GRANULE_SIZE (1 << IA64_GRANULE_SHIFT) +/* + * log2() of the page size we use to map the kernel image (IA64_TR_KERNEL): + */ +#define KERNEL_TR_PAGE_SHIFT _PAGE_SIZE_64M +#define KERNEL_TR_PAGE_SIZE (1 << KERNEL_TR_PAGE_SHIFT) + +/* + * No page table caches to initialise + */ +#define pgtable_cache_init() do { } while (0) + +/* These tell get_user_pages() that the first gate page is accessible from user-level. */ +#define FIXADDR_USER_START GATE_ADDR +#ifdef HAVE_BUGGY_SEGREL +# define FIXADDR_USER_END (GATE_ADDR + 2*PAGE_SIZE) +#else +# define FIXADDR_USER_END (GATE_ADDR + 2*PERCPU_PAGE_SIZE) +#endif + +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR +#define __HAVE_ARCH_PTEP_SET_WRPROTECT +#define __HAVE_ARCH_PTE_SAME +#define __HAVE_ARCH_PGD_OFFSET_GATE + + +#if CONFIG_PGTABLE_LEVELS == 3 +#include +#endif +#include + +#endif /* _ASM_IA64_PGTABLE_H */ diff --git a/kernel/arch/ia64/include/asm/processor.h b/kernel/arch/ia64/include/asm/processor.h new file mode 100644 index 000000000..ce53c50d0 --- /dev/null +++ b/kernel/arch/ia64/include/asm/processor.h @@ -0,0 +1,711 @@ +#ifndef _ASM_IA64_PROCESSOR_H +#define _ASM_IA64_PROCESSOR_H + +/* + * Copyright (C) 1998-2004 Hewlett-Packard Co + * David Mosberger-Tang + * Stephane Eranian + * Copyright (C) 1999 Asit Mallick + * Copyright (C) 1999 Don Dugger + * + * 11/24/98 S.Eranian added ia64_set_iva() + * 12/03/99 D. Mosberger implement thread_saved_pc() via kernel unwind API + * 06/16/00 A. Mallick added csd/ssd/tssd for ia32 support + */ + + +#include +#include +#include +#include + +#define ARCH_HAS_PREFETCH_SWITCH_STACK + +#define IA64_NUM_PHYS_STACK_REG 96 +#define IA64_NUM_DBG_REGS 8 + +#define DEFAULT_MAP_BASE __IA64_UL_CONST(0x2000000000000000) +#define DEFAULT_TASK_SIZE __IA64_UL_CONST(0xa000000000000000) + +/* + * TASK_SIZE really is a mis-named. It really is the maximum user + * space address (plus one). On IA-64, there are five regions of 2TB + * each (assuming 8KB page size), for a total of 8TB of user virtual + * address space. + */ +#define TASK_SIZE DEFAULT_TASK_SIZE + +/* + * This decides where the kernel will search for a free chunk of vm + * space during mmap's. + */ +#define TASK_UNMAPPED_BASE (current->thread.map_base) + +#define IA64_THREAD_FPH_VALID (__IA64_UL(1) << 0) /* floating-point high state valid? */ +#define IA64_THREAD_DBG_VALID (__IA64_UL(1) << 1) /* debug registers valid? */ +#define IA64_THREAD_PM_VALID (__IA64_UL(1) << 2) /* performance registers valid? */ +#define IA64_THREAD_UAC_NOPRINT (__IA64_UL(1) << 3) /* don't log unaligned accesses */ +#define IA64_THREAD_UAC_SIGBUS (__IA64_UL(1) << 4) /* generate SIGBUS on unaligned acc. */ +#define IA64_THREAD_MIGRATION (__IA64_UL(1) << 5) /* require migration + sync at ctx sw */ +#define IA64_THREAD_FPEMU_NOPRINT (__IA64_UL(1) << 6) /* don't log any fpswa faults */ +#define IA64_THREAD_FPEMU_SIGFPE (__IA64_UL(1) << 7) /* send a SIGFPE for fpswa faults */ + +#define IA64_THREAD_UAC_SHIFT 3 +#define IA64_THREAD_UAC_MASK (IA64_THREAD_UAC_NOPRINT | IA64_THREAD_UAC_SIGBUS) +#define IA64_THREAD_FPEMU_SHIFT 6 +#define IA64_THREAD_FPEMU_MASK (IA64_THREAD_FPEMU_NOPRINT | IA64_THREAD_FPEMU_SIGFPE) + + +/* + * This shift should be large enough to be able to represent 1000000000/itc_freq with good + * accuracy while being small enough to fit 10*1000000000< +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_NUMA +#include +#endif + +/* like above but expressed as bitfields for more efficient access: */ +struct ia64_psr { + __u64 reserved0 : 1; + __u64 be : 1; + __u64 up : 1; + __u64 ac : 1; + __u64 mfl : 1; + __u64 mfh : 1; + __u64 reserved1 : 7; + __u64 ic : 1; + __u64 i : 1; + __u64 pk : 1; + __u64 reserved2 : 1; + __u64 dt : 1; + __u64 dfl : 1; + __u64 dfh : 1; + __u64 sp : 1; + __u64 pp : 1; + __u64 di : 1; + __u64 si : 1; + __u64 db : 1; + __u64 lp : 1; + __u64 tb : 1; + __u64 rt : 1; + __u64 reserved3 : 4; + __u64 cpl : 2; + __u64 is : 1; + __u64 mc : 1; + __u64 it : 1; + __u64 id : 1; + __u64 da : 1; + __u64 dd : 1; + __u64 ss : 1; + __u64 ri : 2; + __u64 ed : 1; + __u64 bn : 1; + __u64 reserved4 : 19; +}; + +union ia64_isr { + __u64 val; + struct { + __u64 code : 16; + __u64 vector : 8; + __u64 reserved1 : 8; + __u64 x : 1; + __u64 w : 1; + __u64 r : 1; + __u64 na : 1; + __u64 sp : 1; + __u64 rs : 1; + __u64 ir : 1; + __u64 ni : 1; + __u64 so : 1; + __u64 ei : 2; + __u64 ed : 1; + __u64 reserved2 : 20; + }; +}; + +union ia64_lid { + __u64 val; + struct { + __u64 rv : 16; + __u64 eid : 8; + __u64 id : 8; + __u64 ig : 32; + }; +}; + +union ia64_tpr { + __u64 val; + struct { + __u64 ig0 : 4; + __u64 mic : 4; + __u64 rsv : 8; + __u64 mmi : 1; + __u64 ig1 : 47; + }; +}; + +union ia64_itir { + __u64 val; + struct { + __u64 rv3 : 2; /* 0-1 */ + __u64 ps : 6; /* 2-7 */ + __u64 key : 24; /* 8-31 */ + __u64 rv4 : 32; /* 32-63 */ + }; +}; + +union ia64_rr { + __u64 val; + struct { + __u64 ve : 1; /* enable hw walker */ + __u64 reserved0: 1; /* reserved */ + __u64 ps : 6; /* log page size */ + __u64 rid : 24; /* region id */ + __u64 reserved1: 32; /* reserved */ + }; +}; + +/* + * CPU type, hardware bug flags, and per-CPU state. Frequently used + * state comes earlier: + */ +struct cpuinfo_ia64 { + unsigned int softirq_pending; + unsigned long itm_delta; /* # of clock cycles between clock ticks */ + unsigned long itm_next; /* interval timer mask value to use for next clock tick */ + unsigned long nsec_per_cyc; /* (1000000000<thread.flags = (((task)->thread.flags & ~IA64_THREAD_UAC_MASK) \ + | (((value) << IA64_THREAD_UAC_SHIFT) & IA64_THREAD_UAC_MASK)); \ + 0; \ +}) +#define GET_UNALIGN_CTL(task,addr) \ +({ \ + put_user(((task)->thread.flags & IA64_THREAD_UAC_MASK) >> IA64_THREAD_UAC_SHIFT, \ + (int __user *) (addr)); \ +}) + +#define SET_FPEMU_CTL(task,value) \ +({ \ + (task)->thread.flags = (((task)->thread.flags & ~IA64_THREAD_FPEMU_MASK) \ + | (((value) << IA64_THREAD_FPEMU_SHIFT) & IA64_THREAD_FPEMU_MASK)); \ + 0; \ +}) +#define GET_FPEMU_CTL(task,addr) \ +({ \ + put_user(((task)->thread.flags & IA64_THREAD_FPEMU_MASK) >> IA64_THREAD_FPEMU_SHIFT, \ + (int __user *) (addr)); \ +}) + +struct thread_struct { + __u32 flags; /* various thread flags (see IA64_THREAD_*) */ + /* writing on_ustack is performance-critical, so it's worth spending 8 bits on it... */ + __u8 on_ustack; /* executing on user-stacks? */ + __u8 pad[3]; + __u64 ksp; /* kernel stack pointer */ + __u64 map_base; /* base address for get_unmapped_area() */ + __u64 rbs_bot; /* the base address for the RBS */ + int last_fph_cpu; /* CPU that may hold the contents of f32-f127 */ + +#ifdef CONFIG_PERFMON + void *pfm_context; /* pointer to detailed PMU context */ + unsigned long pfm_needs_checking; /* when >0, pending perfmon work on kernel exit */ +# define INIT_THREAD_PM .pfm_context = NULL, \ + .pfm_needs_checking = 0UL, +#else +# define INIT_THREAD_PM +#endif + unsigned long dbr[IA64_NUM_DBG_REGS]; + unsigned long ibr[IA64_NUM_DBG_REGS]; + struct ia64_fpreg fph[96]; /* saved/loaded on demand */ +}; + +#define INIT_THREAD { \ + .flags = 0, \ + .on_ustack = 0, \ + .ksp = 0, \ + .map_base = DEFAULT_MAP_BASE, \ + .rbs_bot = STACK_TOP - DEFAULT_USER_STACK_SIZE, \ + .last_fph_cpu = -1, \ + INIT_THREAD_PM \ + .dbr = {0, }, \ + .ibr = {0, }, \ + .fph = {{{{0}}}, } \ +} + +#define start_thread(regs,new_ip,new_sp) do { \ + regs->cr_ipsr = ((regs->cr_ipsr | (IA64_PSR_BITS_TO_SET | IA64_PSR_CPL)) \ + & ~(IA64_PSR_BITS_TO_CLEAR | IA64_PSR_RI | IA64_PSR_IS)); \ + regs->cr_iip = new_ip; \ + regs->ar_rsc = 0xf; /* eager mode, privilege level 3 */ \ + regs->ar_rnat = 0; \ + regs->ar_bspstore = current->thread.rbs_bot; \ + regs->ar_fpsr = FPSR_DEFAULT; \ + regs->loadrs = 0; \ + regs->r8 = get_dumpable(current->mm); /* set "don't zap registers" flag */ \ + regs->r12 = new_sp - 16; /* allocate 16 byte scratch area */ \ + if (unlikely(get_dumpable(current->mm) != SUID_DUMP_USER)) { \ + /* \ + * Zap scratch regs to avoid leaking bits between processes with different \ + * uid/privileges. \ + */ \ + regs->ar_pfs = 0; regs->b0 = 0; regs->pr = 0; \ + regs->r1 = 0; regs->r9 = 0; regs->r11 = 0; regs->r13 = 0; regs->r15 = 0; \ + } \ +} while (0) + +/* Forward declarations, a strange C thing... */ +struct mm_struct; +struct task_struct; + +/* + * Free all resources held by a thread. This is called after the + * parent of DEAD_TASK has collected the exit status of the task via + * wait(). + */ +#define release_thread(dead_task) + +/* Get wait channel for task P. */ +extern unsigned long get_wchan (struct task_struct *p); + +/* Return instruction pointer of blocked task TSK. */ +#define KSTK_EIP(tsk) \ + ({ \ + struct pt_regs *_regs = task_pt_regs(tsk); \ + _regs->cr_iip + ia64_psr(_regs)->ri; \ + }) + +/* Return stack pointer of blocked task TSK. */ +#define KSTK_ESP(tsk) ((tsk)->thread.ksp) + +extern void ia64_getreg_unknown_kr (void); +extern void ia64_setreg_unknown_kr (void); + +#define ia64_get_kr(regnum) \ +({ \ + unsigned long r = 0; \ + \ + switch (regnum) { \ + case 0: r = ia64_getreg(_IA64_REG_AR_KR0); break; \ + case 1: r = ia64_getreg(_IA64_REG_AR_KR1); break; \ + case 2: r = ia64_getreg(_IA64_REG_AR_KR2); break; \ + case 3: r = ia64_getreg(_IA64_REG_AR_KR3); break; \ + case 4: r = ia64_getreg(_IA64_REG_AR_KR4); break; \ + case 5: r = ia64_getreg(_IA64_REG_AR_KR5); break; \ + case 6: r = ia64_getreg(_IA64_REG_AR_KR6); break; \ + case 7: r = ia64_getreg(_IA64_REG_AR_KR7); break; \ + default: ia64_getreg_unknown_kr(); break; \ + } \ + r; \ +}) + +#define ia64_set_kr(regnum, r) \ +({ \ + switch (regnum) { \ + case 0: ia64_setreg(_IA64_REG_AR_KR0, r); break; \ + case 1: ia64_setreg(_IA64_REG_AR_KR1, r); break; \ + case 2: ia64_setreg(_IA64_REG_AR_KR2, r); break; \ + case 3: ia64_setreg(_IA64_REG_AR_KR3, r); break; \ + case 4: ia64_setreg(_IA64_REG_AR_KR4, r); break; \ + case 5: ia64_setreg(_IA64_REG_AR_KR5, r); break; \ + case 6: ia64_setreg(_IA64_REG_AR_KR6, r); break; \ + case 7: ia64_setreg(_IA64_REG_AR_KR7, r); break; \ + default: ia64_setreg_unknown_kr(); break; \ + } \ +}) + +/* + * The following three macros can't be inline functions because we don't have struct + * task_struct at this point. + */ + +/* + * Return TRUE if task T owns the fph partition of the CPU we're running on. + * Must be called from code that has preemption disabled. + */ +#define ia64_is_local_fpu_owner(t) \ +({ \ + struct task_struct *__ia64_islfo_task = (t); \ + (__ia64_islfo_task->thread.last_fph_cpu == smp_processor_id() \ + && __ia64_islfo_task == (struct task_struct *) ia64_get_kr(IA64_KR_FPU_OWNER)); \ +}) + +/* + * Mark task T as owning the fph partition of the CPU we're running on. + * Must be called from code that has preemption disabled. + */ +#define ia64_set_local_fpu_owner(t) do { \ + struct task_struct *__ia64_slfo_task = (t); \ + __ia64_slfo_task->thread.last_fph_cpu = smp_processor_id(); \ + ia64_set_kr(IA64_KR_FPU_OWNER, (unsigned long) __ia64_slfo_task); \ +} while (0) + +/* Mark the fph partition of task T as being invalid on all CPUs. */ +#define ia64_drop_fpu(t) ((t)->thread.last_fph_cpu = -1) + +extern void __ia64_init_fpu (void); +extern void __ia64_save_fpu (struct ia64_fpreg *fph); +extern void __ia64_load_fpu (struct ia64_fpreg *fph); +extern void ia64_save_debug_regs (unsigned long *save_area); +extern void ia64_load_debug_regs (unsigned long *save_area); + +#define ia64_fph_enable() do { ia64_rsm(IA64_PSR_DFH); ia64_srlz_d(); } while (0) +#define ia64_fph_disable() do { ia64_ssm(IA64_PSR_DFH); ia64_srlz_d(); } while (0) + +/* load fp 0.0 into fph */ +static inline void +ia64_init_fpu (void) { + ia64_fph_enable(); + __ia64_init_fpu(); + ia64_fph_disable(); +} + +/* save f32-f127 at FPH */ +static inline void +ia64_save_fpu (struct ia64_fpreg *fph) { + ia64_fph_enable(); + __ia64_save_fpu(fph); + ia64_fph_disable(); +} + +/* load f32-f127 from FPH */ +static inline void +ia64_load_fpu (struct ia64_fpreg *fph) { + ia64_fph_enable(); + __ia64_load_fpu(fph); + ia64_fph_disable(); +} + +static inline __u64 +ia64_clear_ic (void) +{ + __u64 psr; + psr = ia64_getreg(_IA64_REG_PSR); + ia64_stop(); + ia64_rsm(IA64_PSR_I | IA64_PSR_IC); + ia64_srlz_i(); + return psr; +} + +/* + * Restore the psr. + */ +static inline void +ia64_set_psr (__u64 psr) +{ + ia64_stop(); + ia64_setreg(_IA64_REG_PSR_L, psr); + ia64_srlz_i(); +} + +/* + * Insert a translation into an instruction and/or data translation + * register. + */ +static inline void +ia64_itr (__u64 target_mask, __u64 tr_num, + __u64 vmaddr, __u64 pte, + __u64 log_page_size) +{ + ia64_setreg(_IA64_REG_CR_ITIR, (log_page_size << 2)); + ia64_setreg(_IA64_REG_CR_IFA, vmaddr); + ia64_stop(); + if (target_mask & 0x1) + ia64_itri(tr_num, pte); + if (target_mask & 0x2) + ia64_itrd(tr_num, pte); +} + +/* + * Insert a translation into the instruction and/or data translation + * cache. + */ +static inline void +ia64_itc (__u64 target_mask, __u64 vmaddr, __u64 pte, + __u64 log_page_size) +{ + ia64_setreg(_IA64_REG_CR_ITIR, (log_page_size << 2)); + ia64_setreg(_IA64_REG_CR_IFA, vmaddr); + ia64_stop(); + /* as per EAS2.6, itc must be the last instruction in an instruction group */ + if (target_mask & 0x1) + ia64_itci(pte); + if (target_mask & 0x2) + ia64_itcd(pte); +} + +/* + * Purge a range of addresses from instruction and/or data translation + * register(s). + */ +static inline void +ia64_ptr (__u64 target_mask, __u64 vmaddr, __u64 log_size) +{ + if (target_mask & 0x1) + ia64_ptri(vmaddr, (log_size << 2)); + if (target_mask & 0x2) + ia64_ptrd(vmaddr, (log_size << 2)); +} + +/* Set the interrupt vector address. The address must be suitably aligned (32KB). */ +static inline void +ia64_set_iva (void *ivt_addr) +{ + ia64_setreg(_IA64_REG_CR_IVA, (__u64) ivt_addr); + ia64_srlz_i(); +} + +/* Set the page table address and control bits. */ +static inline void +ia64_set_pta (__u64 pta) +{ + /* Note: srlz.i implies srlz.d */ + ia64_setreg(_IA64_REG_CR_PTA, pta); + ia64_srlz_i(); +} + +static inline void +ia64_eoi (void) +{ + ia64_setreg(_IA64_REG_CR_EOI, 0); + ia64_srlz_d(); +} + +#define cpu_relax() ia64_hint(ia64_hint_pause) +#define cpu_relax_lowlatency() cpu_relax() + +static inline int +ia64_get_irr(unsigned int vector) +{ + unsigned int reg = vector / 64; + unsigned int bit = vector % 64; + u64 irr; + + switch (reg) { + case 0: irr = ia64_getreg(_IA64_REG_CR_IRR0); break; + case 1: irr = ia64_getreg(_IA64_REG_CR_IRR1); break; + case 2: irr = ia64_getreg(_IA64_REG_CR_IRR2); break; + case 3: irr = ia64_getreg(_IA64_REG_CR_IRR3); break; + } + + return test_bit(bit, &irr); +} + +static inline void +ia64_set_lrr0 (unsigned long val) +{ + ia64_setreg(_IA64_REG_CR_LRR0, val); + ia64_srlz_d(); +} + +static inline void +ia64_set_lrr1 (unsigned long val) +{ + ia64_setreg(_IA64_REG_CR_LRR1, val); + ia64_srlz_d(); +} + + +/* + * Given the address to which a spill occurred, return the unat bit + * number that corresponds to this address. + */ +static inline __u64 +ia64_unat_pos (void *spill_addr) +{ + return ((__u64) spill_addr >> 3) & 0x3f; +} + +/* + * Set the NaT bit of an integer register which was spilled at address + * SPILL_ADDR. UNAT is the mask to be updated. + */ +static inline void +ia64_set_unat (__u64 *unat, void *spill_addr, unsigned long nat) +{ + __u64 bit = ia64_unat_pos(spill_addr); + __u64 mask = 1UL << bit; + + *unat = (*unat & ~mask) | (nat << bit); +} + +/* + * Return saved PC of a blocked thread. + * Note that the only way T can block is through a call to schedule() -> switch_to(). + */ +static inline unsigned long +thread_saved_pc (struct task_struct *t) +{ + struct unw_frame_info info; + unsigned long ip; + + unw_init_from_blocked_task(&info, t); + if (unw_unwind(&info) < 0) + return 0; + unw_get_ip(&info, &ip); + return ip; +} + +/* + * Get the current instruction/program counter value. + */ +#define current_text_addr() \ + ({ void *_pc; _pc = (void *)ia64_getreg(_IA64_REG_IP); _pc; }) + +static inline __u64 +ia64_get_ivr (void) +{ + __u64 r; + ia64_srlz_d(); + r = ia64_getreg(_IA64_REG_CR_IVR); + ia64_srlz_d(); + return r; +} + +static inline void +ia64_set_dbr (__u64 regnum, __u64 value) +{ + __ia64_set_dbr(regnum, value); +#ifdef CONFIG_ITANIUM + ia64_srlz_d(); +#endif +} + +static inline __u64 +ia64_get_dbr (__u64 regnum) +{ + __u64 retval; + + retval = __ia64_get_dbr(regnum); +#ifdef CONFIG_ITANIUM + ia64_srlz_d(); +#endif + return retval; +} + +static inline __u64 +ia64_rotr (__u64 w, __u64 n) +{ + return (w >> n) | (w << (64 - n)); +} + +#define ia64_rotl(w,n) ia64_rotr((w), (64) - (n)) + +/* + * Take a mapped kernel address and return the equivalent address + * in the region 7 identity mapped virtual area. + */ +static inline void * +ia64_imva (void *addr) +{ + void *result; + result = (void *) ia64_tpa(addr); + return __va(result); +} + +#define ARCH_HAS_PREFETCH +#define ARCH_HAS_PREFETCHW +#define ARCH_HAS_SPINLOCK_PREFETCH +#define PREFETCH_STRIDE L1_CACHE_BYTES + +static inline void +prefetch (const void *x) +{ + ia64_lfetch(ia64_lfhint_none, x); +} + +static inline void +prefetchw (const void *x) +{ + ia64_lfetch_excl(ia64_lfhint_none, x); +} + +#define spin_lock_prefetch(x) prefetchw(x) + +extern unsigned long boot_option_idle_override; + +enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_FORCE_MWAIT, + IDLE_NOMWAIT, IDLE_POLL}; + +void default_idle(void); + +#define ia64_platform_is(x) (strcmp(x, ia64_platform_name) == 0) + +#endif /* !__ASSEMBLY__ */ + +#endif /* _ASM_IA64_PROCESSOR_H */ diff --git a/kernel/arch/ia64/include/asm/ptrace.h b/kernel/arch/ia64/include/asm/ptrace.h new file mode 100644 index 000000000..845143990 --- /dev/null +++ b/kernel/arch/ia64/include/asm/ptrace.h @@ -0,0 +1,151 @@ +/* + * Copyright (C) 1998-2004 Hewlett-Packard Co + * David Mosberger-Tang + * Stephane Eranian + * Copyright (C) 2003 Intel Co + * Suresh Siddha + * Fenghua Yu + * Arun Sharma + * + * 12/07/98 S. Eranian added pt_regs & switch_stack + * 12/21/98 D. Mosberger updated to match latest code + * 6/17/99 D. Mosberger added second unat member to "struct switch_stack" + * + */ +#ifndef _ASM_IA64_PTRACE_H +#define _ASM_IA64_PTRACE_H + +#ifndef ASM_OFFSETS_C +#include +#endif +#include + +/* + * Base-2 logarithm of number of pages to allocate per task structure + * (including register backing store and memory stack): + */ +#if defined(CONFIG_IA64_PAGE_SIZE_4KB) +# define KERNEL_STACK_SIZE_ORDER 3 +#elif defined(CONFIG_IA64_PAGE_SIZE_8KB) +# define KERNEL_STACK_SIZE_ORDER 2 +#elif defined(CONFIG_IA64_PAGE_SIZE_16KB) +# define KERNEL_STACK_SIZE_ORDER 1 +#else +# define KERNEL_STACK_SIZE_ORDER 0 +#endif + +#define IA64_RBS_OFFSET ((IA64_TASK_SIZE + IA64_THREAD_INFO_SIZE + 31) & ~31) +#define IA64_STK_OFFSET ((1 << KERNEL_STACK_SIZE_ORDER)*PAGE_SIZE) + +#define KERNEL_STACK_SIZE IA64_STK_OFFSET + +#ifndef __ASSEMBLY__ + +#include +#include + +/* + * We use the ia64_psr(regs)->ri to determine which of the three + * instructions in bundle (16 bytes) took the sample. Generate + * the canonical representation by adding to instruction pointer. + */ +# define instruction_pointer(regs) ((regs)->cr_iip + ia64_psr(regs)->ri) + +static inline unsigned long user_stack_pointer(struct pt_regs *regs) +{ + /* FIXME: should this be bspstore + nr_dirty regs? */ + return regs->ar_bspstore; +} + +static inline int is_syscall_success(struct pt_regs *regs) +{ + return regs->r10 != -1; +} + +static inline long regs_return_value(struct pt_regs *regs) +{ + if (is_syscall_success(regs)) + return regs->r8; + else + return -regs->r8; +} + +/* Conserve space in histogram by encoding slot bits in address + * bits 2 and 3 rather than bits 0 and 1. + */ +#define profile_pc(regs) \ +({ \ + unsigned long __ip = instruction_pointer(regs); \ + (__ip & ~3UL) + ((__ip & 3UL) << 2); \ +}) +/* + * Why not default? Because user_stack_pointer() on ia64 gives register + * stack backing store instead... + */ +#define current_user_stack_pointer() (current_pt_regs()->r12) + + /* given a pointer to a task_struct, return the user's pt_regs */ +# define task_pt_regs(t) (((struct pt_regs *) ((char *) (t) + IA64_STK_OFFSET)) - 1) +# define ia64_psr(regs) ((struct ia64_psr *) &(regs)->cr_ipsr) +# define user_mode(regs) (((struct ia64_psr *) &(regs)->cr_ipsr)->cpl != 0) +# define user_stack(task,regs) ((long) regs - (long) task == IA64_STK_OFFSET - sizeof(*regs)) +# define fsys_mode(task,regs) \ + ({ \ + struct task_struct *_task = (task); \ + struct pt_regs *_regs = (regs); \ + !user_mode(_regs) && user_stack(_task, _regs); \ + }) + + /* + * System call handlers that, upon successful completion, need to return a negative value + * should call force_successful_syscall_return() right before returning. On architectures + * where the syscall convention provides for a separate error flag (e.g., alpha, ia64, + * ppc{,64}, sparc{,64}, possibly others), this macro can be used to ensure that the error + * flag will not get set. On architectures which do not support a separate error flag, + * the macro is a no-op and the spurious error condition needs to be filtered out by some + * other means (e.g., in user-level, by passing an extra argument to the syscall handler, + * or something along those lines). + * + * On ia64, we can clear the user's pt_regs->r8 to force a successful syscall. + */ +# define force_successful_syscall_return() (task_pt_regs(current)->r8 = 0) + + struct task_struct; /* forward decl */ + struct unw_frame_info; /* forward decl */ + + extern void ia64_do_show_stack (struct unw_frame_info *, void *); + extern unsigned long ia64_get_user_rbs_end (struct task_struct *, struct pt_regs *, + unsigned long *); + extern long ia64_peek (struct task_struct *, struct switch_stack *, unsigned long, + unsigned long, long *); + extern long ia64_poke (struct task_struct *, struct switch_stack *, unsigned long, + unsigned long, long); + extern void ia64_flush_fph (struct task_struct *); + extern void ia64_sync_fph (struct task_struct *); + extern void ia64_sync_krbs(void); + extern long ia64_sync_user_rbs (struct task_struct *, struct switch_stack *, + unsigned long, unsigned long); + + /* get nat bits for scratch registers such that bit N==1 iff scratch register rN is a NaT */ + extern unsigned long ia64_get_scratch_nat_bits (struct pt_regs *pt, unsigned long scratch_unat); + /* put nat bits for scratch registers such that scratch register rN is a NaT iff bit N==1 */ + extern unsigned long ia64_put_scratch_nat_bits (struct pt_regs *pt, unsigned long nat); + + extern void ia64_increment_ip (struct pt_regs *pt); + extern void ia64_decrement_ip (struct pt_regs *pt); + + extern void ia64_ptrace_stop(void); + #define arch_ptrace_stop(code, info) \ + ia64_ptrace_stop() + #define arch_ptrace_stop_needed(code, info) \ + (!test_thread_flag(TIF_RESTORE_RSE)) + + extern void ptrace_attach_sync_user_rbs (struct task_struct *); + #define arch_ptrace_attach(child) \ + ptrace_attach_sync_user_rbs(child) + + #define arch_has_single_step() (1) + #define arch_has_block_step() (1) + +#endif /* !__ASSEMBLY__ */ +#endif /* _ASM_IA64_PTRACE_H */ diff --git a/kernel/arch/ia64/include/asm/rwsem.h b/kernel/arch/ia64/include/asm/rwsem.h new file mode 100644 index 000000000..3027e7516 --- /dev/null +++ b/kernel/arch/ia64/include/asm/rwsem.h @@ -0,0 +1,145 @@ +/* + * R/W semaphores for ia64 + * + * Copyright (C) 2003 Ken Chen + * Copyright (C) 2003 Asit Mallick + * Copyright (C) 2005 Christoph Lameter + * + * Based on asm-i386/rwsem.h and other architecture implementation. + * + * The MSW of the count is the negated number of active writers and + * waiting lockers, and the LSW is the total number of active locks. + * + * The lock count is initialized to 0 (no active and no waiting lockers). + * + * When a writer subtracts WRITE_BIAS, it'll get 0xffffffff00000001 for + * the case of an uncontended lock. Readers increment by 1 and see a positive + * value when uncontended, negative if there are writers (and maybe) readers + * waiting (in which case it goes to sleep). + */ + +#ifndef _ASM_IA64_RWSEM_H +#define _ASM_IA64_RWSEM_H + +#ifndef _LINUX_RWSEM_H +#error "Please don't include directly, use instead." +#endif + +#include + +#define RWSEM_UNLOCKED_VALUE __IA64_UL_CONST(0x0000000000000000) +#define RWSEM_ACTIVE_BIAS (1L) +#define RWSEM_ACTIVE_MASK (0xffffffffL) +#define RWSEM_WAITING_BIAS (-0x100000000L) +#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS +#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) + +/* + * lock for reading + */ +static inline void +__down_read (struct rw_semaphore *sem) +{ + long result = ia64_fetchadd8_acq((unsigned long *)&sem->count, 1); + + if (result < 0) + rwsem_down_read_failed(sem); +} + +/* + * lock for writing + */ +static inline void +__down_write (struct rw_semaphore *sem) +{ + long old, new; + + do { + old = sem->count; + new = old + RWSEM_ACTIVE_WRITE_BIAS; + } while (cmpxchg_acq(&sem->count, old, new) != old); + + if (old != 0) + rwsem_down_write_failed(sem); +} + +/* + * unlock after reading + */ +static inline void +__up_read (struct rw_semaphore *sem) +{ + long result = ia64_fetchadd8_rel((unsigned long *)&sem->count, -1); + + if (result < 0 && (--result & RWSEM_ACTIVE_MASK) == 0) + rwsem_wake(sem); +} + +/* + * unlock after writing + */ +static inline void +__up_write (struct rw_semaphore *sem) +{ + long old, new; + + do { + old = sem->count; + new = old - RWSEM_ACTIVE_WRITE_BIAS; + } while (cmpxchg_rel(&sem->count, old, new) != old); + + if (new < 0 && (new & RWSEM_ACTIVE_MASK) == 0) + rwsem_wake(sem); +} + +/* + * trylock for reading -- returns 1 if successful, 0 if contention + */ +static inline int +__down_read_trylock (struct rw_semaphore *sem) +{ + long tmp; + while ((tmp = sem->count) >= 0) { + if (tmp == cmpxchg_acq(&sem->count, tmp, tmp+1)) { + return 1; + } + } + return 0; +} + +/* + * trylock for writing -- returns 1 if successful, 0 if contention + */ +static inline int +__down_write_trylock (struct rw_semaphore *sem) +{ + long tmp = cmpxchg_acq(&sem->count, RWSEM_UNLOCKED_VALUE, + RWSEM_ACTIVE_WRITE_BIAS); + return tmp == RWSEM_UNLOCKED_VALUE; +} + +/* + * downgrade write lock to read lock + */ +static inline void +__downgrade_write (struct rw_semaphore *sem) +{ + long old, new; + + do { + old = sem->count; + new = old - RWSEM_WAITING_BIAS; + } while (cmpxchg_rel(&sem->count, old, new) != old); + + if (old < 0) + rwsem_downgrade_wake(sem); +} + +/* + * Implement atomic add functionality. These used to be "inline" functions, but GCC v3.1 + * doesn't quite optimize this stuff right and ends up with bad calls to fetchandadd. + */ +#define rwsem_atomic_add(delta, sem) atomic64_add(delta, (atomic64_t *)(&(sem)->count)) +#define rwsem_atomic_update(delta, sem) atomic64_add_return(delta, (atomic64_t *)(&(sem)->count)) + +#endif /* _ASM_IA64_RWSEM_H */ diff --git a/kernel/arch/ia64/include/asm/sal.h b/kernel/arch/ia64/include/asm/sal.h new file mode 100644 index 000000000..e504f3821 --- /dev/null +++ b/kernel/arch/ia64/include/asm/sal.h @@ -0,0 +1,917 @@ +#ifndef _ASM_IA64_SAL_H +#define _ASM_IA64_SAL_H + +/* + * System Abstraction Layer definitions. + * + * This is based on version 2.5 of the manual "IA-64 System + * Abstraction Layer". + * + * Copyright (C) 2001 Intel + * Copyright (C) 2002 Jenna Hall + * Copyright (C) 2001 Fred Lewis + * Copyright (C) 1998, 1999, 2001, 2003 Hewlett-Packard Co + * David Mosberger-Tang + * Copyright (C) 1999 Srinivasa Prasad Thirumalachar + * + * 02/01/04 J. Hall Updated Error Record Structures to conform to July 2001 + * revision of the SAL spec. + * 01/01/03 fvlewis Updated Error Record Structures to conform with Nov. 2000 + * revision of the SAL spec. + * 99/09/29 davidm Updated for SAL 2.6. + * 00/03/29 cfleck Updated SAL Error Logging info for processor (SAL 2.6) + * (plus examples of platform error info structures from smariset @ Intel) + */ + +#define IA64_SAL_PLATFORM_FEATURE_BUS_LOCK_BIT 0 +#define IA64_SAL_PLATFORM_FEATURE_IRQ_REDIR_HINT_BIT 1 +#define IA64_SAL_PLATFORM_FEATURE_IPI_REDIR_HINT_BIT 2 +#define IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT_BIT 3 + +#define IA64_SAL_PLATFORM_FEATURE_BUS_LOCK (1< +#include +#include + +#include +#include + +extern spinlock_t sal_lock; + +/* SAL spec _requires_ eight args for each call. */ +#define __IA64_FW_CALL(entry,result,a0,a1,a2,a3,a4,a5,a6,a7) \ + result = (*entry)(a0,a1,a2,a3,a4,a5,a6,a7) + +# define IA64_FW_CALL(entry,result,args...) do { \ + unsigned long __ia64_sc_flags; \ + struct ia64_fpreg __ia64_sc_fr[6]; \ + ia64_save_scratch_fpregs(__ia64_sc_fr); \ + spin_lock_irqsave(&sal_lock, __ia64_sc_flags); \ + __IA64_FW_CALL(entry, result, args); \ + spin_unlock_irqrestore(&sal_lock, __ia64_sc_flags); \ + ia64_load_scratch_fpregs(__ia64_sc_fr); \ +} while (0) + +# define SAL_CALL(result,args...) \ + IA64_FW_CALL(ia64_sal, result, args); + +# define SAL_CALL_NOLOCK(result,args...) do { \ + unsigned long __ia64_scn_flags; \ + struct ia64_fpreg __ia64_scn_fr[6]; \ + ia64_save_scratch_fpregs(__ia64_scn_fr); \ + local_irq_save(__ia64_scn_flags); \ + __IA64_FW_CALL(ia64_sal, result, args); \ + local_irq_restore(__ia64_scn_flags); \ + ia64_load_scratch_fpregs(__ia64_scn_fr); \ +} while (0) + +# define SAL_CALL_REENTRANT(result,args...) do { \ + struct ia64_fpreg __ia64_scs_fr[6]; \ + ia64_save_scratch_fpregs(__ia64_scs_fr); \ + preempt_disable(); \ + __IA64_FW_CALL(ia64_sal, result, args); \ + preempt_enable(); \ + ia64_load_scratch_fpregs(__ia64_scs_fr); \ +} while (0) + +#define SAL_SET_VECTORS 0x01000000 +#define SAL_GET_STATE_INFO 0x01000001 +#define SAL_GET_STATE_INFO_SIZE 0x01000002 +#define SAL_CLEAR_STATE_INFO 0x01000003 +#define SAL_MC_RENDEZ 0x01000004 +#define SAL_MC_SET_PARAMS 0x01000005 +#define SAL_REGISTER_PHYSICAL_ADDR 0x01000006 + +#define SAL_CACHE_FLUSH 0x01000008 +#define SAL_CACHE_INIT 0x01000009 +#define SAL_PCI_CONFIG_READ 0x01000010 +#define SAL_PCI_CONFIG_WRITE 0x01000011 +#define SAL_FREQ_BASE 0x01000012 +#define SAL_PHYSICAL_ID_INFO 0x01000013 + +#define SAL_UPDATE_PAL 0x01000020 + +struct ia64_sal_retval { + /* + * A zero status value indicates call completed without error. + * A negative status value indicates reason of call failure. + * A positive status value indicates success but an + * informational value should be printed (e.g., "reboot for + * change to take effect"). + */ + long status; + unsigned long v0; + unsigned long v1; + unsigned long v2; +}; + +typedef struct ia64_sal_retval (*ia64_sal_handler) (u64, ...); + +enum { + SAL_FREQ_BASE_PLATFORM = 0, + SAL_FREQ_BASE_INTERVAL_TIMER = 1, + SAL_FREQ_BASE_REALTIME_CLOCK = 2 +}; + +/* + * The SAL system table is followed by a variable number of variable + * length descriptors. The structure of these descriptors follows + * below. + * The defininition follows SAL specs from July 2000 + */ +struct ia64_sal_systab { + u8 signature[4]; /* should be "SST_" */ + u32 size; /* size of this table in bytes */ + u8 sal_rev_minor; + u8 sal_rev_major; + u16 entry_count; /* # of entries in variable portion */ + u8 checksum; + u8 reserved1[7]; + u8 sal_a_rev_minor; + u8 sal_a_rev_major; + u8 sal_b_rev_minor; + u8 sal_b_rev_major; + /* oem_id & product_id: terminating NUL is missing if string is exactly 32 bytes long. */ + u8 oem_id[32]; + u8 product_id[32]; /* ASCII product id */ + u8 reserved2[8]; +}; + +enum sal_systab_entry_type { + SAL_DESC_ENTRY_POINT = 0, + SAL_DESC_MEMORY = 1, + SAL_DESC_PLATFORM_FEATURE = 2, + SAL_DESC_TR = 3, + SAL_DESC_PTC = 4, + SAL_DESC_AP_WAKEUP = 5 +}; + +/* + * Entry type: Size: + * 0 48 + * 1 32 + * 2 16 + * 3 32 + * 4 16 + * 5 16 + */ +#define SAL_DESC_SIZE(type) "\060\040\020\040\020\020"[(unsigned) type] + +typedef struct ia64_sal_desc_entry_point { + u8 type; + u8 reserved1[7]; + u64 pal_proc; + u64 sal_proc; + u64 gp; + u8 reserved2[16]; +}ia64_sal_desc_entry_point_t; + +typedef struct ia64_sal_desc_memory { + u8 type; + u8 used_by_sal; /* needs to be mapped for SAL? */ + u8 mem_attr; /* current memory attribute setting */ + u8 access_rights; /* access rights set up by SAL */ + u8 mem_attr_mask; /* mask of supported memory attributes */ + u8 reserved1; + u8 mem_type; /* memory type */ + u8 mem_usage; /* memory usage */ + u64 addr; /* physical address of memory */ + u32 length; /* length (multiple of 4KB pages) */ + u32 reserved2; + u8 oem_reserved[8]; +} ia64_sal_desc_memory_t; + +typedef struct ia64_sal_desc_platform_feature { + u8 type; + u8 feature_mask; + u8 reserved1[14]; +} ia64_sal_desc_platform_feature_t; + +typedef struct ia64_sal_desc_tr { + u8 type; + u8 tr_type; /* 0 == instruction, 1 == data */ + u8 regnum; /* translation register number */ + u8 reserved1[5]; + u64 addr; /* virtual address of area covered */ + u64 page_size; /* encoded page size */ + u8 reserved2[8]; +} ia64_sal_desc_tr_t; + +typedef struct ia64_sal_desc_ptc { + u8 type; + u8 reserved1[3]; + u32 num_domains; /* # of coherence domains */ + u64 domain_info; /* physical address of domain info table */ +} ia64_sal_desc_ptc_t; + +typedef struct ia64_sal_ptc_domain_info { + u64 proc_count; /* number of processors in domain */ + u64 proc_list; /* physical address of LID array */ +} ia64_sal_ptc_domain_info_t; + +typedef struct ia64_sal_ptc_domain_proc_entry { + u64 id : 8; /* id of processor */ + u64 eid : 8; /* eid of processor */ +} ia64_sal_ptc_domain_proc_entry_t; + + +#define IA64_SAL_AP_EXTERNAL_INT 0 + +typedef struct ia64_sal_desc_ap_wakeup { + u8 type; + u8 mechanism; /* 0 == external interrupt */ + u8 reserved1[6]; + u64 vector; /* interrupt vector in range 0x10-0xff */ +} ia64_sal_desc_ap_wakeup_t ; + +extern ia64_sal_handler ia64_sal; +extern struct ia64_sal_desc_ptc *ia64_ptc_domain_info; + +extern unsigned short sal_revision; /* supported SAL spec revision */ +extern unsigned short sal_version; /* SAL version; OEM dependent */ +#define SAL_VERSION_CODE(major, minor) ((bin2bcd(major) << 8) | bin2bcd(minor)) + +extern const char *ia64_sal_strerror (long status); +extern void ia64_sal_init (struct ia64_sal_systab *sal_systab); + +/* SAL information type encodings */ +enum { + SAL_INFO_TYPE_MCA = 0, /* Machine check abort information */ + SAL_INFO_TYPE_INIT = 1, /* Init information */ + SAL_INFO_TYPE_CMC = 2, /* Corrected machine check information */ + SAL_INFO_TYPE_CPE = 3 /* Corrected platform error information */ +}; + +/* Encodings for machine check parameter types */ +enum { + SAL_MC_PARAM_RENDEZ_INT = 1, /* Rendezvous interrupt */ + SAL_MC_PARAM_RENDEZ_WAKEUP = 2, /* Wakeup */ + SAL_MC_PARAM_CPE_INT = 3 /* Corrected Platform Error Int */ +}; + +/* Encodings for rendezvous mechanisms */ +enum { + SAL_MC_PARAM_MECHANISM_INT = 1, /* Use interrupt */ + SAL_MC_PARAM_MECHANISM_MEM = 2 /* Use memory synchronization variable*/ +}; + +/* Encodings for vectors which can be registered by the OS with SAL */ +enum { + SAL_VECTOR_OS_MCA = 0, + SAL_VECTOR_OS_INIT = 1, + SAL_VECTOR_OS_BOOT_RENDEZ = 2 +}; + +/* Encodings for mca_opt parameter sent to SAL_MC_SET_PARAMS */ +#define SAL_MC_PARAM_RZ_ALWAYS 0x1 +#define SAL_MC_PARAM_BINIT_ESCALATE 0x10 + +/* + * Definition of the SAL Error Log from the SAL spec + */ + +/* SAL Error Record Section GUID Definitions */ +#define SAL_PROC_DEV_ERR_SECT_GUID \ + EFI_GUID(0xe429faf1, 0x3cb7, 0x11d4, 0xbc, 0xa7, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81) +#define SAL_PLAT_MEM_DEV_ERR_SECT_GUID \ + EFI_GUID(0xe429faf2, 0x3cb7, 0x11d4, 0xbc, 0xa7, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81) +#define SAL_PLAT_SEL_DEV_ERR_SECT_GUID \ + EFI_GUID(0xe429faf3, 0x3cb7, 0x11d4, 0xbc, 0xa7, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81) +#define SAL_PLAT_PCI_BUS_ERR_SECT_GUID \ + EFI_GUID(0xe429faf4, 0x3cb7, 0x11d4, 0xbc, 0xa7, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81) +#define SAL_PLAT_SMBIOS_DEV_ERR_SECT_GUID \ + EFI_GUID(0xe429faf5, 0x3cb7, 0x11d4, 0xbc, 0xa7, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81) +#define SAL_PLAT_PCI_COMP_ERR_SECT_GUID \ + EFI_GUID(0xe429faf6, 0x3cb7, 0x11d4, 0xbc, 0xa7, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81) +#define SAL_PLAT_SPECIFIC_ERR_SECT_GUID \ + EFI_GUID(0xe429faf7, 0x3cb7, 0x11d4, 0xbc, 0xa7, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81) +#define SAL_PLAT_HOST_CTLR_ERR_SECT_GUID \ + EFI_GUID(0xe429faf8, 0x3cb7, 0x11d4, 0xbc, 0xa7, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81) +#define SAL_PLAT_BUS_ERR_SECT_GUID \ + EFI_GUID(0xe429faf9, 0x3cb7, 0x11d4, 0xbc, 0xa7, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81) +#define PROCESSOR_ABSTRACTION_LAYER_OVERWRITE_GUID \ + EFI_GUID(0x6cb0a200, 0x893a, 0x11da, 0x96, 0xd2, 0x0, 0x10, 0x83, 0xff, \ + 0xca, 0x4d) + +#define MAX_CACHE_ERRORS 6 +#define MAX_TLB_ERRORS 6 +#define MAX_BUS_ERRORS 1 + +/* Definition of version according to SAL spec for logging purposes */ +typedef struct sal_log_revision { + u8 minor; /* BCD (0..99) */ + u8 major; /* BCD (0..99) */ +} sal_log_revision_t; + +/* Definition of timestamp according to SAL spec for logging purposes */ +typedef struct sal_log_timestamp { + u8 slh_second; /* Second (0..59) */ + u8 slh_minute; /* Minute (0..59) */ + u8 slh_hour; /* Hour (0..23) */ + u8 slh_reserved; + u8 slh_day; /* Day (1..31) */ + u8 slh_month; /* Month (1..12) */ + u8 slh_year; /* Year (00..99) */ + u8 slh_century; /* Century (19, 20, 21, ...) */ +} sal_log_timestamp_t; + +/* Definition of log record header structures */ +typedef struct sal_log_record_header { + u64 id; /* Unique monotonically increasing ID */ + sal_log_revision_t revision; /* Major and Minor revision of header */ + u8 severity; /* Error Severity */ + u8 validation_bits; /* 0: platform_guid, 1: !timestamp */ + u32 len; /* Length of this error log in bytes */ + sal_log_timestamp_t timestamp; /* Timestamp */ + efi_guid_t platform_guid; /* Unique OEM Platform ID */ +} sal_log_record_header_t; + +#define sal_log_severity_recoverable 0 +#define sal_log_severity_fatal 1 +#define sal_log_severity_corrected 2 + +/* + * Error Recovery Info (ERI) bit decode. From SAL Spec section B.2.2 Table B-3 + * Error Section Error_Recovery_Info Field Definition. + */ +#define ERI_NOT_VALID 0x0 /* Error Recovery Field is not valid */ +#define ERI_NOT_ACCESSIBLE 0x30 /* Resource not accessible */ +#define ERI_CONTAINMENT_WARN 0x22 /* Corrupt data propagated */ +#define ERI_UNCORRECTED_ERROR 0x20 /* Uncorrected error */ +#define ERI_COMPONENT_RESET 0x24 /* Component must be reset */ +#define ERI_CORR_ERROR_LOG 0x21 /* Corrected error, needs logging */ +#define ERI_CORR_ERROR_THRESH 0x29 /* Corrected error threshold exceeded */ + +/* Definition of log section header structures */ +typedef struct sal_log_sec_header { + efi_guid_t guid; /* Unique Section ID */ + sal_log_revision_t revision; /* Major and Minor revision of Section */ + u8 error_recovery_info; /* Platform error recovery status */ + u8 reserved; + u32 len; /* Section length */ +} sal_log_section_hdr_t; + +typedef struct sal_log_mod_error_info { + struct { + u64 check_info : 1, + requestor_identifier : 1, + responder_identifier : 1, + target_identifier : 1, + precise_ip : 1, + reserved : 59; + } valid; + u64 check_info; + u64 requestor_identifier; + u64 responder_identifier; + u64 target_identifier; + u64 precise_ip; +} sal_log_mod_error_info_t; + +typedef struct sal_processor_static_info { + struct { + u64 minstate : 1, + br : 1, + cr : 1, + ar : 1, + rr : 1, + fr : 1, + reserved : 58; + } valid; + pal_min_state_area_t min_state_area; + u64 br[8]; + u64 cr[128]; + u64 ar[128]; + u64 rr[8]; + struct ia64_fpreg __attribute__ ((packed)) fr[128]; +} sal_processor_static_info_t; + +struct sal_cpuid_info { + u64 regs[5]; + u64 reserved; +}; + +typedef struct sal_log_processor_info { + sal_log_section_hdr_t header; + struct { + u64 proc_error_map : 1, + proc_state_param : 1, + proc_cr_lid : 1, + psi_static_struct : 1, + num_cache_check : 4, + num_tlb_check : 4, + num_bus_check : 4, + num_reg_file_check : 4, + num_ms_check : 4, + cpuid_info : 1, + reserved1 : 39; + } valid; + u64 proc_error_map; + u64 proc_state_parameter; + u64 proc_cr_lid; + /* + * The rest of this structure consists of variable-length arrays, which can't be + * expressed in C. + */ + sal_log_mod_error_info_t info[0]; + /* + * This is what the rest looked like if C supported variable-length arrays: + * + * sal_log_mod_error_info_t cache_check_info[.valid.num_cache_check]; + * sal_log_mod_error_info_t tlb_check_info[.valid.num_tlb_check]; + * sal_log_mod_error_info_t bus_check_info[.valid.num_bus_check]; + * sal_log_mod_error_info_t reg_file_check_info[.valid.num_reg_file_check]; + * sal_log_mod_error_info_t ms_check_info[.valid.num_ms_check]; + * struct sal_cpuid_info cpuid_info; + * sal_processor_static_info_t processor_static_info; + */ +} sal_log_processor_info_t; + +/* Given a sal_log_processor_info_t pointer, return a pointer to the processor_static_info: */ +#define SAL_LPI_PSI_INFO(l) \ +({ sal_log_processor_info_t *_l = (l); \ + ((sal_processor_static_info_t *) \ + ((char *) _l->info + ((_l->valid.num_cache_check + _l->valid.num_tlb_check \ + + _l->valid.num_bus_check + _l->valid.num_reg_file_check \ + + _l->valid.num_ms_check) * sizeof(sal_log_mod_error_info_t) \ + + sizeof(struct sal_cpuid_info)))); \ +}) + +/* platform error log structures */ + +typedef struct sal_log_mem_dev_err_info { + sal_log_section_hdr_t header; + struct { + u64 error_status : 1, + physical_addr : 1, + addr_mask : 1, + node : 1, + card : 1, + module : 1, + bank : 1, + device : 1, + row : 1, + column : 1, + bit_position : 1, + requestor_id : 1, + responder_id : 1, + target_id : 1, + bus_spec_data : 1, + oem_id : 1, + oem_data : 1, + reserved : 47; + } valid; + u64 error_status; + u64 physical_addr; + u64 addr_mask; + u16 node; + u16 card; + u16 module; + u16 bank; + u16 device; + u16 row; + u16 column; + u16 bit_position; + u64 requestor_id; + u64 responder_id; + u64 target_id; + u64 bus_spec_data; + u8 oem_id[16]; + u8 oem_data[1]; /* Variable length data */ +} sal_log_mem_dev_err_info_t; + +typedef struct sal_log_sel_dev_err_info { + sal_log_section_hdr_t header; + struct { + u64 record_id : 1, + record_type : 1, + generator_id : 1, + evm_rev : 1, + sensor_type : 1, + sensor_num : 1, + event_dir : 1, + event_data1 : 1, + event_data2 : 1, + event_data3 : 1, + reserved : 54; + } valid; + u16 record_id; + u8 record_type; + u8 timestamp[4]; + u16 generator_id; + u8 evm_rev; + u8 sensor_type; + u8 sensor_num; + u8 event_dir; + u8 event_data1; + u8 event_data2; + u8 event_data3; +} sal_log_sel_dev_err_info_t; + +typedef struct sal_log_pci_bus_err_info { + sal_log_section_hdr_t header; + struct { + u64 err_status : 1, + err_type : 1, + bus_id : 1, + bus_address : 1, + bus_data : 1, + bus_cmd : 1, + requestor_id : 1, + responder_id : 1, + target_id : 1, + oem_data : 1, + reserved : 54; + } valid; + u64 err_status; + u16 err_type; + u16 bus_id; + u32 reserved; + u64 bus_address; + u64 bus_data; + u64 bus_cmd; + u64 requestor_id; + u64 responder_id; + u64 target_id; + u8 oem_data[1]; /* Variable length data */ +} sal_log_pci_bus_err_info_t; + +typedef struct sal_log_smbios_dev_err_info { + sal_log_section_hdr_t header; + struct { + u64 event_type : 1, + length : 1, + time_stamp : 1, + data : 1, + reserved1 : 60; + } valid; + u8 event_type; + u8 length; + u8 time_stamp[6]; + u8 data[1]; /* data of variable length, length == slsmb_length */ +} sal_log_smbios_dev_err_info_t; + +typedef struct sal_log_pci_comp_err_info { + sal_log_section_hdr_t header; + struct { + u64 err_status : 1, + comp_info : 1, + num_mem_regs : 1, + num_io_regs : 1, + reg_data_pairs : 1, + oem_data : 1, + reserved : 58; + } valid; + u64 err_status; + struct { + u16 vendor_id; + u16 device_id; + u8 class_code[3]; + u8 func_num; + u8 dev_num; + u8 bus_num; + u8 seg_num; + u8 reserved[5]; + } comp_info; + u32 num_mem_regs; + u32 num_io_regs; + u64 reg_data_pairs[1]; + /* + * array of address/data register pairs is num_mem_regs + num_io_regs elements + * long. Each array element consists of a u64 address followed by a u64 data + * value. The oem_data array immediately follows the reg_data_pairs array + */ + u8 oem_data[1]; /* Variable length data */ +} sal_log_pci_comp_err_info_t; + +typedef struct sal_log_plat_specific_err_info { + sal_log_section_hdr_t header; + struct { + u64 err_status : 1, + guid : 1, + oem_data : 1, + reserved : 61; + } valid; + u64 err_status; + efi_guid_t guid; + u8 oem_data[1]; /* platform specific variable length data */ +} sal_log_plat_specific_err_info_t; + +typedef struct sal_log_host_ctlr_err_info { + sal_log_section_hdr_t header; + struct { + u64 err_status : 1, + requestor_id : 1, + responder_id : 1, + target_id : 1, + bus_spec_data : 1, + oem_data : 1, + reserved : 58; + } valid; + u64 err_status; + u64 requestor_id; + u64 responder_id; + u64 target_id; + u64 bus_spec_data; + u8 oem_data[1]; /* Variable length OEM data */ +} sal_log_host_ctlr_err_info_t; + +typedef struct sal_log_plat_bus_err_info { + sal_log_section_hdr_t header; + struct { + u64 err_status : 1, + requestor_id : 1, + responder_id : 1, + target_id : 1, + bus_spec_data : 1, + oem_data : 1, + reserved : 58; + } valid; + u64 err_status; + u64 requestor_id; + u64 responder_id; + u64 target_id; + u64 bus_spec_data; + u8 oem_data[1]; /* Variable length OEM data */ +} sal_log_plat_bus_err_info_t; + +/* Overall platform error section structure */ +typedef union sal_log_platform_err_info { + sal_log_mem_dev_err_info_t mem_dev_err; + sal_log_sel_dev_err_info_t sel_dev_err; + sal_log_pci_bus_err_info_t pci_bus_err; + sal_log_smbios_dev_err_info_t smbios_dev_err; + sal_log_pci_comp_err_info_t pci_comp_err; + sal_log_plat_specific_err_info_t plat_specific_err; + sal_log_host_ctlr_err_info_t host_ctlr_err; + sal_log_plat_bus_err_info_t plat_bus_err; +} sal_log_platform_err_info_t; + +/* SAL log over-all, multi-section error record structure (processor+platform) */ +typedef struct err_rec { + sal_log_record_header_t sal_elog_header; + sal_log_processor_info_t proc_err; + sal_log_platform_err_info_t plat_err; + u8 oem_data_pad[1024]; +} ia64_err_rec_t; + +/* + * Now define a couple of inline functions for improved type checking + * and convenience. + */ + +extern s64 ia64_sal_cache_flush (u64 cache_type); +extern void __init check_sal_cache_flush (void); + +/* Initialize all the processor and platform level instruction and data caches */ +static inline s64 +ia64_sal_cache_init (void) +{ + struct ia64_sal_retval isrv; + SAL_CALL(isrv, SAL_CACHE_INIT, 0, 0, 0, 0, 0, 0, 0); + return isrv.status; +} + +/* + * Clear the processor and platform information logged by SAL with respect to the machine + * state at the time of MCA's, INITs, CMCs, or CPEs. + */ +static inline s64 +ia64_sal_clear_state_info (u64 sal_info_type) +{ + struct ia64_sal_retval isrv; + SAL_CALL_REENTRANT(isrv, SAL_CLEAR_STATE_INFO, sal_info_type, 0, + 0, 0, 0, 0, 0); + return isrv.status; +} + + +/* Get the processor and platform information logged by SAL with respect to the machine + * state at the time of the MCAs, INITs, CMCs, or CPEs. + */ +static inline u64 +ia64_sal_get_state_info (u64 sal_info_type, u64 *sal_info) +{ + struct ia64_sal_retval isrv; + SAL_CALL_REENTRANT(isrv, SAL_GET_STATE_INFO, sal_info_type, 0, + sal_info, 0, 0, 0, 0); + if (isrv.status) + return 0; + + return isrv.v0; +} + +/* + * Get the maximum size of the information logged by SAL with respect to the machine state + * at the time of MCAs, INITs, CMCs, or CPEs. + */ +static inline u64 +ia64_sal_get_state_info_size (u64 sal_info_type) +{ + struct ia64_sal_retval isrv; + SAL_CALL_REENTRANT(isrv, SAL_GET_STATE_INFO_SIZE, sal_info_type, 0, + 0, 0, 0, 0, 0); + if (isrv.status) + return 0; + return isrv.v0; +} + +/* + * Causes the processor to go into a spin loop within SAL where SAL awaits a wakeup from + * the monarch processor. Must not lock, because it will not return on any cpu until the + * monarch processor sends a wake up. + */ +static inline s64 +ia64_sal_mc_rendez (void) +{ + struct ia64_sal_retval isrv; + SAL_CALL_NOLOCK(isrv, SAL_MC_RENDEZ, 0, 0, 0, 0, 0, 0, 0); + return isrv.status; +} + +/* + * Allow the OS to specify the interrupt number to be used by SAL to interrupt OS during + * the machine check rendezvous sequence as well as the mechanism to wake up the + * non-monarch processor at the end of machine check processing. + * Returns the complete ia64_sal_retval because some calls return more than just a status + * value. + */ +static inline struct ia64_sal_retval +ia64_sal_mc_set_params (u64 param_type, u64 i_or_m, u64 i_or_m_val, u64 timeout, u64 rz_always) +{ + struct ia64_sal_retval isrv; + SAL_CALL(isrv, SAL_MC_SET_PARAMS, param_type, i_or_m, i_or_m_val, + timeout, rz_always, 0, 0); + return isrv; +} + +/* Read from PCI configuration space */ +static inline s64 +ia64_sal_pci_config_read (u64 pci_config_addr, int type, u64 size, u64 *value) +{ + struct ia64_sal_retval isrv; + SAL_CALL(isrv, SAL_PCI_CONFIG_READ, pci_config_addr, size, type, 0, 0, 0, 0); + if (value) + *value = isrv.v0; + return isrv.status; +} + +/* Write to PCI configuration space */ +static inline s64 +ia64_sal_pci_config_write (u64 pci_config_addr, int type, u64 size, u64 value) +{ + struct ia64_sal_retval isrv; + SAL_CALL(isrv, SAL_PCI_CONFIG_WRITE, pci_config_addr, size, value, + type, 0, 0, 0); + return isrv.status; +} + +/* + * Register physical addresses of locations needed by SAL when SAL procedures are invoked + * in virtual mode. + */ +static inline s64 +ia64_sal_register_physical_addr (u64 phys_entry, u64 phys_addr) +{ + struct ia64_sal_retval isrv; + SAL_CALL(isrv, SAL_REGISTER_PHYSICAL_ADDR, phys_entry, phys_addr, + 0, 0, 0, 0, 0); + return isrv.status; +} + +/* + * Register software dependent code locations within SAL. These locations are handlers or + * entry points where SAL will pass control for the specified event. These event handlers + * are for the bott rendezvous, MCAs and INIT scenarios. + */ +static inline s64 +ia64_sal_set_vectors (u64 vector_type, + u64 handler_addr1, u64 gp1, u64 handler_len1, + u64 handler_addr2, u64 gp2, u64 handler_len2) +{ + struct ia64_sal_retval isrv; + SAL_CALL(isrv, SAL_SET_VECTORS, vector_type, + handler_addr1, gp1, handler_len1, + handler_addr2, gp2, handler_len2); + + return isrv.status; +} + +/* Update the contents of PAL block in the non-volatile storage device */ +static inline s64 +ia64_sal_update_pal (u64 param_buf, u64 scratch_buf, u64 scratch_buf_size, + u64 *error_code, u64 *scratch_buf_size_needed) +{ + struct ia64_sal_retval isrv; + SAL_CALL(isrv, SAL_UPDATE_PAL, param_buf, scratch_buf, scratch_buf_size, + 0, 0, 0, 0); + if (error_code) + *error_code = isrv.v0; + if (scratch_buf_size_needed) + *scratch_buf_size_needed = isrv.v1; + return isrv.status; +} + +/* Get physical processor die mapping in the platform. */ +static inline s64 +ia64_sal_physical_id_info(u16 *splid) +{ + struct ia64_sal_retval isrv; + + if (sal_revision < SAL_VERSION_CODE(3,2)) + return -1; + + SAL_CALL(isrv, SAL_PHYSICAL_ID_INFO, 0, 0, 0, 0, 0, 0, 0); + if (splid) + *splid = isrv.v0; + return isrv.status; +} + +extern unsigned long sal_platform_features; + +extern int (*salinfo_platform_oemdata)(const u8 *, u8 **, u64 *); + +struct sal_ret_values { + long r8; long r9; long r10; long r11; +}; + +#define IA64_SAL_OEMFUNC_MIN 0x02000000 +#define IA64_SAL_OEMFUNC_MAX 0x03ffffff + +extern int ia64_sal_oemcall(struct ia64_sal_retval *, u64, u64, u64, u64, u64, + u64, u64, u64); +extern int ia64_sal_oemcall_nolock(struct ia64_sal_retval *, u64, u64, u64, + u64, u64, u64, u64, u64); +extern int ia64_sal_oemcall_reentrant(struct ia64_sal_retval *, u64, u64, u64, + u64, u64, u64, u64, u64); +extern long +ia64_sal_freq_base (unsigned long which, unsigned long *ticks_per_second, + unsigned long *drift_info); +#ifdef CONFIG_HOTPLUG_CPU +/* + * System Abstraction Layer Specification + * Section 3.2.5.1: OS_BOOT_RENDEZ to SAL return State. + * Note: region regs are stored first in head.S _start. Hence they must + * stay up front. + */ +struct sal_to_os_boot { + u64 rr[8]; /* Region Registers */ + u64 br[6]; /* br0: + * return addr into SAL boot rendez routine */ + u64 gr1; /* SAL:GP */ + u64 gr12; /* SAL:SP */ + u64 gr13; /* SAL: Task Pointer */ + u64 fpsr; + u64 pfs; + u64 rnat; + u64 unat; + u64 bspstore; + u64 dcr; /* Default Control Register */ + u64 iva; + u64 pta; + u64 itv; + u64 pmv; + u64 cmcv; + u64 lrr[2]; + u64 gr[4]; + u64 pr; /* Predicate registers */ + u64 lc; /* Loop Count */ + struct ia64_fpreg fp[20]; +}; + +/* + * Global array allocated for NR_CPUS at boot time + */ +extern struct sal_to_os_boot sal_boot_rendez_state[NR_CPUS]; + +extern void ia64_jump_to_sal(struct sal_to_os_boot *); +#endif + +extern void ia64_sal_handler_init(void *entry_point, void *gpval); + +#define PALO_MAX_TLB_PURGES 0xFFFF +#define PALO_SIG "PALO" + +struct palo_table { + u8 signature[4]; /* Should be "PALO" */ + u32 length; + u8 minor_revision; + u8 major_revision; + u8 checksum; + u8 reserved1[5]; + u16 max_tlb_purges; + u8 reserved2[6]; +}; + +#define NPTCG_FROM_PAL 0 +#define NPTCG_FROM_PALO 1 +#define NPTCG_FROM_KERNEL_PARAMETER 2 + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_IA64_SAL_H */ diff --git a/kernel/arch/ia64/include/asm/sections.h b/kernel/arch/ia64/include/asm/sections.h new file mode 100644 index 000000000..2ab200369 --- /dev/null +++ b/kernel/arch/ia64/include/asm/sections.h @@ -0,0 +1,42 @@ +#ifndef _ASM_IA64_SECTIONS_H +#define _ASM_IA64_SECTIONS_H + +/* + * Copyright (C) 1998-2003 Hewlett-Packard Co + * David Mosberger-Tang + */ + +#include +#include +#include + +extern char __phys_per_cpu_start[]; +#ifdef CONFIG_SMP +extern char __cpu0_per_cpu[]; +#endif +extern char __start___vtop_patchlist[], __end___vtop_patchlist[]; +extern char __start___rse_patchlist[], __end___rse_patchlist[]; +extern char __start___mckinley_e9_bundles[], __end___mckinley_e9_bundles[]; +extern char __start___phys_stack_reg_patchlist[], __end___phys_stack_reg_patchlist[]; +extern char __start_gate_section[]; +extern char __start_gate_mckinley_e9_patchlist[], __end_gate_mckinley_e9_patchlist[]; +extern char __start_gate_vtop_patchlist[], __end_gate_vtop_patchlist[]; +extern char __start_gate_fsyscall_patchlist[], __end_gate_fsyscall_patchlist[]; +extern char __start_gate_brl_fsys_bubble_down_patchlist[], __end_gate_brl_fsys_bubble_down_patchlist[]; +extern char __start_unwind[], __end_unwind[]; +extern char __start_ivt_text[], __end_ivt_text[]; + +#undef dereference_function_descriptor +static inline void *dereference_function_descriptor(void *ptr) +{ + struct fdesc *desc = ptr; + void *p; + + if (!probe_kernel_address(&desc->ip, p)) + ptr = p; + return ptr; +} + + +#endif /* _ASM_IA64_SECTIONS_H */ + diff --git a/kernel/arch/ia64/include/asm/segment.h b/kernel/arch/ia64/include/asm/segment.h new file mode 100644 index 000000000..b89e2b3d6 --- /dev/null +++ b/kernel/arch/ia64/include/asm/segment.h @@ -0,0 +1,6 @@ +#ifndef _ASM_IA64_SEGMENT_H +#define _ASM_IA64_SEGMENT_H + +/* Only here because we have some old header files that expect it.. */ + +#endif /* _ASM_IA64_SEGMENT_H */ diff --git a/kernel/arch/ia64/include/asm/serial.h b/kernel/arch/ia64/include/asm/serial.h new file mode 100644 index 000000000..068be1158 --- /dev/null +++ b/kernel/arch/ia64/include/asm/serial.h @@ -0,0 +1,17 @@ +/* + * Derived from the i386 version. + */ + +/* + * This assumes you have a 1.8432 MHz clock for your UART. + * + * It'd be nice if someone built a serial card with a 24.576 MHz + * clock, since the 16550A is capable of handling a top speed of 1.5 + * megabits/second; but this requires the faster clock. + */ +#define BASE_BAUD ( 1843200 / 16 ) + +/* + * All legacy serial ports should be enumerated via ACPI namespace, so + * we need not list them here. + */ diff --git a/kernel/arch/ia64/include/asm/shmparam.h b/kernel/arch/ia64/include/asm/shmparam.h new file mode 100644 index 000000000..d07508dc5 --- /dev/null +++ b/kernel/arch/ia64/include/asm/shmparam.h @@ -0,0 +1,12 @@ +#ifndef _ASM_IA64_SHMPARAM_H +#define _ASM_IA64_SHMPARAM_H + +/* + * SHMLBA controls minimum alignment at which shared memory segments + * get attached. The IA-64 architecture says that there may be a + * performance degradation when there are virtual aliases within 1MB. + * To reduce the chance of this, we set SHMLBA to 1MB. --davidm 00/12/20 + */ +#define SHMLBA (1024*1024) + +#endif /* _ASM_IA64_SHMPARAM_H */ diff --git a/kernel/arch/ia64/include/asm/siginfo.h b/kernel/arch/ia64/include/asm/siginfo.h new file mode 100644 index 000000000..6f2e2dd0f --- /dev/null +++ b/kernel/arch/ia64/include/asm/siginfo.h @@ -0,0 +1,23 @@ +/* + * Based on . + * + * Modified 1998-2002 + * David Mosberger-Tang , Hewlett-Packard Co + */ +#ifndef _ASM_IA64_SIGINFO_H +#define _ASM_IA64_SIGINFO_H + +#include +#include + +static inline void +copy_siginfo (siginfo_t *to, siginfo_t *from) +{ + if (from->si_code < 0) + memcpy(to, from, sizeof(siginfo_t)); + else + /* _sigchld is currently the largest know union member */ + memcpy(to, from, 4*sizeof(int) + sizeof(from->_sifields._sigchld)); +} + +#endif /* _ASM_IA64_SIGINFO_H */ diff --git a/kernel/arch/ia64/include/asm/signal.h b/kernel/arch/ia64/include/asm/signal.h new file mode 100644 index 000000000..c62afa4a0 --- /dev/null +++ b/kernel/arch/ia64/include/asm/signal.h @@ -0,0 +1,32 @@ +/* + * Modified 1998-2001, 2003 + * David Mosberger-Tang , Hewlett-Packard Co + * + * Unfortunately, this file is being included by bits/signal.h in + * glibc-2.x. Hence the #ifdef __KERNEL__ ugliness. + */ +#ifndef _ASM_IA64_SIGNAL_H +#define _ASM_IA64_SIGNAL_H + +#include + + +#define _NSIG 64 +#define _NSIG_BPW 64 +#define _NSIG_WORDS (_NSIG / _NSIG_BPW) + +# ifndef __ASSEMBLY__ + +/* Most things should be clean enough to redefine this at will, if care + is taken to make libc match. */ + +typedef unsigned long old_sigset_t; + +typedef struct { + unsigned long sig[_NSIG_WORDS]; +} sigset_t; + +# include + +# endif /* !__ASSEMBLY__ */ +#endif /* _ASM_IA64_SIGNAL_H */ diff --git a/kernel/arch/ia64/include/asm/smp.h b/kernel/arch/ia64/include/asm/smp.h new file mode 100644 index 000000000..fea21e986 --- /dev/null +++ b/kernel/arch/ia64/include/asm/smp.h @@ -0,0 +1,137 @@ +/* + * SMP Support + * + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999 Walt Drummond + * (c) Copyright 2001-2003, 2005 Hewlett-Packard Development Company, L.P. + * David Mosberger-Tang + * Bjorn Helgaas + */ +#ifndef _ASM_IA64_SMP_H +#define _ASM_IA64_SMP_H + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +static inline unsigned int +ia64_get_lid (void) +{ + union { + struct { + unsigned long reserved : 16; + unsigned long eid : 8; + unsigned long id : 8; + unsigned long ignored : 32; + } f; + unsigned long bits; + } lid; + + lid.bits = ia64_getreg(_IA64_REG_CR_LID); + return lid.f.id << 8 | lid.f.eid; +} + +#define hard_smp_processor_id() ia64_get_lid() + +#ifdef CONFIG_SMP + +#define XTP_OFFSET 0x1e0008 + +#define SMP_IRQ_REDIRECTION (1 << 0) +#define SMP_IPI_REDIRECTION (1 << 1) + +#define raw_smp_processor_id() (current_thread_info()->cpu) + +extern struct smp_boot_data { + int cpu_count; + int cpu_phys_id[NR_CPUS]; +} smp_boot_data __initdata; + +extern char no_int_routing; + +extern cpumask_t cpu_core_map[NR_CPUS]; +DECLARE_PER_CPU_SHARED_ALIGNED(cpumask_t, cpu_sibling_map); +extern int smp_num_siblings; +extern void __iomem *ipi_base_addr; +extern unsigned char smp_int_redirect; + +extern volatile int ia64_cpu_to_sapicid[]; +#define cpu_physical_id(i) ia64_cpu_to_sapicid[i] + +extern unsigned long ap_wakeup_vector; + +/* + * Function to map hard smp processor id to logical id. Slow, so don't use this in + * performance-critical code. + */ +static inline int +cpu_logical_id (int cpuid) +{ + int i; + + for (i = 0; i < NR_CPUS; ++i) + if (cpu_physical_id(i) == cpuid) + break; + return i; +} + +/* + * XTP control functions: + * min_xtp : route all interrupts to this CPU + * normal_xtp: nominal XTP value + * max_xtp : never deliver interrupts to this CPU. + */ + +static inline void +min_xtp (void) +{ + if (smp_int_redirect & SMP_IRQ_REDIRECTION) + writeb(0x00, ipi_base_addr + XTP_OFFSET); /* XTP to min */ +} + +static inline void +normal_xtp (void) +{ + if (smp_int_redirect & SMP_IRQ_REDIRECTION) + writeb(0x08, ipi_base_addr + XTP_OFFSET); /* XTP normal */ +} + +static inline void +max_xtp (void) +{ + if (smp_int_redirect & SMP_IRQ_REDIRECTION) + writeb(0x0f, ipi_base_addr + XTP_OFFSET); /* Set XTP to max */ +} + +/* Upping and downing of CPUs */ +extern int __cpu_disable (void); +extern void __cpu_die (unsigned int cpu); +extern void cpu_die (void) __attribute__ ((noreturn)); +extern void __init smp_build_cpu_map(void); + +extern void __init init_smp_config (void); +extern void smp_do_timer (struct pt_regs *regs); + +extern irqreturn_t handle_IPI(int irq, void *dev_id); +extern void smp_send_reschedule (int cpu); +extern void identify_siblings (struct cpuinfo_ia64 *); +extern int is_multithreading_enabled(void); + +extern void arch_send_call_function_single_ipi(int cpu); +extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); + +#else /* CONFIG_SMP */ + +#define cpu_logical_id(i) 0 +#define cpu_physical_id(i) ia64_get_lid() + +#endif /* CONFIG_SMP */ +#endif /* _ASM_IA64_SMP_H */ diff --git a/kernel/arch/ia64/include/asm/sn/acpi.h b/kernel/arch/ia64/include/asm/sn/acpi.h new file mode 100644 index 000000000..fd480db25 --- /dev/null +++ b/kernel/arch/ia64/include/asm/sn/acpi.h @@ -0,0 +1,15 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2006 Silicon Graphics, Inc. All rights reserved. + */ + +#ifndef _ASM_IA64_SN_ACPI_H +#define _ASM_IA64_SN_ACPI_H + +extern int sn_acpi_rev; +#define SN_ACPI_BASE_SUPPORT() (sn_acpi_rev >= 0x20101) + +#endif /* _ASM_IA64_SN_ACPI_H */ diff --git a/kernel/arch/ia64/include/asm/sn/addrs.h b/kernel/arch/ia64/include/asm/sn/addrs.h new file mode 100644 index 000000000..e715c794b --- /dev/null +++ b/kernel/arch/ia64/include/asm/sn/addrs.h @@ -0,0 +1,299 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 1992-1999,2001-2005 Silicon Graphics, Inc. All rights reserved. + */ + +#ifndef _ASM_IA64_SN_ADDRS_H +#define _ASM_IA64_SN_ADDRS_H + +#include +#include +#include +#include + +/* + * Memory/SHUB Address Format: + * +-+---------+--+--------------+ + * |0| NASID |AS| NodeOffset | + * +-+---------+--+--------------+ + * + * NASID: (low NASID bit is 0) Memory and SHUB MMRs + * AS: 2-bit Address Space Identifier. Used only if low NASID bit is 0 + * 00: Local Resources and MMR space + * Top bit of NodeOffset + * 0: Local resources space + * node id: + * 0: IA64/NT compatibility space + * 2: Local MMR Space + * 4: Local memory, regardless of local node id + * 1: Global MMR space + * 01: GET space. + * 10: AMO space. + * 11: Cacheable memory space. + * + * NodeOffset: byte offset + * + * + * TIO address format: + * +-+----------+--+--------------+ + * |0| NASID |AS| Nodeoffset | + * +-+----------+--+--------------+ + * + * NASID: (low NASID bit is 1) TIO + * AS: 2-bit Chiplet Identifier + * 00: TIO LB (Indicates TIO MMR access.) + * 01: TIO ICE (indicates coretalk space access.) + * + * NodeOffset: top bit must be set. + * + * + * Note that in both of the above address formats, the low + * NASID bit indicates if the reference is to the SHUB or TIO MMRs. + */ + + +/* + * Define basic shift & mask constants for manipulating NASIDs and AS values. + */ +#define NASID_BITMASK (sn_hub_info->nasid_bitmask) +#define NASID_SHIFT (sn_hub_info->nasid_shift) +#define AS_SHIFT (sn_hub_info->as_shift) +#define AS_BITMASK 0x3UL + +#define NASID_MASK ((u64)NASID_BITMASK << NASID_SHIFT) +#define AS_MASK ((u64)AS_BITMASK << AS_SHIFT) + + +/* + * AS values. These are the same on both SHUB1 & SHUB2. + */ +#define AS_GET_VAL 1UL +#define AS_AMO_VAL 2UL +#define AS_CAC_VAL 3UL +#define AS_GET_SPACE (AS_GET_VAL << AS_SHIFT) +#define AS_AMO_SPACE (AS_AMO_VAL << AS_SHIFT) +#define AS_CAC_SPACE (AS_CAC_VAL << AS_SHIFT) + + +/* + * Virtual Mode Local & Global MMR space. + */ +#define SH1_LOCAL_MMR_OFFSET 0x8000000000UL +#define SH2_LOCAL_MMR_OFFSET 0x0200000000UL +#define LOCAL_MMR_OFFSET (is_shub2() ? SH2_LOCAL_MMR_OFFSET : SH1_LOCAL_MMR_OFFSET) +#define LOCAL_MMR_SPACE (__IA64_UNCACHED_OFFSET | LOCAL_MMR_OFFSET) +#define LOCAL_PHYS_MMR_SPACE (RGN_BASE(RGN_HPAGE) | LOCAL_MMR_OFFSET) + +#define SH1_GLOBAL_MMR_OFFSET 0x0800000000UL +#define SH2_GLOBAL_MMR_OFFSET 0x0300000000UL +#define GLOBAL_MMR_OFFSET (is_shub2() ? SH2_GLOBAL_MMR_OFFSET : SH1_GLOBAL_MMR_OFFSET) +#define GLOBAL_MMR_SPACE (__IA64_UNCACHED_OFFSET | GLOBAL_MMR_OFFSET) + +/* + * Physical mode addresses + */ +#define GLOBAL_PHYS_MMR_SPACE (RGN_BASE(RGN_HPAGE) | GLOBAL_MMR_OFFSET) + + +/* + * Clear region & AS bits. + */ +#define TO_PHYS_MASK (~(RGN_BITS | AS_MASK)) + + +/* + * Misc NASID manipulation. + */ +#define NASID_SPACE(n) ((u64)(n) << NASID_SHIFT) +#define REMOTE_ADDR(n,a) (NASID_SPACE(n) | (a)) +#define NODE_OFFSET(x) ((x) & (NODE_ADDRSPACE_SIZE - 1)) +#define NODE_ADDRSPACE_SIZE (1UL << AS_SHIFT) +#define NASID_GET(x) (int) (((u64) (x) >> NASID_SHIFT) & NASID_BITMASK) +#define LOCAL_MMR_ADDR(a) (LOCAL_MMR_SPACE | (a)) +#define GLOBAL_MMR_ADDR(n,a) (GLOBAL_MMR_SPACE | REMOTE_ADDR(n,a)) +#define GLOBAL_MMR_PHYS_ADDR(n,a) (GLOBAL_PHYS_MMR_SPACE | REMOTE_ADDR(n,a)) +#define GLOBAL_CAC_ADDR(n,a) (CAC_BASE | REMOTE_ADDR(n,a)) +#define CHANGE_NASID(n,x) ((void *)(((u64)(x) & ~NASID_MASK) | NASID_SPACE(n))) +#define IS_TIO_NASID(n) ((n) & 1) + + +/* non-II mmr's start at top of big window space (4G) */ +#define BWIN_TOP 0x0000000100000000UL + +/* + * general address defines + */ +#define CAC_BASE (PAGE_OFFSET | AS_CAC_SPACE) +#define AMO_BASE (__IA64_UNCACHED_OFFSET | AS_AMO_SPACE) +#define AMO_PHYS_BASE (RGN_BASE(RGN_HPAGE) | AS_AMO_SPACE) +#define GET_BASE (PAGE_OFFSET | AS_GET_SPACE) + +/* + * Convert Memory addresses between various addressing modes. + */ +#define TO_PHYS(x) (TO_PHYS_MASK & (x)) +#define TO_CAC(x) (CAC_BASE | TO_PHYS(x)) +#ifdef CONFIG_SGI_SN +#define TO_AMO(x) (AMO_BASE | TO_PHYS(x)) +#define TO_GET(x) (GET_BASE | TO_PHYS(x)) +#else +#define TO_AMO(x) ({ BUG(); x; }) +#define TO_GET(x) ({ BUG(); x; }) +#endif + +/* + * Covert from processor physical address to II/TIO physical address: + * II - squeeze out the AS bits + * TIO- requires a chiplet id in bits 38-39. For DMA to memory, + * the chiplet id is zero. If we implement TIO-TIO dma, we might need + * to insert a chiplet id into this macro. However, it is our belief + * right now that this chiplet id will be ICE, which is also zero. + */ +#define SH1_TIO_PHYS_TO_DMA(x) \ + ((((u64)(NASID_GET(x))) << 40) | NODE_OFFSET(x)) + +#define SH2_NETWORK_BANK_OFFSET(x) \ + ((u64)(x) & ((1UL << (sn_hub_info->nasid_shift - 4)) -1)) + +#define SH2_NETWORK_BANK_SELECT(x) \ + ((((u64)(x) & (0x3UL << (sn_hub_info->nasid_shift - 4))) \ + >> (sn_hub_info->nasid_shift - 4)) << 36) + +#define SH2_NETWORK_ADDRESS(x) \ + (SH2_NETWORK_BANK_OFFSET(x) | SH2_NETWORK_BANK_SELECT(x)) + +#define SH2_TIO_PHYS_TO_DMA(x) \ + (((u64)(NASID_GET(x)) << 40) | SH2_NETWORK_ADDRESS(x)) + +#define PHYS_TO_TIODMA(x) \ + (is_shub1() ? SH1_TIO_PHYS_TO_DMA(x) : SH2_TIO_PHYS_TO_DMA(x)) + +#define PHYS_TO_DMA(x) \ + ((((u64)(x) & NASID_MASK) >> 2) | NODE_OFFSET(x)) + + +/* + * Macros to test for address type. + */ +#define IS_AMO_ADDRESS(x) (((u64)(x) & (RGN_BITS | AS_MASK)) == AMO_BASE) +#define IS_AMO_PHYS_ADDRESS(x) (((u64)(x) & (RGN_BITS | AS_MASK)) == AMO_PHYS_BASE) + + +/* + * The following definitions pertain to the IO special address + * space. They define the location of the big and little windows + * of any given node. + */ +#define BWIN_SIZE_BITS 29 /* big window size: 512M */ +#define TIO_BWIN_SIZE_BITS 30 /* big window size: 1G */ +#define NODE_SWIN_BASE(n, w) ((w == 0) ? NODE_BWIN_BASE((n), SWIN0_BIGWIN) \ + : RAW_NODE_SWIN_BASE(n, w)) +#define TIO_SWIN_BASE(n, w) (TIO_IO_BASE(n) + \ + ((u64) (w) << TIO_SWIN_SIZE_BITS)) +#define NODE_IO_BASE(n) (GLOBAL_MMR_SPACE | NASID_SPACE(n)) +#define TIO_IO_BASE(n) (__IA64_UNCACHED_OFFSET | NASID_SPACE(n)) +#define BWIN_SIZE (1UL << BWIN_SIZE_BITS) +#define NODE_BWIN_BASE0(n) (NODE_IO_BASE(n) + BWIN_SIZE) +#define NODE_BWIN_BASE(n, w) (NODE_BWIN_BASE0(n) + ((u64) (w) << BWIN_SIZE_BITS)) +#define RAW_NODE_SWIN_BASE(n, w) (NODE_IO_BASE(n) + ((u64) (w) << SWIN_SIZE_BITS)) +#define BWIN_WIDGET_MASK 0x7 +#define BWIN_WINDOWNUM(x) (((x) >> BWIN_SIZE_BITS) & BWIN_WIDGET_MASK) +#define SH1_IS_BIG_WINDOW_ADDR(x) ((x) & BWIN_TOP) + +#define TIO_BWIN_WINDOW_SELECT_MASK 0x7 +#define TIO_BWIN_WINDOWNUM(x) (((x) >> TIO_BWIN_SIZE_BITS) & TIO_BWIN_WINDOW_SELECT_MASK) + +#define TIO_HWIN_SHIFT_BITS 33 +#define TIO_HWIN(x) (NODE_OFFSET(x) >> TIO_HWIN_SHIFT_BITS) + +/* + * The following definitions pertain to the IO special address + * space. They define the location of the big and little windows + * of any given node. + */ + +#define SWIN_SIZE_BITS 24 +#define SWIN_WIDGET_MASK 0xF + +#define TIO_SWIN_SIZE_BITS 28 +#define TIO_SWIN_SIZE (1UL << TIO_SWIN_SIZE_BITS) +#define TIO_SWIN_WIDGET_MASK 0x3 + +/* + * Convert smallwindow address to xtalk address. + * + * 'addr' can be physical or virtual address, but will be converted + * to Xtalk address in the range 0 -> SWINZ_SIZEMASK + */ +#define SWIN_WIDGETNUM(x) (((x) >> SWIN_SIZE_BITS) & SWIN_WIDGET_MASK) +#define TIO_SWIN_WIDGETNUM(x) (((x) >> TIO_SWIN_SIZE_BITS) & TIO_SWIN_WIDGET_MASK) + + +/* + * The following macros produce the correct base virtual address for + * the hub registers. The REMOTE_HUB_* macro produce + * the address for the specified hub's registers. The intent is + * that the appropriate PI, MD, NI, or II register would be substituted + * for x. + * + * WARNING: + * When certain Hub chip workaround are defined, it's not sufficient + * to dereference the *_HUB_ADDR() macros. You should instead use + * HUB_L() and HUB_S() if you must deal with pointers to hub registers. + * Otherwise, the recommended approach is to use *_HUB_L() and *_HUB_S(). + * They're always safe. + */ +/* Shub1 TIO & MMR addressing macros */ +#define SH1_TIO_IOSPACE_ADDR(n,x) \ + GLOBAL_MMR_ADDR(n,x) + +#define SH1_REMOTE_BWIN_MMR(n,x) \ + GLOBAL_MMR_ADDR(n,x) + +#define SH1_REMOTE_SWIN_MMR(n,x) \ + (NODE_SWIN_BASE(n,1) + 0x800000UL + (x)) + +#define SH1_REMOTE_MMR(n,x) \ + (SH1_IS_BIG_WINDOW_ADDR(x) ? SH1_REMOTE_BWIN_MMR(n,x) : \ + SH1_REMOTE_SWIN_MMR(n,x)) + +/* Shub1 TIO & MMR addressing macros */ +#define SH2_TIO_IOSPACE_ADDR(n,x) \ + ((__IA64_UNCACHED_OFFSET | REMOTE_ADDR(n,x) | 1UL << (NASID_SHIFT - 2))) + +#define SH2_REMOTE_MMR(n,x) \ + GLOBAL_MMR_ADDR(n,x) + + +/* TIO & MMR addressing macros that work on both shub1 & shub2 */ +#define TIO_IOSPACE_ADDR(n,x) \ + ((u64 *)(is_shub1() ? SH1_TIO_IOSPACE_ADDR(n,x) : \ + SH2_TIO_IOSPACE_ADDR(n,x))) + +#define SH_REMOTE_MMR(n,x) \ + (is_shub1() ? SH1_REMOTE_MMR(n,x) : SH2_REMOTE_MMR(n,x)) + +#define REMOTE_HUB_ADDR(n,x) \ + (IS_TIO_NASID(n) ? ((volatile u64*)TIO_IOSPACE_ADDR(n,x)) : \ + ((volatile u64*)SH_REMOTE_MMR(n,x))) + + +#define HUB_L(x) (*((volatile typeof(*x) *)x)) +#define HUB_S(x,d) (*((volatile typeof(*x) *)x) = (d)) + +#define REMOTE_HUB_L(n, a) HUB_L(REMOTE_HUB_ADDR((n), (a))) +#define REMOTE_HUB_S(n, a, d) HUB_S(REMOTE_HUB_ADDR((n), (a)), (d)) + +/* + * Coretalk address breakdown + */ +#define CTALK_NASID_SHFT 40 +#define CTALK_NASID_MASK (0x3FFFULL << CTALK_NASID_SHFT) +#define CTALK_CID_SHFT 38 +#define CTALK_CID_MASK (0x3ULL << CTALK_CID_SHFT) +#define CTALK_NODE_OFFSET 0x3FFFFFFFFF + +#endif /* _ASM_IA64_SN_ADDRS_H */ diff --git a/kernel/arch/ia64/include/asm/sn/arch.h b/kernel/arch/ia64/include/asm/sn/arch.h new file mode 100644 index 000000000..31eb78486 --- /dev/null +++ b/kernel/arch/ia64/include/asm/sn/arch.h @@ -0,0 +1,86 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * SGI specific setup. + * + * Copyright (C) 1995-1997,1999,2001-2005 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 1999 Ralf Baechle (ralf@gnu.org) + */ +#ifndef _ASM_IA64_SN_ARCH_H +#define _ASM_IA64_SN_ARCH_H + +#include +#include +#include +#include +#include + +/* + * This is the maximum number of NUMALINK nodes that can be part of a single + * SSI kernel. This number includes C-brick, M-bricks, and TIOs. Nodes in + * remote partitions are NOT included in this number. + * The number of compact nodes cannot exceed size of a coherency domain. + * The purpose of this define is to specify a node count that includes + * all C/M/TIO nodes in an SSI system. + * + * SGI system can currently support up to 256 C/M nodes plus additional TIO nodes. + * + * Note: ACPI20 has an architectural limit of 256 nodes. When we upgrade + * to ACPI3.0, this limit will be removed. The notion of "compact nodes" + * should be deleted and TIOs should be included in MAX_NUMNODES. + */ +#define MAX_TIO_NODES MAX_NUMNODES +#define MAX_COMPACT_NODES (MAX_NUMNODES + MAX_TIO_NODES) + +/* + * Maximum number of nodes in all partitions and in all coherency domains. + * This is the total number of nodes accessible in the numalink fabric. It + * includes all C & M bricks, plus all TIOs. + * + * This value is also the value of the maximum number of NASIDs in the numalink + * fabric. + */ +#define MAX_NUMALINK_NODES 16384 + +/* + * The following defines attributes of the HUB chip. These attributes are + * frequently referenced. They are kept in the per-cpu data areas of each cpu. + * They are kept together in a struct to minimize cache misses. + */ +struct sn_hub_info_s { + u8 shub2; + u8 nasid_shift; + u8 as_shift; + u8 shub_1_1_found; + u16 nasid_bitmask; +}; +DECLARE_PER_CPU(struct sn_hub_info_s, __sn_hub_info); +#define sn_hub_info this_cpu_ptr(&__sn_hub_info) +#define is_shub2() (sn_hub_info->shub2) +#define is_shub1() (sn_hub_info->shub2 == 0) + +/* + * Use this macro to test if shub 1.1 wars should be enabled + */ +#define enable_shub_wars_1_1() (sn_hub_info->shub_1_1_found) + + +/* + * Compact node ID to nasid mappings kept in the per-cpu data areas of each + * cpu. + */ +DECLARE_PER_CPU(short, __sn_cnodeid_to_nasid[MAX_COMPACT_NODES]); +#define sn_cnodeid_to_nasid this_cpu_ptr(&__sn_cnodeid_to_nasid[0]) + + +extern u8 sn_partition_id; +extern u8 sn_system_size; +extern u8 sn_sharing_domain_size; +extern u8 sn_region_size; + +extern void sn_flush_all_caches(long addr, long bytes); +extern bool sn_cpu_disable_allowed(int cpu); + +#endif /* _ASM_IA64_SN_ARCH_H */ diff --git a/kernel/arch/ia64/include/asm/sn/bte.h b/kernel/arch/ia64/include/asm/sn/bte.h new file mode 100644 index 000000000..cc6c4dbf5 --- /dev/null +++ b/kernel/arch/ia64/include/asm/sn/bte.h @@ -0,0 +1,234 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2000-2007 Silicon Graphics, Inc. All Rights Reserved. + */ + + +#ifndef _ASM_IA64_SN_BTE_H +#define _ASM_IA64_SN_BTE_H + +#include +#include +#include +#include +#include +#include + +#define IBCT_NOTIFY (0x1UL << 4) +#define IBCT_ZFIL_MODE (0x1UL << 0) + +/* #define BTE_DEBUG */ +/* #define BTE_DEBUG_VERBOSE */ + +#ifdef BTE_DEBUG +# define BTE_PRINTK(x) printk x /* Terse */ +# ifdef BTE_DEBUG_VERBOSE +# define BTE_PRINTKV(x) printk x /* Verbose */ +# else +# define BTE_PRINTKV(x) +# endif /* BTE_DEBUG_VERBOSE */ +#else +# define BTE_PRINTK(x) +# define BTE_PRINTKV(x) +#endif /* BTE_DEBUG */ + + +/* BTE status register only supports 16 bits for length field */ +#define BTE_LEN_BITS (16) +#define BTE_LEN_MASK ((1 << BTE_LEN_BITS) - 1) +#define BTE_MAX_XFER (BTE_LEN_MASK << L1_CACHE_SHIFT) + + +/* Define hardware */ +#define BTES_PER_NODE (is_shub2() ? 4 : 2) +#define MAX_BTES_PER_NODE 4 + +#define BTE2OFF_CTRL 0 +#define BTE2OFF_SRC (SH2_BT_ENG_SRC_ADDR_0 - SH2_BT_ENG_CSR_0) +#define BTE2OFF_DEST (SH2_BT_ENG_DEST_ADDR_0 - SH2_BT_ENG_CSR_0) +#define BTE2OFF_NOTIFY (SH2_BT_ENG_NOTIF_ADDR_0 - SH2_BT_ENG_CSR_0) + +#define BTE_BASE_ADDR(interface) \ + (is_shub2() ? (interface == 0) ? SH2_BT_ENG_CSR_0 : \ + (interface == 1) ? SH2_BT_ENG_CSR_1 : \ + (interface == 2) ? SH2_BT_ENG_CSR_2 : \ + SH2_BT_ENG_CSR_3 \ + : (interface == 0) ? IIO_IBLS0 : IIO_IBLS1) + +#define BTE_SOURCE_ADDR(base) \ + (is_shub2() ? base + (BTE2OFF_SRC/8) \ + : base + (BTEOFF_SRC/8)) + +#define BTE_DEST_ADDR(base) \ + (is_shub2() ? base + (BTE2OFF_DEST/8) \ + : base + (BTEOFF_DEST/8)) + +#define BTE_CTRL_ADDR(base) \ + (is_shub2() ? base + (BTE2OFF_CTRL/8) \ + : base + (BTEOFF_CTRL/8)) + +#define BTE_NOTIF_ADDR(base) \ + (is_shub2() ? base + (BTE2OFF_NOTIFY/8) \ + : base + (BTEOFF_NOTIFY/8)) + +/* Define hardware modes */ +#define BTE_NOTIFY IBCT_NOTIFY +#define BTE_NORMAL BTE_NOTIFY +#define BTE_ZERO_FILL (BTE_NOTIFY | IBCT_ZFIL_MODE) +/* Use a reserved bit to let the caller specify a wait for any BTE */ +#define BTE_WACQUIRE 0x4000 +/* Use the BTE on the node with the destination memory */ +#define BTE_USE_DEST (BTE_WACQUIRE << 1) +/* Use any available BTE interface on any node for the transfer */ +#define BTE_USE_ANY (BTE_USE_DEST << 1) +/* macro to force the IBCT0 value valid */ +#define BTE_VALID_MODE(x) ((x) & (IBCT_NOTIFY | IBCT_ZFIL_MODE)) + +#define BTE_ACTIVE (IBLS_BUSY | IBLS_ERROR) +#define BTE_WORD_AVAILABLE (IBLS_BUSY << 1) +#define BTE_WORD_BUSY (~BTE_WORD_AVAILABLE) + +/* + * Some macros to simplify reading. + * Start with macros to locate the BTE control registers. + */ +#define BTE_LNSTAT_LOAD(_bte) \ + HUB_L(_bte->bte_base_addr) +#define BTE_LNSTAT_STORE(_bte, _x) \ + HUB_S(_bte->bte_base_addr, (_x)) +#define BTE_SRC_STORE(_bte, _x) \ +({ \ + u64 __addr = ((_x) & ~AS_MASK); \ + if (is_shub2()) \ + __addr = SH2_TIO_PHYS_TO_DMA(__addr); \ + HUB_S(_bte->bte_source_addr, __addr); \ +}) +#define BTE_DEST_STORE(_bte, _x) \ +({ \ + u64 __addr = ((_x) & ~AS_MASK); \ + if (is_shub2()) \ + __addr = SH2_TIO_PHYS_TO_DMA(__addr); \ + HUB_S(_bte->bte_destination_addr, __addr); \ +}) +#define BTE_CTRL_STORE(_bte, _x) \ + HUB_S(_bte->bte_control_addr, (_x)) +#define BTE_NOTIF_STORE(_bte, _x) \ +({ \ + u64 __addr = ia64_tpa((_x) & ~AS_MASK); \ + if (is_shub2()) \ + __addr = SH2_TIO_PHYS_TO_DMA(__addr); \ + HUB_S(_bte->bte_notify_addr, __addr); \ +}) + +#define BTE_START_TRANSFER(_bte, _len, _mode) \ + is_shub2() ? BTE_CTRL_STORE(_bte, IBLS_BUSY | (_mode << 24) | _len) \ + : BTE_LNSTAT_STORE(_bte, _len); \ + BTE_CTRL_STORE(_bte, _mode) + +/* Possible results from bte_copy and bte_unaligned_copy */ +/* The following error codes map into the BTE hardware codes + * IIO_ICRB_ECODE_* (in shubio.h). The hardware uses + * an error code of 0 (IIO_ICRB_ECODE_DERR), but we want zero + * to mean BTE_SUCCESS, so add one (BTEFAIL_OFFSET) to the error + * codes to give the following error codes. + */ +#define BTEFAIL_OFFSET 1 + +typedef enum { + BTE_SUCCESS, /* 0 is success */ + BTEFAIL_DIR, /* Directory error due to IIO access*/ + BTEFAIL_POISON, /* poison error on IO access (write to poison page) */ + BTEFAIL_WERR, /* Write error (ie WINV to a Read only line) */ + BTEFAIL_ACCESS, /* access error (protection violation) */ + BTEFAIL_PWERR, /* Partial Write Error */ + BTEFAIL_PRERR, /* Partial Read Error */ + BTEFAIL_TOUT, /* CRB Time out */ + BTEFAIL_XTERR, /* Incoming xtalk pkt had error bit */ + BTEFAIL_NOTAVAIL, /* BTE not available */ +} bte_result_t; + +#define BTEFAIL_SH2_RESP_SHORT 0x1 /* bit 000001 */ +#define BTEFAIL_SH2_RESP_LONG 0x2 /* bit 000010 */ +#define BTEFAIL_SH2_RESP_DSP 0x4 /* bit 000100 */ +#define BTEFAIL_SH2_RESP_ACCESS 0x8 /* bit 001000 */ +#define BTEFAIL_SH2_CRB_TO 0x10 /* bit 010000 */ +#define BTEFAIL_SH2_NACK_LIMIT 0x20 /* bit 100000 */ +#define BTEFAIL_SH2_ALL 0x3F /* bit 111111 */ + +#define BTE_ERR_BITS 0x3FUL +#define BTE_ERR_SHIFT 36 +#define BTE_ERR_MASK (BTE_ERR_BITS << BTE_ERR_SHIFT) + +#define BTE_ERROR_RETRY(value) \ + (is_shub2() ? (value != BTEFAIL_SH2_CRB_TO) \ + : (value != BTEFAIL_TOUT)) + +/* + * On shub1 BTE_ERR_MASK will always be false, so no need for is_shub2() + */ +#define BTE_SHUB2_ERROR(_status) \ + ((_status & BTE_ERR_MASK) \ + ? (((_status >> BTE_ERR_SHIFT) & BTE_ERR_BITS) | IBLS_ERROR) \ + : _status) + +#define BTE_GET_ERROR_STATUS(_status) \ + (BTE_SHUB2_ERROR(_status) & ~IBLS_ERROR) + +#define BTE_VALID_SH2_ERROR(value) \ + ((value >= BTEFAIL_SH2_RESP_SHORT) && (value <= BTEFAIL_SH2_ALL)) + +/* + * Structure defining a bte. An instance of this + * structure is created in the nodepda for each + * bte on that node (as defined by BTES_PER_NODE) + * This structure contains everything necessary + * to work with a BTE. + */ +struct bteinfo_s { + volatile u64 notify ____cacheline_aligned; + u64 *bte_base_addr ____cacheline_aligned; + u64 *bte_source_addr; + u64 *bte_destination_addr; + u64 *bte_control_addr; + u64 *bte_notify_addr; + spinlock_t spinlock; + cnodeid_t bte_cnode; /* cnode */ + int bte_error_count; /* Number of errors encountered */ + int bte_num; /* 0 --> BTE0, 1 --> BTE1 */ + int cleanup_active; /* Interface is locked for cleanup */ + volatile bte_result_t bh_error; /* error while processing */ + volatile u64 *most_rcnt_na; + struct bteinfo_s *btes_to_try[MAX_BTES_PER_NODE]; +}; + + +/* + * Function prototypes (functions defined in bte.c, used elsewhere) + */ +extern bte_result_t bte_copy(u64, u64, u64, u64, void *); +extern bte_result_t bte_unaligned_copy(u64, u64, u64, u64); +extern void bte_error_handler(unsigned long); + +#define bte_zero(dest, len, mode, notification) \ + bte_copy(0, dest, len, ((mode) | BTE_ZERO_FILL), notification) + +/* + * The following is the preferred way of calling bte_unaligned_copy + * If the copy is fully cache line aligned, then bte_copy is + * used instead. Since bte_copy is inlined, this saves a call + * stack. NOTE: bte_copy is called synchronously and does block + * until the transfer is complete. In order to get the asynch + * version of bte_copy, you must perform this check yourself. + */ +#define BTE_UNALIGNED_COPY(src, dest, len, mode) \ + (((len & (L1_CACHE_BYTES - 1)) || \ + (src & (L1_CACHE_BYTES - 1)) || \ + (dest & (L1_CACHE_BYTES - 1))) ? \ + bte_unaligned_copy(src, dest, len, mode) : \ + bte_copy(src, dest, len, mode, NULL)) + + +#endif /* _ASM_IA64_SN_BTE_H */ diff --git a/kernel/arch/ia64/include/asm/sn/clksupport.h b/kernel/arch/ia64/include/asm/sn/clksupport.h new file mode 100644 index 000000000..d340c365a --- /dev/null +++ b/kernel/arch/ia64/include/asm/sn/clksupport.h @@ -0,0 +1,28 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2000-2004 Silicon Graphics, Inc. All rights reserved. + */ + +/* + * This file contains definitions for accessing a platform supported high resolution + * clock. The clock is monitonically increasing and can be accessed from any node + * in the system. The clock is synchronized across nodes - all nodes see the + * same value. + * + * RTC_COUNTER_ADDR - contains the address of the counter + * + */ + +#ifndef _ASM_IA64_SN_CLKSUPPORT_H +#define _ASM_IA64_SN_CLKSUPPORT_H + +extern unsigned long sn_rtc_cycles_per_second; + +#define RTC_COUNTER_ADDR ((long *)LOCAL_MMR_ADDR(SH_RTC)) + +#define rtc_time() (*RTC_COUNTER_ADDR) + +#endif /* _ASM_IA64_SN_CLKSUPPORT_H */ diff --git a/kernel/arch/ia64/include/asm/sn/geo.h b/kernel/arch/ia64/include/asm/sn/geo.h new file mode 100644 index 000000000..f083c9434 --- /dev/null +++ b/kernel/arch/ia64/include/asm/sn/geo.h @@ -0,0 +1,132 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1992 - 1997, 2000-2005 Silicon Graphics, Inc. All rights reserved. + */ + +#ifndef _ASM_IA64_SN_GEO_H +#define _ASM_IA64_SN_GEO_H + +/* The geoid_t implementation below is based loosely on the pcfg_t + implementation in sys/SN/promcfg.h. */ + +/* Type declaractions */ + +/* Size of a geoid_t structure (must be before decl. of geoid_u) */ +#define GEOID_SIZE 8 /* Would 16 be better? The size can + be different on different platforms. */ + +#define MAX_SLOTS 0xf /* slots per module */ +#define MAX_SLABS 0xf /* slabs per slot */ + +typedef unsigned char geo_type_t; + +/* Fields common to all substructures */ +typedef struct geo_common_s { + moduleid_t module; /* The module (box) this h/w lives in */ + geo_type_t type; /* What type of h/w is named by this geoid_t */ + slabid_t slab:4; /* slab (ASIC), 0 .. 15 within slot */ + slotid_t slot:4; /* slot (Blade), 0 .. 15 within module */ +} geo_common_t; + +/* Additional fields for particular types of hardware */ +typedef struct geo_node_s { + geo_common_t common; /* No additional fields needed */ +} geo_node_t; + +typedef struct geo_rtr_s { + geo_common_t common; /* No additional fields needed */ +} geo_rtr_t; + +typedef struct geo_iocntl_s { + geo_common_t common; /* No additional fields needed */ +} geo_iocntl_t; + +typedef struct geo_pcicard_s { + geo_iocntl_t common; + char bus; /* Bus/widget number */ + char slot; /* PCI slot number */ +} geo_pcicard_t; + +/* Subcomponents of a node */ +typedef struct geo_cpu_s { + geo_node_t node; + char slice; /* Which CPU on the node */ +} geo_cpu_t; + +typedef struct geo_mem_s { + geo_node_t node; + char membus; /* The memory bus on the node */ + char memslot; /* The memory slot on the bus */ +} geo_mem_t; + + +typedef union geoid_u { + geo_common_t common; + geo_node_t node; + geo_iocntl_t iocntl; + geo_pcicard_t pcicard; + geo_rtr_t rtr; + geo_cpu_t cpu; + geo_mem_t mem; + char padsize[GEOID_SIZE]; +} geoid_t; + + +/* Preprocessor macros */ + +#define GEO_MAX_LEN 48 /* max. formatted length, plus some pad: + module/001c07/slab/5/node/memory/2/slot/4 */ + +/* Values for geo_type_t */ +#define GEO_TYPE_INVALID 0 +#define GEO_TYPE_MODULE 1 +#define GEO_TYPE_NODE 2 +#define GEO_TYPE_RTR 3 +#define GEO_TYPE_IOCNTL 4 +#define GEO_TYPE_IOCARD 5 +#define GEO_TYPE_CPU 6 +#define GEO_TYPE_MEM 7 +#define GEO_TYPE_MAX (GEO_TYPE_MEM+1) + +/* Parameter for hwcfg_format_geoid_compt() */ +#define GEO_COMPT_MODULE 1 +#define GEO_COMPT_SLAB 2 +#define GEO_COMPT_IOBUS 3 +#define GEO_COMPT_IOSLOT 4 +#define GEO_COMPT_CPU 5 +#define GEO_COMPT_MEMBUS 6 +#define GEO_COMPT_MEMSLOT 7 + +#define GEO_INVALID_STR "" + +#define INVALID_NASID ((nasid_t)-1) +#define INVALID_CNODEID ((cnodeid_t)-1) +#define INVALID_PNODEID ((pnodeid_t)-1) +#define INVALID_SLAB (slabid_t)-1 +#define INVALID_SLOT (slotid_t)-1 +#define INVALID_MODULE ((moduleid_t)-1) + +static inline slabid_t geo_slab(geoid_t g) +{ + return (g.common.type == GEO_TYPE_INVALID) ? + INVALID_SLAB : g.common.slab; +} + +static inline slotid_t geo_slot(geoid_t g) +{ + return (g.common.type == GEO_TYPE_INVALID) ? + INVALID_SLOT : g.common.slot; +} + +static inline moduleid_t geo_module(geoid_t g) +{ + return (g.common.type == GEO_TYPE_INVALID) ? + INVALID_MODULE : g.common.module; +} + +extern geoid_t cnodeid_get_geoid(cnodeid_t cnode); + +#endif /* _ASM_IA64_SN_GEO_H */ diff --git a/kernel/arch/ia64/include/asm/sn/intr.h b/kernel/arch/ia64/include/asm/sn/intr.h new file mode 100644 index 000000000..e0487aa97 --- /dev/null +++ b/kernel/arch/ia64/include/asm/sn/intr.h @@ -0,0 +1,68 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1992 - 1997, 2000-2006 Silicon Graphics, Inc. All rights reserved. + */ + +#ifndef _ASM_IA64_SN_INTR_H +#define _ASM_IA64_SN_INTR_H + +#include +#include + +#define SGI_UART_VECTOR 0xe9 + +/* Reserved IRQs : Note, not to exceed IA64_SN2_FIRST_DEVICE_VECTOR */ +#define SGI_XPC_ACTIVATE 0x30 +#define SGI_II_ERROR 0x31 +#define SGI_XBOW_ERROR 0x32 +#define SGI_PCIASIC_ERROR 0x33 +#define SGI_ACPI_SCI_INT 0x34 +#define SGI_TIOCA_ERROR 0x35 +#define SGI_TIO_ERROR 0x36 +#define SGI_TIOCX_ERROR 0x37 +#define SGI_MMTIMER_VECTOR 0x38 +#define SGI_XPC_NOTIFY 0xe7 + +#define IA64_SN2_FIRST_DEVICE_VECTOR 0x3c +#define IA64_SN2_LAST_DEVICE_VECTOR 0xe6 + +#define SN2_IRQ_RESERVED 0x1 +#define SN2_IRQ_CONNECTED 0x2 +#define SN2_IRQ_SHARED 0x4 + +// The SN PROM irq struct +struct sn_irq_info { + struct sn_irq_info *irq_next; /* deprecated DO NOT USE */ + short irq_nasid; /* Nasid IRQ is assigned to */ + int irq_slice; /* slice IRQ is assigned to */ + int irq_cpuid; /* kernel logical cpuid */ + int irq_irq; /* the IRQ number */ + int irq_int_bit; /* Bridge interrupt pin */ + /* <0 means MSI */ + u64 irq_xtalkaddr; /* xtalkaddr IRQ is sent to */ + int irq_bridge_type;/* pciio asic type (pciio.h) */ + void *irq_bridge; /* bridge generating irq */ + void *irq_pciioinfo; /* associated pciio_info_t */ + int irq_last_intr; /* For Shub lb lost intr WAR */ + int irq_cookie; /* unique cookie */ + int irq_flags; /* flags */ + int irq_share_cnt; /* num devices sharing IRQ */ + struct list_head list; /* list of sn_irq_info structs */ + struct rcu_head rcu; /* rcu callback list */ +}; + +extern void sn_send_IPI_phys(int, long, int, int); +extern u64 sn_intr_alloc(nasid_t, int, + struct sn_irq_info *, + int, nasid_t, int); +extern void sn_intr_free(nasid_t, int, struct sn_irq_info *); +extern struct sn_irq_info *sn_retarget_vector(struct sn_irq_info *, nasid_t, int); +extern void sn_set_err_irq_affinity(unsigned int); +extern struct list_head **sn_irq_lh; + +#define CPU_VECTOR_TO_IRQ(cpuid,vector) (vector) + +#endif /* _ASM_IA64_SN_INTR_H */ diff --git a/kernel/arch/ia64/include/asm/sn/io.h b/kernel/arch/ia64/include/asm/sn/io.h new file mode 100644 index 000000000..41c73a735 --- /dev/null +++ b/kernel/arch/ia64/include/asm/sn/io.h @@ -0,0 +1,274 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2000-2004 Silicon Graphics, Inc. All rights reserved. + */ + +#ifndef _ASM_SN_IO_H +#define _ASM_SN_IO_H +#include +#include + +extern void * sn_io_addr(unsigned long port) __attribute_const__; /* Forward definition */ +extern void __sn_mmiowb(void); /* Forward definition */ + +extern int num_cnodes; + +#define __sn_mf_a() ia64_mfa() + +extern void sn_dma_flush(unsigned long); + +#define __sn_inb ___sn_inb +#define __sn_inw ___sn_inw +#define __sn_inl ___sn_inl +#define __sn_outb ___sn_outb +#define __sn_outw ___sn_outw +#define __sn_outl ___sn_outl +#define __sn_readb ___sn_readb +#define __sn_readw ___sn_readw +#define __sn_readl ___sn_readl +#define __sn_readq ___sn_readq +#define __sn_readb_relaxed ___sn_readb_relaxed +#define __sn_readw_relaxed ___sn_readw_relaxed +#define __sn_readl_relaxed ___sn_readl_relaxed +#define __sn_readq_relaxed ___sn_readq_relaxed + +/* + * Convenience macros for setting/clearing bits using the above accessors + */ + +#define __sn_setq_relaxed(addr, val) \ + writeq((__sn_readq_relaxed(addr) | (val)), (addr)) +#define __sn_clrq_relaxed(addr, val) \ + writeq((__sn_readq_relaxed(addr) & ~(val)), (addr)) + +/* + * The following routines are SN Platform specific, called when + * a reference is made to inX/outX set macros. SN Platform + * inX set of macros ensures that Posted DMA writes on the + * Bridge is flushed. + * + * The routines should be self explainatory. + */ + +static inline unsigned int +___sn_inb (unsigned long port) +{ + volatile unsigned char *addr; + unsigned char ret = -1; + + if ((addr = sn_io_addr(port))) { + ret = *addr; + __sn_mf_a(); + sn_dma_flush((unsigned long)addr); + } + return ret; +} + +static inline unsigned int +___sn_inw (unsigned long port) +{ + volatile unsigned short *addr; + unsigned short ret = -1; + + if ((addr = sn_io_addr(port))) { + ret = *addr; + __sn_mf_a(); + sn_dma_flush((unsigned long)addr); + } + return ret; +} + +static inline unsigned int +___sn_inl (unsigned long port) +{ + volatile unsigned int *addr; + unsigned int ret = -1; + + if ((addr = sn_io_addr(port))) { + ret = *addr; + __sn_mf_a(); + sn_dma_flush((unsigned long)addr); + } + return ret; +} + +static inline void +___sn_outb (unsigned char val, unsigned long port) +{ + volatile unsigned char *addr; + + if ((addr = sn_io_addr(port))) { + *addr = val; + __sn_mmiowb(); + } +} + +static inline void +___sn_outw (unsigned short val, unsigned long port) +{ + volatile unsigned short *addr; + + if ((addr = sn_io_addr(port))) { + *addr = val; + __sn_mmiowb(); + } +} + +static inline void +___sn_outl (unsigned int val, unsigned long port) +{ + volatile unsigned int *addr; + + if ((addr = sn_io_addr(port))) { + *addr = val; + __sn_mmiowb(); + } +} + +/* + * The following routines are SN Platform specific, called when + * a reference is made to readX/writeX set macros. SN Platform + * readX set of macros ensures that Posted DMA writes on the + * Bridge is flushed. + * + * The routines should be self explainatory. + */ + +static inline unsigned char +___sn_readb (const volatile void __iomem *addr) +{ + unsigned char val; + + val = *(volatile unsigned char __force *)addr; + __sn_mf_a(); + sn_dma_flush((unsigned long)addr); + return val; +} + +static inline unsigned short +___sn_readw (const volatile void __iomem *addr) +{ + unsigned short val; + + val = *(volatile unsigned short __force *)addr; + __sn_mf_a(); + sn_dma_flush((unsigned long)addr); + return val; +} + +static inline unsigned int +___sn_readl (const volatile void __iomem *addr) +{ + unsigned int val; + + val = *(volatile unsigned int __force *)addr; + __sn_mf_a(); + sn_dma_flush((unsigned long)addr); + return val; +} + +static inline unsigned long +___sn_readq (const volatile void __iomem *addr) +{ + unsigned long val; + + val = *(volatile unsigned long __force *)addr; + __sn_mf_a(); + sn_dma_flush((unsigned long)addr); + return val; +} + +/* + * For generic and SN2 kernels, we have a set of fast access + * PIO macros. These macros are provided on SN Platform + * because the normal inX and readX macros perform an + * additional task of flushing Post DMA request on the Bridge. + * + * These routines should be self explainatory. + */ + +static inline unsigned int +sn_inb_fast (unsigned long port) +{ + volatile unsigned char *addr = (unsigned char *)port; + unsigned char ret; + + ret = *addr; + __sn_mf_a(); + return ret; +} + +static inline unsigned int +sn_inw_fast (unsigned long port) +{ + volatile unsigned short *addr = (unsigned short *)port; + unsigned short ret; + + ret = *addr; + __sn_mf_a(); + return ret; +} + +static inline unsigned int +sn_inl_fast (unsigned long port) +{ + volatile unsigned int *addr = (unsigned int *)port; + unsigned int ret; + + ret = *addr; + __sn_mf_a(); + return ret; +} + +static inline unsigned char +___sn_readb_relaxed (const volatile void __iomem *addr) +{ + return *(volatile unsigned char __force *)addr; +} + +static inline unsigned short +___sn_readw_relaxed (const volatile void __iomem *addr) +{ + return *(volatile unsigned short __force *)addr; +} + +static inline unsigned int +___sn_readl_relaxed (const volatile void __iomem *addr) +{ + return *(volatile unsigned int __force *) addr; +} + +static inline unsigned long +___sn_readq_relaxed (const volatile void __iomem *addr) +{ + return *(volatile unsigned long __force *) addr; +} + +struct pci_dev; + +static inline int +sn_pci_set_vchan(struct pci_dev *pci_dev, unsigned long *addr, int vchan) +{ + + if (vchan > 1) { + return -1; + } + + if (!(*addr >> 32)) /* Using a mask here would be cleaner */ + return 0; /* but this generates better code */ + + if (vchan == 1) { + /* Set Bit 57 */ + *addr |= (1UL << 57); + } else { + /* Clear Bit 57 */ + *addr &= ~(1UL << 57); + } + + return 0; +} + +#endif /* _ASM_SN_IO_H */ diff --git a/kernel/arch/ia64/include/asm/sn/ioc3.h b/kernel/arch/ia64/include/asm/sn/ioc3.h new file mode 100644 index 000000000..95ed6cc83 --- /dev/null +++ b/kernel/arch/ia64/include/asm/sn/ioc3.h @@ -0,0 +1,241 @@ +/* + * Copyright (C) 2005 Silicon Graphics, Inc. + */ +#ifndef IA64_SN_IOC3_H +#define IA64_SN_IOC3_H + +/* serial port register map */ +struct ioc3_serialregs { + uint32_t sscr; + uint32_t stpir; + uint32_t stcir; + uint32_t srpir; + uint32_t srcir; + uint32_t srtr; + uint32_t shadow; +}; + +/* SUPERIO uart register map */ +struct ioc3_uartregs { + char iu_lcr; + union { + char iir; /* read only */ + char fcr; /* write only */ + } u3; + union { + char ier; /* DLAB == 0 */ + char dlm; /* DLAB == 1 */ + } u2; + union { + char rbr; /* read only, DLAB == 0 */ + char thr; /* write only, DLAB == 0 */ + char dll; /* DLAB == 1 */ + } u1; + char iu_scr; + char iu_msr; + char iu_lsr; + char iu_mcr; +}; + +#define iu_rbr u1.rbr +#define iu_thr u1.thr +#define iu_dll u1.dll +#define iu_ier u2.ier +#define iu_dlm u2.dlm +#define iu_iir u3.iir +#define iu_fcr u3.fcr + +struct ioc3_sioregs { + char fill[0x170]; + struct ioc3_uartregs uartb; + struct ioc3_uartregs uarta; +}; + +/* PCI IO/mem space register map */ +struct ioc3 { + uint32_t pci_id; + uint32_t pci_scr; + uint32_t pci_rev; + uint32_t pci_lat; + uint32_t pci_addr; + uint32_t pci_err_addr_l; + uint32_t pci_err_addr_h; + + uint32_t sio_ir; + /* these registers are read-only for general kernel code. To + * modify them use the functions in ioc3.c + */ + uint32_t sio_ies; + uint32_t sio_iec; + uint32_t sio_cr; + uint32_t int_out; + uint32_t mcr; + uint32_t gpcr_s; + uint32_t gpcr_c; + uint32_t gpdr; + uint32_t gppr[9]; + char fill[0x4c]; + + /* serial port registers */ + uint32_t sbbr_h; + uint32_t sbbr_l; + + struct ioc3_serialregs port_a; + struct ioc3_serialregs port_b; + char fill1[0x1ff10]; + /* superio registers */ + struct ioc3_sioregs sregs; +}; + +/* These don't exist on the ioc3 serial card... */ +#define eier fill1[8] +#define eisr fill1[4] + +#define PCI_LAT 0xc /* Latency Timer */ +#define PCI_SCR_DROP_MODE_EN 0x00008000 /* drop pios on parity err */ +#define UARTA_BASE 0x178 +#define UARTB_BASE 0x170 + + +/* bitmasks for serial RX status byte */ +#define RXSB_OVERRUN 0x01 /* char(s) lost */ +#define RXSB_PAR_ERR 0x02 /* parity error */ +#define RXSB_FRAME_ERR 0x04 /* framing error */ +#define RXSB_BREAK 0x08 /* break character */ +#define RXSB_CTS 0x10 /* state of CTS */ +#define RXSB_DCD 0x20 /* state of DCD */ +#define RXSB_MODEM_VALID 0x40 /* DCD, CTS and OVERRUN are valid */ +#define RXSB_DATA_VALID 0x80 /* FRAME_ERR PAR_ERR & BREAK valid */ + +/* bitmasks for serial TX control byte */ +#define TXCB_INT_WHEN_DONE 0x20 /* interrupt after this byte is sent */ +#define TXCB_INVALID 0x00 /* byte is invalid */ +#define TXCB_VALID 0x40 /* byte is valid */ +#define TXCB_MCR 0x80 /* data<7:0> to modem cntrl register */ +#define TXCB_DELAY 0xc0 /* delay data<7:0> mSec */ + +/* bitmasks for SBBR_L */ +#define SBBR_L_SIZE 0x00000001 /* 0 1KB rings, 1 4KB rings */ + +/* bitmasks for SSCR_ */ +#define SSCR_RX_THRESHOLD 0x000001ff /* hiwater mark */ +#define SSCR_TX_TIMER_BUSY 0x00010000 /* TX timer in progress */ +#define SSCR_HFC_EN 0x00020000 /* h/w flow cntrl enabled */ +#define SSCR_RX_RING_DCD 0x00040000 /* postRX record on delta-DCD */ +#define SSCR_RX_RING_CTS 0x00080000 /* postRX record on delta-CTS */ +#define SSCR_HIGH_SPD 0x00100000 /* 4X speed */ +#define SSCR_DIAG 0x00200000 /* bypass clock divider */ +#define SSCR_RX_DRAIN 0x08000000 /* drain RX buffer to memory */ +#define SSCR_DMA_EN 0x10000000 /* enable ring buffer DMA */ +#define SSCR_DMA_PAUSE 0x20000000 /* pause DMA */ +#define SSCR_PAUSE_STATE 0x40000000 /* set when PAUSE takes effect*/ +#define SSCR_RESET 0x80000000 /* reset DMA channels */ + +/* all producer/comsumer pointers are the same bitfield */ +#define PROD_CONS_PTR_4K 0x00000ff8 /* for 4K buffers */ +#define PROD_CONS_PTR_1K 0x000003f8 /* for 1K buffers */ +#define PROD_CONS_PTR_OFF 3 + +/* bitmasks for SRCIR_ */ +#define SRCIR_ARM 0x80000000 /* arm RX timer */ + +/* bitmasks for SHADOW_ */ +#define SHADOW_DR 0x00000001 /* data ready */ +#define SHADOW_OE 0x00000002 /* overrun error */ +#define SHADOW_PE 0x00000004 /* parity error */ +#define SHADOW_FE 0x00000008 /* framing error */ +#define SHADOW_BI 0x00000010 /* break interrupt */ +#define SHADOW_THRE 0x00000020 /* transmit holding reg empty */ +#define SHADOW_TEMT 0x00000040 /* transmit shift reg empty */ +#define SHADOW_RFCE 0x00000080 /* char in RX fifo has error */ +#define SHADOW_DCTS 0x00010000 /* delta clear to send */ +#define SHADOW_DDCD 0x00080000 /* delta data carrier detect */ +#define SHADOW_CTS 0x00100000 /* clear to send */ +#define SHADOW_DCD 0x00800000 /* data carrier detect */ +#define SHADOW_DTR 0x01000000 /* data terminal ready */ +#define SHADOW_RTS 0x02000000 /* request to send */ +#define SHADOW_OUT1 0x04000000 /* 16550 OUT1 bit */ +#define SHADOW_OUT2 0x08000000 /* 16550 OUT2 bit */ +#define SHADOW_LOOP 0x10000000 /* loopback enabled */ + +/* bitmasks for SRTR_ */ +#define SRTR_CNT 0x00000fff /* reload value for RX timer */ +#define SRTR_CNT_VAL 0x0fff0000 /* current value of RX timer */ +#define SRTR_CNT_VAL_SHIFT 16 +#define SRTR_HZ 16000 /* SRTR clock frequency */ + +/* bitmasks for SIO_IR, SIO_IEC and SIO_IES */ +#define SIO_IR_SA_TX_MT 0x00000001 /* Serial port A TX empty */ +#define SIO_IR_SA_RX_FULL 0x00000002 /* port A RX buf full */ +#define SIO_IR_SA_RX_HIGH 0x00000004 /* port A RX hiwat */ +#define SIO_IR_SA_RX_TIMER 0x00000008 /* port A RX timeout */ +#define SIO_IR_SA_DELTA_DCD 0x00000010 /* port A delta DCD */ +#define SIO_IR_SA_DELTA_CTS 0x00000020 /* port A delta CTS */ +#define SIO_IR_SA_INT 0x00000040 /* port A pass-thru intr */ +#define SIO_IR_SA_TX_EXPLICIT 0x00000080 /* port A explicit TX thru */ +#define SIO_IR_SA_MEMERR 0x00000100 /* port A PCI error */ +#define SIO_IR_SB_TX_MT 0x00000200 +#define SIO_IR_SB_RX_FULL 0x00000400 +#define SIO_IR_SB_RX_HIGH 0x00000800 +#define SIO_IR_SB_RX_TIMER 0x00001000 +#define SIO_IR_SB_DELTA_DCD 0x00002000 +#define SIO_IR_SB_DELTA_CTS 0x00004000 +#define SIO_IR_SB_INT 0x00008000 +#define SIO_IR_SB_TX_EXPLICIT 0x00010000 +#define SIO_IR_SB_MEMERR 0x00020000 +#define SIO_IR_PP_INT 0x00040000 /* P port pass-thru intr */ +#define SIO_IR_PP_INTA 0x00080000 /* PP context A thru */ +#define SIO_IR_PP_INTB 0x00100000 /* PP context B thru */ +#define SIO_IR_PP_MEMERR 0x00200000 /* PP PCI error */ +#define SIO_IR_KBD_INT 0x00400000 /* kbd/mouse intr */ +#define SIO_IR_RT_INT 0x08000000 /* RT output pulse */ +#define SIO_IR_GEN_INT1 0x10000000 /* RT input pulse */ +#define SIO_IR_GEN_INT_SHIFT 28 + +/* per device interrupt masks */ +#define SIO_IR_SA (SIO_IR_SA_TX_MT | \ + SIO_IR_SA_RX_FULL | \ + SIO_IR_SA_RX_HIGH | \ + SIO_IR_SA_RX_TIMER | \ + SIO_IR_SA_DELTA_DCD | \ + SIO_IR_SA_DELTA_CTS | \ + SIO_IR_SA_INT | \ + SIO_IR_SA_TX_EXPLICIT | \ + SIO_IR_SA_MEMERR) + +#define SIO_IR_SB (SIO_IR_SB_TX_MT | \ + SIO_IR_SB_RX_FULL | \ + SIO_IR_SB_RX_HIGH | \ + SIO_IR_SB_RX_TIMER | \ + SIO_IR_SB_DELTA_DCD | \ + SIO_IR_SB_DELTA_CTS | \ + SIO_IR_SB_INT | \ + SIO_IR_SB_TX_EXPLICIT | \ + SIO_IR_SB_MEMERR) + +#define SIO_IR_PP (SIO_IR_PP_INT | SIO_IR_PP_INTA | \ + SIO_IR_PP_INTB | SIO_IR_PP_MEMERR) +#define SIO_IR_RT (SIO_IR_RT_INT | SIO_IR_GEN_INT1) + +/* bitmasks for SIO_CR */ +#define SIO_CR_CMD_PULSE_SHIFT 15 +#define SIO_CR_SER_A_BASE_SHIFT 1 +#define SIO_CR_SER_B_BASE_SHIFT 8 +#define SIO_CR_ARB_DIAG 0x00380000 /* cur !enet PCI requet (ro) */ +#define SIO_CR_ARB_DIAG_TXA 0x00000000 +#define SIO_CR_ARB_DIAG_RXA 0x00080000 +#define SIO_CR_ARB_DIAG_TXB 0x00100000 +#define SIO_CR_ARB_DIAG_RXB 0x00180000 +#define SIO_CR_ARB_DIAG_PP 0x00200000 +#define SIO_CR_ARB_DIAG_IDLE 0x00400000 /* 0 -> active request (ro) */ + +/* defs for some of the generic I/O pins */ +#define GPCR_PHY_RESET 0x20 /* pin is output to PHY reset */ +#define GPCR_UARTB_MODESEL 0x40 /* pin is output to port B mode sel */ +#define GPCR_UARTA_MODESEL 0x80 /* pin is output to port A mode sel */ + +#define GPPR_PHY_RESET_PIN 5 /* GIO pin controlling phy reset */ +#define GPPR_UARTB_MODESEL_PIN 6 /* GIO pin cntrling uartb modeselect */ +#define GPPR_UARTA_MODESEL_PIN 7 /* GIO pin cntrling uarta modeselect */ + +#endif /* IA64_SN_IOC3_H */ diff --git a/kernel/arch/ia64/include/asm/sn/klconfig.h b/kernel/arch/ia64/include/asm/sn/klconfig.h new file mode 100644 index 000000000..bcbf209d6 --- /dev/null +++ b/kernel/arch/ia64/include/asm/sn/klconfig.h @@ -0,0 +1,246 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Derived from IRIX . + * + * Copyright (C) 1992-1997,1999,2001-2004 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (C) 1999 by Ralf Baechle + */ +#ifndef _ASM_IA64_SN_KLCONFIG_H +#define _ASM_IA64_SN_KLCONFIG_H + +/* + * The KLCONFIG structures store info about the various BOARDs found + * during Hardware Discovery. In addition, it stores info about the + * components found on the BOARDs. + */ + +typedef s32 klconf_off_t; + + +/* Functions/macros needed to use this structure */ + +typedef struct kl_config_hdr { + char pad[20]; + klconf_off_t ch_board_info; /* the link list of boards */ + char pad0[88]; +} kl_config_hdr_t; + + +#define NODE_OFFSET_TO_LBOARD(nasid,off) (lboard_t*)(GLOBAL_CAC_ADDR((nasid), (off))) + +/* + * The KLCONFIG area is organized as a LINKED LIST of BOARDs. A BOARD + * can be either 'LOCAL' or 'REMOTE'. LOCAL means it is attached to + * the LOCAL/current NODE. REMOTE means it is attached to a different + * node.(TBD - Need a way to treat ROUTER boards.) + * + * There are 2 different structures to represent these boards - + * lboard - Local board, rboard - remote board. These 2 structures + * can be arbitrarily mixed in the LINKED LIST of BOARDs. (Refer + * Figure below). The first byte of the rboard or lboard structure + * is used to find out its type - no unions are used. + * If it is a lboard, then the config info of this board will be found + * on the local node. (LOCAL NODE BASE + offset value gives pointer to + * the structure. + * If it is a rboard, the local structure contains the node number + * and the offset of the beginning of the LINKED LIST on the remote node. + * The details of the hardware on a remote node can be built locally, + * if required, by reading the LINKED LIST on the remote node and + * ignoring all the rboards on that node. + * + * The local node uses the REMOTE NODE NUMBER + OFFSET to point to the + * First board info on the remote node. The remote node list is + * traversed as the local list, using the REMOTE BASE ADDRESS and not + * the local base address and ignoring all rboard values. + * + * + KLCONFIG + + +------------+ +------------+ +------------+ +------------+ + | lboard | +-->| lboard | +-->| rboard | +-->| lboard | + +------------+ | +------------+ | +------------+ | +------------+ + | board info | | | board info | | |errinfo,bptr| | | board info | + +------------+ | +------------+ | +------------+ | +------------+ + | offset |--+ | offset |--+ | offset |--+ |offset=NULL | + +------------+ +------------+ +------------+ +------------+ + + + +------------+ + | board info | + +------------+ +--------------------------------+ + | compt 1 |------>| type, rev, diaginfo, size ... | (CPU) + +------------+ +--------------------------------+ + | compt 2 |--+ + +------------+ | +--------------------------------+ + | ... | +--->| type, rev, diaginfo, size ... | (MEM_BANK) + +------------+ +--------------------------------+ + | errinfo |--+ + +------------+ | +--------------------------------+ + +--->|r/l brd errinfo,compt err flags | + +--------------------------------+ + + * + * Each BOARD consists of COMPONENTs and the BOARD structure has + * pointers (offsets) to its COMPONENT structure. + * The COMPONENT structure has version info, size and speed info, revision, + * error info and the NIC info. This structure can accommodate any + * BOARD with arbitrary COMPONENT composition. + * + * The ERRORINFO part of each BOARD has error information + * that describes errors about the BOARD itself. It also has flags to + * indicate the COMPONENT(s) on the board that have errors. The error + * information specific to the COMPONENT is present in the respective + * COMPONENT structure. + * + * The ERRORINFO structure is also treated like a COMPONENT, ie. the + * BOARD has pointers(offset) to the ERRORINFO structure. The rboard + * structure also has a pointer to the ERRORINFO structure. This is + * the place to store ERRORINFO about a REMOTE NODE, if the HUB on + * that NODE is not working or if the REMOTE MEMORY is BAD. In cases where + * only the CPU of the REMOTE NODE is disabled, the ERRORINFO pointer can + * be a NODE NUMBER, REMOTE OFFSET combination, pointing to error info + * which is present on the REMOTE NODE.(TBD) + * REMOTE ERRINFO can be stored on any of the nearest nodes + * or on all the nearest nodes.(TBD) + * Like BOARD structures, REMOTE ERRINFO structures can be built locally + * using the rboard errinfo pointer. + * + * In order to get useful information from this Data organization, a set of + * interface routines are provided (TBD). The important thing to remember while + * manipulating the structures, is that, the NODE number information should + * be used. If the NODE is non-zero (remote) then each offset should + * be added to the REMOTE BASE ADDR else it should be added to the LOCAL BASE ADDR. + * This includes offsets for BOARDS, COMPONENTS and ERRORINFO. + * + * Note that these structures do not provide much info about connectivity. + * That info will be part of HWGRAPH, which is an extension of the cfg_t + * data structure. (ref IP27prom/cfg.h) It has to be extended to include + * the IO part of the Network(TBD). + * + * The data structures below define the above concepts. + */ + + +/* + * BOARD classes + */ + +#define KLCLASS_MASK 0xf0 +#define KLCLASS_NONE 0x00 +#define KLCLASS_NODE 0x10 /* CPU, Memory and HUB board */ +#define KLCLASS_CPU KLCLASS_NODE +#define KLCLASS_IO 0x20 /* BaseIO, 4 ch SCSI, ethernet, FDDI + and the non-graphics widget boards */ +#define KLCLASS_ROUTER 0x30 /* Router board */ +#define KLCLASS_MIDPLANE 0x40 /* We need to treat this as a board + so that we can record error info */ +#define KLCLASS_IOBRICK 0x70 /* IP35 iobrick */ +#define KLCLASS_MAX 8 /* Bump this if a new CLASS is added */ + +#define KLCLASS(_x) ((_x) & KLCLASS_MASK) + + +/* + * board types + */ + +#define KLTYPE_MASK 0x0f +#define KLTYPE(_x) ((_x) & KLTYPE_MASK) + +#define KLTYPE_SNIA (KLCLASS_CPU | 0x1) +#define KLTYPE_TIO (KLCLASS_CPU | 0x2) + +#define KLTYPE_ROUTER (KLCLASS_ROUTER | 0x1) +#define KLTYPE_META_ROUTER (KLCLASS_ROUTER | 0x3) +#define KLTYPE_REPEATER_ROUTER (KLCLASS_ROUTER | 0x4) + +#define KLTYPE_IOBRICK_XBOW (KLCLASS_MIDPLANE | 0x2) + +#define KLTYPE_IOBRICK (KLCLASS_IOBRICK | 0x0) +#define KLTYPE_NBRICK (KLCLASS_IOBRICK | 0x4) +#define KLTYPE_PXBRICK (KLCLASS_IOBRICK | 0x6) +#define KLTYPE_IXBRICK (KLCLASS_IOBRICK | 0x7) +#define KLTYPE_CGBRICK (KLCLASS_IOBRICK | 0x8) +#define KLTYPE_OPUSBRICK (KLCLASS_IOBRICK | 0x9) +#define KLTYPE_SABRICK (KLCLASS_IOBRICK | 0xa) +#define KLTYPE_IABRICK (KLCLASS_IOBRICK | 0xb) +#define KLTYPE_PABRICK (KLCLASS_IOBRICK | 0xc) +#define KLTYPE_GABRICK (KLCLASS_IOBRICK | 0xd) + + +/* + * board structures + */ + +#define MAX_COMPTS_PER_BRD 24 + +typedef struct lboard_s { + klconf_off_t brd_next_any; /* Next BOARD */ + unsigned char struct_type; /* type of structure, local or remote */ + unsigned char brd_type; /* type+class */ + unsigned char brd_sversion; /* version of this structure */ + unsigned char brd_brevision; /* board revision */ + unsigned char brd_promver; /* board prom version, if any */ + unsigned char brd_flags; /* Enabled, Disabled etc */ + unsigned char brd_slot; /* slot number */ + unsigned short brd_debugsw; /* Debug switches */ + geoid_t brd_geoid; /* geo id */ + partid_t brd_partition; /* Partition number */ + unsigned short brd_diagval; /* diagnostic value */ + unsigned short brd_diagparm; /* diagnostic parameter */ + unsigned char brd_inventory; /* inventory history */ + unsigned char brd_numcompts; /* Number of components */ + nic_t brd_nic; /* Number in CAN */ + nasid_t brd_nasid; /* passed parameter */ + klconf_off_t brd_compts[MAX_COMPTS_PER_BRD]; /* pointers to COMPONENTS */ + klconf_off_t brd_errinfo; /* Board's error information */ + struct lboard_s *brd_parent; /* Logical parent for this brd */ + char pad0[4]; + unsigned char brd_confidence; /* confidence that the board is bad */ + nasid_t brd_owner; /* who owns this board */ + unsigned char brd_nic_flags; /* To handle 8 more NICs */ + char pad1[24]; /* future expansion */ + char brd_name[32]; + nasid_t brd_next_same_host; /* host of next brd w/same nasid */ + klconf_off_t brd_next_same; /* Next BOARD with same nasid */ +} lboard_t; + +/* + * Generic info structure. This stores common info about a + * component. + */ + +typedef struct klinfo_s { /* Generic info */ + unsigned char struct_type; /* type of this structure */ + unsigned char struct_version; /* version of this structure */ + unsigned char flags; /* Enabled, disabled etc */ + unsigned char revision; /* component revision */ + unsigned short diagval; /* result of diagnostics */ + unsigned short diagparm; /* diagnostic parameter */ + unsigned char inventory; /* previous inventory status */ + unsigned short partid; /* widget part number */ + nic_t nic; /* MUst be aligned properly */ + unsigned char physid; /* physical id of component */ + unsigned int virtid; /* virtual id as seen by system */ + unsigned char widid; /* Widget id - if applicable */ + nasid_t nasid; /* node number - from parent */ + char pad1; /* pad out structure. */ + char pad2; /* pad out structure. */ + void *data; + klconf_off_t errinfo; /* component specific errors */ + unsigned short pad3; /* pci fields have moved over to */ + unsigned short pad4; /* klbri_t */ +} klinfo_t ; + + +static inline lboard_t *find_lboard_next(lboard_t * brd) +{ + if (brd && brd->brd_next_any) + return NODE_OFFSET_TO_LBOARD(NASID_GET(brd), brd->brd_next_any); + return NULL; +} + +#endif /* _ASM_IA64_SN_KLCONFIG_H */ diff --git a/kernel/arch/ia64/include/asm/sn/l1.h b/kernel/arch/ia64/include/asm/sn/l1.h new file mode 100644 index 000000000..344bf44bb --- /dev/null +++ b/kernel/arch/ia64/include/asm/sn/l1.h @@ -0,0 +1,51 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1992-1997,2000-2004 Silicon Graphics, Inc. All Rights Reserved. + */ + +#ifndef _ASM_IA64_SN_L1_H +#define _ASM_IA64_SN_L1_H + +/* brick type response codes */ +#define L1_BRICKTYPE_PX 0x23 /* # */ +#define L1_BRICKTYPE_PE 0x25 /* % */ +#define L1_BRICKTYPE_N_p0 0x26 /* & */ +#define L1_BRICKTYPE_IP45 0x34 /* 4 */ +#define L1_BRICKTYPE_IP41 0x35 /* 5 */ +#define L1_BRICKTYPE_TWISTER 0x36 /* 6 */ /* IP53 & ROUTER */ +#define L1_BRICKTYPE_IX 0x3d /* = */ +#define L1_BRICKTYPE_IP34 0x61 /* a */ +#define L1_BRICKTYPE_GA 0x62 /* b */ +#define L1_BRICKTYPE_C 0x63 /* c */ +#define L1_BRICKTYPE_OPUS_TIO 0x66 /* f */ +#define L1_BRICKTYPE_I 0x69 /* i */ +#define L1_BRICKTYPE_N 0x6e /* n */ +#define L1_BRICKTYPE_OPUS 0x6f /* o */ +#define L1_BRICKTYPE_P 0x70 /* p */ +#define L1_BRICKTYPE_R 0x72 /* r */ +#define L1_BRICKTYPE_CHI_CG 0x76 /* v */ +#define L1_BRICKTYPE_X 0x78 /* x */ +#define L1_BRICKTYPE_X2 0x79 /* y */ +#define L1_BRICKTYPE_SA 0x5e /* ^ */ +#define L1_BRICKTYPE_PA 0x6a /* j */ +#define L1_BRICKTYPE_IA 0x6b /* k */ +#define L1_BRICKTYPE_ATHENA 0x2b /* + */ +#define L1_BRICKTYPE_DAYTONA 0x7a /* z */ +#define L1_BRICKTYPE_1932 0x2c /* . */ +#define L1_BRICKTYPE_191010 0x2e /* , */ + +/* board type response codes */ +#define L1_BOARDTYPE_IP69 0x0100 /* CA */ +#define L1_BOARDTYPE_IP63 0x0200 /* CB */ +#define L1_BOARDTYPE_BASEIO 0x0300 /* IB */ +#define L1_BOARDTYPE_PCIE2SLOT 0x0400 /* IC */ +#define L1_BOARDTYPE_PCIX3SLOT 0x0500 /* ID */ +#define L1_BOARDTYPE_PCIXPCIE4SLOT 0x0600 /* IE */ +#define L1_BOARDTYPE_ABACUS 0x0700 /* AB */ +#define L1_BOARDTYPE_DAYTONA 0x0800 /* AD */ +#define L1_BOARDTYPE_INVAL (-1) /* invalid brick type */ + +#endif /* _ASM_IA64_SN_L1_H */ diff --git a/kernel/arch/ia64/include/asm/sn/leds.h b/kernel/arch/ia64/include/asm/sn/leds.h new file mode 100644 index 000000000..66cf8c4d9 --- /dev/null +++ b/kernel/arch/ia64/include/asm/sn/leds.h @@ -0,0 +1,33 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * Copyright (C) 2000-2004 Silicon Graphics, Inc. All rights reserved. + */ +#ifndef _ASM_IA64_SN_LEDS_H +#define _ASM_IA64_SN_LEDS_H + +#include +#include +#include + +#define LED0 (LOCAL_MMR_ADDR(SH_REAL_JUNK_BUS_LED0)) +#define LED_CPU_SHIFT 16 + +#define LED_CPU_HEARTBEAT 0x01 +#define LED_CPU_ACTIVITY 0x02 +#define LED_ALWAYS_SET 0x00 + +/* + * Basic macros for flashing the LEDS on an SGI SN. + */ + +static __inline__ void +set_led_bits(u8 value, u8 mask) +{ + pda->led_state = (pda->led_state & ~mask) | (value & mask); + *pda->led_address = (short) pda->led_state; +} + +#endif /* _ASM_IA64_SN_LEDS_H */ + diff --git a/kernel/arch/ia64/include/asm/sn/module.h b/kernel/arch/ia64/include/asm/sn/module.h new file mode 100644 index 000000000..734e980ec --- /dev/null +++ b/kernel/arch/ia64/include/asm/sn/module.h @@ -0,0 +1,127 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1992 - 1997, 2000-2004 Silicon Graphics, Inc. All rights reserved. + */ +#ifndef _ASM_IA64_SN_MODULE_H +#define _ASM_IA64_SN_MODULE_H + +/* parameter for format_module_id() */ +#define MODULE_FORMAT_BRIEF 1 +#define MODULE_FORMAT_LONG 2 +#define MODULE_FORMAT_LCD 3 + +/* + * Module id format + * + * 31-16 Rack ID (encoded class, group, number - 16-bit unsigned int) + * 15-8 Brick type (8-bit ascii character) + * 7-0 Bay (brick position in rack (0-63) - 8-bit unsigned int) + * + */ + +/* + * Macros for getting the brick type + */ +#define MODULE_BTYPE_MASK 0xff00 +#define MODULE_BTYPE_SHFT 8 +#define MODULE_GET_BTYPE(_m) (((_m) & MODULE_BTYPE_MASK) >> MODULE_BTYPE_SHFT) +#define MODULE_BT_TO_CHAR(_b) ((char)(_b)) +#define MODULE_GET_BTCHAR(_m) (MODULE_BT_TO_CHAR(MODULE_GET_BTYPE(_m))) + +/* + * Macros for getting the rack ID. + */ +#define MODULE_RACK_MASK 0xffff0000 +#define MODULE_RACK_SHFT 16 +#define MODULE_GET_RACK(_m) (((_m) & MODULE_RACK_MASK) >> MODULE_RACK_SHFT) + +/* + * Macros for getting the brick position + */ +#define MODULE_BPOS_MASK 0x00ff +#define MODULE_BPOS_SHFT 0 +#define MODULE_GET_BPOS(_m) (((_m) & MODULE_BPOS_MASK) >> MODULE_BPOS_SHFT) + +/* + * Macros for encoding and decoding rack IDs + * A rack number consists of three parts: + * class (0==CPU/mixed, 1==I/O), group, number + * + * Rack number is stored just as it is displayed on the screen: + * a 3-decimal-digit number. + */ +#define RACK_CLASS_DVDR 100 +#define RACK_GROUP_DVDR 10 +#define RACK_NUM_DVDR 1 + +#define RACK_CREATE_RACKID(_c, _g, _n) ((_c) * RACK_CLASS_DVDR + \ + (_g) * RACK_GROUP_DVDR + (_n) * RACK_NUM_DVDR) + +#define RACK_GET_CLASS(_r) ((_r) / RACK_CLASS_DVDR) +#define RACK_GET_GROUP(_r) (((_r) - RACK_GET_CLASS(_r) * \ + RACK_CLASS_DVDR) / RACK_GROUP_DVDR) +#define RACK_GET_NUM(_r) (((_r) - RACK_GET_CLASS(_r) * \ + RACK_CLASS_DVDR - RACK_GET_GROUP(_r) * \ + RACK_GROUP_DVDR) / RACK_NUM_DVDR) + +/* + * Macros for encoding and decoding rack IDs + * A rack number consists of three parts: + * class 1 bit, 0==CPU/mixed, 1==I/O + * group 2 bits for CPU/mixed, 3 bits for I/O + * number 3 bits for CPU/mixed, 2 bits for I/O (1 based) + */ +#define RACK_GROUP_BITS(_r) (RACK_GET_CLASS(_r) ? 3 : 2) +#define RACK_NUM_BITS(_r) (RACK_GET_CLASS(_r) ? 2 : 3) + +#define RACK_CLASS_MASK(_r) 0x20 +#define RACK_CLASS_SHFT(_r) 5 +#define RACK_ADD_CLASS(_r, _c) \ + ((_r) |= (_c) << RACK_CLASS_SHFT(_r) & RACK_CLASS_MASK(_r)) + +#define RACK_GROUP_SHFT(_r) RACK_NUM_BITS(_r) +#define RACK_GROUP_MASK(_r) \ + ( (((unsigned)1< +#include +#include +#include + +/* + * NUMA Node-Specific Data structures are defined in this file. + * In particular, this is the location of the node PDA. + * A pointer to the right node PDA is saved in each CPU PDA. + */ + +/* + * Node-specific data structure. + * + * One of these structures is allocated on each node of a NUMA system. + * + * This structure provides a convenient way of keeping together + * all per-node data structures. + */ +struct phys_cpuid { + short nasid; + char subnode; + char slice; +}; + +struct nodepda_s { + void *pdinfo; /* Platform-dependent per-node info */ + + /* + * The BTEs on this node are shared by the local cpus + */ + struct bteinfo_s bte_if[MAX_BTES_PER_NODE]; /* Virtual Interface */ + struct timer_list bte_recovery_timer; + spinlock_t bte_recovery_lock; + + /* + * Array of pointers to the nodepdas for each node. + */ + struct nodepda_s *pernode_pdaindr[MAX_COMPACT_NODES]; + + /* + * Array of physical cpu identifiers. Indexed by cpuid. + */ + struct phys_cpuid phys_cpuid[NR_CPUS]; + spinlock_t ptc_lock ____cacheline_aligned_in_smp; +}; + +typedef struct nodepda_s nodepda_t; + +/* + * Access Functions for node PDA. + * Since there is one nodepda for each node, we need a convenient mechanism + * to access these nodepdas without cluttering code with #ifdefs. + * The next set of definitions provides this. + * Routines are expected to use + * + * sn_nodepda - to access node PDA for the node on which code is running + * NODEPDA(cnodeid) - to access node PDA for cnodeid + */ + +DECLARE_PER_CPU(struct nodepda_s *, __sn_nodepda); +#define sn_nodepda __this_cpu_read(__sn_nodepda) +#define NODEPDA(cnodeid) (sn_nodepda->pernode_pdaindr[cnodeid]) + +/* + * Check if given a compact node id the corresponding node has all the + * cpus disabled. + */ +#define is_headless_node(cnodeid) (nr_cpus_node(cnodeid) == 0) + +#endif /* _ASM_IA64_SN_NODEPDA_H */ diff --git a/kernel/arch/ia64/include/asm/sn/pcibr_provider.h b/kernel/arch/ia64/include/asm/sn/pcibr_provider.h new file mode 100644 index 000000000..da205b7cd --- /dev/null +++ b/kernel/arch/ia64/include/asm/sn/pcibr_provider.h @@ -0,0 +1,150 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1992-1997,2000-2006 Silicon Graphics, Inc. All rights reserved. + */ +#ifndef _ASM_IA64_SN_PCI_PCIBR_PROVIDER_H +#define _ASM_IA64_SN_PCI_PCIBR_PROVIDER_H + +#include +#include + +/* Workarounds */ +#define PV907516 (1 << 1) /* TIOCP: Don't write the write buffer flush reg */ + +#define BUSTYPE_MASK 0x1 + +/* Macros given a pcibus structure */ +#define IS_PCIX(ps) ((ps)->pbi_bridge_mode & BUSTYPE_MASK) +#define IS_PCI_BRIDGE_ASIC(asic) (asic == PCIIO_ASIC_TYPE_PIC || \ + asic == PCIIO_ASIC_TYPE_TIOCP) +#define IS_PIC_SOFT(ps) (ps->pbi_bridge_type == PCIBR_BRIDGETYPE_PIC) +#define IS_TIOCP_SOFT(ps) (ps->pbi_bridge_type == PCIBR_BRIDGETYPE_TIOCP) + + +/* + * The different PCI Bridge types supported on the SGI Altix platforms + */ +#define PCIBR_BRIDGETYPE_UNKNOWN -1 +#define PCIBR_BRIDGETYPE_PIC 2 +#define PCIBR_BRIDGETYPE_TIOCP 3 + +/* + * Bridge 64bit Direct Map Attributes + */ +#define PCI64_ATTR_PREF (1ull << 59) +#define PCI64_ATTR_PREC (1ull << 58) +#define PCI64_ATTR_VIRTUAL (1ull << 57) +#define PCI64_ATTR_BAR (1ull << 56) +#define PCI64_ATTR_SWAP (1ull << 55) +#define PCI64_ATTR_VIRTUAL1 (1ull << 54) + +#define PCI32_LOCAL_BASE 0 +#define PCI32_MAPPED_BASE 0x40000000 +#define PCI32_DIRECT_BASE 0x80000000 + +#define IS_PCI32_MAPPED(x) ((u64)(x) < PCI32_DIRECT_BASE && \ + (u64)(x) >= PCI32_MAPPED_BASE) +#define IS_PCI32_DIRECT(x) ((u64)(x) >= PCI32_MAPPED_BASE) + + +/* + * Bridge PMU Address Transaltion Entry Attibutes + */ +#define PCI32_ATE_V (0x1 << 0) +#define PCI32_ATE_CO (0x1 << 1) /* PIC ASIC ONLY */ +#define PCI32_ATE_PIO (0x1 << 1) /* TIOCP ASIC ONLY */ +#define PCI32_ATE_MSI (0x1 << 2) +#define PCI32_ATE_PREF (0x1 << 3) +#define PCI32_ATE_BAR (0x1 << 4) +#define PCI32_ATE_ADDR_SHFT 12 + +#define MINIMAL_ATES_REQUIRED(addr, size) \ + (IOPG(IOPGOFF(addr) + (size) - 1) == IOPG((size) - 1)) + +#define MINIMAL_ATE_FLAG(addr, size) \ + (MINIMAL_ATES_REQUIRED((u64)addr, size) ? 1 : 0) + +/* bit 29 of the pci address is the SWAP bit */ +#define ATE_SWAPSHIFT 29 +#define ATE_SWAP_ON(x) ((x) |= (1 << ATE_SWAPSHIFT)) +#define ATE_SWAP_OFF(x) ((x) &= ~(1 << ATE_SWAPSHIFT)) + +/* + * I/O page size + */ +#if PAGE_SIZE < 16384 +#define IOPFNSHIFT 12 /* 4K per mapped page */ +#else +#define IOPFNSHIFT 14 /* 16K per mapped page */ +#endif + +#define IOPGSIZE (1 << IOPFNSHIFT) +#define IOPG(x) ((x) >> IOPFNSHIFT) +#define IOPGOFF(x) ((x) & (IOPGSIZE-1)) + +#define PCIBR_DEV_SWAP_DIR (1ull << 19) +#define PCIBR_CTRL_PAGE_SIZE (0x1 << 21) + +/* + * PMU resources. + */ +struct ate_resource{ + u64 *ate; + u64 num_ate; + u64 lowest_free_index; +}; + +struct pcibus_info { + struct pcibus_bussoft pbi_buscommon; /* common header */ + u32 pbi_moduleid; + short pbi_bridge_type; + short pbi_bridge_mode; + + struct ate_resource pbi_int_ate_resource; + u64 pbi_int_ate_size; + + u64 pbi_dir_xbase; + char pbi_hub_xid; + + u64 pbi_devreg[8]; + + u32 pbi_valid_devices; + u32 pbi_enabled_devices; + + spinlock_t pbi_lock; +}; + +extern int pcibr_init_provider(void); +extern void *pcibr_bus_fixup(struct pcibus_bussoft *, struct pci_controller *); +extern dma_addr_t pcibr_dma_map(struct pci_dev *, unsigned long, size_t, int type); +extern dma_addr_t pcibr_dma_map_consistent(struct pci_dev *, unsigned long, size_t, int type); +extern void pcibr_dma_unmap(struct pci_dev *, dma_addr_t, int); + +/* + * prototypes for the bridge asic register access routines in pcibr_reg.c + */ +extern void pcireg_control_bit_clr(struct pcibus_info *, u64); +extern void pcireg_control_bit_set(struct pcibus_info *, u64); +extern u64 pcireg_tflush_get(struct pcibus_info *); +extern u64 pcireg_intr_status_get(struct pcibus_info *); +extern void pcireg_intr_enable_bit_clr(struct pcibus_info *, u64); +extern void pcireg_intr_enable_bit_set(struct pcibus_info *, u64); +extern void pcireg_intr_addr_addr_set(struct pcibus_info *, int, u64); +extern void pcireg_force_intr_set(struct pcibus_info *, int); +extern u64 pcireg_wrb_flush_get(struct pcibus_info *, int); +extern void pcireg_int_ate_set(struct pcibus_info *, int, u64); +extern u64 __iomem * pcireg_int_ate_addr(struct pcibus_info *, int); +extern void pcibr_force_interrupt(struct sn_irq_info *sn_irq_info); +extern void pcibr_change_devices_irq(struct sn_irq_info *sn_irq_info); +extern int pcibr_ate_alloc(struct pcibus_info *, int); +extern void pcibr_ate_free(struct pcibus_info *, int); +extern void ate_write(struct pcibus_info *, int, int, u64); +extern int sal_pcibr_slot_enable(struct pcibus_info *soft, int device, + void *resp, char **ssdt); +extern int sal_pcibr_slot_disable(struct pcibus_info *soft, int device, + int action, void *resp); +extern u16 sn_ioboard_to_pci_bus(struct pci_bus *pci_bus); +#endif diff --git a/kernel/arch/ia64/include/asm/sn/pcibus_provider_defs.h b/kernel/arch/ia64/include/asm/sn/pcibus_provider_defs.h new file mode 100644 index 000000000..8f7c83d0f --- /dev/null +++ b/kernel/arch/ia64/include/asm/sn/pcibus_provider_defs.h @@ -0,0 +1,68 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1992 - 1997, 2000-2005 Silicon Graphics, Inc. All rights reserved. + */ +#ifndef _ASM_IA64_SN_PCI_PCIBUS_PROVIDER_H +#define _ASM_IA64_SN_PCI_PCIBUS_PROVIDER_H + +/* + * SN pci asic types. Do not ever renumber these or reuse values. The + * values must agree with what prom thinks they are. + */ + +#define PCIIO_ASIC_TYPE_UNKNOWN 0 +#define PCIIO_ASIC_TYPE_PPB 1 +#define PCIIO_ASIC_TYPE_PIC 2 +#define PCIIO_ASIC_TYPE_TIOCP 3 +#define PCIIO_ASIC_TYPE_TIOCA 4 +#define PCIIO_ASIC_TYPE_TIOCE 5 + +#define PCIIO_ASIC_MAX_TYPES 6 + +/* + * Common pciio bus provider data. There should be one of these as the + * first field in any pciio based provider soft structure (e.g. pcibr_soft + * tioca_soft, etc). + */ + +struct pcibus_bussoft { + u32 bs_asic_type; /* chipset type */ + u32 bs_xid; /* xwidget id */ + u32 bs_persist_busnum; /* Persistent Bus Number */ + u32 bs_persist_segment; /* Segment Number */ + u64 bs_legacy_io; /* legacy io pio addr */ + u64 bs_legacy_mem; /* legacy mem pio addr */ + u64 bs_base; /* widget base */ + struct xwidget_info *bs_xwidget_info; +}; + +struct pci_controller; +/* + * SN pci bus indirection + */ + +struct sn_pcibus_provider { + dma_addr_t (*dma_map)(struct pci_dev *, unsigned long, size_t, int flags); + dma_addr_t (*dma_map_consistent)(struct pci_dev *, unsigned long, size_t, int flags); + void (*dma_unmap)(struct pci_dev *, dma_addr_t, int); + void * (*bus_fixup)(struct pcibus_bussoft *, struct pci_controller *); + void (*force_interrupt)(struct sn_irq_info *); + void (*target_interrupt)(struct sn_irq_info *); +}; + +/* + * Flags used by the map interfaces + * bits 3:0 specifies format of passed in address + * bit 4 specifies that address is to be used for MSI + */ + +#define SN_DMA_ADDRTYPE(x) ((x) & 0xf) +#define SN_DMA_ADDR_PHYS 1 /* address is an xio address. */ +#define SN_DMA_ADDR_XIO 2 /* address is phys memory */ +#define SN_DMA_MSI 0x10 /* Bus address is to be used for MSI */ + +extern struct sn_pcibus_provider *sn_pci_provider[]; +#endif /* _ASM_IA64_SN_PCI_PCIBUS_PROVIDER_H */ diff --git a/kernel/arch/ia64/include/asm/sn/pcidev.h b/kernel/arch/ia64/include/asm/sn/pcidev.h new file mode 100644 index 000000000..1c2382cea --- /dev/null +++ b/kernel/arch/ia64/include/asm/sn/pcidev.h @@ -0,0 +1,85 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1992 - 1997, 2000-2006 Silicon Graphics, Inc. All rights reserved. + */ +#ifndef _ASM_IA64_SN_PCI_PCIDEV_H +#define _ASM_IA64_SN_PCI_PCIDEV_H + +#include + +/* + * In ia64, pci_dev->sysdata must be a *pci_controller. To provide access to + * the pcidev_info structs for all devices under a controller, we keep a + * list of pcidev_info under pci_controller->platform_data. + */ +struct sn_platform_data { + void *provider_soft; + struct list_head pcidev_info; +}; + +#define SN_PLATFORM_DATA(busdev) \ + ((struct sn_platform_data *)(PCI_CONTROLLER(busdev)->platform_data)) + +#define SN_PCIDEV_INFO(dev) sn_pcidev_info_get(dev) + +/* + * Given a pci_bus, return the sn pcibus_bussoft struct. Note that + * this only works for root busses, not for busses represented by PPB's. + */ + +#define SN_PCIBUS_BUSSOFT(pci_bus) \ + ((struct pcibus_bussoft *)(SN_PLATFORM_DATA(pci_bus)->provider_soft)) + +#define SN_PCIBUS_BUSSOFT_INFO(pci_bus) \ + ((struct pcibus_info *)(SN_PLATFORM_DATA(pci_bus)->provider_soft)) +/* + * Given a struct pci_dev, return the sn pcibus_bussoft struct. Note + * that this is not equivalent to SN_PCIBUS_BUSSOFT(pci_dev->bus) due + * due to possible PPB's in the path. + */ + +#define SN_PCIDEV_BUSSOFT(pci_dev) \ + (SN_PCIDEV_INFO(pci_dev)->pdi_host_pcidev_info->pdi_pcibus_info) + +#define SN_PCIDEV_BUSPROVIDER(pci_dev) \ + (SN_PCIDEV_INFO(pci_dev)->pdi_provider) + +#define PCIIO_BUS_NONE 255 /* bus 255 reserved */ +#define PCIIO_SLOT_NONE 255 +#define PCIIO_FUNC_NONE 255 +#define PCIIO_VENDOR_ID_NONE (-1) + +struct pcidev_info { + u64 pdi_pio_mapped_addr[7]; /* 6 BARs PLUS 1 ROM */ + u64 pdi_slot_host_handle; /* Bus and devfn Host pci_dev */ + + struct pcibus_bussoft *pdi_pcibus_info; /* Kernel common bus soft */ + struct pcidev_info *pdi_host_pcidev_info; /* Kernel Host pci_dev */ + struct pci_dev *pdi_linux_pcidev; /* Kernel pci_dev */ + + struct sn_irq_info *pdi_sn_irq_info; + struct sn_pcibus_provider *pdi_provider; /* sn pci ops */ + struct pci_dev *host_pci_dev; /* host bus link */ + struct list_head pdi_list; /* List of pcidev_info */ +}; + +extern void sn_irq_fixup(struct pci_dev *pci_dev, + struct sn_irq_info *sn_irq_info); +extern void sn_irq_unfixup(struct pci_dev *pci_dev); +extern struct pcidev_info * sn_pcidev_info_get(struct pci_dev *); +extern void sn_bus_fixup(struct pci_bus *); +extern void sn_acpi_bus_fixup(struct pci_bus *); +extern void sn_common_bus_fixup(struct pci_bus *, struct pcibus_bussoft *); +extern void sn_bus_store_sysdata(struct pci_dev *dev); +extern void sn_bus_free_sysdata(void); +extern void sn_generate_path(struct pci_bus *pci_bus, char *address); +extern void sn_io_slot_fixup(struct pci_dev *); +extern void sn_acpi_slot_fixup(struct pci_dev *); +extern void sn_pci_fixup_slot(struct pci_dev *dev, struct pcidev_info *, + struct sn_irq_info *); +extern void sn_pci_unfixup_slot(struct pci_dev *dev); +extern void sn_irq_lh_init(void); +#endif /* _ASM_IA64_SN_PCI_PCIDEV_H */ diff --git a/kernel/arch/ia64/include/asm/sn/pda.h b/kernel/arch/ia64/include/asm/sn/pda.h new file mode 100644 index 000000000..22ae358c8 --- /dev/null +++ b/kernel/arch/ia64/include/asm/sn/pda.h @@ -0,0 +1,68 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1992 - 1997, 2000-2005 Silicon Graphics, Inc. All rights reserved. + */ +#ifndef _ASM_IA64_SN_PDA_H +#define _ASM_IA64_SN_PDA_H + +#include +#include + + +/* + * CPU-specific data structure. + * + * One of these structures is allocated for each cpu of a NUMA system. + * + * This structure provides a convenient way of keeping together + * all SN per-cpu data structures. + */ + +typedef struct pda_s { + + /* + * Support for SN LEDs + */ + volatile short *led_address; + u8 led_state; + u8 hb_state; /* supports blinking heartbeat leds */ + unsigned int hb_count; + + unsigned int idle_flag; + + volatile unsigned long *bedrock_rev_id; + volatile unsigned long *pio_write_status_addr; + unsigned long pio_write_status_val; + volatile unsigned long *pio_shub_war_cam_addr; + + unsigned long sn_in_service_ivecs[4]; + int sn_lb_int_war_ticks; + int sn_last_irq; + int sn_first_irq; +} pda_t; + + +#define CACHE_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1)) + +/* + * PDA + * Per-cpu private data area for each cpu. The PDA is located immediately after + * the IA64 cpu_data area. A full page is allocated for the cp_data area for each + * cpu but only a small amout of the page is actually used. We put the SNIA PDA + * in the same page as the cpu_data area. Note that there is a check in the setup + * code to verify that we don't overflow the page. + * + * Seems like we should should cache-line align the pda so that any changes in the + * size of the cpu_data area don't change cache layout. Should we align to 32, 64, 128 + * or 512 boundary. Each has merits. For now, pick 128 but should be revisited later. + */ +DECLARE_PER_CPU(struct pda_s, pda_percpu); + +#define pda (&__ia64_per_cpu_var(pda_percpu)) + +#define pdacpu(cpu) (&per_cpu(pda_percpu, cpu)) + +#endif /* _ASM_IA64_SN_PDA_H */ diff --git a/kernel/arch/ia64/include/asm/sn/pic.h b/kernel/arch/ia64/include/asm/sn/pic.h new file mode 100644 index 000000000..5f9da5fd6 --- /dev/null +++ b/kernel/arch/ia64/include/asm/sn/pic.h @@ -0,0 +1,261 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1992 - 1997, 2000-2003 Silicon Graphics, Inc. All rights reserved. + */ +#ifndef _ASM_IA64_SN_PCI_PIC_H +#define _ASM_IA64_SN_PCI_PIC_H + +/* + * PIC AS DEVICE ZERO + * ------------------ + * + * PIC handles PCI/X busses. PCI/X requires that the 'bridge' (i.e. PIC) + * be designated as 'device 0'. That is a departure from earlier SGI + * PCI bridges. Because of that we use config space 1 to access the + * config space of the first actual PCI device on the bus. + * Here's what the PIC manual says: + * + * The current PCI-X bus specification now defines that the parent + * hosts bus bridge (PIC for example) must be device 0 on bus 0. PIC + * reduced the total number of devices from 8 to 4 and removed the + * device registers and windows, now only supporting devices 0,1,2, and + * 3. PIC did leave all 8 configuration space windows. The reason was + * there was nothing to gain by removing them. Here in lies the problem. + * The device numbering we do using 0 through 3 is unrelated to the device + * numbering which PCI-X requires in configuration space. In the past we + * correlated Configs pace and our device space 0 <-> 0, 1 <-> 1, etc. + * PCI-X requires we start a 1, not 0 and currently the PX brick + * does associate our: + * + * device 0 with configuration space window 1, + * device 1 with configuration space window 2, + * device 2 with configuration space window 3, + * device 3 with configuration space window 4. + * + * The net effect is that all config space access are off-by-one with + * relation to other per-slot accesses on the PIC. + * Here is a table that shows some of that: + * + * Internal Slot# + * | + * | 0 1 2 3 + * ----------|--------------------------------------- + * config | 0x21000 0x22000 0x23000 0x24000 + * | + * even rrb | 0[0] n/a 1[0] n/a [] == implied even/odd + * | + * odd rrb | n/a 0[1] n/a 1[1] + * | + * int dev | 00 01 10 11 + * | + * ext slot# | 1 2 3 4 + * ----------|--------------------------------------- + */ + +#define PIC_ATE_TARGETID_SHFT 8 +#define PIC_HOST_INTR_ADDR 0x0000FFFFFFFFFFFFUL +#define PIC_PCI64_ATTR_TARG_SHFT 60 + + +/***************************************************************************** + *********************** PIC MMR structure mapping *************************** + *****************************************************************************/ + +/* NOTE: PIC WAR. PV#854697. PIC does not allow writes just to [31:0] + * of a 64-bit register. When writing PIC registers, always write the + * entire 64 bits. + */ + +struct pic { + + /* 0x000000-0x00FFFF -- Local Registers */ + + /* 0x000000-0x000057 -- Standard Widget Configuration */ + u64 p_wid_id; /* 0x000000 */ + u64 p_wid_stat; /* 0x000008 */ + u64 p_wid_err_upper; /* 0x000010 */ + u64 p_wid_err_lower; /* 0x000018 */ + #define p_wid_err p_wid_err_lower + u64 p_wid_control; /* 0x000020 */ + u64 p_wid_req_timeout; /* 0x000028 */ + u64 p_wid_int_upper; /* 0x000030 */ + u64 p_wid_int_lower; /* 0x000038 */ + #define p_wid_int p_wid_int_lower + u64 p_wid_err_cmdword; /* 0x000040 */ + u64 p_wid_llp; /* 0x000048 */ + u64 p_wid_tflush; /* 0x000050 */ + + /* 0x000058-0x00007F -- Bridge-specific Widget Configuration */ + u64 p_wid_aux_err; /* 0x000058 */ + u64 p_wid_resp_upper; /* 0x000060 */ + u64 p_wid_resp_lower; /* 0x000068 */ + #define p_wid_resp p_wid_resp_lower + u64 p_wid_tst_pin_ctrl; /* 0x000070 */ + u64 p_wid_addr_lkerr; /* 0x000078 */ + + /* 0x000080-0x00008F -- PMU & MAP */ + u64 p_dir_map; /* 0x000080 */ + u64 _pad_000088; /* 0x000088 */ + + /* 0x000090-0x00009F -- SSRAM */ + u64 p_map_fault; /* 0x000090 */ + u64 _pad_000098; /* 0x000098 */ + + /* 0x0000A0-0x0000AF -- Arbitration */ + u64 p_arb; /* 0x0000A0 */ + u64 _pad_0000A8; /* 0x0000A8 */ + + /* 0x0000B0-0x0000BF -- Number In A Can or ATE Parity Error */ + u64 p_ate_parity_err; /* 0x0000B0 */ + u64 _pad_0000B8; /* 0x0000B8 */ + + /* 0x0000C0-0x0000FF -- PCI/GIO */ + u64 p_bus_timeout; /* 0x0000C0 */ + u64 p_pci_cfg; /* 0x0000C8 */ + u64 p_pci_err_upper; /* 0x0000D0 */ + u64 p_pci_err_lower; /* 0x0000D8 */ + #define p_pci_err p_pci_err_lower + u64 _pad_0000E0[4]; /* 0x0000{E0..F8} */ + + /* 0x000100-0x0001FF -- Interrupt */ + u64 p_int_status; /* 0x000100 */ + u64 p_int_enable; /* 0x000108 */ + u64 p_int_rst_stat; /* 0x000110 */ + u64 p_int_mode; /* 0x000118 */ + u64 p_int_device; /* 0x000120 */ + u64 p_int_host_err; /* 0x000128 */ + u64 p_int_addr[8]; /* 0x0001{30,,,68} */ + u64 p_err_int_view; /* 0x000170 */ + u64 p_mult_int; /* 0x000178 */ + u64 p_force_always[8]; /* 0x0001{80,,,B8} */ + u64 p_force_pin[8]; /* 0x0001{C0,,,F8} */ + + /* 0x000200-0x000298 -- Device */ + u64 p_device[4]; /* 0x0002{00,,,18} */ + u64 _pad_000220[4]; /* 0x0002{20,,,38} */ + u64 p_wr_req_buf[4]; /* 0x0002{40,,,58} */ + u64 _pad_000260[4]; /* 0x0002{60,,,78} */ + u64 p_rrb_map[2]; /* 0x0002{80,,,88} */ + #define p_even_resp p_rrb_map[0] /* 0x000280 */ + #define p_odd_resp p_rrb_map[1] /* 0x000288 */ + u64 p_resp_status; /* 0x000290 */ + u64 p_resp_clear; /* 0x000298 */ + + u64 _pad_0002A0[12]; /* 0x0002{A0..F8} */ + + /* 0x000300-0x0003F8 -- Buffer Address Match Registers */ + struct { + u64 upper; /* 0x0003{00,,,F0} */ + u64 lower; /* 0x0003{08,,,F8} */ + } p_buf_addr_match[16]; + + /* 0x000400-0x0005FF -- Performance Monitor Registers (even only) */ + struct { + u64 flush_w_touch; /* 0x000{400,,,5C0} */ + u64 flush_wo_touch; /* 0x000{408,,,5C8} */ + u64 inflight; /* 0x000{410,,,5D0} */ + u64 prefetch; /* 0x000{418,,,5D8} */ + u64 total_pci_retry; /* 0x000{420,,,5E0} */ + u64 max_pci_retry; /* 0x000{428,,,5E8} */ + u64 max_latency; /* 0x000{430,,,5F0} */ + u64 clear_all; /* 0x000{438,,,5F8} */ + } p_buf_count[8]; + + + /* 0x000600-0x0009FF -- PCI/X registers */ + u64 p_pcix_bus_err_addr; /* 0x000600 */ + u64 p_pcix_bus_err_attr; /* 0x000608 */ + u64 p_pcix_bus_err_data; /* 0x000610 */ + u64 p_pcix_pio_split_addr; /* 0x000618 */ + u64 p_pcix_pio_split_attr; /* 0x000620 */ + u64 p_pcix_dma_req_err_attr; /* 0x000628 */ + u64 p_pcix_dma_req_err_addr; /* 0x000630 */ + u64 p_pcix_timeout; /* 0x000638 */ + + u64 _pad_000640[120]; /* 0x000{640,,,9F8} */ + + /* 0x000A00-0x000BFF -- PCI/X Read&Write Buffer */ + struct { + u64 p_buf_addr; /* 0x000{A00,,,AF0} */ + u64 p_buf_attr; /* 0X000{A08,,,AF8} */ + } p_pcix_read_buf_64[16]; + + struct { + u64 p_buf_addr; /* 0x000{B00,,,BE0} */ + u64 p_buf_attr; /* 0x000{B08,,,BE8} */ + u64 p_buf_valid; /* 0x000{B10,,,BF0} */ + u64 __pad1; /* 0x000{B18,,,BF8} */ + } p_pcix_write_buf_64[8]; + + /* End of Local Registers -- Start of Address Map space */ + + char _pad_000c00[0x010000 - 0x000c00]; + + /* 0x010000-0x011fff -- Internal ATE RAM (Auto Parity Generation) */ + u64 p_int_ate_ram[1024]; /* 0x010000-0x011fff */ + + /* 0x012000-0x013fff -- Internal ATE RAM (Manual Parity Generation) */ + u64 p_int_ate_ram_mp[1024]; /* 0x012000-0x013fff */ + + char _pad_014000[0x18000 - 0x014000]; + + /* 0x18000-0x197F8 -- PIC Write Request Ram */ + u64 p_wr_req_lower[256]; /* 0x18000 - 0x187F8 */ + u64 p_wr_req_upper[256]; /* 0x18800 - 0x18FF8 */ + u64 p_wr_req_parity[256]; /* 0x19000 - 0x197F8 */ + + char _pad_019800[0x20000 - 0x019800]; + + /* 0x020000-0x027FFF -- PCI Device Configuration Spaces */ + union { + u8 c[0x1000 / 1]; /* 0x02{0000,,,7FFF} */ + u16 s[0x1000 / 2]; /* 0x02{0000,,,7FFF} */ + u32 l[0x1000 / 4]; /* 0x02{0000,,,7FFF} */ + u64 d[0x1000 / 8]; /* 0x02{0000,,,7FFF} */ + union { + u8 c[0x100 / 1]; + u16 s[0x100 / 2]; + u32 l[0x100 / 4]; + u64 d[0x100 / 8]; + } f[8]; + } p_type0_cfg_dev[8]; /* 0x02{0000,,,7FFF} */ + + /* 0x028000-0x028FFF -- PCI Type 1 Configuration Space */ + union { + u8 c[0x1000 / 1]; /* 0x028000-0x029000 */ + u16 s[0x1000 / 2]; /* 0x028000-0x029000 */ + u32 l[0x1000 / 4]; /* 0x028000-0x029000 */ + u64 d[0x1000 / 8]; /* 0x028000-0x029000 */ + union { + u8 c[0x100 / 1]; + u16 s[0x100 / 2]; + u32 l[0x100 / 4]; + u64 d[0x100 / 8]; + } f[8]; + } p_type1_cfg; /* 0x028000-0x029000 */ + + char _pad_029000[0x030000-0x029000]; + + /* 0x030000-0x030007 -- PCI Interrupt Acknowledge Cycle */ + union { + u8 c[8 / 1]; + u16 s[8 / 2]; + u32 l[8 / 4]; + u64 d[8 / 8]; + } p_pci_iack; /* 0x030000-0x030007 */ + + char _pad_030007[0x040000-0x030008]; + + /* 0x040000-0x030007 -- PCIX Special Cycle */ + union { + u8 c[8 / 1]; + u16 s[8 / 2]; + u32 l[8 / 4]; + u64 d[8 / 8]; + } p_pcix_cycle; /* 0x040000-0x040007 */ +}; + +#endif /* _ASM_IA64_SN_PCI_PIC_H */ diff --git a/kernel/arch/ia64/include/asm/sn/rw_mmr.h b/kernel/arch/ia64/include/asm/sn/rw_mmr.h new file mode 100644 index 000000000..2d78f4c5a --- /dev/null +++ b/kernel/arch/ia64/include/asm/sn/rw_mmr.h @@ -0,0 +1,28 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2002-2006 Silicon Graphics, Inc. All Rights Reserved. + */ +#ifndef _ASM_IA64_SN_RW_MMR_H +#define _ASM_IA64_SN_RW_MMR_H + + +/* + * This file that access MMRs via uncached physical addresses. + * pio_phys_read_mmr - read an MMR + * pio_phys_write_mmr - write an MMR + * pio_atomic_phys_write_mmrs - atomically write 1 or 2 MMRs with psr.ic=0 + * Second MMR will be skipped if address is NULL + * + * Addresses passed to these routines should be uncached physical addresses + * ie., 0x80000.... + */ + + +extern long pio_phys_read_mmr(volatile long *mmr); +extern void pio_phys_write_mmr(volatile long *mmr, long val); +extern void pio_atomic_phys_write_mmrs(volatile long *mmr1, long val1, volatile long *mmr2, long val2); + +#endif /* _ASM_IA64_SN_RW_MMR_H */ diff --git a/kernel/arch/ia64/include/asm/sn/shub_mmr.h b/kernel/arch/ia64/include/asm/sn/shub_mmr.h new file mode 100644 index 000000000..a84d870f4 --- /dev/null +++ b/kernel/arch/ia64/include/asm/sn/shub_mmr.h @@ -0,0 +1,502 @@ +/* + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2001-2005 Silicon Graphics, Inc. All rights reserved. + */ + +#ifndef _ASM_IA64_SN_SHUB_MMR_H +#define _ASM_IA64_SN_SHUB_MMR_H + +/* ==================================================================== */ +/* Register "SH_IPI_INT" */ +/* SHub Inter-Processor Interrupt Registers */ +/* ==================================================================== */ +#define SH1_IPI_INT __IA64_UL_CONST(0x0000000110000380) +#define SH2_IPI_INT __IA64_UL_CONST(0x0000000010000380) + +/* SH_IPI_INT_TYPE */ +/* Description: Type of Interrupt: 0=INT, 2=PMI, 4=NMI, 5=INIT */ +#define SH_IPI_INT_TYPE_SHFT 0 +#define SH_IPI_INT_TYPE_MASK __IA64_UL_CONST(0x0000000000000007) + +/* SH_IPI_INT_AGT */ +/* Description: Agent, must be 0 for SHub */ +#define SH_IPI_INT_AGT_SHFT 3 +#define SH_IPI_INT_AGT_MASK __IA64_UL_CONST(0x0000000000000008) + +/* SH_IPI_INT_PID */ +/* Description: Processor ID, same setting as on targeted McKinley */ +#define SH_IPI_INT_PID_SHFT 4 +#define SH_IPI_INT_PID_MASK __IA64_UL_CONST(0x00000000000ffff0) + +/* SH_IPI_INT_BASE */ +/* Description: Optional interrupt vector area, 2MB aligned */ +#define SH_IPI_INT_BASE_SHFT 21 +#define SH_IPI_INT_BASE_MASK __IA64_UL_CONST(0x0003ffffffe00000) + +/* SH_IPI_INT_IDX */ +/* Description: Targeted McKinley interrupt vector */ +#define SH_IPI_INT_IDX_SHFT 52 +#define SH_IPI_INT_IDX_MASK __IA64_UL_CONST(0x0ff0000000000000) + +/* SH_IPI_INT_SEND */ +/* Description: Send Interrupt Message to PI, This generates a puls */ +#define SH_IPI_INT_SEND_SHFT 63 +#define SH_IPI_INT_SEND_MASK __IA64_UL_CONST(0x8000000000000000) + +/* ==================================================================== */ +/* Register "SH_EVENT_OCCURRED" */ +/* SHub Interrupt Event Occurred */ +/* ==================================================================== */ +#define SH1_EVENT_OCCURRED __IA64_UL_CONST(0x0000000110010000) +#define SH1_EVENT_OCCURRED_ALIAS __IA64_UL_CONST(0x0000000110010008) +#define SH2_EVENT_OCCURRED __IA64_UL_CONST(0x0000000010010000) +#define SH2_EVENT_OCCURRED_ALIAS __IA64_UL_CONST(0x0000000010010008) + +/* ==================================================================== */ +/* Register "SH_PI_CAM_CONTROL" */ +/* CRB CAM MMR Access Control */ +/* ==================================================================== */ +#define SH1_PI_CAM_CONTROL __IA64_UL_CONST(0x0000000120050300) + +/* ==================================================================== */ +/* Register "SH_SHUB_ID" */ +/* SHub ID Number */ +/* ==================================================================== */ +#define SH1_SHUB_ID __IA64_UL_CONST(0x0000000110060580) +#define SH1_SHUB_ID_REVISION_SHFT 28 +#define SH1_SHUB_ID_REVISION_MASK __IA64_UL_CONST(0x00000000f0000000) + +/* ==================================================================== */ +/* Register "SH_RTC" */ +/* Real-time Clock */ +/* ==================================================================== */ +#define SH1_RTC __IA64_UL_CONST(0x00000001101c0000) +#define SH2_RTC __IA64_UL_CONST(0x00000002101c0000) +#define SH_RTC_MASK __IA64_UL_CONST(0x007fffffffffffff) + +/* ==================================================================== */ +/* Register "SH_PIO_WRITE_STATUS_0|1" */ +/* PIO Write Status for CPU 0 & 1 */ +/* ==================================================================== */ +#define SH1_PIO_WRITE_STATUS_0 __IA64_UL_CONST(0x0000000120070200) +#define SH1_PIO_WRITE_STATUS_1 __IA64_UL_CONST(0x0000000120070280) +#define SH2_PIO_WRITE_STATUS_0 __IA64_UL_CONST(0x0000000020070200) +#define SH2_PIO_WRITE_STATUS_1 __IA64_UL_CONST(0x0000000020070280) +#define SH2_PIO_WRITE_STATUS_2 __IA64_UL_CONST(0x0000000020070300) +#define SH2_PIO_WRITE_STATUS_3 __IA64_UL_CONST(0x0000000020070380) + +/* SH_PIO_WRITE_STATUS_0_WRITE_DEADLOCK */ +/* Description: Deadlock response detected */ +#define SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_SHFT 1 +#define SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_MASK \ + __IA64_UL_CONST(0x0000000000000002) + +/* SH_PIO_WRITE_STATUS_0_PENDING_WRITE_COUNT */ +/* Description: Count of currently pending PIO writes */ +#define SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_SHFT 56 +#define SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK \ + __IA64_UL_CONST(0x3f00000000000000) + +/* ==================================================================== */ +/* Register "SH_PIO_WRITE_STATUS_0_ALIAS" */ +/* ==================================================================== */ +#define SH1_PIO_WRITE_STATUS_0_ALIAS __IA64_UL_CONST(0x0000000120070208) +#define SH2_PIO_WRITE_STATUS_0_ALIAS __IA64_UL_CONST(0x0000000020070208) + +/* ==================================================================== */ +/* Register "SH_EVENT_OCCURRED" */ +/* SHub Interrupt Event Occurred */ +/* ==================================================================== */ +/* SH_EVENT_OCCURRED_UART_INT */ +/* Description: Pending Junk Bus UART Interrupt */ +#define SH_EVENT_OCCURRED_UART_INT_SHFT 20 +#define SH_EVENT_OCCURRED_UART_INT_MASK __IA64_UL_CONST(0x0000000000100000) + +/* SH_EVENT_OCCURRED_IPI_INT */ +/* Description: Pending IPI Interrupt */ +#define SH_EVENT_OCCURRED_IPI_INT_SHFT 28 +#define SH_EVENT_OCCURRED_IPI_INT_MASK __IA64_UL_CONST(0x0000000010000000) + +/* SH_EVENT_OCCURRED_II_INT0 */ +/* Description: Pending II 0 Interrupt */ +#define SH_EVENT_OCCURRED_II_INT0_SHFT 29 +#define SH_EVENT_OCCURRED_II_INT0_MASK __IA64_UL_CONST(0x0000000020000000) + +/* SH_EVENT_OCCURRED_II_INT1 */ +/* Description: Pending II 1 Interrupt */ +#define SH_EVENT_OCCURRED_II_INT1_SHFT 30 +#define SH_EVENT_OCCURRED_II_INT1_MASK __IA64_UL_CONST(0x0000000040000000) + +/* SH2_EVENT_OCCURRED_EXTIO_INT2 */ +/* Description: Pending SHUB 2 EXT IO INT2 */ +#define SH2_EVENT_OCCURRED_EXTIO_INT2_SHFT 33 +#define SH2_EVENT_OCCURRED_EXTIO_INT2_MASK __IA64_UL_CONST(0x0000000200000000) + +/* SH2_EVENT_OCCURRED_EXTIO_INT3 */ +/* Description: Pending SHUB 2 EXT IO INT3 */ +#define SH2_EVENT_OCCURRED_EXTIO_INT3_SHFT 34 +#define SH2_EVENT_OCCURRED_EXTIO_INT3_MASK __IA64_UL_CONST(0x0000000400000000) + +#define SH_ALL_INT_MASK \ + (SH_EVENT_OCCURRED_UART_INT_MASK | SH_EVENT_OCCURRED_IPI_INT_MASK | \ + SH_EVENT_OCCURRED_II_INT0_MASK | SH_EVENT_OCCURRED_II_INT1_MASK | \ + SH_EVENT_OCCURRED_II_INT1_MASK | SH2_EVENT_OCCURRED_EXTIO_INT2_MASK | \ + SH2_EVENT_OCCURRED_EXTIO_INT3_MASK) + + +/* ==================================================================== */ +/* LEDS */ +/* ==================================================================== */ +#define SH1_REAL_JUNK_BUS_LED0 0x7fed00000UL +#define SH1_REAL_JUNK_BUS_LED1 0x7fed10000UL +#define SH1_REAL_JUNK_BUS_LED2 0x7fed20000UL +#define SH1_REAL_JUNK_BUS_LED3 0x7fed30000UL + +#define SH2_REAL_JUNK_BUS_LED0 0xf0000000UL +#define SH2_REAL_JUNK_BUS_LED1 0xf0010000UL +#define SH2_REAL_JUNK_BUS_LED2 0xf0020000UL +#define SH2_REAL_JUNK_BUS_LED3 0xf0030000UL + +/* ==================================================================== */ +/* Register "SH1_PTC_0" */ +/* Puge Translation Cache Message Configuration Information */ +/* ==================================================================== */ +#define SH1_PTC_0 __IA64_UL_CONST(0x00000001101a0000) + +/* SH1_PTC_0_A */ +/* Description: Type */ +#define SH1_PTC_0_A_SHFT 0 + +/* SH1_PTC_0_PS */ +/* Description: Page Size */ +#define SH1_PTC_0_PS_SHFT 2 + +/* SH1_PTC_0_RID */ +/* Description: Region ID */ +#define SH1_PTC_0_RID_SHFT 8 + +/* SH1_PTC_0_START */ +/* Description: Start */ +#define SH1_PTC_0_START_SHFT 63 + +/* ==================================================================== */ +/* Register "SH1_PTC_1" */ +/* Puge Translation Cache Message Configuration Information */ +/* ==================================================================== */ +#define SH1_PTC_1 __IA64_UL_CONST(0x00000001101a0080) + +/* SH1_PTC_1_START */ +/* Description: PTC_1 Start */ +#define SH1_PTC_1_START_SHFT 63 + +/* ==================================================================== */ +/* Register "SH2_PTC" */ +/* Puge Translation Cache Message Configuration Information */ +/* ==================================================================== */ +#define SH2_PTC __IA64_UL_CONST(0x0000000170000000) + +/* SH2_PTC_A */ +/* Description: Type */ +#define SH2_PTC_A_SHFT 0 + +/* SH2_PTC_PS */ +/* Description: Page Size */ +#define SH2_PTC_PS_SHFT 2 + +/* SH2_PTC_RID */ +/* Description: Region ID */ +#define SH2_PTC_RID_SHFT 4 + +/* SH2_PTC_START */ +/* Description: Start */ +#define SH2_PTC_START_SHFT 63 + +/* SH2_PTC_ADDR_RID */ +/* Description: Region ID */ +#define SH2_PTC_ADDR_SHFT 4 +#define SH2_PTC_ADDR_MASK __IA64_UL_CONST(0x1ffffffffffff000) + +/* ==================================================================== */ +/* Register "SH_RTC1_INT_CONFIG" */ +/* SHub RTC 1 Interrupt Config Registers */ +/* ==================================================================== */ + +#define SH1_RTC1_INT_CONFIG __IA64_UL_CONST(0x0000000110001480) +#define SH2_RTC1_INT_CONFIG __IA64_UL_CONST(0x0000000010001480) +#define SH_RTC1_INT_CONFIG_MASK __IA64_UL_CONST(0x0ff3ffffffefffff) +#define SH_RTC1_INT_CONFIG_INIT __IA64_UL_CONST(0x0000000000000000) + +/* SH_RTC1_INT_CONFIG_TYPE */ +/* Description: Type of Interrupt: 0=INT, 2=PMI, 4=NMI, 5=INIT */ +#define SH_RTC1_INT_CONFIG_TYPE_SHFT 0 +#define SH_RTC1_INT_CONFIG_TYPE_MASK __IA64_UL_CONST(0x0000000000000007) + +/* SH_RTC1_INT_CONFIG_AGT */ +/* Description: Agent, must be 0 for SHub */ +#define SH_RTC1_INT_CONFIG_AGT_SHFT 3 +#define SH_RTC1_INT_CONFIG_AGT_MASK __IA64_UL_CONST(0x0000000000000008) + +/* SH_RTC1_INT_CONFIG_PID */ +/* Description: Processor ID, same setting as on targeted McKinley */ +#define SH_RTC1_INT_CONFIG_PID_SHFT 4 +#define SH_RTC1_INT_CONFIG_PID_MASK __IA64_UL_CONST(0x00000000000ffff0) + +/* SH_RTC1_INT_CONFIG_BASE */ +/* Description: Optional interrupt vector area, 2MB aligned */ +#define SH_RTC1_INT_CONFIG_BASE_SHFT 21 +#define SH_RTC1_INT_CONFIG_BASE_MASK __IA64_UL_CONST(0x0003ffffffe00000) + +/* SH_RTC1_INT_CONFIG_IDX */ +/* Description: Targeted McKinley interrupt vector */ +#define SH_RTC1_INT_CONFIG_IDX_SHFT 52 +#define SH_RTC1_INT_CONFIG_IDX_MASK __IA64_UL_CONST(0x0ff0000000000000) + +/* ==================================================================== */ +/* Register "SH_RTC1_INT_ENABLE" */ +/* SHub RTC 1 Interrupt Enable Registers */ +/* ==================================================================== */ + +#define SH1_RTC1_INT_ENABLE __IA64_UL_CONST(0x0000000110001500) +#define SH2_RTC1_INT_ENABLE __IA64_UL_CONST(0x0000000010001500) +#define SH_RTC1_INT_ENABLE_MASK __IA64_UL_CONST(0x0000000000000001) +#define SH_RTC1_INT_ENABLE_INIT __IA64_UL_CONST(0x0000000000000000) + +/* SH_RTC1_INT_ENABLE_RTC1_ENABLE */ +/* Description: Enable RTC 1 Interrupt */ +#define SH_RTC1_INT_ENABLE_RTC1_ENABLE_SHFT 0 +#define SH_RTC1_INT_ENABLE_RTC1_ENABLE_MASK \ + __IA64_UL_CONST(0x0000000000000001) + +/* ==================================================================== */ +/* Register "SH_RTC2_INT_CONFIG" */ +/* SHub RTC 2 Interrupt Config Registers */ +/* ==================================================================== */ + +#define SH1_RTC2_INT_CONFIG __IA64_UL_CONST(0x0000000110001580) +#define SH2_RTC2_INT_CONFIG __IA64_UL_CONST(0x0000000010001580) +#define SH_RTC2_INT_CONFIG_MASK __IA64_UL_CONST(0x0ff3ffffffefffff) +#define SH_RTC2_INT_CONFIG_INIT __IA64_UL_CONST(0x0000000000000000) + +/* SH_RTC2_INT_CONFIG_TYPE */ +/* Description: Type of Interrupt: 0=INT, 2=PMI, 4=NMI, 5=INIT */ +#define SH_RTC2_INT_CONFIG_TYPE_SHFT 0 +#define SH_RTC2_INT_CONFIG_TYPE_MASK __IA64_UL_CONST(0x0000000000000007) + +/* SH_RTC2_INT_CONFIG_AGT */ +/* Description: Agent, must be 0 for SHub */ +#define SH_RTC2_INT_CONFIG_AGT_SHFT 3 +#define SH_RTC2_INT_CONFIG_AGT_MASK __IA64_UL_CONST(0x0000000000000008) + +/* SH_RTC2_INT_CONFIG_PID */ +/* Description: Processor ID, same setting as on targeted McKinley */ +#define SH_RTC2_INT_CONFIG_PID_SHFT 4 +#define SH_RTC2_INT_CONFIG_PID_MASK __IA64_UL_CONST(0x00000000000ffff0) + +/* SH_RTC2_INT_CONFIG_BASE */ +/* Description: Optional interrupt vector area, 2MB aligned */ +#define SH_RTC2_INT_CONFIG_BASE_SHFT 21 +#define SH_RTC2_INT_CONFIG_BASE_MASK __IA64_UL_CONST(0x0003ffffffe00000) + +/* SH_RTC2_INT_CONFIG_IDX */ +/* Description: Targeted McKinley interrupt vector */ +#define SH_RTC2_INT_CONFIG_IDX_SHFT 52 +#define SH_RTC2_INT_CONFIG_IDX_MASK __IA64_UL_CONST(0x0ff0000000000000) + +/* ==================================================================== */ +/* Register "SH_RTC2_INT_ENABLE" */ +/* SHub RTC 2 Interrupt Enable Registers */ +/* ==================================================================== */ + +#define SH1_RTC2_INT_ENABLE __IA64_UL_CONST(0x0000000110001600) +#define SH2_RTC2_INT_ENABLE __IA64_UL_CONST(0x0000000010001600) +#define SH_RTC2_INT_ENABLE_MASK __IA64_UL_CONST(0x0000000000000001) +#define SH_RTC2_INT_ENABLE_INIT __IA64_UL_CONST(0x0000000000000000) + +/* SH_RTC2_INT_ENABLE_RTC2_ENABLE */ +/* Description: Enable RTC 2 Interrupt */ +#define SH_RTC2_INT_ENABLE_RTC2_ENABLE_SHFT 0 +#define SH_RTC2_INT_ENABLE_RTC2_ENABLE_MASK \ + __IA64_UL_CONST(0x0000000000000001) + +/* ==================================================================== */ +/* Register "SH_RTC3_INT_CONFIG" */ +/* SHub RTC 3 Interrupt Config Registers */ +/* ==================================================================== */ + +#define SH1_RTC3_INT_CONFIG __IA64_UL_CONST(0x0000000110001680) +#define SH2_RTC3_INT_CONFIG __IA64_UL_CONST(0x0000000010001680) +#define SH_RTC3_INT_CONFIG_MASK __IA64_UL_CONST(0x0ff3ffffffefffff) +#define SH_RTC3_INT_CONFIG_INIT __IA64_UL_CONST(0x0000000000000000) + +/* SH_RTC3_INT_CONFIG_TYPE */ +/* Description: Type of Interrupt: 0=INT, 2=PMI, 4=NMI, 5=INIT */ +#define SH_RTC3_INT_CONFIG_TYPE_SHFT 0 +#define SH_RTC3_INT_CONFIG_TYPE_MASK __IA64_UL_CONST(0x0000000000000007) + +/* SH_RTC3_INT_CONFIG_AGT */ +/* Description: Agent, must be 0 for SHub */ +#define SH_RTC3_INT_CONFIG_AGT_SHFT 3 +#define SH_RTC3_INT_CONFIG_AGT_MASK __IA64_UL_CONST(0x0000000000000008) + +/* SH_RTC3_INT_CONFIG_PID */ +/* Description: Processor ID, same setting as on targeted McKinley */ +#define SH_RTC3_INT_CONFIG_PID_SHFT 4 +#define SH_RTC3_INT_CONFIG_PID_MASK __IA64_UL_CONST(0x00000000000ffff0) + +/* SH_RTC3_INT_CONFIG_BASE */ +/* Description: Optional interrupt vector area, 2MB aligned */ +#define SH_RTC3_INT_CONFIG_BASE_SHFT 21 +#define SH_RTC3_INT_CONFIG_BASE_MASK __IA64_UL_CONST(0x0003ffffffe00000) + +/* SH_RTC3_INT_CONFIG_IDX */ +/* Description: Targeted McKinley interrupt vector */ +#define SH_RTC3_INT_CONFIG_IDX_SHFT 52 +#define SH_RTC3_INT_CONFIG_IDX_MASK __IA64_UL_CONST(0x0ff0000000000000) + +/* ==================================================================== */ +/* Register "SH_RTC3_INT_ENABLE" */ +/* SHub RTC 3 Interrupt Enable Registers */ +/* ==================================================================== */ + +#define SH1_RTC3_INT_ENABLE __IA64_UL_CONST(0x0000000110001700) +#define SH2_RTC3_INT_ENABLE __IA64_UL_CONST(0x0000000010001700) +#define SH_RTC3_INT_ENABLE_MASK __IA64_UL_CONST(0x0000000000000001) +#define SH_RTC3_INT_ENABLE_INIT __IA64_UL_CONST(0x0000000000000000) + +/* SH_RTC3_INT_ENABLE_RTC3_ENABLE */ +/* Description: Enable RTC 3 Interrupt */ +#define SH_RTC3_INT_ENABLE_RTC3_ENABLE_SHFT 0 +#define SH_RTC3_INT_ENABLE_RTC3_ENABLE_MASK \ + __IA64_UL_CONST(0x0000000000000001) + +/* SH_EVENT_OCCURRED_RTC1_INT */ +/* Description: Pending RTC 1 Interrupt */ +#define SH_EVENT_OCCURRED_RTC1_INT_SHFT 24 +#define SH_EVENT_OCCURRED_RTC1_INT_MASK __IA64_UL_CONST(0x0000000001000000) + +/* SH_EVENT_OCCURRED_RTC2_INT */ +/* Description: Pending RTC 2 Interrupt */ +#define SH_EVENT_OCCURRED_RTC2_INT_SHFT 25 +#define SH_EVENT_OCCURRED_RTC2_INT_MASK __IA64_UL_CONST(0x0000000002000000) + +/* SH_EVENT_OCCURRED_RTC3_INT */ +/* Description: Pending RTC 3 Interrupt */ +#define SH_EVENT_OCCURRED_RTC3_INT_SHFT 26 +#define SH_EVENT_OCCURRED_RTC3_INT_MASK __IA64_UL_CONST(0x0000000004000000) + +/* ==================================================================== */ +/* Register "SH_IPI_ACCESS" */ +/* CPU interrupt Access Permission Bits */ +/* ==================================================================== */ + +#define SH1_IPI_ACCESS __IA64_UL_CONST(0x0000000110060480) +#define SH2_IPI_ACCESS0 __IA64_UL_CONST(0x0000000010060c00) +#define SH2_IPI_ACCESS1 __IA64_UL_CONST(0x0000000010060c80) +#define SH2_IPI_ACCESS2 __IA64_UL_CONST(0x0000000010060d00) +#define SH2_IPI_ACCESS3 __IA64_UL_CONST(0x0000000010060d80) + +/* ==================================================================== */ +/* Register "SH_INT_CMPB" */ +/* RTC Compare Value for Processor B */ +/* ==================================================================== */ + +#define SH1_INT_CMPB __IA64_UL_CONST(0x00000001101b0080) +#define SH2_INT_CMPB __IA64_UL_CONST(0x00000000101b0080) +#define SH_INT_CMPB_MASK __IA64_UL_CONST(0x007fffffffffffff) +#define SH_INT_CMPB_INIT __IA64_UL_CONST(0x0000000000000000) + +/* SH_INT_CMPB_REAL_TIME_CMPB */ +/* Description: Real Time Clock Compare */ +#define SH_INT_CMPB_REAL_TIME_CMPB_SHFT 0 +#define SH_INT_CMPB_REAL_TIME_CMPB_MASK __IA64_UL_CONST(0x007fffffffffffff) + +/* ==================================================================== */ +/* Register "SH_INT_CMPC" */ +/* RTC Compare Value for Processor C */ +/* ==================================================================== */ + +#define SH1_INT_CMPC __IA64_UL_CONST(0x00000001101b0100) +#define SH2_INT_CMPC __IA64_UL_CONST(0x00000000101b0100) +#define SH_INT_CMPC_MASK __IA64_UL_CONST(0x007fffffffffffff) +#define SH_INT_CMPC_INIT __IA64_UL_CONST(0x0000000000000000) + +/* SH_INT_CMPC_REAL_TIME_CMPC */ +/* Description: Real Time Clock Compare */ +#define SH_INT_CMPC_REAL_TIME_CMPC_SHFT 0 +#define SH_INT_CMPC_REAL_TIME_CMPC_MASK __IA64_UL_CONST(0x007fffffffffffff) + +/* ==================================================================== */ +/* Register "SH_INT_CMPD" */ +/* RTC Compare Value for Processor D */ +/* ==================================================================== */ + +#define SH1_INT_CMPD __IA64_UL_CONST(0x00000001101b0180) +#define SH2_INT_CMPD __IA64_UL_CONST(0x00000000101b0180) +#define SH_INT_CMPD_MASK __IA64_UL_CONST(0x007fffffffffffff) +#define SH_INT_CMPD_INIT __IA64_UL_CONST(0x0000000000000000) + +/* SH_INT_CMPD_REAL_TIME_CMPD */ +/* Description: Real Time Clock Compare */ +#define SH_INT_CMPD_REAL_TIME_CMPD_SHFT 0 +#define SH_INT_CMPD_REAL_TIME_CMPD_MASK __IA64_UL_CONST(0x007fffffffffffff) + +/* ==================================================================== */ +/* Register "SH_MD_DQLP_MMR_DIR_PRIVEC0" */ +/* privilege vector for acc=0 */ +/* ==================================================================== */ +#define SH1_MD_DQLP_MMR_DIR_PRIVEC0 __IA64_UL_CONST(0x0000000100030300) + +/* ==================================================================== */ +/* Register "SH_MD_DQRP_MMR_DIR_PRIVEC0" */ +/* privilege vector for acc=0 */ +/* ==================================================================== */ +#define SH1_MD_DQRP_MMR_DIR_PRIVEC0 __IA64_UL_CONST(0x0000000100050300) + +/* ==================================================================== */ +/* Some MMRs are functionally identical (or close enough) on both SHUB1 */ +/* and SHUB2 that it makes sense to define a geberic name for the MMR. */ +/* It is acceptable to use (for example) SH_IPI_INT to reference the */ +/* the IPI MMR. The value of SH_IPI_INT is determined at runtime based */ +/* on the type of the SHUB. Do not use these #defines in performance */ +/* critical code or loops - there is a small performance penalty. */ +/* ==================================================================== */ +#define shubmmr(a,b) (is_shub2() ? a##2_##b : a##1_##b) + +#define SH_REAL_JUNK_BUS_LED0 shubmmr(SH, REAL_JUNK_BUS_LED0) +#define SH_IPI_INT shubmmr(SH, IPI_INT) +#define SH_EVENT_OCCURRED shubmmr(SH, EVENT_OCCURRED) +#define SH_EVENT_OCCURRED_ALIAS shubmmr(SH, EVENT_OCCURRED_ALIAS) +#define SH_RTC shubmmr(SH, RTC) +#define SH_RTC1_INT_CONFIG shubmmr(SH, RTC1_INT_CONFIG) +#define SH_RTC1_INT_ENABLE shubmmr(SH, RTC1_INT_ENABLE) +#define SH_RTC2_INT_CONFIG shubmmr(SH, RTC2_INT_CONFIG) +#define SH_RTC2_INT_ENABLE shubmmr(SH, RTC2_INT_ENABLE) +#define SH_RTC3_INT_CONFIG shubmmr(SH, RTC3_INT_CONFIG) +#define SH_RTC3_INT_ENABLE shubmmr(SH, RTC3_INT_ENABLE) +#define SH_INT_CMPB shubmmr(SH, INT_CMPB) +#define SH_INT_CMPC shubmmr(SH, INT_CMPC) +#define SH_INT_CMPD shubmmr(SH, INT_CMPD) + +/* ========================================================================== */ +/* Register "SH2_BT_ENG_CSR_0" */ +/* Engine 0 Control and Status Register */ +/* ========================================================================== */ + +#define SH2_BT_ENG_CSR_0 __IA64_UL_CONST(0x0000000030040000) +#define SH2_BT_ENG_SRC_ADDR_0 __IA64_UL_CONST(0x0000000030040080) +#define SH2_BT_ENG_DEST_ADDR_0 __IA64_UL_CONST(0x0000000030040100) +#define SH2_BT_ENG_NOTIF_ADDR_0 __IA64_UL_CONST(0x0000000030040180) + +/* ========================================================================== */ +/* BTE interfaces 1-3 */ +/* ========================================================================== */ + +#define SH2_BT_ENG_CSR_1 __IA64_UL_CONST(0x0000000030050000) +#define SH2_BT_ENG_CSR_2 __IA64_UL_CONST(0x0000000030060000) +#define SH2_BT_ENG_CSR_3 __IA64_UL_CONST(0x0000000030070000) + +#endif /* _ASM_IA64_SN_SHUB_MMR_H */ diff --git a/kernel/arch/ia64/include/asm/sn/shubio.h b/kernel/arch/ia64/include/asm/sn/shubio.h new file mode 100644 index 000000000..ecb8a4947 --- /dev/null +++ b/kernel/arch/ia64/include/asm/sn/shubio.h @@ -0,0 +1,3358 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1992 - 1997, 2000-2005 Silicon Graphics, Inc. All rights reserved. + */ + +#ifndef _ASM_IA64_SN_SHUBIO_H +#define _ASM_IA64_SN_SHUBIO_H + +#define HUB_WIDGET_ID_MAX 0xf +#define IIO_NUM_ITTES 7 +#define HUB_NUM_BIG_WINDOW (IIO_NUM_ITTES - 1) + +#define IIO_WID 0x00400000 /* Crosstalk Widget Identification */ + /* This register is also accessible from + * Crosstalk at address 0x0. */ +#define IIO_WSTAT 0x00400008 /* Crosstalk Widget Status */ +#define IIO_WCR 0x00400020 /* Crosstalk Widget Control Register */ +#define IIO_ILAPR 0x00400100 /* IO Local Access Protection Register */ +#define IIO_ILAPO 0x00400108 /* IO Local Access Protection Override */ +#define IIO_IOWA 0x00400110 /* IO Outbound Widget Access */ +#define IIO_IIWA 0x00400118 /* IO Inbound Widget Access */ +#define IIO_IIDEM 0x00400120 /* IO Inbound Device Error Mask */ +#define IIO_ILCSR 0x00400128 /* IO LLP Control and Status Register */ +#define IIO_ILLR 0x00400130 /* IO LLP Log Register */ +#define IIO_IIDSR 0x00400138 /* IO Interrupt Destination */ + +#define IIO_IGFX0 0x00400140 /* IO Graphics Node-Widget Map 0 */ +#define IIO_IGFX1 0x00400148 /* IO Graphics Node-Widget Map 1 */ + +#define IIO_ISCR0 0x00400150 /* IO Scratch Register 0 */ +#define IIO_ISCR1 0x00400158 /* IO Scratch Register 1 */ + +#define IIO_ITTE1 0x00400160 /* IO Translation Table Entry 1 */ +#define IIO_ITTE2 0x00400168 /* IO Translation Table Entry 2 */ +#define IIO_ITTE3 0x00400170 /* IO Translation Table Entry 3 */ +#define IIO_ITTE4 0x00400178 /* IO Translation Table Entry 4 */ +#define IIO_ITTE5 0x00400180 /* IO Translation Table Entry 5 */ +#define IIO_ITTE6 0x00400188 /* IO Translation Table Entry 6 */ +#define IIO_ITTE7 0x00400190 /* IO Translation Table Entry 7 */ + +#define IIO_IPRB0 0x00400198 /* IO PRB Entry 0 */ +#define IIO_IPRB8 0x004001A0 /* IO PRB Entry 8 */ +#define IIO_IPRB9 0x004001A8 /* IO PRB Entry 9 */ +#define IIO_IPRBA 0x004001B0 /* IO PRB Entry A */ +#define IIO_IPRBB 0x004001B8 /* IO PRB Entry B */ +#define IIO_IPRBC 0x004001C0 /* IO PRB Entry C */ +#define IIO_IPRBD 0x004001C8 /* IO PRB Entry D */ +#define IIO_IPRBE 0x004001D0 /* IO PRB Entry E */ +#define IIO_IPRBF 0x004001D8 /* IO PRB Entry F */ + +#define IIO_IXCC 0x004001E0 /* IO Crosstalk Credit Count Timeout */ +#define IIO_IMEM 0x004001E8 /* IO Miscellaneous Error Mask */ +#define IIO_IXTT 0x004001F0 /* IO Crosstalk Timeout Threshold */ +#define IIO_IECLR 0x004001F8 /* IO Error Clear Register */ +#define IIO_IBCR 0x00400200 /* IO BTE Control Register */ + +#define IIO_IXSM 0x00400208 /* IO Crosstalk Spurious Message */ +#define IIO_IXSS 0x00400210 /* IO Crosstalk Spurious Sideband */ + +#define IIO_ILCT 0x00400218 /* IO LLP Channel Test */ + +#define IIO_IIEPH1 0x00400220 /* IO Incoming Error Packet Header, Part 1 */ +#define IIO_IIEPH2 0x00400228 /* IO Incoming Error Packet Header, Part 2 */ + +#define IIO_ISLAPR 0x00400230 /* IO SXB Local Access Protection Regster */ +#define IIO_ISLAPO 0x00400238 /* IO SXB Local Access Protection Override */ + +#define IIO_IWI 0x00400240 /* IO Wrapper Interrupt Register */ +#define IIO_IWEL 0x00400248 /* IO Wrapper Error Log Register */ +#define IIO_IWC 0x00400250 /* IO Wrapper Control Register */ +#define IIO_IWS 0x00400258 /* IO Wrapper Status Register */ +#define IIO_IWEIM 0x00400260 /* IO Wrapper Error Interrupt Masking Register */ + +#define IIO_IPCA 0x00400300 /* IO PRB Counter Adjust */ + +#define IIO_IPRTE0_A 0x00400308 /* IO PIO Read Address Table Entry 0, Part A */ +#define IIO_IPRTE1_A 0x00400310 /* IO PIO Read Address Table Entry 1, Part A */ +#define IIO_IPRTE2_A 0x00400318 /* IO PIO Read Address Table Entry 2, Part A */ +#define IIO_IPRTE3_A 0x00400320 /* IO PIO Read Address Table Entry 3, Part A */ +#define IIO_IPRTE4_A 0x00400328 /* IO PIO Read Address Table Entry 4, Part A */ +#define IIO_IPRTE5_A 0x00400330 /* IO PIO Read Address Table Entry 5, Part A */ +#define IIO_IPRTE6_A 0x00400338 /* IO PIO Read Address Table Entry 6, Part A */ +#define IIO_IPRTE7_A 0x00400340 /* IO PIO Read Address Table Entry 7, Part A */ + +#define IIO_IPRTE0_B 0x00400348 /* IO PIO Read Address Table Entry 0, Part B */ +#define IIO_IPRTE1_B 0x00400350 /* IO PIO Read Address Table Entry 1, Part B */ +#define IIO_IPRTE2_B 0x00400358 /* IO PIO Read Address Table Entry 2, Part B */ +#define IIO_IPRTE3_B 0x00400360 /* IO PIO Read Address Table Entry 3, Part B */ +#define IIO_IPRTE4_B 0x00400368 /* IO PIO Read Address Table Entry 4, Part B */ +#define IIO_IPRTE5_B 0x00400370 /* IO PIO Read Address Table Entry 5, Part B */ +#define IIO_IPRTE6_B 0x00400378 /* IO PIO Read Address Table Entry 6, Part B */ +#define IIO_IPRTE7_B 0x00400380 /* IO PIO Read Address Table Entry 7, Part B */ + +#define IIO_IPDR 0x00400388 /* IO PIO Deallocation Register */ +#define IIO_ICDR 0x00400390 /* IO CRB Entry Deallocation Register */ +#define IIO_IFDR 0x00400398 /* IO IOQ FIFO Depth Register */ +#define IIO_IIAP 0x004003A0 /* IO IIQ Arbitration Parameters */ +#define IIO_ICMR 0x004003A8 /* IO CRB Management Register */ +#define IIO_ICCR 0x004003B0 /* IO CRB Control Register */ +#define IIO_ICTO 0x004003B8 /* IO CRB Timeout */ +#define IIO_ICTP 0x004003C0 /* IO CRB Timeout Prescalar */ + +#define IIO_ICRB0_A 0x00400400 /* IO CRB Entry 0_A */ +#define IIO_ICRB0_B 0x00400408 /* IO CRB Entry 0_B */ +#define IIO_ICRB0_C 0x00400410 /* IO CRB Entry 0_C */ +#define IIO_ICRB0_D 0x00400418 /* IO CRB Entry 0_D */ +#define IIO_ICRB0_E 0x00400420 /* IO CRB Entry 0_E */ + +#define IIO_ICRB1_A 0x00400430 /* IO CRB Entry 1_A */ +#define IIO_ICRB1_B 0x00400438 /* IO CRB Entry 1_B */ +#define IIO_ICRB1_C 0x00400440 /* IO CRB Entry 1_C */ +#define IIO_ICRB1_D 0x00400448 /* IO CRB Entry 1_D */ +#define IIO_ICRB1_E 0x00400450 /* IO CRB Entry 1_E */ + +#define IIO_ICRB2_A 0x00400460 /* IO CRB Entry 2_A */ +#define IIO_ICRB2_B 0x00400468 /* IO CRB Entry 2_B */ +#define IIO_ICRB2_C 0x00400470 /* IO CRB Entry 2_C */ +#define IIO_ICRB2_D 0x00400478 /* IO CRB Entry 2_D */ +#define IIO_ICRB2_E 0x00400480 /* IO CRB Entry 2_E */ + +#define IIO_ICRB3_A 0x00400490 /* IO CRB Entry 3_A */ +#define IIO_ICRB3_B 0x00400498 /* IO CRB Entry 3_B */ +#define IIO_ICRB3_C 0x004004a0 /* IO CRB Entry 3_C */ +#define IIO_ICRB3_D 0x004004a8 /* IO CRB Entry 3_D */ +#define IIO_ICRB3_E 0x004004b0 /* IO CRB Entry 3_E */ + +#define IIO_ICRB4_A 0x004004c0 /* IO CRB Entry 4_A */ +#define IIO_ICRB4_B 0x004004c8 /* IO CRB Entry 4_B */ +#define IIO_ICRB4_C 0x004004d0 /* IO CRB Entry 4_C */ +#define IIO_ICRB4_D 0x004004d8 /* IO CRB Entry 4_D */ +#define IIO_ICRB4_E 0x004004e0 /* IO CRB Entry 4_E */ + +#define IIO_ICRB5_A 0x004004f0 /* IO CRB Entry 5_A */ +#define IIO_ICRB5_B 0x004004f8 /* IO CRB Entry 5_B */ +#define IIO_ICRB5_C 0x00400500 /* IO CRB Entry 5_C */ +#define IIO_ICRB5_D 0x00400508 /* IO CRB Entry 5_D */ +#define IIO_ICRB5_E 0x00400510 /* IO CRB Entry 5_E */ + +#define IIO_ICRB6_A 0x00400520 /* IO CRB Entry 6_A */ +#define IIO_ICRB6_B 0x00400528 /* IO CRB Entry 6_B */ +#define IIO_ICRB6_C 0x00400530 /* IO CRB Entry 6_C */ +#define IIO_ICRB6_D 0x00400538 /* IO CRB Entry 6_D */ +#define IIO_ICRB6_E 0x00400540 /* IO CRB Entry 6_E */ + +#define IIO_ICRB7_A 0x00400550 /* IO CRB Entry 7_A */ +#define IIO_ICRB7_B 0x00400558 /* IO CRB Entry 7_B */ +#define IIO_ICRB7_C 0x00400560 /* IO CRB Entry 7_C */ +#define IIO_ICRB7_D 0x00400568 /* IO CRB Entry 7_D */ +#define IIO_ICRB7_E 0x00400570 /* IO CRB Entry 7_E */ + +#define IIO_ICRB8_A 0x00400580 /* IO CRB Entry 8_A */ +#define IIO_ICRB8_B 0x00400588 /* IO CRB Entry 8_B */ +#define IIO_ICRB8_C 0x00400590 /* IO CRB Entry 8_C */ +#define IIO_ICRB8_D 0x00400598 /* IO CRB Entry 8_D */ +#define IIO_ICRB8_E 0x004005a0 /* IO CRB Entry 8_E */ + +#define IIO_ICRB9_A 0x004005b0 /* IO CRB Entry 9_A */ +#define IIO_ICRB9_B 0x004005b8 /* IO CRB Entry 9_B */ +#define IIO_ICRB9_C 0x004005c0 /* IO CRB Entry 9_C */ +#define IIO_ICRB9_D 0x004005c8 /* IO CRB Entry 9_D */ +#define IIO_ICRB9_E 0x004005d0 /* IO CRB Entry 9_E */ + +#define IIO_ICRBA_A 0x004005e0 /* IO CRB Entry A_A */ +#define IIO_ICRBA_B 0x004005e8 /* IO CRB Entry A_B */ +#define IIO_ICRBA_C 0x004005f0 /* IO CRB Entry A_C */ +#define IIO_ICRBA_D 0x004005f8 /* IO CRB Entry A_D */ +#define IIO_ICRBA_E 0x00400600 /* IO CRB Entry A_E */ + +#define IIO_ICRBB_A 0x00400610 /* IO CRB Entry B_A */ +#define IIO_ICRBB_B 0x00400618 /* IO CRB Entry B_B */ +#define IIO_ICRBB_C 0x00400620 /* IO CRB Entry B_C */ +#define IIO_ICRBB_D 0x00400628 /* IO CRB Entry B_D */ +#define IIO_ICRBB_E 0x00400630 /* IO CRB Entry B_E */ + +#define IIO_ICRBC_A 0x00400640 /* IO CRB Entry C_A */ +#define IIO_ICRBC_B 0x00400648 /* IO CRB Entry C_B */ +#define IIO_ICRBC_C 0x00400650 /* IO CRB Entry C_C */ +#define IIO_ICRBC_D 0x00400658 /* IO CRB Entry C_D */ +#define IIO_ICRBC_E 0x00400660 /* IO CRB Entry C_E */ + +#define IIO_ICRBD_A 0x00400670 /* IO CRB Entry D_A */ +#define IIO_ICRBD_B 0x00400678 /* IO CRB Entry D_B */ +#define IIO_ICRBD_C 0x00400680 /* IO CRB Entry D_C */ +#define IIO_ICRBD_D 0x00400688 /* IO CRB Entry D_D */ +#define IIO_ICRBD_E 0x00400690 /* IO CRB Entry D_E */ + +#define IIO_ICRBE_A 0x004006a0 /* IO CRB Entry E_A */ +#define IIO_ICRBE_B 0x004006a8 /* IO CRB Entry E_B */ +#define IIO_ICRBE_C 0x004006b0 /* IO CRB Entry E_C */ +#define IIO_ICRBE_D 0x004006b8 /* IO CRB Entry E_D */ +#define IIO_ICRBE_E 0x004006c0 /* IO CRB Entry E_E */ + +#define IIO_ICSML 0x00400700 /* IO CRB Spurious Message Low */ +#define IIO_ICSMM 0x00400708 /* IO CRB Spurious Message Middle */ +#define IIO_ICSMH 0x00400710 /* IO CRB Spurious Message High */ + +#define IIO_IDBSS 0x00400718 /* IO Debug Submenu Select */ + +#define IIO_IBLS0 0x00410000 /* IO BTE Length Status 0 */ +#define IIO_IBSA0 0x00410008 /* IO BTE Source Address 0 */ +#define IIO_IBDA0 0x00410010 /* IO BTE Destination Address 0 */ +#define IIO_IBCT0 0x00410018 /* IO BTE Control Terminate 0 */ +#define IIO_IBNA0 0x00410020 /* IO BTE Notification Address 0 */ +#define IIO_IBIA0 0x00410028 /* IO BTE Interrupt Address 0 */ +#define IIO_IBLS1 0x00420000 /* IO BTE Length Status 1 */ +#define IIO_IBSA1 0x00420008 /* IO BTE Source Address 1 */ +#define IIO_IBDA1 0x00420010 /* IO BTE Destination Address 1 */ +#define IIO_IBCT1 0x00420018 /* IO BTE Control Terminate 1 */ +#define IIO_IBNA1 0x00420020 /* IO BTE Notification Address 1 */ +#define IIO_IBIA1 0x00420028 /* IO BTE Interrupt Address 1 */ + +#define IIO_IPCR 0x00430000 /* IO Performance Control */ +#define IIO_IPPR 0x00430008 /* IO Performance Profiling */ + +/************************************************************************ + * * + * Description: This register echoes some information from the * + * LB_REV_ID register. It is available through Crosstalk as described * + * above. The REV_NUM and MFG_NUM fields receive their values from * + * the REVISION and MANUFACTURER fields in the LB_REV_ID register. * + * The PART_NUM field's value is the Crosstalk device ID number that * + * Steve Miller assigned to the SHub chip. * + * * + ************************************************************************/ + +typedef union ii_wid_u { + u64 ii_wid_regval; + struct { + u64 w_rsvd_1:1; + u64 w_mfg_num:11; + u64 w_part_num:16; + u64 w_rev_num:4; + u64 w_rsvd:32; + } ii_wid_fld_s; +} ii_wid_u_t; + +/************************************************************************ + * * + * The fields in this register are set upon detection of an error * + * and cleared by various mechanisms, as explained in the * + * description. * + * * + ************************************************************************/ + +typedef union ii_wstat_u { + u64 ii_wstat_regval; + struct { + u64 w_pending:4; + u64 w_xt_crd_to:1; + u64 w_xt_tail_to:1; + u64 w_rsvd_3:3; + u64 w_tx_mx_rty:1; + u64 w_rsvd_2:6; + u64 w_llp_tx_cnt:8; + u64 w_rsvd_1:8; + u64 w_crazy:1; + u64 w_rsvd:31; + } ii_wstat_fld_s; +} ii_wstat_u_t; + +/************************************************************************ + * * + * Description: This is a read-write enabled register. It controls * + * various aspects of the Crosstalk flow control. * + * * + ************************************************************************/ + +typedef union ii_wcr_u { + u64 ii_wcr_regval; + struct { + u64 w_wid:4; + u64 w_tag:1; + u64 w_rsvd_1:8; + u64 w_dst_crd:3; + u64 w_f_bad_pkt:1; + u64 w_dir_con:1; + u64 w_e_thresh:5; + u64 w_rsvd:41; + } ii_wcr_fld_s; +} ii_wcr_u_t; + +/************************************************************************ + * * + * Description: This register's value is a bit vector that guards * + * access to local registers within the II as well as to external * + * Crosstalk widgets. Each bit in the register corresponds to a * + * particular region in the system; a region consists of one, two or * + * four nodes (depending on the value of the REGION_SIZE field in the * + * LB_REV_ID register, which is documented in Section 8.3.1.1). The * + * protection provided by this register applies to PIO read * + * operations as well as PIO write operations. The II will perform a * + * PIO read or write request only if the bit for the requestor's * + * region is set; otherwise, the II will not perform the requested * + * operation and will return an error response. When a PIO read or * + * write request targets an external Crosstalk widget, then not only * + * must the bit for the requestor's region be set in the ILAPR, but * + * also the target widget's bit in the IOWA register must be set in * + * order for the II to perform the requested operation; otherwise, * + * the II will return an error response. Hence, the protection * + * provided by the IOWA register supplements the protection provided * + * by the ILAPR for requests that target external Crosstalk widgets. * + * This register itself can be accessed only by the nodes whose * + * region ID bits are enabled in this same register. It can also be * + * accessed through the IAlias space by the local processors. * + * The reset value of this register allows access by all nodes. * + * * + ************************************************************************/ + +typedef union ii_ilapr_u { + u64 ii_ilapr_regval; + struct { + u64 i_region:64; + } ii_ilapr_fld_s; +} ii_ilapr_u_t; + +/************************************************************************ + * * + * Description: A write to this register of the 64-bit value * + * "SGIrules" in ASCII, will cause the bit in the ILAPR register * + * corresponding to the region of the requestor to be set (allow * + * access). A write of any other value will be ignored. Access * + * protection for this register is "SGIrules". * + * This register can also be accessed through the IAlias space. * + * However, this access will not change the access permissions in the * + * ILAPR. * + * * + ************************************************************************/ + +typedef union ii_ilapo_u { + u64 ii_ilapo_regval; + struct { + u64 i_io_ovrride:64; + } ii_ilapo_fld_s; +} ii_ilapo_u_t; + +/************************************************************************ + * * + * This register qualifies all the PIO and Graphics writes launched * + * from the SHUB towards a widget. * + * * + ************************************************************************/ + +typedef union ii_iowa_u { + u64 ii_iowa_regval; + struct { + u64 i_w0_oac:1; + u64 i_rsvd_1:7; + u64 i_wx_oac:8; + u64 i_rsvd:48; + } ii_iowa_fld_s; +} ii_iowa_u_t; + +/************************************************************************ + * * + * Description: This register qualifies all the requests launched * + * from a widget towards the Shub. This register is intended to be * + * used by software in case of misbehaving widgets. * + * * + * * + ************************************************************************/ + +typedef union ii_iiwa_u { + u64 ii_iiwa_regval; + struct { + u64 i_w0_iac:1; + u64 i_rsvd_1:7; + u64 i_wx_iac:8; + u64 i_rsvd:48; + } ii_iiwa_fld_s; +} ii_iiwa_u_t; + +/************************************************************************ + * * + * Description: This register qualifies all the operations launched * + * from a widget towards the SHub. It allows individual access * + * control for up to 8 devices per widget. A device refers to * + * individual DMA master hosted by a widget. * + * The bits in each field of this register are cleared by the Shub * + * upon detection of an error which requires the device to be * + * disabled. These fields assume that 0=TNUM=7 (i.e., Bridge-centric * + * Crosstalk). Whether or not a device has access rights to this * + * Shub is determined by an AND of the device enable bit in the * + * appropriate field of this register and the corresponding bit in * + * the Wx_IAC field (for the widget which this device belongs to). * + * The bits in this field are set by writing a 1 to them. Incoming * + * replies from Crosstalk are not subject to this access control * + * mechanism. * + * * + ************************************************************************/ + +typedef union ii_iidem_u { + u64 ii_iidem_regval; + struct { + u64 i_w8_dxs:8; + u64 i_w9_dxs:8; + u64 i_wa_dxs:8; + u64 i_wb_dxs:8; + u64 i_wc_dxs:8; + u64 i_wd_dxs:8; + u64 i_we_dxs:8; + u64 i_wf_dxs:8; + } ii_iidem_fld_s; +} ii_iidem_u_t; + +/************************************************************************ + * * + * This register contains the various programmable fields necessary * + * for controlling and observing the LLP signals. * + * * + ************************************************************************/ + +typedef union ii_ilcsr_u { + u64 ii_ilcsr_regval; + struct { + u64 i_nullto:6; + u64 i_rsvd_4:2; + u64 i_wrmrst:1; + u64 i_rsvd_3:1; + u64 i_llp_en:1; + u64 i_bm8:1; + u64 i_llp_stat:2; + u64 i_remote_power:1; + u64 i_rsvd_2:1; + u64 i_maxrtry:10; + u64 i_d_avail_sel:2; + u64 i_rsvd_1:4; + u64 i_maxbrst:10; + u64 i_rsvd:22; + + } ii_ilcsr_fld_s; +} ii_ilcsr_u_t; + +/************************************************************************ + * * + * This is simply a status registers that monitors the LLP error * + * rate. * + * * + ************************************************************************/ + +typedef union ii_illr_u { + u64 ii_illr_regval; + struct { + u64 i_sn_cnt:16; + u64 i_cb_cnt:16; + u64 i_rsvd:32; + } ii_illr_fld_s; +} ii_illr_u_t; + +/************************************************************************ + * * + * Description: All II-detected non-BTE error interrupts are * + * specified via this register. * + * NOTE: The PI interrupt register address is hardcoded in the II. If * + * PI_ID==0, then the II sends an interrupt request (Duplonet PWRI * + * packet) to address offset 0x0180_0090 within the local register * + * address space of PI0 on the node specified by the NODE field. If * + * PI_ID==1, then the II sends the interrupt request to address * + * offset 0x01A0_0090 within the local register address space of PI1 * + * on the node specified by the NODE field. * + * * + ************************************************************************/ + +typedef union ii_iidsr_u { + u64 ii_iidsr_regval; + struct { + u64 i_level:8; + u64 i_pi_id:1; + u64 i_node:11; + u64 i_rsvd_3:4; + u64 i_enable:1; + u64 i_rsvd_2:3; + u64 i_int_sent:2; + u64 i_rsvd_1:2; + u64 i_pi0_forward_int:1; + u64 i_pi1_forward_int:1; + u64 i_rsvd:30; + } ii_iidsr_fld_s; +} ii_iidsr_u_t; + +/************************************************************************ + * * + * There are two instances of this register. This register is used * + * for matching up the incoming responses from the graphics widget to * + * the processor that initiated the graphics operation. The * + * write-responses are converted to graphics credits and returned to * + * the processor so that the processor interface can manage the flow * + * control. * + * * + ************************************************************************/ + +typedef union ii_igfx0_u { + u64 ii_igfx0_regval; + struct { + u64 i_w_num:4; + u64 i_pi_id:1; + u64 i_n_num:12; + u64 i_p_num:1; + u64 i_rsvd:46; + } ii_igfx0_fld_s; +} ii_igfx0_u_t; + +/************************************************************************ + * * + * There are two instances of this register. This register is used * + * for matching up the incoming responses from the graphics widget to * + * the processor that initiated the graphics operation. The * + * write-responses are converted to graphics credits and returned to * + * the processor so that the processor interface can manage the flow * + * control. * + * * + ************************************************************************/ + +typedef union ii_igfx1_u { + u64 ii_igfx1_regval; + struct { + u64 i_w_num:4; + u64 i_pi_id:1; + u64 i_n_num:12; + u64 i_p_num:1; + u64 i_rsvd:46; + } ii_igfx1_fld_s; +} ii_igfx1_u_t; + +/************************************************************************ + * * + * There are two instances of this registers. These registers are * + * used as scratch registers for software use. * + * * + ************************************************************************/ + +typedef union ii_iscr0_u { + u64 ii_iscr0_regval; + struct { + u64 i_scratch:64; + } ii_iscr0_fld_s; +} ii_iscr0_u_t; + +/************************************************************************ + * * + * There are two instances of this registers. These registers are * + * used as scratch registers for software use. * + * * + ************************************************************************/ + +typedef union ii_iscr1_u { + u64 ii_iscr1_regval; + struct { + u64 i_scratch:64; + } ii_iscr1_fld_s; +} ii_iscr1_u_t; + +/************************************************************************ + * * + * Description: There are seven instances of translation table entry * + * registers. Each register maps a Shub Big Window to a 48-bit * + * address on Crosstalk. * + * For M-mode (128 nodes, 8 GBytes/node), SysAD[31:29] (Big Window * + * number) are used to select one of these 7 registers. The Widget * + * number field is then derived from the W_NUM field for synthesizing * + * a Crosstalk packet. The 5 bits of OFFSET are concatenated with * + * SysAD[28:0] to form Crosstalk[33:0]. The upper Crosstalk[47:34] * + * are padded with zeros. Although the maximum Crosstalk space * + * addressable by the SHub is thus the lower 16 GBytes per widget * + * (M-mode), however only 7/32nds of this * + * space can be accessed. * + * For the N-mode (256 nodes, 4 GBytes/node), SysAD[30:28] (Big * + * Window number) are used to select one of these 7 registers. The * + * Widget number field is then derived from the W_NUM field for * + * synthesizing a Crosstalk packet. The 5 bits of OFFSET are * + * concatenated with SysAD[27:0] to form Crosstalk[33:0]. The IOSP * + * field is used as Crosstalk[47], and remainder of the Crosstalk * + * address bits (Crosstalk[46:34]) are always zero. While the maximum * + * Crosstalk space addressable by the Shub is thus the lower * + * 8-GBytes per widget (N-mode), only 7/32nds * + * of this space can be accessed. * + * * + ************************************************************************/ + +typedef union ii_itte1_u { + u64 ii_itte1_regval; + struct { + u64 i_offset:5; + u64 i_rsvd_1:3; + u64 i_w_num:4; + u64 i_iosp:1; + u64 i_rsvd:51; + } ii_itte1_fld_s; +} ii_itte1_u_t; + +/************************************************************************ + * * + * Description: There are seven instances of translation table entry * + * registers. Each register maps a Shub Big Window to a 48-bit * + * address on Crosstalk. * + * For M-mode (128 nodes, 8 GBytes/node), SysAD[31:29] (Big Window * + * number) are used to select one of these 7 registers. The Widget * + * number field is then derived from the W_NUM field for synthesizing * + * a Crosstalk packet. The 5 bits of OFFSET are concatenated with * + * SysAD[28:0] to form Crosstalk[33:0]. The upper Crosstalk[47:34] * + * are padded with zeros. Although the maximum Crosstalk space * + * addressable by the Shub is thus the lower 16 GBytes per widget * + * (M-mode), however only 7/32nds of this * + * space can be accessed. * + * For the N-mode (256 nodes, 4 GBytes/node), SysAD[30:28] (Big * + * Window number) are used to select one of these 7 registers. The * + * Widget number field is then derived from the W_NUM field for * + * synthesizing a Crosstalk packet. The 5 bits of OFFSET are * + * concatenated with SysAD[27:0] to form Crosstalk[33:0]. The IOSP * + * field is used as Crosstalk[47], and remainder of the Crosstalk * + * address bits (Crosstalk[46:34]) are always zero. While the maximum * + * Crosstalk space addressable by the Shub is thus the lower * + * 8-GBytes per widget (N-mode), only 7/32nds * + * of this space can be accessed. * + * * + ************************************************************************/ + +typedef union ii_itte2_u { + u64 ii_itte2_regval; + struct { + u64 i_offset:5; + u64 i_rsvd_1:3; + u64 i_w_num:4; + u64 i_iosp:1; + u64 i_rsvd:51; + } ii_itte2_fld_s; +} ii_itte2_u_t; + +/************************************************************************ + * * + * Description: There are seven instances of translation table entry * + * registers. Each register maps a Shub Big Window to a 48-bit * + * address on Crosstalk. * + * For M-mode (128 nodes, 8 GBytes/node), SysAD[31:29] (Big Window * + * number) are used to select one of these 7 registers. The Widget * + * number field is then derived from the W_NUM field for synthesizing * + * a Crosstalk packet. The 5 bits of OFFSET are concatenated with * + * SysAD[28:0] to form Crosstalk[33:0]. The upper Crosstalk[47:34] * + * are padded with zeros. Although the maximum Crosstalk space * + * addressable by the Shub is thus the lower 16 GBytes per widget * + * (M-mode), however only 7/32nds of this * + * space can be accessed. * + * For the N-mode (256 nodes, 4 GBytes/node), SysAD[30:28] (Big * + * Window number) are used to select one of these 7 registers. The * + * Widget number field is then derived from the W_NUM field for * + * synthesizing a Crosstalk packet. The 5 bits of OFFSET are * + * concatenated with SysAD[27:0] to form Crosstalk[33:0]. The IOSP * + * field is used as Crosstalk[47], and remainder of the Crosstalk * + * address bits (Crosstalk[46:34]) are always zero. While the maximum * + * Crosstalk space addressable by the SHub is thus the lower * + * 8-GBytes per widget (N-mode), only 7/32nds * + * of this space can be accessed. * + * * + ************************************************************************/ + +typedef union ii_itte3_u { + u64 ii_itte3_regval; + struct { + u64 i_offset:5; + u64 i_rsvd_1:3; + u64 i_w_num:4; + u64 i_iosp:1; + u64 i_rsvd:51; + } ii_itte3_fld_s; +} ii_itte3_u_t; + +/************************************************************************ + * * + * Description: There are seven instances of translation table entry * + * registers. Each register maps a SHub Big Window to a 48-bit * + * address on Crosstalk. * + * For M-mode (128 nodes, 8 GBytes/node), SysAD[31:29] (Big Window * + * number) are used to select one of these 7 registers. The Widget * + * number field is then derived from the W_NUM field for synthesizing * + * a Crosstalk packet. The 5 bits of OFFSET are concatenated with * + * SysAD[28:0] to form Crosstalk[33:0]. The upper Crosstalk[47:34] * + * are padded with zeros. Although the maximum Crosstalk space * + * addressable by the SHub is thus the lower 16 GBytes per widget * + * (M-mode), however only 7/32nds of this * + * space can be accessed. * + * For the N-mode (256 nodes, 4 GBytes/node), SysAD[30:28] (Big * + * Window number) are used to select one of these 7 registers. The * + * Widget number field is then derived from the W_NUM field for * + * synthesizing a Crosstalk packet. The 5 bits of OFFSET are * + * concatenated with SysAD[27:0] to form Crosstalk[33:0]. The IOSP * + * field is used as Crosstalk[47], and remainder of the Crosstalk * + * address bits (Crosstalk[46:34]) are always zero. While the maximum * + * Crosstalk space addressable by the SHub is thus the lower * + * 8-GBytes per widget (N-mode), only 7/32nds * + * of this space can be accessed. * + * * + ************************************************************************/ + +typedef union ii_itte4_u { + u64 ii_itte4_regval; + struct { + u64 i_offset:5; + u64 i_rsvd_1:3; + u64 i_w_num:4; + u64 i_iosp:1; + u64 i_rsvd:51; + } ii_itte4_fld_s; +} ii_itte4_u_t; + +/************************************************************************ + * * + * Description: There are seven instances of translation table entry * + * registers. Each register maps a SHub Big Window to a 48-bit * + * address on Crosstalk. * + * For M-mode (128 nodes, 8 GBytes/node), SysAD[31:29] (Big Window * + * number) are used to select one of these 7 registers. The Widget * + * number field is then derived from the W_NUM field for synthesizing * + * a Crosstalk packet. The 5 bits of OFFSET are concatenated with * + * SysAD[28:0] to form Crosstalk[33:0]. The upper Crosstalk[47:34] * + * are padded with zeros. Although the maximum Crosstalk space * + * addressable by the Shub is thus the lower 16 GBytes per widget * + * (M-mode), however only 7/32nds of this * + * space can be accessed. * + * For the N-mode (256 nodes, 4 GBytes/node), SysAD[30:28] (Big * + * Window number) are used to select one of these 7 registers. The * + * Widget number field is then derived from the W_NUM field for * + * synthesizing a Crosstalk packet. The 5 bits of OFFSET are * + * concatenated with SysAD[27:0] to form Crosstalk[33:0]. The IOSP * + * field is used as Crosstalk[47], and remainder of the Crosstalk * + * address bits (Crosstalk[46:34]) are always zero. While the maximum * + * Crosstalk space addressable by the Shub is thus the lower * + * 8-GBytes per widget (N-mode), only 7/32nds * + * of this space can be accessed. * + * * + ************************************************************************/ + +typedef union ii_itte5_u { + u64 ii_itte5_regval; + struct { + u64 i_offset:5; + u64 i_rsvd_1:3; + u64 i_w_num:4; + u64 i_iosp:1; + u64 i_rsvd:51; + } ii_itte5_fld_s; +} ii_itte5_u_t; + +/************************************************************************ + * * + * Description: There are seven instances of translation table entry * + * registers. Each register maps a Shub Big Window to a 48-bit * + * address on Crosstalk. * + * For M-mode (128 nodes, 8 GBytes/node), SysAD[31:29] (Big Window * + * number) are used to select one of these 7 registers. The Widget * + * number field is then derived from the W_NUM field for synthesizing * + * a Crosstalk packet. The 5 bits of OFFSET are concatenated with * + * SysAD[28:0] to form Crosstalk[33:0]. The upper Crosstalk[47:34] * + * are padded with zeros. Although the maximum Crosstalk space * + * addressable by the Shub is thus the lower 16 GBytes per widget * + * (M-mode), however only 7/32nds of this * + * space can be accessed. * + * For the N-mode (256 nodes, 4 GBytes/node), SysAD[30:28] (Big * + * Window number) are used to select one of these 7 registers. The * + * Widget number field is then derived from the W_NUM field for * + * synthesizing a Crosstalk packet. The 5 bits of OFFSET are * + * concatenated with SysAD[27:0] to form Crosstalk[33:0]. The IOSP * + * field is used as Crosstalk[47], and remainder of the Crosstalk * + * address bits (Crosstalk[46:34]) are always zero. While the maximum * + * Crosstalk space addressable by the Shub is thus the lower * + * 8-GBytes per widget (N-mode), only 7/32nds * + * of this space can be accessed. * + * * + ************************************************************************/ + +typedef union ii_itte6_u { + u64 ii_itte6_regval; + struct { + u64 i_offset:5; + u64 i_rsvd_1:3; + u64 i_w_num:4; + u64 i_iosp:1; + u64 i_rsvd:51; + } ii_itte6_fld_s; +} ii_itte6_u_t; + +/************************************************************************ + * * + * Description: There are seven instances of translation table entry * + * registers. Each register maps a Shub Big Window to a 48-bit * + * address on Crosstalk. * + * For M-mode (128 nodes, 8 GBytes/node), SysAD[31:29] (Big Window * + * number) are used to select one of these 7 registers. The Widget * + * number field is then derived from the W_NUM field for synthesizing * + * a Crosstalk packet. The 5 bits of OFFSET are concatenated with * + * SysAD[28:0] to form Crosstalk[33:0]. The upper Crosstalk[47:34] * + * are padded with zeros. Although the maximum Crosstalk space * + * addressable by the Shub is thus the lower 16 GBytes per widget * + * (M-mode), however only 7/32nds of this * + * space can be accessed. * + * For the N-mode (256 nodes, 4 GBytes/node), SysAD[30:28] (Big * + * Window number) are used to select one of these 7 registers. The * + * Widget number field is then derived from the W_NUM field for * + * synthesizing a Crosstalk packet. The 5 bits of OFFSET are * + * concatenated with SysAD[27:0] to form Crosstalk[33:0]. The IOSP * + * field is used as Crosstalk[47], and remainder of the Crosstalk * + * address bits (Crosstalk[46:34]) are always zero. While the maximum * + * Crosstalk space addressable by the SHub is thus the lower * + * 8-GBytes per widget (N-mode), only 7/32nds * + * of this space can be accessed. * + * * + ************************************************************************/ + +typedef union ii_itte7_u { + u64 ii_itte7_regval; + struct { + u64 i_offset:5; + u64 i_rsvd_1:3; + u64 i_w_num:4; + u64 i_iosp:1; + u64 i_rsvd:51; + } ii_itte7_fld_s; +} ii_itte7_u_t; + +/************************************************************************ + * * + * Description: There are 9 instances of this register, one per * + * actual widget in this implementation of SHub and Crossbow. * + * Note: Crossbow only has ports for Widgets 8 through F, widget 0 * + * refers to Crossbow's internal space. * + * This register contains the state elements per widget that are * + * necessary to manage the PIO flow control on Crosstalk and on the * + * Router Network. See the PIO Flow Control chapter for a complete * + * description of this register * + * The SPUR_WR bit requires some explanation. When this register is * + * written, the new value of the C field is captured in an internal * + * register so the hardware can remember what the programmer wrote * + * into the credit counter. The SPUR_WR bit sets whenever the C field * + * increments above this stored value, which indicates that there * + * have been more responses received than requests sent. The SPUR_WR * + * bit cannot be cleared until a value is written to the IPRBx * + * register; the write will correct the C field and capture its new * + * value in the internal register. Even if IECLR[E_PRB_x] is set, the * + * SPUR_WR bit will persist if IPRBx hasn't yet been written. * + * . * + * * + ************************************************************************/ + +typedef union ii_iprb0_u { + u64 ii_iprb0_regval; + struct { + u64 i_c:8; + u64 i_na:14; + u64 i_rsvd_2:2; + u64 i_nb:14; + u64 i_rsvd_1:2; + u64 i_m:2; + u64 i_f:1; + u64 i_of_cnt:5; + u64 i_error:1; + u64 i_rd_to:1; + u64 i_spur_wr:1; + u64 i_spur_rd:1; + u64 i_rsvd:11; + u64 i_mult_err:1; + } ii_iprb0_fld_s; +} ii_iprb0_u_t; + +/************************************************************************ + * * + * Description: There are 9 instances of this register, one per * + * actual widget in this implementation of SHub and Crossbow. * + * Note: Crossbow only has ports for Widgets 8 through F, widget 0 * + * refers to Crossbow's internal space. * + * This register contains the state elements per widget that are * + * necessary to manage the PIO flow control on Crosstalk and on the * + * Router Network. See the PIO Flow Control chapter for a complete * + * description of this register * + * The SPUR_WR bit requires some explanation. When this register is * + * written, the new value of the C field is captured in an internal * + * register so the hardware can remember what the programmer wrote * + * into the credit counter. The SPUR_WR bit sets whenever the C field * + * increments above this stored value, which indicates that there * + * have been more responses received than requests sent. The SPUR_WR * + * bit cannot be cleared until a value is written to the IPRBx * + * register; the write will correct the C field and capture its new * + * value in the internal register. Even if IECLR[E_PRB_x] is set, the * + * SPUR_WR bit will persist if IPRBx hasn't yet been written. * + * . * + * * + ************************************************************************/ + +typedef union ii_iprb8_u { + u64 ii_iprb8_regval; + struct { + u64 i_c:8; + u64 i_na:14; + u64 i_rsvd_2:2; + u64 i_nb:14; + u64 i_rsvd_1:2; + u64 i_m:2; + u64 i_f:1; + u64 i_of_cnt:5; + u64 i_error:1; + u64 i_rd_to:1; + u64 i_spur_wr:1; + u64 i_spur_rd:1; + u64 i_rsvd:11; + u64 i_mult_err:1; + } ii_iprb8_fld_s; +} ii_iprb8_u_t; + +/************************************************************************ + * * + * Description: There are 9 instances of this register, one per * + * actual widget in this implementation of SHub and Crossbow. * + * Note: Crossbow only has ports for Widgets 8 through F, widget 0 * + * refers to Crossbow's internal space. * + * This register contains the state elements per widget that are * + * necessary to manage the PIO flow control on Crosstalk and on the * + * Router Network. See the PIO Flow Control chapter for a complete * + * description of this register * + * The SPUR_WR bit requires some explanation. When this register is * + * written, the new value of the C field is captured in an internal * + * register so the hardware can remember what the programmer wrote * + * into the credit counter. The SPUR_WR bit sets whenever the C field * + * increments above this stored value, which indicates that there * + * have been more responses received than requests sent. The SPUR_WR * + * bit cannot be cleared until a value is written to the IPRBx * + * register; the write will correct the C field and capture its new * + * value in the internal register. Even if IECLR[E_PRB_x] is set, the * + * SPUR_WR bit will persist if IPRBx hasn't yet been written. * + * . * + * * + ************************************************************************/ + +typedef union ii_iprb9_u { + u64 ii_iprb9_regval; + struct { + u64 i_c:8; + u64 i_na:14; + u64 i_rsvd_2:2; + u64 i_nb:14; + u64 i_rsvd_1:2; + u64 i_m:2; + u64 i_f:1; + u64 i_of_cnt:5; + u64 i_error:1; + u64 i_rd_to:1; + u64 i_spur_wr:1; + u64 i_spur_rd:1; + u64 i_rsvd:11; + u64 i_mult_err:1; + } ii_iprb9_fld_s; +} ii_iprb9_u_t; + +/************************************************************************ + * * + * Description: There are 9 instances of this register, one per * + * actual widget in this implementation of SHub and Crossbow. * + * Note: Crossbow only has ports for Widgets 8 through F, widget 0 * + * refers to Crossbow's internal space. * + * This register contains the state elements per widget that are * + * necessary to manage the PIO flow control on Crosstalk and on the * + * Router Network. See the PIO Flow Control chapter for a complete * + * description of this register * + * The SPUR_WR bit requires some explanation. When this register is * + * written, the new value of the C field is captured in an internal * + * register so the hardware can remember what the programmer wrote * + * into the credit counter. The SPUR_WR bit sets whenever the C field * + * increments above this stored value, which indicates that there * + * have been more responses received than requests sent. The SPUR_WR * + * bit cannot be cleared until a value is written to the IPRBx * + * register; the write will correct the C field and capture its new * + * value in the internal register. Even if IECLR[E_PRB_x] is set, the * + * SPUR_WR bit will persist if IPRBx hasn't yet been written. * + * * + * * + ************************************************************************/ + +typedef union ii_iprba_u { + u64 ii_iprba_regval; + struct { + u64 i_c:8; + u64 i_na:14; + u64 i_rsvd_2:2; + u64 i_nb:14; + u64 i_rsvd_1:2; + u64 i_m:2; + u64 i_f:1; + u64 i_of_cnt:5; + u64 i_error:1; + u64 i_rd_to:1; + u64 i_spur_wr:1; + u64 i_spur_rd:1; + u64 i_rsvd:11; + u64 i_mult_err:1; + } ii_iprba_fld_s; +} ii_iprba_u_t; + +/************************************************************************ + * * + * Description: There are 9 instances of this register, one per * + * actual widget in this implementation of SHub and Crossbow. * + * Note: Crossbow only has ports for Widgets 8 through F, widget 0 * + * refers to Crossbow's internal space. * + * This register contains the state elements per widget that are * + * necessary to manage the PIO flow control on Crosstalk and on the * + * Router Network. See the PIO Flow Control chapter for a complete * + * description of this register * + * The SPUR_WR bit requires some explanation. When this register is * + * written, the new value of the C field is captured in an internal * + * register so the hardware can remember what the programmer wrote * + * into the credit counter. The SPUR_WR bit sets whenever the C field * + * increments above this stored value, which indicates that there * + * have been more responses received than requests sent. The SPUR_WR * + * bit cannot be cleared until a value is written to the IPRBx * + * register; the write will correct the C field and capture its new * + * value in the internal register. Even if IECLR[E_PRB_x] is set, the * + * SPUR_WR bit will persist if IPRBx hasn't yet been written. * + * . * + * * + ************************************************************************/ + +typedef union ii_iprbb_u { + u64 ii_iprbb_regval; + struct { + u64 i_c:8; + u64 i_na:14; + u64 i_rsvd_2:2; + u64 i_nb:14; + u64 i_rsvd_1:2; + u64 i_m:2; + u64 i_f:1; + u64 i_of_cnt:5; + u64 i_error:1; + u64 i_rd_to:1; + u64 i_spur_wr:1; + u64 i_spur_rd:1; + u64 i_rsvd:11; + u64 i_mult_err:1; + } ii_iprbb_fld_s; +} ii_iprbb_u_t; + +/************************************************************************ + * * + * Description: There are 9 instances of this register, one per * + * actual widget in this implementation of SHub and Crossbow. * + * Note: Crossbow only has ports for Widgets 8 through F, widget 0 * + * refers to Crossbow's internal space. * + * This register contains the state elements per widget that are * + * necessary to manage the PIO flow control on Crosstalk and on the * + * Router Network. See the PIO Flow Control chapter for a complete * + * description of this register * + * The SPUR_WR bit requires some explanation. When this register is * + * written, the new value of the C field is captured in an internal * + * register so the hardware can remember what the programmer wrote * + * into the credit counter. The SPUR_WR bit sets whenever the C field * + * increments above this stored value, which indicates that there * + * have been more responses received than requests sent. The SPUR_WR * + * bit cannot be cleared until a value is written to the IPRBx * + * register; the write will correct the C field and capture its new * + * value in the internal register. Even if IECLR[E_PRB_x] is set, the * + * SPUR_WR bit will persist if IPRBx hasn't yet been written. * + * . * + * * + ************************************************************************/ + +typedef union ii_iprbc_u { + u64 ii_iprbc_regval; + struct { + u64 i_c:8; + u64 i_na:14; + u64 i_rsvd_2:2; + u64 i_nb:14; + u64 i_rsvd_1:2; + u64 i_m:2; + u64 i_f:1; + u64 i_of_cnt:5; + u64 i_error:1; + u64 i_rd_to:1; + u64 i_spur_wr:1; + u64 i_spur_rd:1; + u64 i_rsvd:11; + u64 i_mult_err:1; + } ii_iprbc_fld_s; +} ii_iprbc_u_t; + +/************************************************************************ + * * + * Description: There are 9 instances of this register, one per * + * actual widget in this implementation of SHub and Crossbow. * + * Note: Crossbow only has ports for Widgets 8 through F, widget 0 * + * refers to Crossbow's internal space. * + * This register contains the state elements per widget that are * + * necessary to manage the PIO flow control on Crosstalk and on the * + * Router Network. See the PIO Flow Control chapter for a complete * + * description of this register * + * The SPUR_WR bit requires some explanation. When this register is * + * written, the new value of the C field is captured in an internal * + * register so the hardware can remember what the programmer wrote * + * into the credit counter. The SPUR_WR bit sets whenever the C field * + * increments above this stored value, which indicates that there * + * have been more responses received than requests sent. The SPUR_WR * + * bit cannot be cleared until a value is written to the IPRBx * + * register; the write will correct the C field and capture its new * + * value in the internal register. Even if IECLR[E_PRB_x] is set, the * + * SPUR_WR bit will persist if IPRBx hasn't yet been written. * + * . * + * * + ************************************************************************/ + +typedef union ii_iprbd_u { + u64 ii_iprbd_regval; + struct { + u64 i_c:8; + u64 i_na:14; + u64 i_rsvd_2:2; + u64 i_nb:14; + u64 i_rsvd_1:2; + u64 i_m:2; + u64 i_f:1; + u64 i_of_cnt:5; + u64 i_error:1; + u64 i_rd_to:1; + u64 i_spur_wr:1; + u64 i_spur_rd:1; + u64 i_rsvd:11; + u64 i_mult_err:1; + } ii_iprbd_fld_s; +} ii_iprbd_u_t; + +/************************************************************************ + * * + * Description: There are 9 instances of this register, one per * + * actual widget in this implementation of SHub and Crossbow. * + * Note: Crossbow only has ports for Widgets 8 through F, widget 0 * + * refers to Crossbow's internal space. * + * This register contains the state elements per widget that are * + * necessary to manage the PIO flow control on Crosstalk and on the * + * Router Network. See the PIO Flow Control chapter for a complete * + * description of this register * + * The SPUR_WR bit requires some explanation. When this register is * + * written, the new value of the C field is captured in an internal * + * register so the hardware can remember what the programmer wrote * + * into the credit counter. The SPUR_WR bit sets whenever the C field * + * increments above this stored value, which indicates that there * + * have been more responses received than requests sent. The SPUR_WR * + * bit cannot be cleared until a value is written to the IPRBx * + * register; the write will correct the C field and capture its new * + * value in the internal register. Even if IECLR[E_PRB_x] is set, the * + * SPUR_WR bit will persist if IPRBx hasn't yet been written. * + * . * + * * + ************************************************************************/ + +typedef union ii_iprbe_u { + u64 ii_iprbe_regval; + struct { + u64 i_c:8; + u64 i_na:14; + u64 i_rsvd_2:2; + u64 i_nb:14; + u64 i_rsvd_1:2; + u64 i_m:2; + u64 i_f:1; + u64 i_of_cnt:5; + u64 i_error:1; + u64 i_rd_to:1; + u64 i_spur_wr:1; + u64 i_spur_rd:1; + u64 i_rsvd:11; + u64 i_mult_err:1; + } ii_iprbe_fld_s; +} ii_iprbe_u_t; + +/************************************************************************ + * * + * Description: There are 9 instances of this register, one per * + * actual widget in this implementation of Shub and Crossbow. * + * Note: Crossbow only has ports for Widgets 8 through F, widget 0 * + * refers to Crossbow's internal space. * + * This register contains the state elements per widget that are * + * necessary to manage the PIO flow control on Crosstalk and on the * + * Router Network. See the PIO Flow Control chapter for a complete * + * description of this register * + * The SPUR_WR bit requires some explanation. When this register is * + * written, the new value of the C field is captured in an internal * + * register so the hardware can remember what the programmer wrote * + * into the credit counter. The SPUR_WR bit sets whenever the C field * + * increments above this stored value, which indicates that there * + * have been more responses received than requests sent. The SPUR_WR * + * bit cannot be cleared until a value is written to the IPRBx * + * register; the write will correct the C field and capture its new * + * value in the internal register. Even if IECLR[E_PRB_x] is set, the * + * SPUR_WR bit will persist if IPRBx hasn't yet been written. * + * . * + * * + ************************************************************************/ + +typedef union ii_iprbf_u { + u64 ii_iprbf_regval; + struct { + u64 i_c:8; + u64 i_na:14; + u64 i_rsvd_2:2; + u64 i_nb:14; + u64 i_rsvd_1:2; + u64 i_m:2; + u64 i_f:1; + u64 i_of_cnt:5; + u64 i_error:1; + u64 i_rd_to:1; + u64 i_spur_wr:1; + u64 i_spur_rd:1; + u64 i_rsvd:11; + u64 i_mult_err:1; + } ii_iprbe_fld_s; +} ii_iprbf_u_t; + +/************************************************************************ + * * + * This register specifies the timeout value to use for monitoring * + * Crosstalk credits which are used outbound to Crosstalk. An * + * internal counter called the Crosstalk Credit Timeout Counter * + * increments every 128 II clocks. The counter starts counting * + * anytime the credit count drops below a threshold, and resets to * + * zero (stops counting) anytime the credit count is at or above the * + * threshold. The threshold is 1 credit in direct connect mode and 2 * + * in Crossbow connect mode. When the internal Crosstalk Credit * + * Timeout Counter reaches the value programmed in this register, a * + * Crosstalk Credit Timeout has occurred. The internal counter is not * + * readable from software, and stops counting at its maximum value, * + * so it cannot cause more than one interrupt. * + * * + ************************************************************************/ + +typedef union ii_ixcc_u { + u64 ii_ixcc_regval; + struct { + u64 i_time_out:26; + u64 i_rsvd:38; + } ii_ixcc_fld_s; +} ii_ixcc_u_t; + +/************************************************************************ + * * + * Description: This register qualifies all the PIO and DMA * + * operations launched from widget 0 towards the SHub. In * + * addition, it also qualifies accesses by the BTE streams. * + * The bits in each field of this register are cleared by the SHub * + * upon detection of an error which requires widget 0 or the BTE * + * streams to be terminated. Whether or not widget x has access * + * rights to this SHub is determined by an AND of the device * + * enable bit in the appropriate field of this register and bit 0 in * + * the Wx_IAC field. The bits in this field are set by writing a 1 to * + * them. Incoming replies from Crosstalk are not subject to this * + * access control mechanism. * + * * + ************************************************************************/ + +typedef union ii_imem_u { + u64 ii_imem_regval; + struct { + u64 i_w0_esd:1; + u64 i_rsvd_3:3; + u64 i_b0_esd:1; + u64 i_rsvd_2:3; + u64 i_b1_esd:1; + u64 i_rsvd_1:3; + u64 i_clr_precise:1; + u64 i_rsvd:51; + } ii_imem_fld_s; +} ii_imem_u_t; + +/************************************************************************ + * * + * Description: This register specifies the timeout value to use for * + * monitoring Crosstalk tail flits coming into the Shub in the * + * TAIL_TO field. An internal counter associated with this register * + * is incremented every 128 II internal clocks (7 bits). The counter * + * starts counting anytime a header micropacket is received and stops * + * counting (and resets to zero) any time a micropacket with a Tail * + * bit is received. Once the counter reaches the threshold value * + * programmed in this register, it generates an interrupt to the * + * processor that is programmed into the IIDSR. The counter saturates * + * (does not roll over) at its maximum value, so it cannot cause * + * another interrupt until after it is cleared. * + * The register also contains the Read Response Timeout values. The * + * Prescalar is 23 bits, and counts II clocks. An internal counter * + * increments on every II clock and when it reaches the value in the * + * Prescalar field, all IPRTE registers with their valid bits set * + * have their Read Response timers bumped. Whenever any of them match * + * the value in the RRSP_TO field, a Read Response Timeout has * + * occurred, and error handling occurs as described in the Error * + * Handling section of this document. * + * * + ************************************************************************/ + +typedef union ii_ixtt_u { + u64 ii_ixtt_regval; + struct { + u64 i_tail_to:26; + u64 i_rsvd_1:6; + u64 i_rrsp_ps:23; + u64 i_rrsp_to:5; + u64 i_rsvd:4; + } ii_ixtt_fld_s; +} ii_ixtt_u_t; + +/************************************************************************ + * * + * Writing a 1 to the fields of this register clears the appropriate * + * error bits in other areas of SHub. Note that when the * + * E_PRB_x bits are used to clear error bits in PRB registers, * + * SPUR_RD and SPUR_WR may persist, because they require additional * + * action to clear them. See the IPRBx and IXSS Register * + * specifications. * + * * + ************************************************************************/ + +typedef union ii_ieclr_u { + u64 ii_ieclr_regval; + struct { + u64 i_e_prb_0:1; + u64 i_rsvd:7; + u64 i_e_prb_8:1; + u64 i_e_prb_9:1; + u64 i_e_prb_a:1; + u64 i_e_prb_b:1; + u64 i_e_prb_c:1; + u64 i_e_prb_d:1; + u64 i_e_prb_e:1; + u64 i_e_prb_f:1; + u64 i_e_crazy:1; + u64 i_e_bte_0:1; + u64 i_e_bte_1:1; + u64 i_reserved_1:10; + u64 i_spur_rd_hdr:1; + u64 i_cam_intr_to:1; + u64 i_cam_overflow:1; + u64 i_cam_read_miss:1; + u64 i_ioq_rep_underflow:1; + u64 i_ioq_req_underflow:1; + u64 i_ioq_rep_overflow:1; + u64 i_ioq_req_overflow:1; + u64 i_iiq_rep_overflow:1; + u64 i_iiq_req_overflow:1; + u64 i_ii_xn_rep_cred_overflow:1; + u64 i_ii_xn_req_cred_overflow:1; + u64 i_ii_xn_invalid_cmd:1; + u64 i_xn_ii_invalid_cmd:1; + u64 i_reserved_2:21; + } ii_ieclr_fld_s; +} ii_ieclr_u_t; + +/************************************************************************ + * * + * This register controls both BTEs. SOFT_RESET is intended for * + * recovery after an error. COUNT controls the total number of CRBs * + * that both BTEs (combined) can use, which affects total BTE * + * bandwidth. * + * * + ************************************************************************/ + +typedef union ii_ibcr_u { + u64 ii_ibcr_regval; + struct { + u64 i_count:4; + u64 i_rsvd_1:4; + u64 i_soft_reset:1; + u64 i_rsvd:55; + } ii_ibcr_fld_s; +} ii_ibcr_u_t; + +/************************************************************************ + * * + * This register contains the header of a spurious read response * + * received from Crosstalk. A spurious read response is defined as a * + * read response received by II from a widget for which (1) the SIDN * + * has a value between 1 and 7, inclusive (II never sends requests to * + * these widgets (2) there is no valid IPRTE register which * + * corresponds to the TNUM, or (3) the widget indicated in SIDN is * + * not the same as the widget recorded in the IPRTE register * + * referenced by the TNUM. If this condition is true, and if the * + * IXSS[VALID] bit is clear, then the header of the spurious read * + * response is capture in IXSM and IXSS, and IXSS[VALID] is set. The * + * errant header is thereby captured, and no further spurious read * + * respones are captured until IXSS[VALID] is cleared by setting the * + * appropriate bit in IECLR. Every time a spurious read response is * + * detected, the SPUR_RD bit of the PRB corresponding to the incoming * + * message's SIDN field is set. This always happens, regarless of * + * whether a header is captured. The programmer should check * + * IXSM[SIDN] to determine which widget sent the spurious response, * + * because there may be more than one SPUR_RD bit set in the PRB * + * registers. The widget indicated by IXSM[SIDN] was the first * + * spurious read response to be received since the last time * + * IXSS[VALID] was clear. The SPUR_RD bit of the corresponding PRB * + * will be set. Any SPUR_RD bits in any other PRB registers indicate * + * spurious messages from other widets which were detected after the * + * header was captured.. * + * * + ************************************************************************/ + +typedef union ii_ixsm_u { + u64 ii_ixsm_regval; + struct { + u64 i_byte_en:32; + u64 i_reserved:1; + u64 i_tag:3; + u64 i_alt_pactyp:4; + u64 i_bo:1; + u64 i_error:1; + u64 i_vbpm:1; + u64 i_gbr:1; + u64 i_ds:2; + u64 i_ct:1; + u64 i_tnum:5; + u64 i_pactyp:4; + u64 i_sidn:4; + u64 i_didn:4; + } ii_ixsm_fld_s; +} ii_ixsm_u_t; + +/************************************************************************ + * * + * This register contains the sideband bits of a spurious read * + * response received from Crosstalk. * + * * + ************************************************************************/ + +typedef union ii_ixss_u { + u64 ii_ixss_regval; + struct { + u64 i_sideband:8; + u64 i_rsvd:55; + u64 i_valid:1; + } ii_ixss_fld_s; +} ii_ixss_u_t; + +/************************************************************************ + * * + * This register enables software to access the II LLP's test port. * + * Refer to the LLP 2.5 documentation for an explanation of the test * + * port. Software can write to this register to program the values * + * for the control fields (TestErrCapture, TestClear, TestFlit, * + * TestMask and TestSeed). Similarly, software can read from this * + * register to obtain the values of the test port's status outputs * + * (TestCBerr, TestValid and TestData). * + * * + ************************************************************************/ + +typedef union ii_ilct_u { + u64 ii_ilct_regval; + struct { + u64 i_test_seed:20; + u64 i_test_mask:8; + u64 i_test_data:20; + u64 i_test_valid:1; + u64 i_test_cberr:1; + u64 i_test_flit:3; + u64 i_test_clear:1; + u64 i_test_err_capture:1; + u64 i_rsvd:9; + } ii_ilct_fld_s; +} ii_ilct_u_t; + +/************************************************************************ + * * + * If the II detects an illegal incoming Duplonet packet (request or * + * reply) when VALID==0 in the IIEPH1 register, then it saves the * + * contents of the packet's header flit in the IIEPH1 and IIEPH2 * + * registers, sets the VALID bit in IIEPH1, clears the OVERRUN bit, * + * and assigns a value to the ERR_TYPE field which indicates the * + * specific nature of the error. The II recognizes four different * + * types of errors: short request packets (ERR_TYPE==2), short reply * + * packets (ERR_TYPE==3), long request packets (ERR_TYPE==4) and long * + * reply packets (ERR_TYPE==5). The encodings for these types of * + * errors were chosen to be consistent with the same types of errors * + * indicated by the ERR_TYPE field in the LB_ERROR_HDR1 register (in * + * the LB unit). If the II detects an illegal incoming Duplonet * + * packet when VALID==1 in the IIEPH1 register, then it merely sets * + * the OVERRUN bit to indicate that a subsequent error has happened, * + * and does nothing further. * + * * + ************************************************************************/ + +typedef union ii_iieph1_u { + u64 ii_iieph1_regval; + struct { + u64 i_command:7; + u64 i_rsvd_5:1; + u64 i_suppl:14; + u64 i_rsvd_4:1; + u64 i_source:14; + u64 i_rsvd_3:1; + u64 i_err_type:4; + u64 i_rsvd_2:4; + u64 i_overrun:1; + u64 i_rsvd_1:3; + u64 i_valid:1; + u64 i_rsvd:13; + } ii_iieph1_fld_s; +} ii_iieph1_u_t; + +/************************************************************************ + * * + * This register holds the Address field from the header flit of an * + * incoming erroneous Duplonet packet, along with the tail bit which * + * accompanied this header flit. This register is essentially an * + * extension of IIEPH1. Two registers were necessary because the 64 * + * bits available in only a single register were insufficient to * + * capture the entire header flit of an erroneous packet. * + * * + ************************************************************************/ + +typedef union ii_iieph2_u { + u64 ii_iieph2_regval; + struct { + u64 i_rsvd_0:3; + u64 i_address:47; + u64 i_rsvd_1:10; + u64 i_tail:1; + u64 i_rsvd:3; + } ii_iieph2_fld_s; +} ii_iieph2_u_t; + +/******************************/ + +/************************************************************************ + * * + * This register's value is a bit vector that guards access from SXBs * + * to local registers within the II as well as to external Crosstalk * + * widgets * + * * + ************************************************************************/ + +typedef union ii_islapr_u { + u64 ii_islapr_regval; + struct { + u64 i_region:64; + } ii_islapr_fld_s; +} ii_islapr_u_t; + +/************************************************************************ + * * + * A write to this register of the 56-bit value "Pup+Bun" will cause * + * the bit in the ISLAPR register corresponding to the region of the * + * requestor to be set (access allowed). ( + * * + ************************************************************************/ + +typedef union ii_islapo_u { + u64 ii_islapo_regval; + struct { + u64 i_io_sbx_ovrride:56; + u64 i_rsvd:8; + } ii_islapo_fld_s; +} ii_islapo_u_t; + +/************************************************************************ + * * + * Determines how long the wrapper will wait aftr an interrupt is * + * initially issued from the II before it times out the outstanding * + * interrupt and drops it from the interrupt queue. * + * * + ************************************************************************/ + +typedef union ii_iwi_u { + u64 ii_iwi_regval; + struct { + u64 i_prescale:24; + u64 i_rsvd:8; + u64 i_timeout:8; + u64 i_rsvd1:8; + u64 i_intrpt_retry_period:8; + u64 i_rsvd2:8; + } ii_iwi_fld_s; +} ii_iwi_u_t; + +/************************************************************************ + * * + * Log errors which have occurred in the II wrapper. The errors are * + * cleared by writing to the IECLR register. * + * * + ************************************************************************/ + +typedef union ii_iwel_u { + u64 ii_iwel_regval; + struct { + u64 i_intr_timed_out:1; + u64 i_rsvd:7; + u64 i_cam_overflow:1; + u64 i_cam_read_miss:1; + u64 i_rsvd1:2; + u64 i_ioq_rep_underflow:1; + u64 i_ioq_req_underflow:1; + u64 i_ioq_rep_overflow:1; + u64 i_ioq_req_overflow:1; + u64 i_iiq_rep_overflow:1; + u64 i_iiq_req_overflow:1; + u64 i_rsvd2:6; + u64 i_ii_xn_rep_cred_over_under:1; + u64 i_ii_xn_req_cred_over_under:1; + u64 i_rsvd3:6; + u64 i_ii_xn_invalid_cmd:1; + u64 i_xn_ii_invalid_cmd:1; + u64 i_rsvd4:30; + } ii_iwel_fld_s; +} ii_iwel_u_t; + +/************************************************************************ + * * + * Controls the II wrapper. * + * * + ************************************************************************/ + +typedef union ii_iwc_u { + u64 ii_iwc_regval; + struct { + u64 i_dma_byte_swap:1; + u64 i_rsvd:3; + u64 i_cam_read_lines_reset:1; + u64 i_rsvd1:3; + u64 i_ii_xn_cred_over_under_log:1; + u64 i_rsvd2:19; + u64 i_xn_rep_iq_depth:5; + u64 i_rsvd3:3; + u64 i_xn_req_iq_depth:5; + u64 i_rsvd4:3; + u64 i_iiq_depth:6; + u64 i_rsvd5:12; + u64 i_force_rep_cred:1; + u64 i_force_req_cred:1; + } ii_iwc_fld_s; +} ii_iwc_u_t; + +/************************************************************************ + * * + * Status in the II wrapper. * + * * + ************************************************************************/ + +typedef union ii_iws_u { + u64 ii_iws_regval; + struct { + u64 i_xn_rep_iq_credits:5; + u64 i_rsvd:3; + u64 i_xn_req_iq_credits:5; + u64 i_rsvd1:51; + } ii_iws_fld_s; +} ii_iws_u_t; + +/************************************************************************ + * * + * Masks errors in the IWEL register. * + * * + ************************************************************************/ + +typedef union ii_iweim_u { + u64 ii_iweim_regval; + struct { + u64 i_intr_timed_out:1; + u64 i_rsvd:7; + u64 i_cam_overflow:1; + u64 i_cam_read_miss:1; + u64 i_rsvd1:2; + u64 i_ioq_rep_underflow:1; + u64 i_ioq_req_underflow:1; + u64 i_ioq_rep_overflow:1; + u64 i_ioq_req_overflow:1; + u64 i_iiq_rep_overflow:1; + u64 i_iiq_req_overflow:1; + u64 i_rsvd2:6; + u64 i_ii_xn_rep_cred_overflow:1; + u64 i_ii_xn_req_cred_overflow:1; + u64 i_rsvd3:6; + u64 i_ii_xn_invalid_cmd:1; + u64 i_xn_ii_invalid_cmd:1; + u64 i_rsvd4:30; + } ii_iweim_fld_s; +} ii_iweim_u_t; + +/************************************************************************ + * * + * A write to this register causes a particular field in the * + * corresponding widget's PRB entry to be adjusted up or down by 1. * + * This counter should be used when recovering from error and reset * + * conditions. Note that software would be capable of causing * + * inadvertent overflow or underflow of these counters. * + * * + ************************************************************************/ + +typedef union ii_ipca_u { + u64 ii_ipca_regval; + struct { + u64 i_wid:4; + u64 i_adjust:1; + u64 i_rsvd_1:3; + u64 i_field:2; + u64 i_rsvd:54; + } ii_ipca_fld_s; +} ii_ipca_u_t; + +/************************************************************************ + * * + * There are 8 instances of this register. This register contains * + * the information that the II has to remember once it has launched a * + * PIO Read operation. The contents are used to form the correct * + * Router Network packet and direct the Crosstalk reply to the * + * appropriate processor. * + * * + ************************************************************************/ + +typedef union ii_iprte0a_u { + u64 ii_iprte0a_regval; + struct { + u64 i_rsvd_1:54; + u64 i_widget:4; + u64 i_to_cnt:5; + u64 i_vld:1; + } ii_iprte0a_fld_s; +} ii_iprte0a_u_t; + +/************************************************************************ + * * + * There are 8 instances of this register. This register contains * + * the information that the II has to remember once it has launched a * + * PIO Read operation. The contents are used to form the correct * + * Router Network packet and direct the Crosstalk reply to the * + * appropriate processor. * + * * + ************************************************************************/ + +typedef union ii_iprte1a_u { + u64 ii_iprte1a_regval; + struct { + u64 i_rsvd_1:54; + u64 i_widget:4; + u64 i_to_cnt:5; + u64 i_vld:1; + } ii_iprte1a_fld_s; +} ii_iprte1a_u_t; + +/************************************************************************ + * * + * There are 8 instances of this register. This register contains * + * the information that the II has to remember once it has launched a * + * PIO Read operation. The contents are used to form the correct * + * Router Network packet and direct the Crosstalk reply to the * + * appropriate processor. * + * * + ************************************************************************/ + +typedef union ii_iprte2a_u { + u64 ii_iprte2a_regval; + struct { + u64 i_rsvd_1:54; + u64 i_widget:4; + u64 i_to_cnt:5; + u64 i_vld:1; + } ii_iprte2a_fld_s; +} ii_iprte2a_u_t; + +/************************************************************************ + * * + * There are 8 instances of this register. This register contains * + * the information that the II has to remember once it has launched a * + * PIO Read operation. The contents are used to form the correct * + * Router Network packet and direct the Crosstalk reply to the * + * appropriate processor. * + * * + ************************************************************************/ + +typedef union ii_iprte3a_u { + u64 ii_iprte3a_regval; + struct { + u64 i_rsvd_1:54; + u64 i_widget:4; + u64 i_to_cnt:5; + u64 i_vld:1; + } ii_iprte3a_fld_s; +} ii_iprte3a_u_t; + +/************************************************************************ + * * + * There are 8 instances of this register. This register contains * + * the information that the II has to remember once it has launched a * + * PIO Read operation. The contents are used to form the correct * + * Router Network packet and direct the Crosstalk reply to the * + * appropriate processor. * + * * + ************************************************************************/ + +typedef union ii_iprte4a_u { + u64 ii_iprte4a_regval; + struct { + u64 i_rsvd_1:54; + u64 i_widget:4; + u64 i_to_cnt:5; + u64 i_vld:1; + } ii_iprte4a_fld_s; +} ii_iprte4a_u_t; + +/************************************************************************ + * * + * There are 8 instances of this register. This register contains * + * the information that the II has to remember once it has launched a * + * PIO Read operation. The contents are used to form the correct * + * Router Network packet and direct the Crosstalk reply to the * + * appropriate processor. * + * * + ************************************************************************/ + +typedef union ii_iprte5a_u { + u64 ii_iprte5a_regval; + struct { + u64 i_rsvd_1:54; + u64 i_widget:4; + u64 i_to_cnt:5; + u64 i_vld:1; + } ii_iprte5a_fld_s; +} ii_iprte5a_u_t; + +/************************************************************************ + * * + * There are 8 instances of this register. This register contains * + * the information that the II has to remember once it has launched a * + * PIO Read operation. The contents are used to form the correct * + * Router Network packet and direct the Crosstalk reply to the * + * appropriate processor. * + * * + ************************************************************************/ + +typedef union ii_iprte6a_u { + u64 ii_iprte6a_regval; + struct { + u64 i_rsvd_1:54; + u64 i_widget:4; + u64 i_to_cnt:5; + u64 i_vld:1; + } ii_iprte6a_fld_s; +} ii_iprte6a_u_t; + +/************************************************************************ + * * + * There are 8 instances of this register. This register contains * + * the information that the II has to remember once it has launched a * + * PIO Read operation. The contents are used to form the correct * + * Router Network packet and direct the Crosstalk reply to the * + * appropriate processor. * + * * + ************************************************************************/ + +typedef union ii_iprte7a_u { + u64 ii_iprte7a_regval; + struct { + u64 i_rsvd_1:54; + u64 i_widget:4; + u64 i_to_cnt:5; + u64 i_vld:1; + } ii_iprtea7_fld_s; +} ii_iprte7a_u_t; + +/************************************************************************ + * * + * There are 8 instances of this register. This register contains * + * the information that the II has to remember once it has launched a * + * PIO Read operation. The contents are used to form the correct * + * Router Network packet and direct the Crosstalk reply to the * + * appropriate processor. * + * * + ************************************************************************/ + +typedef union ii_iprte0b_u { + u64 ii_iprte0b_regval; + struct { + u64 i_rsvd_1:3; + u64 i_address:47; + u64 i_init:3; + u64 i_source:11; + } ii_iprte0b_fld_s; +} ii_iprte0b_u_t; + +/************************************************************************ + * * + * There are 8 instances of this register. This register contains * + * the information that the II has to remember once it has launched a * + * PIO Read operation. The contents are used to form the correct * + * Router Network packet and direct the Crosstalk reply to the * + * appropriate processor. * + * * + ************************************************************************/ + +typedef union ii_iprte1b_u { + u64 ii_iprte1b_regval; + struct { + u64 i_rsvd_1:3; + u64 i_address:47; + u64 i_init:3; + u64 i_source:11; + } ii_iprte1b_fld_s; +} ii_iprte1b_u_t; + +/************************************************************************ + * * + * There are 8 instances of this register. This register contains * + * the information that the II has to remember once it has launched a * + * PIO Read operation. The contents are used to form the correct * + * Router Network packet and direct the Crosstalk reply to the * + * appropriate processor. * + * * + ************************************************************************/ + +typedef union ii_iprte2b_u { + u64 ii_iprte2b_regval; + struct { + u64 i_rsvd_1:3; + u64 i_address:47; + u64 i_init:3; + u64 i_source:11; + } ii_iprte2b_fld_s; +} ii_iprte2b_u_t; + +/************************************************************************ + * * + * There are 8 instances of this register. This register contains * + * the information that the II has to remember once it has launched a * + * PIO Read operation. The contents are used to form the correct * + * Router Network packet and direct the Crosstalk reply to the * + * appropriate processor. * + * * + ************************************************************************/ + +typedef union ii_iprte3b_u { + u64 ii_iprte3b_regval; + struct { + u64 i_rsvd_1:3; + u64 i_address:47; + u64 i_init:3; + u64 i_source:11; + } ii_iprte3b_fld_s; +} ii_iprte3b_u_t; + +/************************************************************************ + * * + * There are 8 instances of this register. This register contains * + * the information that the II has to remember once it has launched a * + * PIO Read operation. The contents are used to form the correct * + * Router Network packet and direct the Crosstalk reply to the * + * appropriate processor. * + * * + ************************************************************************/ + +typedef union ii_iprte4b_u { + u64 ii_iprte4b_regval; + struct { + u64 i_rsvd_1:3; + u64 i_address:47; + u64 i_init:3; + u64 i_source:11; + } ii_iprte4b_fld_s; +} ii_iprte4b_u_t; + +/************************************************************************ + * * + * There are 8 instances of this register. This register contains * + * the information that the II has to remember once it has launched a * + * PIO Read operation. The contents are used to form the correct * + * Router Network packet and direct the Crosstalk reply to the * + * appropriate processor. * + * * + ************************************************************************/ + +typedef union ii_iprte5b_u { + u64 ii_iprte5b_regval; + struct { + u64 i_rsvd_1:3; + u64 i_address:47; + u64 i_init:3; + u64 i_source:11; + } ii_iprte5b_fld_s; +} ii_iprte5b_u_t; + +/************************************************************************ + * * + * There are 8 instances of this register. This register contains * + * the information that the II has to remember once it has launched a * + * PIO Read operation. The contents are used to form the correct * + * Router Network packet and direct the Crosstalk reply to the * + * appropriate processor. * + * * + ************************************************************************/ + +typedef union ii_iprte6b_u { + u64 ii_iprte6b_regval; + struct { + u64 i_rsvd_1:3; + u64 i_address:47; + u64 i_init:3; + u64 i_source:11; + + } ii_iprte6b_fld_s; +} ii_iprte6b_u_t; + +/************************************************************************ + * * + * There are 8 instances of this register. This register contains * + * the information that the II has to remember once it has launched a * + * PIO Read operation. The contents are used to form the correct * + * Router Network packet and direct the Crosstalk reply to the * + * appropriate processor. * + * * + ************************************************************************/ + +typedef union ii_iprte7b_u { + u64 ii_iprte7b_regval; + struct { + u64 i_rsvd_1:3; + u64 i_address:47; + u64 i_init:3; + u64 i_source:11; + } ii_iprte7b_fld_s; +} ii_iprte7b_u_t; + +/************************************************************************ + * * + * Description: SHub II contains a feature which did not exist in * + * the Hub which automatically cleans up after a Read Response * + * timeout, including deallocation of the IPRTE and recovery of IBuf * + * space. The inclusion of this register in SHub is for backward * + * compatibility * + * A write to this register causes an entry from the table of * + * outstanding PIO Read Requests to be freed and returned to the * + * stack of free entries. This register is used in handling the * + * timeout errors that result in a PIO Reply never returning from * + * Crosstalk. * + * Note that this register does not affect the contents of the IPRTE * + * registers. The Valid bits in those registers have to be * + * specifically turned off by software. * + * * + ************************************************************************/ + +typedef union ii_ipdr_u { + u64 ii_ipdr_regval; + struct { + u64 i_te:3; + u64 i_rsvd_1:1; + u64 i_pnd:1; + u64 i_init_rpcnt:1; + u64 i_rsvd:58; + } ii_ipdr_fld_s; +} ii_ipdr_u_t; + +/************************************************************************ + * * + * A write to this register causes a CRB entry to be returned to the * + * queue of free CRBs. The entry should have previously been cleared * + * (mark bit) via backdoor access to the pertinent CRB entry. This * + * register is used in the last step of handling the errors that are * + * captured and marked in CRB entries. Briefly: 1) first error for * + * DMA write from a particular device, and first error for a * + * particular BTE stream, lead to a marked CRB entry, and processor * + * interrupt, 2) software reads the error information captured in the * + * CRB entry, and presumably takes some corrective action, 3) * + * software clears the mark bit, and finally 4) software writes to * + * the ICDR register to return the CRB entry to the list of free CRB * + * entries. * + * * + ************************************************************************/ + +typedef union ii_icdr_u { + u64 ii_icdr_regval; + struct { + u64 i_crb_num:4; + u64 i_pnd:1; + u64 i_rsvd:59; + } ii_icdr_fld_s; +} ii_icdr_u_t; + +/************************************************************************ + * * + * This register provides debug access to two FIFOs inside of II. * + * Both IOQ_MAX* fields of this register contain the instantaneous * + * depth (in units of the number of available entries) of the * + * associated IOQ FIFO. A read of this register will return the * + * number of free entries on each FIFO at the time of the read. So * + * when a FIFO is idle, the associated field contains the maximum * + * depth of the FIFO. This register is writable for debug reasons * + * and is intended to be written with the maximum desired FIFO depth * + * while the FIFO is idle. Software must assure that II is idle when * + * this register is written. If there are any active entries in any * + * of these FIFOs when this register is written, the results are * + * undefined. * + * * + ************************************************************************/ + +typedef union ii_ifdr_u { + u64 ii_ifdr_regval; + struct { + u64 i_ioq_max_rq:7; + u64 i_set_ioq_rq:1; + u64 i_ioq_max_rp:7; + u64 i_set_ioq_rp:1; + u64 i_rsvd:48; + } ii_ifdr_fld_s; +} ii_ifdr_u_t; + +/************************************************************************ + * * + * This register allows the II to become sluggish in removing * + * messages from its inbound queue (IIQ). This will cause messages to * + * back up in either virtual channel. Disabling the "molasses" mode * + * subsequently allows the II to be tested under stress. In the * + * sluggish ("Molasses") mode, the localized effects of congestion * + * can be observed. * + * * + ************************************************************************/ + +typedef union ii_iiap_u { + u64 ii_iiap_regval; + struct { + u64 i_rq_mls:6; + u64 i_rsvd_1:2; + u64 i_rp_mls:6; + u64 i_rsvd:50; + } ii_iiap_fld_s; +} ii_iiap_u_t; + +/************************************************************************ + * * + * This register allows several parameters of CRB operation to be * + * set. Note that writing to this register can have catastrophic side * + * effects, if the CRB is not quiescent, i.e. if the CRB is * + * processing protocol messages when the write occurs. * + * * + ************************************************************************/ + +typedef union ii_icmr_u { + u64 ii_icmr_regval; + struct { + u64 i_sp_msg:1; + u64 i_rd_hdr:1; + u64 i_rsvd_4:2; + u64 i_c_cnt:4; + u64 i_rsvd_3:4; + u64 i_clr_rqpd:1; + u64 i_clr_rppd:1; + u64 i_rsvd_2:2; + u64 i_fc_cnt:4; + u64 i_crb_vld:15; + u64 i_crb_mark:15; + u64 i_rsvd_1:2; + u64 i_precise:1; + u64 i_rsvd:11; + } ii_icmr_fld_s; +} ii_icmr_u_t; + +/************************************************************************ + * * + * This register allows control of the table portion of the CRB * + * logic via software. Control operations from this register have * + * priority over all incoming Crosstalk or BTE requests. * + * * + ************************************************************************/ + +typedef union ii_iccr_u { + u64 ii_iccr_regval; + struct { + u64 i_crb_num:4; + u64 i_rsvd_1:4; + u64 i_cmd:8; + u64 i_pending:1; + u64 i_rsvd:47; + } ii_iccr_fld_s; +} ii_iccr_u_t; + +/************************************************************************ + * * + * This register allows the maximum timeout value to be programmed. * + * * + ************************************************************************/ + +typedef union ii_icto_u { + u64 ii_icto_regval; + struct { + u64 i_timeout:8; + u64 i_rsvd:56; + } ii_icto_fld_s; +} ii_icto_u_t; + +/************************************************************************ + * * + * This register allows the timeout prescalar to be programmed. An * + * internal counter is associated with this register. When the * + * internal counter reaches the value of the PRESCALE field, the * + * timer registers in all valid CRBs are incremented (CRBx_D[TIMEOUT] * + * field). The internal counter resets to zero, and then continues * + * counting. * + * * + ************************************************************************/ + +typedef union ii_ictp_u { + u64 ii_ictp_regval; + struct { + u64 i_prescale:24; + u64 i_rsvd:40; + } ii_ictp_fld_s; +} ii_ictp_u_t; + +/************************************************************************ + * * + * Description: There are 15 CRB Entries (ICRB0 to ICRBE) that are * + * used for Crosstalk operations (both cacheline and partial * + * operations) or BTE/IO. Because the CRB entries are very wide, five * + * registers (_A to _E) are required to read and write each entry. * + * The CRB Entry registers can be conceptualized as rows and columns * + * (illustrated in the table above). Each row contains the 4 * + * registers required for a single CRB Entry. The first doubleword * + * (column) for each entry is labeled A, and the second doubleword * + * (higher address) is labeled B, the third doubleword is labeled C, * + * the fourth doubleword is labeled D and the fifth doubleword is * + * labeled E. All CRB entries have their addresses on a quarter * + * cacheline aligned boundary. * + * Upon reset, only the following fields are initialized: valid * + * (VLD), priority count, timeout, timeout valid, and context valid. * + * All other bits should be cleared by software before use (after * + * recovering any potential error state from before the reset). * + * The following four tables summarize the format for the four * + * registers that are used for each ICRB# Entry. * + * * + ************************************************************************/ + +typedef union ii_icrb0_a_u { + u64 ii_icrb0_a_regval; + struct { + u64 ia_iow:1; + u64 ia_vld:1; + u64 ia_addr:47; + u64 ia_tnum:5; + u64 ia_sidn:4; + u64 ia_rsvd:6; + } ii_icrb0_a_fld_s; +} ii_icrb0_a_u_t; + +/************************************************************************ + * * + * Description: There are 15 CRB Entries (ICRB0 to ICRBE) that are * + * used for Crosstalk operations (both cacheline and partial * + * operations) or BTE/IO. Because the CRB entries are very wide, five * + * registers (_A to _E) are required to read and write each entry. * + * * + ************************************************************************/ + +typedef union ii_icrb0_b_u { + u64 ii_icrb0_b_regval; + struct { + u64 ib_xt_err:1; + u64 ib_mark:1; + u64 ib_ln_uce:1; + u64 ib_errcode:3; + u64 ib_error:1; + u64 ib_stall__bte_1:1; + u64 ib_stall__bte_0:1; + u64 ib_stall__intr:1; + u64 ib_stall_ib:1; + u64 ib_intvn:1; + u64 ib_wb:1; + u64 ib_hold:1; + u64 ib_ack:1; + u64 ib_resp:1; + u64 ib_ack_cnt:11; + u64 ib_rsvd:7; + u64 ib_exc:5; + u64 ib_init:3; + u64 ib_imsg:8; + u64 ib_imsgtype:2; + u64 ib_use_old:1; + u64 ib_rsvd_1:11; + } ii_icrb0_b_fld_s; +} ii_icrb0_b_u_t; + +/************************************************************************ + * * + * Description: There are 15 CRB Entries (ICRB0 to ICRBE) that are * + * used for Crosstalk operations (both cacheline and partial * + * operations) or BTE/IO. Because the CRB entries are very wide, five * + * registers (_A to _E) are required to read and write each entry. * + * * + ************************************************************************/ + +typedef union ii_icrb0_c_u { + u64 ii_icrb0_c_regval; + struct { + u64 ic_source:15; + u64 ic_size:2; + u64 ic_ct:1; + u64 ic_bte_num:1; + u64 ic_gbr:1; + u64 ic_resprqd:1; + u64 ic_bo:1; + u64 ic_suppl:15; + u64 ic_rsvd:27; + } ii_icrb0_c_fld_s; +} ii_icrb0_c_u_t; + +/************************************************************************ + * * + * Description: There are 15 CRB Entries (ICRB0 to ICRBE) that are * + * used for Crosstalk operations (both cacheline and partial * + * operations) or BTE/IO. Because the CRB entries are very wide, five * + * registers (_A to _E) are required to read and write each entry. * + * * + ************************************************************************/ + +typedef union ii_icrb0_d_u { + u64 ii_icrb0_d_regval; + struct { + u64 id_pa_be:43; + u64 id_bte_op:1; + u64 id_pr_psc:4; + u64 id_pr_cnt:4; + u64 id_sleep:1; + u64 id_rsvd:11; + } ii_icrb0_d_fld_s; +} ii_icrb0_d_u_t; + +/************************************************************************ + * * + * Description: There are 15 CRB Entries (ICRB0 to ICRBE) that are * + * used for Crosstalk operations (both cacheline and partial * + * operations) or BTE/IO. Because the CRB entries are very wide, five * + * registers (_A to _E) are required to read and write each entry. * + * * + ************************************************************************/ + +typedef union ii_icrb0_e_u { + u64 ii_icrb0_e_regval; + struct { + u64 ie_timeout:8; + u64 ie_context:15; + u64 ie_rsvd:1; + u64 ie_tvld:1; + u64 ie_cvld:1; + u64 ie_rsvd_0:38; + } ii_icrb0_e_fld_s; +} ii_icrb0_e_u_t; + +/************************************************************************ + * * + * This register contains the lower 64 bits of the header of the * + * spurious message captured by II. Valid when the SP_MSG bit in ICMR * + * register is set. * + * * + ************************************************************************/ + +typedef union ii_icsml_u { + u64 ii_icsml_regval; + struct { + u64 i_tt_addr:47; + u64 i_newsuppl_ex:14; + u64 i_reserved:2; + u64 i_overflow:1; + } ii_icsml_fld_s; +} ii_icsml_u_t; + +/************************************************************************ + * * + * This register contains the middle 64 bits of the header of the * + * spurious message captured by II. Valid when the SP_MSG bit in ICMR * + * register is set. * + * * + ************************************************************************/ + +typedef union ii_icsmm_u { + u64 ii_icsmm_regval; + struct { + u64 i_tt_ack_cnt:11; + u64 i_reserved:53; + } ii_icsmm_fld_s; +} ii_icsmm_u_t; + +/************************************************************************ + * * + * This register contains the microscopic state, all the inputs to * + * the protocol table, captured with the spurious message. Valid when * + * the SP_MSG bit in the ICMR register is set. * + * * + ************************************************************************/ + +typedef union ii_icsmh_u { + u64 ii_icsmh_regval; + struct { + u64 i_tt_vld:1; + u64 i_xerr:1; + u64 i_ft_cwact_o:1; + u64 i_ft_wact_o:1; + u64 i_ft_active_o:1; + u64 i_sync:1; + u64 i_mnusg:1; + u64 i_mnusz:1; + u64 i_plusz:1; + u64 i_plusg:1; + u64 i_tt_exc:5; + u64 i_tt_wb:1; + u64 i_tt_hold:1; + u64 i_tt_ack:1; + u64 i_tt_resp:1; + u64 i_tt_intvn:1; + u64 i_g_stall_bte1:1; + u64 i_g_stall_bte0:1; + u64 i_g_stall_il:1; + u64 i_g_stall_ib:1; + u64 i_tt_imsg:8; + u64 i_tt_imsgtype:2; + u64 i_tt_use_old:1; + u64 i_tt_respreqd:1; + u64 i_tt_bte_num:1; + u64 i_cbn:1; + u64 i_match:1; + u64 i_rpcnt_lt_34:1; + u64 i_rpcnt_ge_34:1; + u64 i_rpcnt_lt_18:1; + u64 i_rpcnt_ge_18:1; + u64 i_rpcnt_lt_2:1; + u64 i_rpcnt_ge_2:1; + u64 i_rqcnt_lt_18:1; + u64 i_rqcnt_ge_18:1; + u64 i_rqcnt_lt_2:1; + u64 i_rqcnt_ge_2:1; + u64 i_tt_device:7; + u64 i_tt_init:3; + u64 i_reserved:5; + } ii_icsmh_fld_s; +} ii_icsmh_u_t; + +/************************************************************************ + * * + * The Shub DEBUG unit provides a 3-bit selection signal to the * + * II core and a 3-bit selection signal to the fsbclk domain in the II * + * wrapper. * + * * + ************************************************************************/ + +typedef union ii_idbss_u { + u64 ii_idbss_regval; + struct { + u64 i_iioclk_core_submenu:3; + u64 i_rsvd:5; + u64 i_fsbclk_wrapper_submenu:3; + u64 i_rsvd_1:5; + u64 i_iioclk_menu:5; + u64 i_rsvd_2:43; + } ii_idbss_fld_s; +} ii_idbss_u_t; + +/************************************************************************ + * * + * Description: This register is used to set up the length for a * + * transfer and then to monitor the progress of that transfer. This * + * register needs to be initialized before a transfer is started. A * + * legitimate write to this register will set the Busy bit, clear the * + * Error bit, and initialize the length to the value desired. * + * While the transfer is in progress, hardware will decrement the * + * length field with each successful block that is copied. Once the * + * transfer completes, hardware will clear the Busy bit. The length * + * field will also contain the number of cache lines left to be * + * transferred. * + * * + ************************************************************************/ + +typedef union ii_ibls0_u { + u64 ii_ibls0_regval; + struct { + u64 i_length:16; + u64 i_error:1; + u64 i_rsvd_1:3; + u64 i_busy:1; + u64 i_rsvd:43; + } ii_ibls0_fld_s; +} ii_ibls0_u_t; + +/************************************************************************ + * * + * This register should be loaded before a transfer is started. The * + * address to be loaded in bits 39:0 is the 40-bit TRex+ physical * + * address as described in Section 1.3, Figure2 and Figure3. Since * + * the bottom 7 bits of the address are always taken to be zero, BTE * + * transfers are always cacheline-aligned. * + * * + ************************************************************************/ + +typedef union ii_ibsa0_u { + u64 ii_ibsa0_regval; + struct { + u64 i_rsvd_1:7; + u64 i_addr:42; + u64 i_rsvd:15; + } ii_ibsa0_fld_s; +} ii_ibsa0_u_t; + +/************************************************************************ + * * + * This register should be loaded before a transfer is started. The * + * address to be loaded in bits 39:0 is the 40-bit TRex+ physical * + * address as described in Section 1.3, Figure2 and Figure3. Since * + * the bottom 7 bits of the address are always taken to be zero, BTE * + * transfers are always cacheline-aligned. * + * * + ************************************************************************/ + +typedef union ii_ibda0_u { + u64 ii_ibda0_regval; + struct { + u64 i_rsvd_1:7; + u64 i_addr:42; + u64 i_rsvd:15; + } ii_ibda0_fld_s; +} ii_ibda0_u_t; + +/************************************************************************ + * * + * Writing to this register sets up the attributes of the transfer * + * and initiates the transfer operation. Reading this register has * + * the side effect of terminating any transfer in progress. Note: * + * stopping a transfer midstream could have an adverse impact on the * + * other BTE. If a BTE stream has to be stopped (due to error * + * handling for example), both BTE streams should be stopped and * + * their transfers discarded. * + * * + ************************************************************************/ + +typedef union ii_ibct0_u { + u64 ii_ibct0_regval; + struct { + u64 i_zerofill:1; + u64 i_rsvd_2:3; + u64 i_notify:1; + u64 i_rsvd_1:3; + u64 i_poison:1; + u64 i_rsvd:55; + } ii_ibct0_fld_s; +} ii_ibct0_u_t; + +/************************************************************************ + * * + * This register contains the address to which the WINV is sent. * + * This address has to be cache line aligned. * + * * + ************************************************************************/ + +typedef union ii_ibna0_u { + u64 ii_ibna0_regval; + struct { + u64 i_rsvd_1:7; + u64 i_addr:42; + u64 i_rsvd:15; + } ii_ibna0_fld_s; +} ii_ibna0_u_t; + +/************************************************************************ + * * + * This register contains the programmable level as well as the node * + * ID and PI unit of the processor to which the interrupt will be * + * sent. * + * * + ************************************************************************/ + +typedef union ii_ibia0_u { + u64 ii_ibia0_regval; + struct { + u64 i_rsvd_2:1; + u64 i_node_id:11; + u64 i_rsvd_1:4; + u64 i_level:7; + u64 i_rsvd:41; + } ii_ibia0_fld_s; +} ii_ibia0_u_t; + +/************************************************************************ + * * + * Description: This register is used to set up the length for a * + * transfer and then to monitor the progress of that transfer. This * + * register needs to be initialized before a transfer is started. A * + * legitimate write to this register will set the Busy bit, clear the * + * Error bit, and initialize the length to the value desired. * + * While the transfer is in progress, hardware will decrement the * + * length field with each successful block that is copied. Once the * + * transfer completes, hardware will clear the Busy bit. The length * + * field will also contain the number of cache lines left to be * + * transferred. * + * * + ************************************************************************/ + +typedef union ii_ibls1_u { + u64 ii_ibls1_regval; + struct { + u64 i_length:16; + u64 i_error:1; + u64 i_rsvd_1:3; + u64 i_busy:1; + u64 i_rsvd:43; + } ii_ibls1_fld_s; +} ii_ibls1_u_t; + +/************************************************************************ + * * + * This register should be loaded before a transfer is started. The * + * address to be loaded in bits 39:0 is the 40-bit TRex+ physical * + * address as described in Section 1.3, Figure2 and Figure3. Since * + * the bottom 7 bits of the address are always taken to be zero, BTE * + * transfers are always cacheline-aligned. * + * * + ************************************************************************/ + +typedef union ii_ibsa1_u { + u64 ii_ibsa1_regval; + struct { + u64 i_rsvd_1:7; + u64 i_addr:33; + u64 i_rsvd:24; + } ii_ibsa1_fld_s; +} ii_ibsa1_u_t; + +/************************************************************************ + * * + * This register should be loaded before a transfer is started. The * + * address to be loaded in bits 39:0 is the 40-bit TRex+ physical * + * address as described in Section 1.3, Figure2 and Figure3. Since * + * the bottom 7 bits of the address are always taken to be zero, BTE * + * transfers are always cacheline-aligned. * + * * + ************************************************************************/ + +typedef union ii_ibda1_u { + u64 ii_ibda1_regval; + struct { + u64 i_rsvd_1:7; + u64 i_addr:33; + u64 i_rsvd:24; + } ii_ibda1_fld_s; +} ii_ibda1_u_t; + +/************************************************************************ + * * + * Writing to this register sets up the attributes of the transfer * + * and initiates the transfer operation. Reading this register has * + * the side effect of terminating any transfer in progress. Note: * + * stopping a transfer midstream could have an adverse impact on the * + * other BTE. If a BTE stream has to be stopped (due to error * + * handling for example), both BTE streams should be stopped and * + * their transfers discarded. * + * * + ************************************************************************/ + +typedef union ii_ibct1_u { + u64 ii_ibct1_regval; + struct { + u64 i_zerofill:1; + u64 i_rsvd_2:3; + u64 i_notify:1; + u64 i_rsvd_1:3; + u64 i_poison:1; + u64 i_rsvd:55; + } ii_ibct1_fld_s; +} ii_ibct1_u_t; + +/************************************************************************ + * * + * This register contains the address to which the WINV is sent. * + * This address has to be cache line aligned. * + * * + ************************************************************************/ + +typedef union ii_ibna1_u { + u64 ii_ibna1_regval; + struct { + u64 i_rsvd_1:7; + u64 i_addr:33; + u64 i_rsvd:24; + } ii_ibna1_fld_s; +} ii_ibna1_u_t; + +/************************************************************************ + * * + * This register contains the programmable level as well as the node * + * ID and PI unit of the processor to which the interrupt will be * + * sent. * + * * + ************************************************************************/ + +typedef union ii_ibia1_u { + u64 ii_ibia1_regval; + struct { + u64 i_pi_id:1; + u64 i_node_id:8; + u64 i_rsvd_1:7; + u64 i_level:7; + u64 i_rsvd:41; + } ii_ibia1_fld_s; +} ii_ibia1_u_t; + +/************************************************************************ + * * + * This register defines the resources that feed information into * + * the two performance counters located in the IO Performance * + * Profiling Register. There are 17 different quantities that can be * + * measured. Given these 17 different options, the two performance * + * counters have 15 of them in common; menu selections 0 through 0xE * + * are identical for each performance counter. As for the other two * + * options, one is available from one performance counter and the * + * other is available from the other performance counter. Hence, the * + * II supports all 17*16=272 possible combinations of quantities to * + * measure. * + * * + ************************************************************************/ + +typedef union ii_ipcr_u { + u64 ii_ipcr_regval; + struct { + u64 i_ippr0_c:4; + u64 i_ippr1_c:4; + u64 i_icct:8; + u64 i_rsvd:48; + } ii_ipcr_fld_s; +} ii_ipcr_u_t; + +/************************************************************************ + * * + * * + * * + ************************************************************************/ + +typedef union ii_ippr_u { + u64 ii_ippr_regval; + struct { + u64 i_ippr0:32; + u64 i_ippr1:32; + } ii_ippr_fld_s; +} ii_ippr_u_t; + +/************************************************************************ + * * + * The following defines which were not formed into structures are * + * probably identical to another register, and the name of the * + * register is provided against each of these registers. This * + * information needs to be checked carefully * + * * + * IIO_ICRB1_A IIO_ICRB0_A * + * IIO_ICRB1_B IIO_ICRB0_B * + * IIO_ICRB1_C IIO_ICRB0_C * + * IIO_ICRB1_D IIO_ICRB0_D * + * IIO_ICRB1_E IIO_ICRB0_E * + * IIO_ICRB2_A IIO_ICRB0_A * + * IIO_ICRB2_B IIO_ICRB0_B * + * IIO_ICRB2_C IIO_ICRB0_C * + * IIO_ICRB2_D IIO_ICRB0_D * + * IIO_ICRB2_E IIO_ICRB0_E * + * IIO_ICRB3_A IIO_ICRB0_A * + * IIO_ICRB3_B IIO_ICRB0_B * + * IIO_ICRB3_C IIO_ICRB0_C * + * IIO_ICRB3_D IIO_ICRB0_D * + * IIO_ICRB3_E IIO_ICRB0_E * + * IIO_ICRB4_A IIO_ICRB0_A * + * IIO_ICRB4_B IIO_ICRB0_B * + * IIO_ICRB4_C IIO_ICRB0_C * + * IIO_ICRB4_D IIO_ICRB0_D * + * IIO_ICRB4_E IIO_ICRB0_E * + * IIO_ICRB5_A IIO_ICRB0_A * + * IIO_ICRB5_B IIO_ICRB0_B * + * IIO_ICRB5_C IIO_ICRB0_C * + * IIO_ICRB5_D IIO_ICRB0_D * + * IIO_ICRB5_E IIO_ICRB0_E * + * IIO_ICRB6_A IIO_ICRB0_A * + * IIO_ICRB6_B IIO_ICRB0_B * + * IIO_ICRB6_C IIO_ICRB0_C * + * IIO_ICRB6_D IIO_ICRB0_D * + * IIO_ICRB6_E IIO_ICRB0_E * + * IIO_ICRB7_A IIO_ICRB0_A * + * IIO_ICRB7_B IIO_ICRB0_B * + * IIO_ICRB7_C IIO_ICRB0_C * + * IIO_ICRB7_D IIO_ICRB0_D * + * IIO_ICRB7_E IIO_ICRB0_E * + * IIO_ICRB8_A IIO_ICRB0_A * + * IIO_ICRB8_B IIO_ICRB0_B * + * IIO_ICRB8_C IIO_ICRB0_C * + * IIO_ICRB8_D IIO_ICRB0_D * + * IIO_ICRB8_E IIO_ICRB0_E * + * IIO_ICRB9_A IIO_ICRB0_A * + * IIO_ICRB9_B IIO_ICRB0_B * + * IIO_ICRB9_C IIO_ICRB0_C * + * IIO_ICRB9_D IIO_ICRB0_D * + * IIO_ICRB9_E IIO_ICRB0_E * + * IIO_ICRBA_A IIO_ICRB0_A * + * IIO_ICRBA_B IIO_ICRB0_B * + * IIO_ICRBA_C IIO_ICRB0_C * + * IIO_ICRBA_D IIO_ICRB0_D * + * IIO_ICRBA_E IIO_ICRB0_E * + * IIO_ICRBB_A IIO_ICRB0_A * + * IIO_ICRBB_B IIO_ICRB0_B * + * IIO_ICRBB_C IIO_ICRB0_C * + * IIO_ICRBB_D IIO_ICRB0_D * + * IIO_ICRBB_E IIO_ICRB0_E * + * IIO_ICRBC_A IIO_ICRB0_A * + * IIO_ICRBC_B IIO_ICRB0_B * + * IIO_ICRBC_C IIO_ICRB0_C * + * IIO_ICRBC_D IIO_ICRB0_D * + * IIO_ICRBC_E IIO_ICRB0_E * + * IIO_ICRBD_A IIO_ICRB0_A * + * IIO_ICRBD_B IIO_ICRB0_B * + * IIO_ICRBD_C IIO_ICRB0_C * + * IIO_ICRBD_D IIO_ICRB0_D * + * IIO_ICRBD_E IIO_ICRB0_E * + * IIO_ICRBE_A IIO_ICRB0_A * + * IIO_ICRBE_B IIO_ICRB0_B * + * IIO_ICRBE_C IIO_ICRB0_C * + * IIO_ICRBE_D IIO_ICRB0_D * + * IIO_ICRBE_E IIO_ICRB0_E * + * * + ************************************************************************/ + +/* + * Slightly friendlier names for some common registers. + */ +#define IIO_WIDGET IIO_WID /* Widget identification */ +#define IIO_WIDGET_STAT IIO_WSTAT /* Widget status register */ +#define IIO_WIDGET_CTRL IIO_WCR /* Widget control register */ +#define IIO_PROTECT IIO_ILAPR /* IO interface protection */ +#define IIO_PROTECT_OVRRD IIO_ILAPO /* IO protect override */ +#define IIO_OUTWIDGET_ACCESS IIO_IOWA /* Outbound widget access */ +#define IIO_INWIDGET_ACCESS IIO_IIWA /* Inbound widget access */ +#define IIO_INDEV_ERR_MASK IIO_IIDEM /* Inbound device error mask */ +#define IIO_LLP_CSR IIO_ILCSR /* LLP control and status */ +#define IIO_LLP_LOG IIO_ILLR /* LLP log */ +#define IIO_XTALKCC_TOUT IIO_IXCC /* Xtalk credit count timeout */ +#define IIO_XTALKTT_TOUT IIO_IXTT /* Xtalk tail timeout */ +#define IIO_IO_ERR_CLR IIO_IECLR /* IO error clear */ +#define IIO_IGFX_0 IIO_IGFX0 +#define IIO_IGFX_1 IIO_IGFX1 +#define IIO_IBCT_0 IIO_IBCT0 +#define IIO_IBCT_1 IIO_IBCT1 +#define IIO_IBLS_0 IIO_IBLS0 +#define IIO_IBLS_1 IIO_IBLS1 +#define IIO_IBSA_0 IIO_IBSA0 +#define IIO_IBSA_1 IIO_IBSA1 +#define IIO_IBDA_0 IIO_IBDA0 +#define IIO_IBDA_1 IIO_IBDA1 +#define IIO_IBNA_0 IIO_IBNA0 +#define IIO_IBNA_1 IIO_IBNA1 +#define IIO_IBIA_0 IIO_IBIA0 +#define IIO_IBIA_1 IIO_IBIA1 +#define IIO_IOPRB_0 IIO_IPRB0 + +#define IIO_PRTE_A(_x) (IIO_IPRTE0_A + (8 * (_x))) +#define IIO_PRTE_B(_x) (IIO_IPRTE0_B + (8 * (_x))) +#define IIO_NUM_PRTES 8 /* Total number of PRB table entries */ +#define IIO_WIDPRTE_A(x) IIO_PRTE_A(((x) - 8)) /* widget ID to its PRTE num */ +#define IIO_WIDPRTE_B(x) IIO_PRTE_B(((x) - 8)) /* widget ID to its PRTE num */ + +#define IIO_NUM_IPRBS 9 + +#define IIO_LLP_CSR_IS_UP 0x00002000 +#define IIO_LLP_CSR_LLP_STAT_MASK 0x00003000 +#define IIO_LLP_CSR_LLP_STAT_SHFT 12 + +#define IIO_LLP_CB_MAX 0xffff /* in ILLR CB_CNT, Max Check Bit errors */ +#define IIO_LLP_SN_MAX 0xffff /* in ILLR SN_CNT, Max Sequence Number errors */ + +/* key to IIO_PROTECT_OVRRD */ +#define IIO_PROTECT_OVRRD_KEY 0x53474972756c6573ull /* "SGIrules" */ + +/* BTE register names */ +#define IIO_BTE_STAT_0 IIO_IBLS_0 /* Also BTE length/status 0 */ +#define IIO_BTE_SRC_0 IIO_IBSA_0 /* Also BTE source address 0 */ +#define IIO_BTE_DEST_0 IIO_IBDA_0 /* Also BTE dest. address 0 */ +#define IIO_BTE_CTRL_0 IIO_IBCT_0 /* Also BTE control/terminate 0 */ +#define IIO_BTE_NOTIFY_0 IIO_IBNA_0 /* Also BTE notification 0 */ +#define IIO_BTE_INT_0 IIO_IBIA_0 /* Also BTE interrupt 0 */ +#define IIO_BTE_OFF_0 0 /* Base offset from BTE 0 regs. */ +#define IIO_BTE_OFF_1 (IIO_IBLS_1 - IIO_IBLS_0) /* Offset from base to BTE 1 */ + +/* BTE register offsets from base */ +#define BTEOFF_STAT 0 +#define BTEOFF_SRC (IIO_BTE_SRC_0 - IIO_BTE_STAT_0) +#define BTEOFF_DEST (IIO_BTE_DEST_0 - IIO_BTE_STAT_0) +#define BTEOFF_CTRL (IIO_BTE_CTRL_0 - IIO_BTE_STAT_0) +#define BTEOFF_NOTIFY (IIO_BTE_NOTIFY_0 - IIO_BTE_STAT_0) +#define BTEOFF_INT (IIO_BTE_INT_0 - IIO_BTE_STAT_0) + +/* names used in shub diags */ +#define IIO_BASE_BTE0 IIO_IBLS_0 +#define IIO_BASE_BTE1 IIO_IBLS_1 + +/* + * Macro which takes the widget number, and returns the + * IO PRB address of that widget. + * value _x is expected to be a widget number in the range + * 0, 8 - 0xF + */ +#define IIO_IOPRB(_x) (IIO_IOPRB_0 + ( ( (_x) < HUB_WIDGET_ID_MIN ? \ + (_x) : \ + (_x) - (HUB_WIDGET_ID_MIN-1)) << 3) ) + +/* GFX Flow Control Node/Widget Register */ +#define IIO_IGFX_W_NUM_BITS 4 /* size of widget num field */ +#define IIO_IGFX_W_NUM_MASK ((1<> IIO_WSTAT_TXRETRY_SHFT) & \ + IIO_WSTAT_TXRETRY_MASK) + +/* Number of II perf. counters we can multiplex at once */ + +#define IO_PERF_SETS 32 + +/* Bit for the widget in inbound access register */ +#define IIO_IIWA_WIDGET(_w) ((u64)(1ULL << _w)) +/* Bit for the widget in outbound access register */ +#define IIO_IOWA_WIDGET(_w) ((u64)(1ULL << _w)) + +/* NOTE: The following define assumes that we are going to get + * widget numbers from 8 thru F and the device numbers within + * widget from 0 thru 7. + */ +#define IIO_IIDEM_WIDGETDEV_MASK(w, d) ((u64)(1ULL << (8 * ((w) - 8) + (d)))) + +/* IO Interrupt Destination Register */ +#define IIO_IIDSR_SENT_SHIFT 28 +#define IIO_IIDSR_SENT_MASK 0x30000000 +#define IIO_IIDSR_ENB_SHIFT 24 +#define IIO_IIDSR_ENB_MASK 0x01000000 +#define IIO_IIDSR_NODE_SHIFT 9 +#define IIO_IIDSR_NODE_MASK 0x000ff700 +#define IIO_IIDSR_PI_ID_SHIFT 8 +#define IIO_IIDSR_PI_ID_MASK 0x00000100 +#define IIO_IIDSR_LVL_SHIFT 0 +#define IIO_IIDSR_LVL_MASK 0x000000ff + +/* Xtalk timeout threshold register (IIO_IXTT) */ +#define IXTT_RRSP_TO_SHFT 55 /* read response timeout */ +#define IXTT_RRSP_TO_MASK (0x1FULL << IXTT_RRSP_TO_SHFT) +#define IXTT_RRSP_PS_SHFT 32 /* read responsed TO prescalar */ +#define IXTT_RRSP_PS_MASK (0x7FFFFFULL << IXTT_RRSP_PS_SHFT) +#define IXTT_TAIL_TO_SHFT 0 /* tail timeout counter threshold */ +#define IXTT_TAIL_TO_MASK (0x3FFFFFFULL << IXTT_TAIL_TO_SHFT) + +/* + * The IO LLP control status register and widget control register + */ + +typedef union hubii_wcr_u { + u64 wcr_reg_value; + struct { + u64 wcr_widget_id:4, /* LLP crossbar credit */ + wcr_tag_mode:1, /* Tag mode */ + wcr_rsvd1:8, /* Reserved */ + wcr_xbar_crd:3, /* LLP crossbar credit */ + wcr_f_bad_pkt:1, /* Force bad llp pkt enable */ + wcr_dir_con:1, /* widget direct connect */ + wcr_e_thresh:5, /* elasticity threshold */ + wcr_rsvd:41; /* unused */ + } wcr_fields_s; +} hubii_wcr_t; + +#define iwcr_dir_con wcr_fields_s.wcr_dir_con + +/* The structures below are defined to extract and modify the ii +performance registers */ + +/* io_perf_sel allows the caller to specify what tests will be + performed */ + +typedef union io_perf_sel { + u64 perf_sel_reg; + struct { + u64 perf_ippr0:4, perf_ippr1:4, perf_icct:8, perf_rsvd:48; + } perf_sel_bits; +} io_perf_sel_t; + +/* io_perf_cnt is to extract the count from the shub registers. Due to + hardware problems there is only one counter, not two. */ + +typedef union io_perf_cnt { + u64 perf_cnt; + struct { + u64 perf_cnt:20, perf_rsvd2:12, perf_rsvd1:32; + } perf_cnt_bits; + +} io_perf_cnt_t; + +typedef union iprte_a { + u64 entry; + struct { + u64 i_rsvd_1:3; + u64 i_addr:38; + u64 i_init:3; + u64 i_source:8; + u64 i_rsvd:2; + u64 i_widget:4; + u64 i_to_cnt:5; + u64 i_vld:1; + } iprte_fields; +} iprte_a_t; + +#endif /* _ASM_IA64_SN_SHUBIO_H */ diff --git a/kernel/arch/ia64/include/asm/sn/simulator.h b/kernel/arch/ia64/include/asm/sn/simulator.h new file mode 100644 index 000000000..c2611f6cf --- /dev/null +++ b/kernel/arch/ia64/include/asm/sn/simulator.h @@ -0,0 +1,25 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * Copyright (C) 2000-2004 Silicon Graphics, Inc. All rights reserved. + */ + +#ifndef _ASM_IA64_SN_SIMULATOR_H +#define _ASM_IA64_SN_SIMULATOR_H + +#if defined(CONFIG_IA64_GENERIC) || defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_SGI_UV) +#define SNMAGIC 0xaeeeeeee8badbeefL +#define IS_MEDUSA() ({long sn; asm("mov %0=cpuid[%1]" : "=r"(sn) : "r"(2)); sn == SNMAGIC;}) + +#define SIMULATOR_SLEEP() asm("nop.i 0x8beef") +#define IS_RUNNING_ON_SIMULATOR() (sn_prom_type) +#define IS_RUNNING_ON_FAKE_PROM() (sn_prom_type == 2) +extern int sn_prom_type; /* 0=hardware, 1=medusa/realprom, 2=medusa/fakeprom */ +#else +#define IS_MEDUSA() 0 +#define SIMULATOR_SLEEP() +#define IS_RUNNING_ON_SIMULATOR() 0 +#endif + +#endif /* _ASM_IA64_SN_SIMULATOR_H */ diff --git a/kernel/arch/ia64/include/asm/sn/sn2/sn_hwperf.h b/kernel/arch/ia64/include/asm/sn/sn2/sn_hwperf.h new file mode 100644 index 000000000..e61ebac38 --- /dev/null +++ b/kernel/arch/ia64/include/asm/sn/sn2/sn_hwperf.h @@ -0,0 +1,242 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2004 Silicon Graphics, Inc. All rights reserved. + * + * Data types used by the SN_SAL_HWPERF_OP SAL call for monitoring + * SGI Altix node and router hardware + * + * Mark Goodwin Mon Aug 30 12:23:46 EST 2004 + */ + +#ifndef SN_HWPERF_H +#define SN_HWPERF_H + +/* + * object structure. SN_HWPERF_ENUM_OBJECTS and SN_HWPERF_GET_CPU_INFO + * return an array of these. Do not change this without also + * changing the corresponding SAL code. + */ +#define SN_HWPERF_MAXSTRING 128 +struct sn_hwperf_object_info { + u32 id; + union { + struct { + u64 this_part:1; + u64 is_shared:1; + } fields; + struct { + u64 flags; + u64 reserved; + } b; + } f; + char name[SN_HWPERF_MAXSTRING]; + char location[SN_HWPERF_MAXSTRING]; + u32 ports; +}; + +#define sn_hwp_this_part f.fields.this_part +#define sn_hwp_is_shared f.fields.is_shared +#define sn_hwp_flags f.b.flags + +/* macros for object classification */ +#define SN_HWPERF_IS_NODE(x) ((x) && strstr((x)->name, "SHub")) +#define SN_HWPERF_IS_NODE_SHUB2(x) ((x) && strstr((x)->name, "SHub 2.")) +#define SN_HWPERF_IS_IONODE(x) ((x) && strstr((x)->name, "TIO")) +#define SN_HWPERF_IS_NL3ROUTER(x) ((x) && strstr((x)->name, "NL3Router")) +#define SN_HWPERF_IS_NL4ROUTER(x) ((x) && strstr((x)->name, "NL4Router")) +#define SN_HWPERF_IS_OLDROUTER(x) ((x) && strstr((x)->name, "Router")) +#define SN_HWPERF_IS_ROUTER(x) (SN_HWPERF_IS_NL3ROUTER(x) || \ + SN_HWPERF_IS_NL4ROUTER(x) || \ + SN_HWPERF_IS_OLDROUTER(x)) +#define SN_HWPERF_FOREIGN(x) ((x) && !(x)->sn_hwp_this_part && !(x)->sn_hwp_is_shared) +#define SN_HWPERF_SAME_OBJTYPE(x,y) ((SN_HWPERF_IS_NODE(x) && SN_HWPERF_IS_NODE(y)) ||\ + (SN_HWPERF_IS_IONODE(x) && SN_HWPERF_IS_IONODE(y)) ||\ + (SN_HWPERF_IS_ROUTER(x) && SN_HWPERF_IS_ROUTER(y))) + +/* numa port structure, SN_HWPERF_ENUM_PORTS returns an array of these */ +struct sn_hwperf_port_info { + u32 port; + u32 conn_id; + u32 conn_port; +}; + +/* for HWPERF_{GET,SET}_MMRS */ +struct sn_hwperf_data { + u64 addr; + u64 data; +}; + +/* user ioctl() argument, see below */ +struct sn_hwperf_ioctl_args { + u64 arg; /* argument, usually an object id */ + u64 sz; /* size of transfer */ + void *ptr; /* pointer to source/target */ + u32 v0; /* second return value */ +}; + +/* + * For SN_HWPERF_{GET,SET}_MMRS and SN_HWPERF_OBJECT_DISTANCE, + * sn_hwperf_ioctl_args.arg can be used to specify a CPU on which + * to call SAL, and whether to use an interprocessor interrupt + * or task migration in order to do so. If the CPU specified is + * SN_HWPERF_ARG_ANY_CPU, then the current CPU will be used. + */ +#define SN_HWPERF_ARG_ANY_CPU 0x7fffffffUL +#define SN_HWPERF_ARG_CPU_MASK 0x7fffffff00000000ULL +#define SN_HWPERF_ARG_USE_IPI_MASK 0x8000000000000000ULL +#define SN_HWPERF_ARG_OBJID_MASK 0x00000000ffffffffULL + +/* + * ioctl requests on the "sn_hwperf" misc device that call SAL. + */ +#define SN_HWPERF_OP_MEM_COPYIN 0x1000 +#define SN_HWPERF_OP_MEM_COPYOUT 0x2000 +#define SN_HWPERF_OP_MASK 0x0fff + +/* + * Determine mem requirement. + * arg don't care + * sz 8 + * p pointer to u64 integer + */ +#define SN_HWPERF_GET_HEAPSIZE 1 + +/* + * Install mem for SAL drvr + * arg don't care + * sz sizeof buffer pointed to by p + * p pointer to buffer for scratch area + */ +#define SN_HWPERF_INSTALL_HEAP 2 + +/* + * Determine number of objects + * arg don't care + * sz 8 + * p pointer to u64 integer + */ +#define SN_HWPERF_OBJECT_COUNT (10|SN_HWPERF_OP_MEM_COPYOUT) + +/* + * Determine object "distance", relative to a cpu. This operation can + * execute on a designated logical cpu number, using either an IPI or + * via task migration. If the cpu number is SN_HWPERF_ANY_CPU, then + * the current CPU is used. See the SN_HWPERF_ARG_* macros above. + * + * arg bitmap of IPI flag, cpu number and object id + * sz 8 + * p pointer to u64 integer + */ +#define SN_HWPERF_OBJECT_DISTANCE (11|SN_HWPERF_OP_MEM_COPYOUT) + +/* + * Enumerate objects. Special case if sz == 8, returns the required + * buffer size. + * arg don't care + * sz sizeof buffer pointed to by p + * p pointer to array of struct sn_hwperf_object_info + */ +#define SN_HWPERF_ENUM_OBJECTS (12|SN_HWPERF_OP_MEM_COPYOUT) + +/* + * Enumerate NumaLink ports for an object. Special case if sz == 8, + * returns the required buffer size. + * arg object id + * sz sizeof buffer pointed to by p + * p pointer to array of struct sn_hwperf_port_info + */ +#define SN_HWPERF_ENUM_PORTS (13|SN_HWPERF_OP_MEM_COPYOUT) + +/* + * SET/GET memory mapped registers. These operations can execute + * on a designated logical cpu number, using either an IPI or via + * task migration. If the cpu number is SN_HWPERF_ANY_CPU, then + * the current CPU is used. See the SN_HWPERF_ARG_* macros above. + * + * arg bitmap of ipi flag, cpu number and object id + * sz sizeof buffer pointed to by p + * p pointer to array of struct sn_hwperf_data + */ +#define SN_HWPERF_SET_MMRS (14|SN_HWPERF_OP_MEM_COPYIN) +#define SN_HWPERF_GET_MMRS (15|SN_HWPERF_OP_MEM_COPYOUT| \ + SN_HWPERF_OP_MEM_COPYIN) +/* + * Lock a shared object + * arg object id + * sz don't care + * p don't care + */ +#define SN_HWPERF_ACQUIRE 16 + +/* + * Unlock a shared object + * arg object id + * sz don't care + * p don't care + */ +#define SN_HWPERF_RELEASE 17 + +/* + * Break a lock on a shared object + * arg object id + * sz don't care + * p don't care + */ +#define SN_HWPERF_FORCE_RELEASE 18 + +/* + * ioctl requests on "sn_hwperf" that do not call SAL + */ + +/* + * get cpu info as an array of hwperf_object_info_t. + * id is logical CPU number, name is description, location + * is geoid (e.g. 001c04#1c). Special case if sz == 8, + * returns the required buffer size. + * + * arg don't care + * sz sizeof buffer pointed to by p + * p pointer to array of struct sn_hwperf_object_info + */ +#define SN_HWPERF_GET_CPU_INFO (100|SN_HWPERF_OP_MEM_COPYOUT) + +/* + * Given an object id, return it's node number (aka cnode). + * arg object id + * sz 8 + * p pointer to u64 integer + */ +#define SN_HWPERF_GET_OBJ_NODE (101|SN_HWPERF_OP_MEM_COPYOUT) + +/* + * Given a node number (cnode), return it's nasid. + * arg ordinal node number (aka cnodeid) + * sz 8 + * p pointer to u64 integer + */ +#define SN_HWPERF_GET_NODE_NASID (102|SN_HWPERF_OP_MEM_COPYOUT) + +/* + * Given a node id, determine the id of the nearest node with CPUs + * and the id of the nearest node that has memory. The argument + * node would normally be a "headless" node, e.g. an "IO node". + * Return 0 on success. + */ +extern int sn_hwperf_get_nearest_node(cnodeid_t node, + cnodeid_t *near_mem, cnodeid_t *near_cpu); + +/* return codes */ +#define SN_HWPERF_OP_OK 0 +#define SN_HWPERF_OP_NOMEM 1 +#define SN_HWPERF_OP_NO_PERM 2 +#define SN_HWPERF_OP_IO_ERROR 3 +#define SN_HWPERF_OP_BUSY 4 +#define SN_HWPERF_OP_RECONFIGURE 253 +#define SN_HWPERF_OP_INVAL 254 + +int sn_topology_open(struct inode *inode, struct file *file); +int sn_topology_release(struct inode *inode, struct file *file); +#endif /* SN_HWPERF_H */ diff --git a/kernel/arch/ia64/include/asm/sn/sn_cpuid.h b/kernel/arch/ia64/include/asm/sn/sn_cpuid.h new file mode 100644 index 000000000..a676dd9ac --- /dev/null +++ b/kernel/arch/ia64/include/asm/sn/sn_cpuid.h @@ -0,0 +1,132 @@ +/* + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2000-2005 Silicon Graphics, Inc. All rights reserved. + */ + + +#ifndef _ASM_IA64_SN_SN_CPUID_H +#define _ASM_IA64_SN_SN_CPUID_H + +#include +#include +#include +#include + + +/* + * Functions for converting between cpuids, nodeids and NASIDs. + * + * These are for SGI platforms only. + * + */ + + + + +/* + * Definitions of terms (these definitions are for IA64 ONLY. Other architectures + * use cpuid/cpunum quite defferently): + * + * CPUID - a number in range of 0..NR_CPUS-1 that uniquely identifies + * the cpu. The value cpuid has no significance on IA64 other than + * the boot cpu is 0. + * smp_processor_id() returns the cpuid of the current cpu. + * + * CPU_PHYSICAL_ID (also known as HARD_PROCESSOR_ID) + * This is the same as 31:24 of the processor LID register + * hard_smp_processor_id()- cpu_physical_id of current processor + * cpu_physical_id(cpuid) - convert a to a + * cpu_logical_id(phy_id) - convert a to a + * * not real efficient - don't use in perf critical code + * + * SLICE - a number in the range of 0 - 3 (typically) that represents the + * cpu number on a brick. + * + * SUBNODE - (almost obsolete) the number of the FSB that a cpu is + * connected to. This is also the same as the PI number. Usually 0 or 1. + * + * NOTE!!!: the value of the bits in the cpu physical id (SAPICid or LID) of a cpu has no + * significance. The SAPIC id (LID) is a 16-bit cookie that has meaning only to the PROM. + * + * + * The macros convert between cpu physical ids & slice/nasid/cnodeid. + * These terms are described below: + * + * + * Brick + * ----- ----- ----- ----- CPU + * | 0 | | 1 | | 0 | | 1 | SLICE + * ----- ----- ----- ----- + * | | | | + * | | | | + * 0 | | 2 0 | | 2 FSB SLOT + * ------- ------- + * | | + * | | + * | | + * ------------ ------------- + * | | | | + * | SHUB | | SHUB | NASID (0..MAX_NASIDS) + * | |----- | | CNODEID (0..num_compact_nodes-1) + * | | | | + * | | | | + * ------------ ------------- + * | | + * + * + */ + +#define get_node_number(addr) NASID_GET(addr) + +/* + * NOTE: on non-MP systems, only cpuid 0 exists + */ + +extern short physical_node_map[]; /* indexed by nasid to get cnode */ + +/* + * Macros for retrieving info about current cpu + */ +#define get_nasid() (sn_nodepda->phys_cpuid[smp_processor_id()].nasid) +#define get_subnode() (sn_nodepda->phys_cpuid[smp_processor_id()].subnode) +#define get_slice() (sn_nodepda->phys_cpuid[smp_processor_id()].slice) +#define get_cnode() (sn_nodepda->phys_cpuid[smp_processor_id()].cnode) +#define get_sapicid() ((ia64_getreg(_IA64_REG_CR_LID) >> 16) & 0xffff) + +/* + * Macros for retrieving info about an arbitrary cpu + * cpuid - logical cpu id + */ +#define cpuid_to_nasid(cpuid) (sn_nodepda->phys_cpuid[cpuid].nasid) +#define cpuid_to_subnode(cpuid) (sn_nodepda->phys_cpuid[cpuid].subnode) +#define cpuid_to_slice(cpuid) (sn_nodepda->phys_cpuid[cpuid].slice) + + +/* + * Dont use the following in performance critical code. They require scans + * of potentially large tables. + */ +extern int nasid_slice_to_cpuid(int, int); + +/* + * cnodeid_to_nasid - convert a cnodeid to a NASID + */ +#define cnodeid_to_nasid(cnodeid) (sn_cnodeid_to_nasid[cnodeid]) + +/* + * nasid_to_cnodeid - convert a NASID to a cnodeid + */ +#define nasid_to_cnodeid(nasid) (physical_node_map[nasid]) + +/* + * partition_coherence_id - get the coherence ID of the current partition + */ +extern u8 sn_coherency_id; +#define partition_coherence_id() (sn_coherency_id) + +#endif /* _ASM_IA64_SN_SN_CPUID_H */ + diff --git a/kernel/arch/ia64/include/asm/sn/sn_feature_sets.h b/kernel/arch/ia64/include/asm/sn/sn_feature_sets.h new file mode 100644 index 000000000..8e83ac117 --- /dev/null +++ b/kernel/arch/ia64/include/asm/sn/sn_feature_sets.h @@ -0,0 +1,58 @@ +#ifndef _ASM_IA64_SN_FEATURE_SETS_H +#define _ASM_IA64_SN_FEATURE_SETS_H + +/* + * SN PROM Features + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2005-2006 Silicon Graphics, Inc. All rights reserved. + */ + + +/* --------------------- PROM Features -----------------------------*/ +extern int sn_prom_feature_available(int id); + +#define MAX_PROM_FEATURE_SETS 2 + +/* + * The following defines features that may or may not be supported by the + * current PROM. The OS uses sn_prom_feature_available(feature) to test for + * the presence of a PROM feature. Down rev (old) PROMs will always test + * "false" for new features. + * + * Use: + * if (sn_prom_feature_available(PRF_XXX)) + * ... + */ + +#define PRF_PAL_CACHE_FLUSH_SAFE 0 +#define PRF_DEVICE_FLUSH_LIST 1 +#define PRF_HOTPLUG_SUPPORT 2 +#define PRF_CPU_DISABLE_SUPPORT 3 + +/* --------------------- OS Features -------------------------------*/ + +/* + * The following defines OS features that are optionally present in + * the operating system. + * During boot, PROM is notified of these features via a series of calls: + * + * ia64_sn_set_os_feature(feature1); + * + * Once enabled, a feature cannot be disabled. + * + * By default, features are disabled unless explicitly enabled. + * + * These defines must be kept in sync with the corresponding + * PROM definitions in feature_sets.h. + */ +#define OSF_MCA_SLV_TO_OS_INIT_SLV 0 +#define OSF_FEAT_LOG_SBES 1 +#define OSF_ACPI_ENABLE 2 +#define OSF_PCISEGMENT_ENABLE 3 + + +#endif /* _ASM_IA64_SN_FEATURE_SETS_H */ diff --git a/kernel/arch/ia64/include/asm/sn/sn_sal.h b/kernel/arch/ia64/include/asm/sn/sn_sal.h new file mode 100644 index 000000000..1f5ff470a --- /dev/null +++ b/kernel/arch/ia64/include/asm/sn/sn_sal.h @@ -0,0 +1,1233 @@ +#ifndef _ASM_IA64_SN_SN_SAL_H +#define _ASM_IA64_SN_SN_SAL_H + +/* + * System Abstraction Layer definitions for IA64 + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2000-2006 Silicon Graphics, Inc. All rights reserved. + */ + + +#include +#include +#include +#include +#include +#include + +// SGI Specific Calls +#define SN_SAL_POD_MODE 0x02000001 +#define SN_SAL_SYSTEM_RESET 0x02000002 +#define SN_SAL_PROBE 0x02000003 +#define SN_SAL_GET_MASTER_NASID 0x02000004 +#define SN_SAL_GET_KLCONFIG_ADDR 0x02000005 +#define SN_SAL_LOG_CE 0x02000006 +#define SN_SAL_REGISTER_CE 0x02000007 +#define SN_SAL_GET_PARTITION_ADDR 0x02000009 +#define SN_SAL_XP_ADDR_REGION 0x0200000f +#define SN_SAL_NO_FAULT_ZONE_VIRTUAL 0x02000010 +#define SN_SAL_NO_FAULT_ZONE_PHYSICAL 0x02000011 +#define SN_SAL_PRINT_ERROR 0x02000012 +#define SN_SAL_REGISTER_PMI_HANDLER 0x02000014 +#define SN_SAL_SET_ERROR_HANDLING_FEATURES 0x0200001a // reentrant +#define SN_SAL_GET_FIT_COMPT 0x0200001b // reentrant +#define SN_SAL_GET_SAPIC_INFO 0x0200001d +#define SN_SAL_GET_SN_INFO 0x0200001e +#define SN_SAL_CONSOLE_PUTC 0x02000021 +#define SN_SAL_CONSOLE_GETC 0x02000022 +#define SN_SAL_CONSOLE_PUTS 0x02000023 +#define SN_SAL_CONSOLE_GETS 0x02000024 +#define SN_SAL_CONSOLE_GETS_TIMEOUT 0x02000025 +#define SN_SAL_CONSOLE_POLL 0x02000026 +#define SN_SAL_CONSOLE_INTR 0x02000027 +#define SN_SAL_CONSOLE_PUTB 0x02000028 +#define SN_SAL_CONSOLE_XMIT_CHARS 0x0200002a +#define SN_SAL_CONSOLE_READC 0x0200002b +#define SN_SAL_SYSCTL_OP 0x02000030 +#define SN_SAL_SYSCTL_MODID_GET 0x02000031 +#define SN_SAL_SYSCTL_GET 0x02000032 +#define SN_SAL_SYSCTL_IOBRICK_MODULE_GET 0x02000033 +#define SN_SAL_SYSCTL_IO_PORTSPEED_GET 0x02000035 +#define SN_SAL_SYSCTL_SLAB_GET 0x02000036 +#define SN_SAL_BUS_CONFIG 0x02000037 +#define SN_SAL_SYS_SERIAL_GET 0x02000038 +#define SN_SAL_PARTITION_SERIAL_GET 0x02000039 +#define SN_SAL_SYSCTL_PARTITION_GET 0x0200003a +#define SN_SAL_SYSTEM_POWER_DOWN 0x0200003b +#define SN_SAL_GET_MASTER_BASEIO_NASID 0x0200003c +#define SN_SAL_COHERENCE 0x0200003d +#define SN_SAL_MEMPROTECT 0x0200003e +#define SN_SAL_SYSCTL_FRU_CAPTURE 0x0200003f + +#define SN_SAL_SYSCTL_IOBRICK_PCI_OP 0x02000042 // reentrant +#define SN_SAL_IROUTER_OP 0x02000043 +#define SN_SAL_SYSCTL_EVENT 0x02000044 +#define SN_SAL_IOIF_INTERRUPT 0x0200004a +#define SN_SAL_HWPERF_OP 0x02000050 // lock +#define SN_SAL_IOIF_ERROR_INTERRUPT 0x02000051 +#define SN_SAL_IOIF_PCI_SAFE 0x02000052 +#define SN_SAL_IOIF_SLOT_ENABLE 0x02000053 +#define SN_SAL_IOIF_SLOT_DISABLE 0x02000054 +#define SN_SAL_IOIF_GET_HUBDEV_INFO 0x02000055 +#define SN_SAL_IOIF_GET_PCIBUS_INFO 0x02000056 +#define SN_SAL_IOIF_GET_PCIDEV_INFO 0x02000057 +#define SN_SAL_IOIF_GET_WIDGET_DMAFLUSH_LIST 0x02000058 // deprecated +#define SN_SAL_IOIF_GET_DEVICE_DMAFLUSH_LIST 0x0200005a + +#define SN_SAL_IOIF_INIT 0x0200005f +#define SN_SAL_HUB_ERROR_INTERRUPT 0x02000060 +#define SN_SAL_BTE_RECOVER 0x02000061 +#define SN_SAL_RESERVED_DO_NOT_USE 0x02000062 +#define SN_SAL_IOIF_GET_PCI_TOPOLOGY 0x02000064 + +#define SN_SAL_GET_PROM_FEATURE_SET 0x02000065 +#define SN_SAL_SET_OS_FEATURE_SET 0x02000066 +#define SN_SAL_INJECT_ERROR 0x02000067 +#define SN_SAL_SET_CPU_NUMBER 0x02000068 + +#define SN_SAL_KERNEL_LAUNCH_EVENT 0x02000069 +#define SN_SAL_WATCHLIST_ALLOC 0x02000070 +#define SN_SAL_WATCHLIST_FREE 0x02000071 + +/* + * Service-specific constants + */ + +/* Console interrupt manipulation */ + /* action codes */ +#define SAL_CONSOLE_INTR_OFF 0 /* turn the interrupt off */ +#define SAL_CONSOLE_INTR_ON 1 /* turn the interrupt on */ +#define SAL_CONSOLE_INTR_STATUS 2 /* retrieve the interrupt status */ + /* interrupt specification & status return codes */ +#define SAL_CONSOLE_INTR_XMIT 1 /* output interrupt */ +#define SAL_CONSOLE_INTR_RECV 2 /* input interrupt */ + +/* interrupt handling */ +#define SAL_INTR_ALLOC 1 +#define SAL_INTR_FREE 2 +#define SAL_INTR_REDIRECT 3 + +/* + * operations available on the generic SN_SAL_SYSCTL_OP + * runtime service + */ +#define SAL_SYSCTL_OP_IOBOARD 0x0001 /* retrieve board type */ +#define SAL_SYSCTL_OP_TIO_JLCK_RST 0x0002 /* issue TIO clock reset */ + +/* + * IRouter (i.e. generalized system controller) operations + */ +#define SAL_IROUTER_OPEN 0 /* open a subchannel */ +#define SAL_IROUTER_CLOSE 1 /* close a subchannel */ +#define SAL_IROUTER_SEND 2 /* send part of an IRouter packet */ +#define SAL_IROUTER_RECV 3 /* receive part of an IRouter packet */ +#define SAL_IROUTER_INTR_STATUS 4 /* check the interrupt status for + * an open subchannel + */ +#define SAL_IROUTER_INTR_ON 5 /* enable an interrupt */ +#define SAL_IROUTER_INTR_OFF 6 /* disable an interrupt */ +#define SAL_IROUTER_INIT 7 /* initialize IRouter driver */ + +/* IRouter interrupt mask bits */ +#define SAL_IROUTER_INTR_XMIT SAL_CONSOLE_INTR_XMIT +#define SAL_IROUTER_INTR_RECV SAL_CONSOLE_INTR_RECV + +/* + * Error Handling Features + */ +#define SAL_ERR_FEAT_MCA_SLV_TO_OS_INIT_SLV 0x1 // obsolete +#define SAL_ERR_FEAT_LOG_SBES 0x2 // obsolete +#define SAL_ERR_FEAT_MFR_OVERRIDE 0x4 +#define SAL_ERR_FEAT_SBE_THRESHOLD 0xffff0000 + +/* + * SAL Error Codes + */ +#define SALRET_MORE_PASSES 1 +#define SALRET_OK 0 +#define SALRET_NOT_IMPLEMENTED (-1) +#define SALRET_INVALID_ARG (-2) +#define SALRET_ERROR (-3) + +#define SN_SAL_FAKE_PROM 0x02009999 + +/** + * sn_sal_revision - get the SGI SAL revision number + * + * The SGI PROM stores its version in the sal_[ab]_rev_(major|minor). + * This routine simply extracts the major and minor values and + * presents them in a u32 format. + * + * For example, version 4.05 would be represented at 0x0405. + */ +static inline u32 +sn_sal_rev(void) +{ + struct ia64_sal_systab *systab = __va(efi.sal_systab); + + return (u32)(systab->sal_b_rev_major << 8 | systab->sal_b_rev_minor); +} + +/* + * Returns the master console nasid, if the call fails, return an illegal + * value. + */ +static inline u64 +ia64_sn_get_console_nasid(void) +{ + struct ia64_sal_retval ret_stuff; + + ret_stuff.status = 0; + ret_stuff.v0 = 0; + ret_stuff.v1 = 0; + ret_stuff.v2 = 0; + SAL_CALL(ret_stuff, SN_SAL_GET_MASTER_NASID, 0, 0, 0, 0, 0, 0, 0); + + if (ret_stuff.status < 0) + return ret_stuff.status; + + /* Master console nasid is in 'v0' */ + return ret_stuff.v0; +} + +/* + * Returns the master baseio nasid, if the call fails, return an illegal + * value. + */ +static inline u64 +ia64_sn_get_master_baseio_nasid(void) +{ + struct ia64_sal_retval ret_stuff; + + ret_stuff.status = 0; + ret_stuff.v0 = 0; + ret_stuff.v1 = 0; + ret_stuff.v2 = 0; + SAL_CALL(ret_stuff, SN_SAL_GET_MASTER_BASEIO_NASID, 0, 0, 0, 0, 0, 0, 0); + + if (ret_stuff.status < 0) + return ret_stuff.status; + + /* Master baseio nasid is in 'v0' */ + return ret_stuff.v0; +} + +static inline void * +ia64_sn_get_klconfig_addr(nasid_t nasid) +{ + struct ia64_sal_retval ret_stuff; + + ret_stuff.status = 0; + ret_stuff.v0 = 0; + ret_stuff.v1 = 0; + ret_stuff.v2 = 0; + SAL_CALL(ret_stuff, SN_SAL_GET_KLCONFIG_ADDR, (u64)nasid, 0, 0, 0, 0, 0, 0); + return ret_stuff.v0 ? __va(ret_stuff.v0) : NULL; +} + +/* + * Returns the next console character. + */ +static inline u64 +ia64_sn_console_getc(int *ch) +{ + struct ia64_sal_retval ret_stuff; + + ret_stuff.status = 0; + ret_stuff.v0 = 0; + ret_stuff.v1 = 0; + ret_stuff.v2 = 0; + SAL_CALL_NOLOCK(ret_stuff, SN_SAL_CONSOLE_GETC, 0, 0, 0, 0, 0, 0, 0); + + /* character is in 'v0' */ + *ch = (int)ret_stuff.v0; + + return ret_stuff.status; +} + +/* + * Read a character from the SAL console device, after a previous interrupt + * or poll operation has given us to know that a character is available + * to be read. + */ +static inline u64 +ia64_sn_console_readc(void) +{ + struct ia64_sal_retval ret_stuff; + + ret_stuff.status = 0; + ret_stuff.v0 = 0; + ret_stuff.v1 = 0; + ret_stuff.v2 = 0; + SAL_CALL_NOLOCK(ret_stuff, SN_SAL_CONSOLE_READC, 0, 0, 0, 0, 0, 0, 0); + + /* character is in 'v0' */ + return ret_stuff.v0; +} + +/* + * Sends the given character to the console. + */ +static inline u64 +ia64_sn_console_putc(char ch) +{ + struct ia64_sal_retval ret_stuff; + + ret_stuff.status = 0; + ret_stuff.v0 = 0; + ret_stuff.v1 = 0; + ret_stuff.v2 = 0; + SAL_CALL_NOLOCK(ret_stuff, SN_SAL_CONSOLE_PUTC, (u64)ch, 0, 0, 0, 0, 0, 0); + + return ret_stuff.status; +} + +/* + * Sends the given buffer to the console. + */ +static inline u64 +ia64_sn_console_putb(const char *buf, int len) +{ + struct ia64_sal_retval ret_stuff; + + ret_stuff.status = 0; + ret_stuff.v0 = 0; + ret_stuff.v1 = 0; + ret_stuff.v2 = 0; + SAL_CALL_NOLOCK(ret_stuff, SN_SAL_CONSOLE_PUTB, (u64)buf, (u64)len, 0, 0, 0, 0, 0); + + if ( ret_stuff.status == 0 ) { + return ret_stuff.v0; + } + return (u64)0; +} + +/* + * Print a platform error record + */ +static inline u64 +ia64_sn_plat_specific_err_print(int (*hook)(const char*, ...), char *rec) +{ + struct ia64_sal_retval ret_stuff; + + ret_stuff.status = 0; + ret_stuff.v0 = 0; + ret_stuff.v1 = 0; + ret_stuff.v2 = 0; + SAL_CALL_REENTRANT(ret_stuff, SN_SAL_PRINT_ERROR, (u64)hook, (u64)rec, 0, 0, 0, 0, 0); + + return ret_stuff.status; +} + +/* + * Check for Platform errors + */ +static inline u64 +ia64_sn_plat_cpei_handler(void) +{ + struct ia64_sal_retval ret_stuff; + + ret_stuff.status = 0; + ret_stuff.v0 = 0; + ret_stuff.v1 = 0; + ret_stuff.v2 = 0; + SAL_CALL_NOLOCK(ret_stuff, SN_SAL_LOG_CE, 0, 0, 0, 0, 0, 0, 0); + + return ret_stuff.status; +} + +/* + * Set Error Handling Features (Obsolete) + */ +static inline u64 +ia64_sn_plat_set_error_handling_features(void) +{ + struct ia64_sal_retval ret_stuff; + + ret_stuff.status = 0; + ret_stuff.v0 = 0; + ret_stuff.v1 = 0; + ret_stuff.v2 = 0; + SAL_CALL_REENTRANT(ret_stuff, SN_SAL_SET_ERROR_HANDLING_FEATURES, + SAL_ERR_FEAT_LOG_SBES, + 0, 0, 0, 0, 0, 0); + + return ret_stuff.status; +} + +/* + * Checks for console input. + */ +static inline u64 +ia64_sn_console_check(int *result) +{ + struct ia64_sal_retval ret_stuff; + + ret_stuff.status = 0; + ret_stuff.v0 = 0; + ret_stuff.v1 = 0; + ret_stuff.v2 = 0; + SAL_CALL_NOLOCK(ret_stuff, SN_SAL_CONSOLE_POLL, 0, 0, 0, 0, 0, 0, 0); + + /* result is in 'v0' */ + *result = (int)ret_stuff.v0; + + return ret_stuff.status; +} + +/* + * Checks console interrupt status + */ +static inline u64 +ia64_sn_console_intr_status(void) +{ + struct ia64_sal_retval ret_stuff; + + ret_stuff.status = 0; + ret_stuff.v0 = 0; + ret_stuff.v1 = 0; + ret_stuff.v2 = 0; + SAL_CALL_NOLOCK(ret_stuff, SN_SAL_CONSOLE_INTR, + 0, SAL_CONSOLE_INTR_STATUS, + 0, 0, 0, 0, 0); + + if (ret_stuff.status == 0) { + return ret_stuff.v0; + } + + return 0; +} + +/* + * Enable an interrupt on the SAL console device. + */ +static inline void +ia64_sn_console_intr_enable(u64 intr) +{ + struct ia64_sal_retval ret_stuff; + + ret_stuff.status = 0; + ret_stuff.v0 = 0; + ret_stuff.v1 = 0; + ret_stuff.v2 = 0; + SAL_CALL_NOLOCK(ret_stuff, SN_SAL_CONSOLE_INTR, + intr, SAL_CONSOLE_INTR_ON, + 0, 0, 0, 0, 0); +} + +/* + * Disable an interrupt on the SAL console device. + */ +static inline void +ia64_sn_console_intr_disable(u64 intr) +{ + struct ia64_sal_retval ret_stuff; + + ret_stuff.status = 0; + ret_stuff.v0 = 0; + ret_stuff.v1 = 0; + ret_stuff.v2 = 0; + SAL_CALL_NOLOCK(ret_stuff, SN_SAL_CONSOLE_INTR, + intr, SAL_CONSOLE_INTR_OFF, + 0, 0, 0, 0, 0); +} + +/* + * Sends a character buffer to the console asynchronously. + */ +static inline u64 +ia64_sn_console_xmit_chars(char *buf, int len) +{ + struct ia64_sal_retval ret_stuff; + + ret_stuff.status = 0; + ret_stuff.v0 = 0; + ret_stuff.v1 = 0; + ret_stuff.v2 = 0; + SAL_CALL_NOLOCK(ret_stuff, SN_SAL_CONSOLE_XMIT_CHARS, + (u64)buf, (u64)len, + 0, 0, 0, 0, 0); + + if (ret_stuff.status == 0) { + return ret_stuff.v0; + } + + return 0; +} + +/* + * Returns the iobrick module Id + */ +static inline u64 +ia64_sn_sysctl_iobrick_module_get(nasid_t nasid, int *result) +{ + struct ia64_sal_retval ret_stuff; + + ret_stuff.status = 0; + ret_stuff.v0 = 0; + ret_stuff.v1 = 0; + ret_stuff.v2 = 0; + SAL_CALL_NOLOCK(ret_stuff, SN_SAL_SYSCTL_IOBRICK_MODULE_GET, nasid, 0, 0, 0, 0, 0, 0); + + /* result is in 'v0' */ + *result = (int)ret_stuff.v0; + + return ret_stuff.status; +} + +/** + * ia64_sn_pod_mode - call the SN_SAL_POD_MODE function + * + * SN_SAL_POD_MODE actually takes an argument, but it's always + * 0 when we call it from the kernel, so we don't have to expose + * it to the caller. + */ +static inline u64 +ia64_sn_pod_mode(void) +{ + struct ia64_sal_retval isrv; + SAL_CALL_REENTRANT(isrv, SN_SAL_POD_MODE, 0, 0, 0, 0, 0, 0, 0); + if (isrv.status) + return 0; + return isrv.v0; +} + +/** + * ia64_sn_probe_mem - read from memory safely + * @addr: address to probe + * @size: number bytes to read (1,2,4,8) + * @data_ptr: address to store value read by probe (-1 returned if probe fails) + * + * Call into the SAL to do a memory read. If the read generates a machine + * check, this routine will recover gracefully and return -1 to the caller. + * @addr is usually a kernel virtual address in uncached space (i.e. the + * address starts with 0xc), but if called in physical mode, @addr should + * be a physical address. + * + * Return values: + * 0 - probe successful + * 1 - probe failed (generated MCA) + * 2 - Bad arg + * <0 - PAL error + */ +static inline u64 +ia64_sn_probe_mem(long addr, long size, void *data_ptr) +{ + struct ia64_sal_retval isrv; + + SAL_CALL(isrv, SN_SAL_PROBE, addr, size, 0, 0, 0, 0, 0); + + if (data_ptr) { + switch (size) { + case 1: + *((u8*)data_ptr) = (u8)isrv.v0; + break; + case 2: + *((u16*)data_ptr) = (u16)isrv.v0; + break; + case 4: + *((u32*)data_ptr) = (u32)isrv.v0; + break; + case 8: + *((u64*)data_ptr) = (u64)isrv.v0; + break; + default: + isrv.status = 2; + } + } + return isrv.status; +} + +/* + * Retrieve the system serial number as an ASCII string. + */ +static inline u64 +ia64_sn_sys_serial_get(char *buf) +{ + struct ia64_sal_retval ret_stuff; + SAL_CALL_NOLOCK(ret_stuff, SN_SAL_SYS_SERIAL_GET, buf, 0, 0, 0, 0, 0, 0); + return ret_stuff.status; +} + +extern char sn_system_serial_number_string[]; +extern u64 sn_partition_serial_number; + +static inline char * +sn_system_serial_number(void) { + if (sn_system_serial_number_string[0]) { + return(sn_system_serial_number_string); + } else { + ia64_sn_sys_serial_get(sn_system_serial_number_string); + return(sn_system_serial_number_string); + } +} + + +/* + * Returns a unique id number for this system and partition (suitable for + * use with license managers), based in part on the system serial number. + */ +static inline u64 +ia64_sn_partition_serial_get(void) +{ + struct ia64_sal_retval ret_stuff; + ia64_sal_oemcall_reentrant(&ret_stuff, SN_SAL_PARTITION_SERIAL_GET, 0, + 0, 0, 0, 0, 0, 0); + if (ret_stuff.status != 0) + return 0; + return ret_stuff.v0; +} + +static inline u64 +sn_partition_serial_number_val(void) { + if (unlikely(sn_partition_serial_number == 0)) { + sn_partition_serial_number = ia64_sn_partition_serial_get(); + } + return sn_partition_serial_number; +} + +/* + * Returns the partition id of the nasid passed in as an argument, + * or INVALID_PARTID if the partition id cannot be retrieved. + */ +static inline partid_t +ia64_sn_sysctl_partition_get(nasid_t nasid) +{ + struct ia64_sal_retval ret_stuff; + SAL_CALL(ret_stuff, SN_SAL_SYSCTL_PARTITION_GET, nasid, + 0, 0, 0, 0, 0, 0); + if (ret_stuff.status != 0) + return -1; + return ((partid_t)ret_stuff.v0); +} + +/* + * Returns the physical address of the partition's reserved page through + * an iterative number of calls. + * + * On first call, 'cookie' and 'len' should be set to 0, and 'addr' + * set to the nasid of the partition whose reserved page's address is + * being sought. + * On subsequent calls, pass the values, that were passed back on the + * previous call. + * + * While the return status equals SALRET_MORE_PASSES, keep calling + * this function after first copying 'len' bytes starting at 'addr' + * into 'buf'. Once the return status equals SALRET_OK, 'addr' will + * be the physical address of the partition's reserved page. If the + * return status equals neither of these, an error as occurred. + */ +static inline s64 +sn_partition_reserved_page_pa(u64 buf, u64 *cookie, u64 *addr, u64 *len) +{ + struct ia64_sal_retval rv; + ia64_sal_oemcall_reentrant(&rv, SN_SAL_GET_PARTITION_ADDR, *cookie, + *addr, buf, *len, 0, 0, 0); + *cookie = rv.v0; + *addr = rv.v1; + *len = rv.v2; + return rv.status; +} + +/* + * Register or unregister a physical address range being referenced across + * a partition boundary for which certain SAL errors should be scanned for, + * cleaned up and ignored. This is of value for kernel partitioning code only. + * Values for the operation argument: + * 1 = register this address range with SAL + * 0 = unregister this address range with SAL + * + * SAL maintains a reference count on an address range in case it is registered + * multiple times. + * + * On success, returns the reference count of the address range after the SAL + * call has performed the current registration/unregistration. Returns a + * negative value if an error occurred. + */ +static inline int +sn_register_xp_addr_region(u64 paddr, u64 len, int operation) +{ + struct ia64_sal_retval ret_stuff; + ia64_sal_oemcall(&ret_stuff, SN_SAL_XP_ADDR_REGION, paddr, len, + (u64)operation, 0, 0, 0, 0); + return ret_stuff.status; +} + +/* + * Register or unregister an instruction range for which SAL errors should + * be ignored. If an error occurs while in the registered range, SAL jumps + * to return_addr after ignoring the error. Values for the operation argument: + * 1 = register this instruction range with SAL + * 0 = unregister this instruction range with SAL + * + * Returns 0 on success, or a negative value if an error occurred. + */ +static inline int +sn_register_nofault_code(u64 start_addr, u64 end_addr, u64 return_addr, + int virtual, int operation) +{ + struct ia64_sal_retval ret_stuff; + u64 call; + if (virtual) { + call = SN_SAL_NO_FAULT_ZONE_VIRTUAL; + } else { + call = SN_SAL_NO_FAULT_ZONE_PHYSICAL; + } + ia64_sal_oemcall(&ret_stuff, call, start_addr, end_addr, return_addr, + (u64)1, 0, 0, 0); + return ret_stuff.status; +} + +/* + * Register or unregister a function to handle a PMI received by a CPU. + * Before calling the registered handler, SAL sets r1 to the value that + * was passed in as the global_pointer. + * + * If the handler pointer is NULL, then the currently registered handler + * will be unregistered. + * + * Returns 0 on success, or a negative value if an error occurred. + */ +static inline int +sn_register_pmi_handler(u64 handler, u64 global_pointer) +{ + struct ia64_sal_retval ret_stuff; + ia64_sal_oemcall(&ret_stuff, SN_SAL_REGISTER_PMI_HANDLER, handler, + global_pointer, 0, 0, 0, 0, 0); + return ret_stuff.status; +} + +/* + * Change or query the coherence domain for this partition. Each cpu-based + * nasid is represented by a bit in an array of 64-bit words: + * 0 = not in this partition's coherency domain + * 1 = in this partition's coherency domain + * + * It is not possible for the local system's nasids to be removed from + * the coherency domain. Purpose of the domain arguments: + * new_domain = set the coherence domain to the given nasids + * old_domain = return the current coherence domain + * + * Returns 0 on success, or a negative value if an error occurred. + */ +static inline int +sn_change_coherence(u64 *new_domain, u64 *old_domain) +{ + struct ia64_sal_retval ret_stuff; + ia64_sal_oemcall_nolock(&ret_stuff, SN_SAL_COHERENCE, (u64)new_domain, + (u64)old_domain, 0, 0, 0, 0, 0); + return ret_stuff.status; +} + +/* + * Change memory access protections for a physical address range. + * nasid_array is not used on Altix, but may be in future architectures. + * Available memory protection access classes are defined after the function. + */ +static inline int +sn_change_memprotect(u64 paddr, u64 len, u64 perms, u64 *nasid_array) +{ + struct ia64_sal_retval ret_stuff; + + ia64_sal_oemcall_nolock(&ret_stuff, SN_SAL_MEMPROTECT, paddr, len, + (u64)nasid_array, perms, 0, 0, 0); + return ret_stuff.status; +} +#define SN_MEMPROT_ACCESS_CLASS_0 0x14a080 +#define SN_MEMPROT_ACCESS_CLASS_1 0x2520c2 +#define SN_MEMPROT_ACCESS_CLASS_2 0x14a1ca +#define SN_MEMPROT_ACCESS_CLASS_3 0x14a290 +#define SN_MEMPROT_ACCESS_CLASS_6 0x084080 +#define SN_MEMPROT_ACCESS_CLASS_7 0x021080 + +/* + * Turns off system power. + */ +static inline void +ia64_sn_power_down(void) +{ + struct ia64_sal_retval ret_stuff; + SAL_CALL(ret_stuff, SN_SAL_SYSTEM_POWER_DOWN, 0, 0, 0, 0, 0, 0, 0); + while(1) + cpu_relax(); + /* never returns */ +} + +/** + * ia64_sn_fru_capture - tell the system controller to capture hw state + * + * This routine will call the SAL which will tell the system controller(s) + * to capture hw mmr information from each SHub in the system. + */ +static inline u64 +ia64_sn_fru_capture(void) +{ + struct ia64_sal_retval isrv; + SAL_CALL(isrv, SN_SAL_SYSCTL_FRU_CAPTURE, 0, 0, 0, 0, 0, 0, 0); + if (isrv.status) + return 0; + return isrv.v0; +} + +/* + * Performs an operation on a PCI bus or slot -- power up, power down + * or reset. + */ +static inline u64 +ia64_sn_sysctl_iobrick_pci_op(nasid_t n, u64 connection_type, + u64 bus, char slot, + u64 action) +{ + struct ia64_sal_retval rv = {0, 0, 0, 0}; + + SAL_CALL_NOLOCK(rv, SN_SAL_SYSCTL_IOBRICK_PCI_OP, connection_type, n, action, + bus, (u64) slot, 0, 0); + if (rv.status) + return rv.v0; + return 0; +} + + +/* + * Open a subchannel for sending arbitrary data to the system + * controller network via the system controller device associated with + * 'nasid'. Return the subchannel number or a negative error code. + */ +static inline int +ia64_sn_irtr_open(nasid_t nasid) +{ + struct ia64_sal_retval rv; + SAL_CALL_REENTRANT(rv, SN_SAL_IROUTER_OP, SAL_IROUTER_OPEN, nasid, + 0, 0, 0, 0, 0); + return (int) rv.v0; +} + +/* + * Close system controller subchannel 'subch' previously opened on 'nasid'. + */ +static inline int +ia64_sn_irtr_close(nasid_t nasid, int subch) +{ + struct ia64_sal_retval rv; + SAL_CALL_REENTRANT(rv, SN_SAL_IROUTER_OP, SAL_IROUTER_CLOSE, + (u64) nasid, (u64) subch, 0, 0, 0, 0); + return (int) rv.status; +} + +/* + * Read data from system controller associated with 'nasid' on + * subchannel 'subch'. The buffer to be filled is pointed to by + * 'buf', and its capacity is in the integer pointed to by 'len'. The + * referent of 'len' is set to the number of bytes read by the SAL + * call. The return value is either SALRET_OK (for bytes read) or + * SALRET_ERROR (for error or "no data available"). + */ +static inline int +ia64_sn_irtr_recv(nasid_t nasid, int subch, char *buf, int *len) +{ + struct ia64_sal_retval rv; + SAL_CALL_REENTRANT(rv, SN_SAL_IROUTER_OP, SAL_IROUTER_RECV, + (u64) nasid, (u64) subch, (u64) buf, (u64) len, + 0, 0); + return (int) rv.status; +} + +/* + * Write data to the system controller network via the system + * controller associated with 'nasid' on suchannel 'subch'. The + * buffer to be written out is pointed to by 'buf', and 'len' is the + * number of bytes to be written. The return value is either the + * number of bytes written (which could be zero) or a negative error + * code. + */ +static inline int +ia64_sn_irtr_send(nasid_t nasid, int subch, char *buf, int len) +{ + struct ia64_sal_retval rv; + SAL_CALL_REENTRANT(rv, SN_SAL_IROUTER_OP, SAL_IROUTER_SEND, + (u64) nasid, (u64) subch, (u64) buf, (u64) len, + 0, 0); + return (int) rv.v0; +} + +/* + * Check whether any interrupts are pending for the system controller + * associated with 'nasid' and its subchannel 'subch'. The return + * value is a mask of pending interrupts (SAL_IROUTER_INTR_XMIT and/or + * SAL_IROUTER_INTR_RECV). + */ +static inline int +ia64_sn_irtr_intr(nasid_t nasid, int subch) +{ + struct ia64_sal_retval rv; + SAL_CALL_REENTRANT(rv, SN_SAL_IROUTER_OP, SAL_IROUTER_INTR_STATUS, + (u64) nasid, (u64) subch, 0, 0, 0, 0); + return (int) rv.v0; +} + +/* + * Enable the interrupt indicated by the intr parameter (either + * SAL_IROUTER_INTR_XMIT or SAL_IROUTER_INTR_RECV). + */ +static inline int +ia64_sn_irtr_intr_enable(nasid_t nasid, int subch, u64 intr) +{ + struct ia64_sal_retval rv; + SAL_CALL_REENTRANT(rv, SN_SAL_IROUTER_OP, SAL_IROUTER_INTR_ON, + (u64) nasid, (u64) subch, intr, 0, 0, 0); + return (int) rv.v0; +} + +/* + * Disable the interrupt indicated by the intr parameter (either + * SAL_IROUTER_INTR_XMIT or SAL_IROUTER_INTR_RECV). + */ +static inline int +ia64_sn_irtr_intr_disable(nasid_t nasid, int subch, u64 intr) +{ + struct ia64_sal_retval rv; + SAL_CALL_REENTRANT(rv, SN_SAL_IROUTER_OP, SAL_IROUTER_INTR_OFF, + (u64) nasid, (u64) subch, intr, 0, 0, 0); + return (int) rv.v0; +} + +/* + * Set up a node as the point of contact for system controller + * environmental event delivery. + */ +static inline int +ia64_sn_sysctl_event_init(nasid_t nasid) +{ + struct ia64_sal_retval rv; + SAL_CALL_REENTRANT(rv, SN_SAL_SYSCTL_EVENT, (u64) nasid, + 0, 0, 0, 0, 0, 0); + return (int) rv.v0; +} + +/* + * Ask the system controller on the specified nasid to reset + * the CX corelet clock. Only valid on TIO nodes. + */ +static inline int +ia64_sn_sysctl_tio_clock_reset(nasid_t nasid) +{ + struct ia64_sal_retval rv; + SAL_CALL_REENTRANT(rv, SN_SAL_SYSCTL_OP, SAL_SYSCTL_OP_TIO_JLCK_RST, + nasid, 0, 0, 0, 0, 0); + if (rv.status != 0) + return (int)rv.status; + if (rv.v0 != 0) + return (int)rv.v0; + + return 0; +} + +/* + * Get the associated ioboard type for a given nasid. + */ +static inline long +ia64_sn_sysctl_ioboard_get(nasid_t nasid, u16 *ioboard) +{ + struct ia64_sal_retval isrv; + SAL_CALL_REENTRANT(isrv, SN_SAL_SYSCTL_OP, SAL_SYSCTL_OP_IOBOARD, + nasid, 0, 0, 0, 0, 0); + if (isrv.v0 != 0) { + *ioboard = isrv.v0; + return isrv.status; + } + if (isrv.v1 != 0) { + *ioboard = isrv.v1; + return isrv.status; + } + + return isrv.status; +} + +/** + * ia64_sn_get_fit_compt - read a FIT entry from the PROM header + * @nasid: NASID of node to read + * @index: FIT entry index to be retrieved (0..n) + * @fitentry: 16 byte buffer where FIT entry will be stored. + * @banbuf: optional buffer for retrieving banner + * @banlen: length of banner buffer + * + * Access to the physical PROM chips needs to be serialized since reads and + * writes can't occur at the same time, so we need to call into the SAL when + * we want to look at the FIT entries on the chips. + * + * Returns: + * %SALRET_OK if ok + * %SALRET_INVALID_ARG if index too big + * %SALRET_NOT_IMPLEMENTED if running on older PROM + * ??? if nasid invalid OR banner buffer not large enough + */ +static inline int +ia64_sn_get_fit_compt(u64 nasid, u64 index, void *fitentry, void *banbuf, + u64 banlen) +{ + struct ia64_sal_retval rv; + SAL_CALL_NOLOCK(rv, SN_SAL_GET_FIT_COMPT, nasid, index, fitentry, + banbuf, banlen, 0, 0); + return (int) rv.status; +} + +/* + * Initialize the SAL components of the system controller + * communication driver; specifically pass in a sizable buffer that + * can be used for allocation of subchannel queues as new subchannels + * are opened. "buf" points to the buffer, and "len" specifies its + * length. + */ +static inline int +ia64_sn_irtr_init(nasid_t nasid, void *buf, int len) +{ + struct ia64_sal_retval rv; + SAL_CALL_REENTRANT(rv, SN_SAL_IROUTER_OP, SAL_IROUTER_INIT, + (u64) nasid, (u64) buf, (u64) len, 0, 0, 0); + return (int) rv.status; +} + +/* + * Returns the nasid, subnode & slice corresponding to a SAPIC ID + * + * In: + * arg0 - SN_SAL_GET_SAPIC_INFO + * arg1 - sapicid (lid >> 16) + * Out: + * v0 - nasid + * v1 - subnode + * v2 - slice + */ +static inline u64 +ia64_sn_get_sapic_info(int sapicid, int *nasid, int *subnode, int *slice) +{ + struct ia64_sal_retval ret_stuff; + + ret_stuff.status = 0; + ret_stuff.v0 = 0; + ret_stuff.v1 = 0; + ret_stuff.v2 = 0; + SAL_CALL_NOLOCK(ret_stuff, SN_SAL_GET_SAPIC_INFO, sapicid, 0, 0, 0, 0, 0, 0); + +/***** BEGIN HACK - temp til old proms no longer supported ********/ + if (ret_stuff.status == SALRET_NOT_IMPLEMENTED) { + if (nasid) *nasid = sapicid & 0xfff; + if (subnode) *subnode = (sapicid >> 13) & 1; + if (slice) *slice = (sapicid >> 12) & 3; + return 0; + } +/***** END HACK *******/ + + if (ret_stuff.status < 0) + return ret_stuff.status; + + if (nasid) *nasid = (int) ret_stuff.v0; + if (subnode) *subnode = (int) ret_stuff.v1; + if (slice) *slice = (int) ret_stuff.v2; + return 0; +} + +/* + * Returns information about the HUB/SHUB. + * In: + * arg0 - SN_SAL_GET_SN_INFO + * arg1 - 0 (other values reserved for future use) + * Out: + * v0 + * [7:0] - shub type (0=shub1, 1=shub2) + * [15:8] - Log2 max number of nodes in entire system (includes + * C-bricks, I-bricks, etc) + * [23:16] - Log2 of nodes per sharing domain + * [31:24] - partition ID + * [39:32] - coherency_id + * [47:40] - regionsize + * v1 + * [15:0] - nasid mask (ex., 0x7ff for 11 bit nasid) + * [23:15] - bit position of low nasid bit + */ +static inline u64 +ia64_sn_get_sn_info(int fc, u8 *shubtype, u16 *nasid_bitmask, u8 *nasid_shift, + u8 *systemsize, u8 *sharing_domain_size, u8 *partid, u8 *coher, u8 *reg) +{ + struct ia64_sal_retval ret_stuff; + + ret_stuff.status = 0; + ret_stuff.v0 = 0; + ret_stuff.v1 = 0; + ret_stuff.v2 = 0; + SAL_CALL_NOLOCK(ret_stuff, SN_SAL_GET_SN_INFO, fc, 0, 0, 0, 0, 0, 0); + +/***** BEGIN HACK - temp til old proms no longer supported ********/ + if (ret_stuff.status == SALRET_NOT_IMPLEMENTED) { + int nasid = get_sapicid() & 0xfff; +#define SH_SHUB_ID_NODES_PER_BIT_MASK 0x001f000000000000UL +#define SH_SHUB_ID_NODES_PER_BIT_SHFT 48 + if (shubtype) *shubtype = 0; + if (nasid_bitmask) *nasid_bitmask = 0x7ff; + if (nasid_shift) *nasid_shift = 38; + if (systemsize) *systemsize = 10; + if (sharing_domain_size) *sharing_domain_size = 8; + if (partid) *partid = ia64_sn_sysctl_partition_get(nasid); + if (coher) *coher = nasid >> 9; + if (reg) *reg = (HUB_L((u64 *) LOCAL_MMR_ADDR(SH1_SHUB_ID)) & SH_SHUB_ID_NODES_PER_BIT_MASK) >> + SH_SHUB_ID_NODES_PER_BIT_SHFT; + return 0; + } +/***** END HACK *******/ + + if (ret_stuff.status < 0) + return ret_stuff.status; + + if (shubtype) *shubtype = ret_stuff.v0 & 0xff; + if (systemsize) *systemsize = (ret_stuff.v0 >> 8) & 0xff; + if (sharing_domain_size) *sharing_domain_size = (ret_stuff.v0 >> 16) & 0xff; + if (partid) *partid = (ret_stuff.v0 >> 24) & 0xff; + if (coher) *coher = (ret_stuff.v0 >> 32) & 0xff; + if (reg) *reg = (ret_stuff.v0 >> 40) & 0xff; + if (nasid_bitmask) *nasid_bitmask = (ret_stuff.v1 & 0xffff); + if (nasid_shift) *nasid_shift = (ret_stuff.v1 >> 16) & 0xff; + return 0; +} + +/* + * This is the access point to the Altix PROM hardware performance + * and status monitoring interface. For info on using this, see + * arch/ia64/include/asm/sn/sn2/sn_hwperf.h + */ +static inline int +ia64_sn_hwperf_op(nasid_t nasid, u64 opcode, u64 a0, u64 a1, u64 a2, + u64 a3, u64 a4, int *v0) +{ + struct ia64_sal_retval rv; + SAL_CALL_NOLOCK(rv, SN_SAL_HWPERF_OP, (u64)nasid, + opcode, a0, a1, a2, a3, a4); + if (v0) + *v0 = (int) rv.v0; + return (int) rv.status; +} + +static inline int +ia64_sn_ioif_get_pci_topology(u64 buf, u64 len) +{ + struct ia64_sal_retval rv; + SAL_CALL_NOLOCK(rv, SN_SAL_IOIF_GET_PCI_TOPOLOGY, buf, len, 0, 0, 0, 0, 0); + return (int) rv.status; +} + +/* + * BTE error recovery is implemented in SAL + */ +static inline int +ia64_sn_bte_recovery(nasid_t nasid) +{ + struct ia64_sal_retval rv; + + rv.status = 0; + SAL_CALL_NOLOCK(rv, SN_SAL_BTE_RECOVER, (u64)nasid, 0, 0, 0, 0, 0, 0); + if (rv.status == SALRET_NOT_IMPLEMENTED) + return 0; + return (int) rv.status; +} + +static inline int +ia64_sn_is_fake_prom(void) +{ + struct ia64_sal_retval rv; + SAL_CALL_NOLOCK(rv, SN_SAL_FAKE_PROM, 0, 0, 0, 0, 0, 0, 0); + return (rv.status == 0); +} + +static inline int +ia64_sn_get_prom_feature_set(int set, unsigned long *feature_set) +{ + struct ia64_sal_retval rv; + + SAL_CALL_NOLOCK(rv, SN_SAL_GET_PROM_FEATURE_SET, set, 0, 0, 0, 0, 0, 0); + if (rv.status != 0) + return rv.status; + *feature_set = rv.v0; + return 0; +} + +static inline int +ia64_sn_set_os_feature(int feature) +{ + struct ia64_sal_retval rv; + + SAL_CALL_NOLOCK(rv, SN_SAL_SET_OS_FEATURE_SET, feature, 0, 0, 0, 0, 0, 0); + return rv.status; +} + +static inline int +sn_inject_error(u64 paddr, u64 *data, u64 *ecc) +{ + struct ia64_sal_retval ret_stuff; + + ia64_sal_oemcall_nolock(&ret_stuff, SN_SAL_INJECT_ERROR, paddr, (u64)data, + (u64)ecc, 0, 0, 0, 0); + return ret_stuff.status; +} + +static inline int +ia64_sn_set_cpu_number(int cpu) +{ + struct ia64_sal_retval rv; + + SAL_CALL_NOLOCK(rv, SN_SAL_SET_CPU_NUMBER, cpu, 0, 0, 0, 0, 0, 0); + return rv.status; +} +static inline int +ia64_sn_kernel_launch_event(void) +{ + struct ia64_sal_retval rv; + SAL_CALL_NOLOCK(rv, SN_SAL_KERNEL_LAUNCH_EVENT, 0, 0, 0, 0, 0, 0, 0); + return rv.status; +} + +union sn_watchlist_u { + u64 val; + struct { + u64 blade : 16, + size : 32, + filler : 16; + }; +}; + +static inline int +sn_mq_watchlist_alloc(int blade, void *mq, unsigned int mq_size, + unsigned long *intr_mmr_offset) +{ + struct ia64_sal_retval rv; + unsigned long addr; + union sn_watchlist_u size_blade; + int watchlist; + + addr = (unsigned long)mq; + size_blade.size = mq_size; + size_blade.blade = blade; + + /* + * bios returns watchlist number or negative error number. + */ + ia64_sal_oemcall_nolock(&rv, SN_SAL_WATCHLIST_ALLOC, addr, + size_blade.val, (u64)intr_mmr_offset, + (u64)&watchlist, 0, 0, 0); + if (rv.status < 0) + return rv.status; + + return watchlist; +} + +static inline int +sn_mq_watchlist_free(int blade, int watchlist_num) +{ + struct ia64_sal_retval rv; + ia64_sal_oemcall_nolock(&rv, SN_SAL_WATCHLIST_FREE, blade, + watchlist_num, 0, 0, 0, 0, 0); + return rv.status; +} +#endif /* _ASM_IA64_SN_SN_SAL_H */ diff --git a/kernel/arch/ia64/include/asm/sn/tioca.h b/kernel/arch/ia64/include/asm/sn/tioca.h new file mode 100644 index 000000000..666222d7f --- /dev/null +++ b/kernel/arch/ia64/include/asm/sn/tioca.h @@ -0,0 +1,596 @@ +#ifndef _ASM_IA64_SN_TIO_TIOCA_H +#define _ASM_IA64_SN_TIO_TIOCA_H + +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2003-2005 Silicon Graphics, Inc. All rights reserved. + */ + + +#define TIOCA_PART_NUM 0xE020 +#define TIOCA_MFGR_NUM 0x24 +#define TIOCA_REV_A 0x1 + +/* + * Register layout for TIO:CA. See below for bitmasks for each register. + */ + +struct tioca { + u64 ca_id; /* 0x000000 */ + u64 ca_control1; /* 0x000008 */ + u64 ca_control2; /* 0x000010 */ + u64 ca_status1; /* 0x000018 */ + u64 ca_status2; /* 0x000020 */ + u64 ca_gart_aperature; /* 0x000028 */ + u64 ca_gfx_detach; /* 0x000030 */ + u64 ca_inta_dest_addr; /* 0x000038 */ + u64 ca_intb_dest_addr; /* 0x000040 */ + u64 ca_err_int_dest_addr; /* 0x000048 */ + u64 ca_int_status; /* 0x000050 */ + u64 ca_int_status_alias; /* 0x000058 */ + u64 ca_mult_error; /* 0x000060 */ + u64 ca_mult_error_alias; /* 0x000068 */ + u64 ca_first_error; /* 0x000070 */ + u64 ca_int_mask; /* 0x000078 */ + u64 ca_crm_pkterr_type; /* 0x000080 */ + u64 ca_crm_pkterr_type_alias; /* 0x000088 */ + u64 ca_crm_ct_error_detail_1; /* 0x000090 */ + u64 ca_crm_ct_error_detail_2; /* 0x000098 */ + u64 ca_crm_tnumto; /* 0x0000A0 */ + u64 ca_gart_err; /* 0x0000A8 */ + u64 ca_pcierr_type; /* 0x0000B0 */ + u64 ca_pcierr_addr; /* 0x0000B8 */ + + u64 ca_pad_0000C0[3]; /* 0x0000{C0..D0} */ + + u64 ca_pci_rd_buf_flush; /* 0x0000D8 */ + u64 ca_pci_dma_addr_extn; /* 0x0000E0 */ + u64 ca_agp_dma_addr_extn; /* 0x0000E8 */ + u64 ca_force_inta; /* 0x0000F0 */ + u64 ca_force_intb; /* 0x0000F8 */ + u64 ca_debug_vector_sel; /* 0x000100 */ + u64 ca_debug_mux_core_sel; /* 0x000108 */ + u64 ca_debug_mux_pci_sel; /* 0x000110 */ + u64 ca_debug_domain_sel; /* 0x000118 */ + + u64 ca_pad_000120[28]; /* 0x0001{20..F8} */ + + u64 ca_gart_ptr_table; /* 0x200 */ + u64 ca_gart_tlb_addr[8]; /* 0x2{08..40} */ +}; + +/* + * Mask/shift definitions for TIO:CA registers. The convention here is + * to mainly use the names as they appear in the "TIO AEGIS Programmers' + * Reference" with a CA_ prefix added. Some exceptions were made to fix + * duplicate field names or to generalize fields that are common to + * different registers (ca_debug_mux_core_sel and ca_debug_mux_pci_sel for + * example). + * + * Fields consisting of a single bit have a single #define have a single + * macro declaration to mask the bit. Fields consisting of multiple bits + * have two declarations: one to mask the proper bits in a register, and + * a second with the suffix "_SHFT" to identify how far the mask needs to + * be shifted right to get its base value. + */ + +/* ==== ca_control1 */ +#define CA_SYS_BIG_END (1ull << 0) +#define CA_DMA_AGP_SWAP (1ull << 1) +#define CA_DMA_PCI_SWAP (1ull << 2) +#define CA_PIO_IO_SWAP (1ull << 3) +#define CA_PIO_MEM_SWAP (1ull << 4) +#define CA_GFX_WR_SWAP (1ull << 5) +#define CA_AGP_FW_ENABLE (1ull << 6) +#define CA_AGP_CAL_CYCLE (0x7ull << 7) +#define CA_AGP_CAL_CYCLE_SHFT 7 +#define CA_AGP_CAL_PRSCL_BYP (1ull << 10) +#define CA_AGP_INIT_CAL_ENB (1ull << 11) +#define CA_INJ_ADDR_PERR (1ull << 12) +#define CA_INJ_DATA_PERR (1ull << 13) + /* bits 15:14 unused */ +#define CA_PCIM_IO_NBE_AD (0x7ull << 16) +#define CA_PCIM_IO_NBE_AD_SHFT 16 +#define CA_PCIM_FAST_BTB_ENB (1ull << 19) + /* bits 23:20 unused */ +#define CA_PIO_ADDR_OFFSET (0xffull << 24) +#define CA_PIO_ADDR_OFFSET_SHFT 24 + /* bits 35:32 unused */ +#define CA_AGPDMA_OP_COMBDELAY (0x1full << 36) +#define CA_AGPDMA_OP_COMBDELAY_SHFT 36 + /* bit 41 unused */ +#define CA_AGPDMA_OP_ENB_COMBDELAY (1ull << 42) +#define CA_PCI_INT_LPCNT (0xffull << 44) +#define CA_PCI_INT_LPCNT_SHFT 44 + /* bits 63:52 unused */ + +/* ==== ca_control2 */ +#define CA_AGP_LATENCY_TO (0xffull << 0) +#define CA_AGP_LATENCY_TO_SHFT 0 +#define CA_PCI_LATENCY_TO (0xffull << 8) +#define CA_PCI_LATENCY_TO_SHFT 8 +#define CA_PCI_MAX_RETRY (0x3ffull << 16) +#define CA_PCI_MAX_RETRY_SHFT 16 + /* bits 27:26 unused */ +#define CA_RT_INT_EN (0x3ull << 28) +#define CA_RT_INT_EN_SHFT 28 +#define CA_MSI_INT_ENB (1ull << 30) +#define CA_PCI_ARB_ERR_ENB (1ull << 31) +#define CA_GART_MEM_PARAM (0x3ull << 32) +#define CA_GART_MEM_PARAM_SHFT 32 +#define CA_GART_RD_PREFETCH_ENB (1ull << 34) +#define CA_GART_WR_PREFETCH_ENB (1ull << 35) +#define CA_GART_FLUSH_TLB (1ull << 36) + /* bits 39:37 unused */ +#define CA_CRM_TNUMTO_PERIOD (0x1fffull << 40) +#define CA_CRM_TNUMTO_PERIOD_SHFT 40 + /* bits 55:53 unused */ +#define CA_CRM_TNUMTO_ENB (1ull << 56) +#define CA_CRM_PRESCALER_BYP (1ull << 57) + /* bits 59:58 unused */ +#define CA_CRM_MAX_CREDIT (0x7ull << 60) +#define CA_CRM_MAX_CREDIT_SHFT 60 + /* bit 63 unused */ + +/* ==== ca_status1 */ +#define CA_CORELET_ID (0x3ull << 0) +#define CA_CORELET_ID_SHFT 0 +#define CA_INTA_N (1ull << 2) +#define CA_INTB_N (1ull << 3) +#define CA_CRM_CREDIT_AVAIL (0x7ull << 4) +#define CA_CRM_CREDIT_AVAIL_SHFT 4 + /* bit 7 unused */ +#define CA_CRM_SPACE_AVAIL (0x7full << 8) +#define CA_CRM_SPACE_AVAIL_SHFT 8 + /* bit 15 unused */ +#define CA_GART_TLB_VAL (0xffull << 16) +#define CA_GART_TLB_VAL_SHFT 16 + /* bits 63:24 unused */ + +/* ==== ca_status2 */ +#define CA_GFX_CREDIT_AVAIL (0xffull << 0) +#define CA_GFX_CREDIT_AVAIL_SHFT 0 +#define CA_GFX_OPQ_AVAIL (0xffull << 8) +#define CA_GFX_OPQ_AVAIL_SHFT 8 +#define CA_GFX_WRBUFF_AVAIL (0xffull << 16) +#define CA_GFX_WRBUFF_AVAIL_SHFT 16 +#define CA_ADMA_OPQ_AVAIL (0xffull << 24) +#define CA_ADMA_OPQ_AVAIL_SHFT 24 +#define CA_ADMA_WRBUFF_AVAIL (0xffull << 32) +#define CA_ADMA_WRBUFF_AVAIL_SHFT 32 +#define CA_ADMA_RDBUFF_AVAIL (0x7full << 40) +#define CA_ADMA_RDBUFF_AVAIL_SHFT 40 +#define CA_PCI_PIO_OP_STAT (1ull << 47) +#define CA_PDMA_OPQ_AVAIL (0xfull << 48) +#define CA_PDMA_OPQ_AVAIL_SHFT 48 +#define CA_PDMA_WRBUFF_AVAIL (0xfull << 52) +#define CA_PDMA_WRBUFF_AVAIL_SHFT 52 +#define CA_PDMA_RDBUFF_AVAIL (0x3ull << 56) +#define CA_PDMA_RDBUFF_AVAIL_SHFT 56 + /* bits 63:58 unused */ + +/* ==== ca_gart_aperature */ +#define CA_GART_AP_ENB_AGP (1ull << 0) +#define CA_GART_PAGE_SIZE (1ull << 1) +#define CA_GART_AP_ENB_PCI (1ull << 2) + /* bits 11:3 unused */ +#define CA_GART_AP_SIZE (0x3ffull << 12) +#define CA_GART_AP_SIZE_SHFT 12 +#define CA_GART_AP_BASE (0x3ffffffffffull << 22) +#define CA_GART_AP_BASE_SHFT 22 + +/* ==== ca_inta_dest_addr + ==== ca_intb_dest_addr + ==== ca_err_int_dest_addr */ + /* bits 2:0 unused */ +#define CA_INT_DEST_ADDR (0x7ffffffffffffull << 3) +#define CA_INT_DEST_ADDR_SHFT 3 + /* bits 55:54 unused */ +#define CA_INT_DEST_VECT (0xffull << 56) +#define CA_INT_DEST_VECT_SHFT 56 + +/* ==== ca_int_status */ +/* ==== ca_int_status_alias */ +/* ==== ca_mult_error */ +/* ==== ca_mult_error_alias */ +/* ==== ca_first_error */ +/* ==== ca_int_mask */ +#define CA_PCI_ERR (1ull << 0) + /* bits 3:1 unused */ +#define CA_GART_FETCH_ERR (1ull << 4) +#define CA_GFX_WR_OVFLW (1ull << 5) +#define CA_PIO_REQ_OVFLW (1ull << 6) +#define CA_CRM_PKTERR (1ull << 7) +#define CA_CRM_DVERR (1ull << 8) +#define CA_TNUMTO (1ull << 9) +#define CA_CXM_RSP_CRED_OVFLW (1ull << 10) +#define CA_CXM_REQ_CRED_OVFLW (1ull << 11) +#define CA_PIO_INVALID_ADDR (1ull << 12) +#define CA_PCI_ARB_TO (1ull << 13) +#define CA_AGP_REQ_OFLOW (1ull << 14) +#define CA_SBA_TYPE1_ERR (1ull << 15) + /* bit 16 unused */ +#define CA_INTA (1ull << 17) +#define CA_INTB (1ull << 18) +#define CA_MULT_INTA (1ull << 19) +#define CA_MULT_INTB (1ull << 20) +#define CA_GFX_CREDIT_OVFLW (1ull << 21) + /* bits 63:22 unused */ + +/* ==== ca_crm_pkterr_type */ +/* ==== ca_crm_pkterr_type_alias */ +#define CA_CRM_PKTERR_SBERR_HDR (1ull << 0) +#define CA_CRM_PKTERR_DIDN (1ull << 1) +#define CA_CRM_PKTERR_PACTYPE (1ull << 2) +#define CA_CRM_PKTERR_INV_TNUM (1ull << 3) +#define CA_CRM_PKTERR_ADDR_RNG (1ull << 4) +#define CA_CRM_PKTERR_ADDR_ALGN (1ull << 5) +#define CA_CRM_PKTERR_HDR_PARAM (1ull << 6) +#define CA_CRM_PKTERR_CW_ERR (1ull << 7) +#define CA_CRM_PKTERR_SBERR_NH (1ull << 8) +#define CA_CRM_PKTERR_EARLY_TERM (1ull << 9) +#define CA_CRM_PKTERR_EARLY_TAIL (1ull << 10) +#define CA_CRM_PKTERR_MSSNG_TAIL (1ull << 11) +#define CA_CRM_PKTERR_MSSNG_HDR (1ull << 12) + /* bits 15:13 unused */ +#define CA_FIRST_CRM_PKTERR_SBERR_HDR (1ull << 16) +#define CA_FIRST_CRM_PKTERR_DIDN (1ull << 17) +#define CA_FIRST_CRM_PKTERR_PACTYPE (1ull << 18) +#define CA_FIRST_CRM_PKTERR_INV_TNUM (1ull << 19) +#define CA_FIRST_CRM_PKTERR_ADDR_RNG (1ull << 20) +#define CA_FIRST_CRM_PKTERR_ADDR_ALGN (1ull << 21) +#define CA_FIRST_CRM_PKTERR_HDR_PARAM (1ull << 22) +#define CA_FIRST_CRM_PKTERR_CW_ERR (1ull << 23) +#define CA_FIRST_CRM_PKTERR_SBERR_NH (1ull << 24) +#define CA_FIRST_CRM_PKTERR_EARLY_TERM (1ull << 25) +#define CA_FIRST_CRM_PKTERR_EARLY_TAIL (1ull << 26) +#define CA_FIRST_CRM_PKTERR_MSSNG_TAIL (1ull << 27) +#define CA_FIRST_CRM_PKTERR_MSSNG_HDR (1ull << 28) + /* bits 63:29 unused */ + +/* ==== ca_crm_ct_error_detail_1 */ +#define CA_PKT_TYPE (0xfull << 0) +#define CA_PKT_TYPE_SHFT 0 +#define CA_SRC_ID (0x3ull << 4) +#define CA_SRC_ID_SHFT 4 +#define CA_DATA_SZ (0x3ull << 6) +#define CA_DATA_SZ_SHFT 6 +#define CA_TNUM (0xffull << 8) +#define CA_TNUM_SHFT 8 +#define CA_DW_DATA_EN (0xffull << 16) +#define CA_DW_DATA_EN_SHFT 16 +#define CA_GFX_CRED (0xffull << 24) +#define CA_GFX_CRED_SHFT 24 +#define CA_MEM_RD_PARAM (0x3ull << 32) +#define CA_MEM_RD_PARAM_SHFT 32 +#define CA_PIO_OP (1ull << 34) +#define CA_CW_ERR (1ull << 35) + /* bits 62:36 unused */ +#define CA_VALID (1ull << 63) + +/* ==== ca_crm_ct_error_detail_2 */ + /* bits 2:0 unused */ +#define CA_PKT_ADDR (0x1fffffffffffffull << 3) +#define CA_PKT_ADDR_SHFT 3 + /* bits 63:56 unused */ + +/* ==== ca_crm_tnumto */ +#define CA_CRM_TNUMTO_VAL (0xffull << 0) +#define CA_CRM_TNUMTO_VAL_SHFT 0 +#define CA_CRM_TNUMTO_WR (1ull << 8) + /* bits 63:9 unused */ + +/* ==== ca_gart_err */ +#define CA_GART_ERR_SOURCE (0x3ull << 0) +#define CA_GART_ERR_SOURCE_SHFT 0 + /* bits 3:2 unused */ +#define CA_GART_ERR_ADDR (0xfffffffffull << 4) +#define CA_GART_ERR_ADDR_SHFT 4 + /* bits 63:40 unused */ + +/* ==== ca_pcierr_type */ +#define CA_PCIERR_DATA (0xffffffffull << 0) +#define CA_PCIERR_DATA_SHFT 0 +#define CA_PCIERR_ENB (0xfull << 32) +#define CA_PCIERR_ENB_SHFT 32 +#define CA_PCIERR_CMD (0xfull << 36) +#define CA_PCIERR_CMD_SHFT 36 +#define CA_PCIERR_A64 (1ull << 40) +#define CA_PCIERR_SLV_SERR (1ull << 41) +#define CA_PCIERR_SLV_WR_PERR (1ull << 42) +#define CA_PCIERR_SLV_RD_PERR (1ull << 43) +#define CA_PCIERR_MST_SERR (1ull << 44) +#define CA_PCIERR_MST_WR_PERR (1ull << 45) +#define CA_PCIERR_MST_RD_PERR (1ull << 46) +#define CA_PCIERR_MST_MABT (1ull << 47) +#define CA_PCIERR_MST_TABT (1ull << 48) +#define CA_PCIERR_MST_RETRY_TOUT (1ull << 49) + +#define CA_PCIERR_TYPES \ + (CA_PCIERR_A64|CA_PCIERR_SLV_SERR| \ + CA_PCIERR_SLV_WR_PERR|CA_PCIERR_SLV_RD_PERR| \ + CA_PCIERR_MST_SERR|CA_PCIERR_MST_WR_PERR|CA_PCIERR_MST_RD_PERR| \ + CA_PCIERR_MST_MABT|CA_PCIERR_MST_TABT|CA_PCIERR_MST_RETRY_TOUT) + + /* bits 63:50 unused */ + +/* ==== ca_pci_dma_addr_extn */ +#define CA_UPPER_NODE_OFFSET (0x3full << 0) +#define CA_UPPER_NODE_OFFSET_SHFT 0 + /* bits 7:6 unused */ +#define CA_CHIPLET_ID (0x3ull << 8) +#define CA_CHIPLET_ID_SHFT 8 + /* bits 11:10 unused */ +#define CA_PCI_DMA_NODE_ID (0xffffull << 12) +#define CA_PCI_DMA_NODE_ID_SHFT 12 + /* bits 27:26 unused */ +#define CA_PCI_DMA_PIO_MEM_TYPE (1ull << 28) + /* bits 63:29 unused */ + + +/* ==== ca_agp_dma_addr_extn */ + /* bits 19:0 unused */ +#define CA_AGP_DMA_NODE_ID (0xffffull << 20) +#define CA_AGP_DMA_NODE_ID_SHFT 20 + /* bits 27:26 unused */ +#define CA_AGP_DMA_PIO_MEM_TYPE (1ull << 28) + /* bits 63:29 unused */ + +/* ==== ca_debug_vector_sel */ +#define CA_DEBUG_MN_VSEL (0xfull << 0) +#define CA_DEBUG_MN_VSEL_SHFT 0 +#define CA_DEBUG_PP_VSEL (0xfull << 4) +#define CA_DEBUG_PP_VSEL_SHFT 4 +#define CA_DEBUG_GW_VSEL (0xfull << 8) +#define CA_DEBUG_GW_VSEL_SHFT 8 +#define CA_DEBUG_GT_VSEL (0xfull << 12) +#define CA_DEBUG_GT_VSEL_SHFT 12 +#define CA_DEBUG_PD_VSEL (0xfull << 16) +#define CA_DEBUG_PD_VSEL_SHFT 16 +#define CA_DEBUG_AD_VSEL (0xfull << 20) +#define CA_DEBUG_AD_VSEL_SHFT 20 +#define CA_DEBUG_CX_VSEL (0xfull << 24) +#define CA_DEBUG_CX_VSEL_SHFT 24 +#define CA_DEBUG_CR_VSEL (0xfull << 28) +#define CA_DEBUG_CR_VSEL_SHFT 28 +#define CA_DEBUG_BA_VSEL (0xfull << 32) +#define CA_DEBUG_BA_VSEL_SHFT 32 +#define CA_DEBUG_PE_VSEL (0xfull << 36) +#define CA_DEBUG_PE_VSEL_SHFT 36 +#define CA_DEBUG_BO_VSEL (0xfull << 40) +#define CA_DEBUG_BO_VSEL_SHFT 40 +#define CA_DEBUG_BI_VSEL (0xfull << 44) +#define CA_DEBUG_BI_VSEL_SHFT 44 +#define CA_DEBUG_AS_VSEL (0xfull << 48) +#define CA_DEBUG_AS_VSEL_SHFT 48 +#define CA_DEBUG_PS_VSEL (0xfull << 52) +#define CA_DEBUG_PS_VSEL_SHFT 52 +#define CA_DEBUG_PM_VSEL (0xfull << 56) +#define CA_DEBUG_PM_VSEL_SHFT 56 + /* bits 63:60 unused */ + +/* ==== ca_debug_mux_core_sel */ +/* ==== ca_debug_mux_pci_sel */ +#define CA_DEBUG_MSEL0 (0x7ull << 0) +#define CA_DEBUG_MSEL0_SHFT 0 + /* bit 3 unused */ +#define CA_DEBUG_NSEL0 (0x7ull << 4) +#define CA_DEBUG_NSEL0_SHFT 4 + /* bit 7 unused */ +#define CA_DEBUG_MSEL1 (0x7ull << 8) +#define CA_DEBUG_MSEL1_SHFT 8 + /* bit 11 unused */ +#define CA_DEBUG_NSEL1 (0x7ull << 12) +#define CA_DEBUG_NSEL1_SHFT 12 + /* bit 15 unused */ +#define CA_DEBUG_MSEL2 (0x7ull << 16) +#define CA_DEBUG_MSEL2_SHFT 16 + /* bit 19 unused */ +#define CA_DEBUG_NSEL2 (0x7ull << 20) +#define CA_DEBUG_NSEL2_SHFT 20 + /* bit 23 unused */ +#define CA_DEBUG_MSEL3 (0x7ull << 24) +#define CA_DEBUG_MSEL3_SHFT 24 + /* bit 27 unused */ +#define CA_DEBUG_NSEL3 (0x7ull << 28) +#define CA_DEBUG_NSEL3_SHFT 28 + /* bit 31 unused */ +#define CA_DEBUG_MSEL4 (0x7ull << 32) +#define CA_DEBUG_MSEL4_SHFT 32 + /* bit 35 unused */ +#define CA_DEBUG_NSEL4 (0x7ull << 36) +#define CA_DEBUG_NSEL4_SHFT 36 + /* bit 39 unused */ +#define CA_DEBUG_MSEL5 (0x7ull << 40) +#define CA_DEBUG_MSEL5_SHFT 40 + /* bit 43 unused */ +#define CA_DEBUG_NSEL5 (0x7ull << 44) +#define CA_DEBUG_NSEL5_SHFT 44 + /* bit 47 unused */ +#define CA_DEBUG_MSEL6 (0x7ull << 48) +#define CA_DEBUG_MSEL6_SHFT 48 + /* bit 51 unused */ +#define CA_DEBUG_NSEL6 (0x7ull << 52) +#define CA_DEBUG_NSEL6_SHFT 52 + /* bit 55 unused */ +#define CA_DEBUG_MSEL7 (0x7ull << 56) +#define CA_DEBUG_MSEL7_SHFT 56 + /* bit 59 unused */ +#define CA_DEBUG_NSEL7 (0x7ull << 60) +#define CA_DEBUG_NSEL7_SHFT 60 + /* bit 63 unused */ + + +/* ==== ca_debug_domain_sel */ +#define CA_DEBUG_DOMAIN_L (1ull << 0) +#define CA_DEBUG_DOMAIN_H (1ull << 1) + /* bits 63:2 unused */ + +/* ==== ca_gart_ptr_table */ +#define CA_GART_PTR_VAL (1ull << 0) + /* bits 11:1 unused */ +#define CA_GART_PTR_ADDR (0xfffffffffffull << 12) +#define CA_GART_PTR_ADDR_SHFT 12 + /* bits 63:56 unused */ + +/* ==== ca_gart_tlb_addr[0-7] */ +#define CA_GART_TLB_ADDR (0xffffffffffffffull << 0) +#define CA_GART_TLB_ADDR_SHFT 0 + /* bits 62:56 unused */ +#define CA_GART_TLB_ENTRY_VAL (1ull << 63) + +/* + * PIO address space ranges for TIO:CA + */ + +/* CA internal registers */ +#define CA_PIO_ADMIN 0x00000000 +#define CA_PIO_ADMIN_LEN 0x00010000 + +/* GFX Write Buffer - Diagnostics */ +#define CA_PIO_GFX 0x00010000 +#define CA_PIO_GFX_LEN 0x00010000 + +/* AGP DMA Write Buffer - Diagnostics */ +#define CA_PIO_AGP_DMAWRITE 0x00020000 +#define CA_PIO_AGP_DMAWRITE_LEN 0x00010000 + +/* AGP DMA READ Buffer - Diagnostics */ +#define CA_PIO_AGP_DMAREAD 0x00030000 +#define CA_PIO_AGP_DMAREAD_LEN 0x00010000 + +/* PCI Config Type 0 */ +#define CA_PIO_PCI_TYPE0_CONFIG 0x01000000 +#define CA_PIO_PCI_TYPE0_CONFIG_LEN 0x01000000 + +/* PCI Config Type 1 */ +#define CA_PIO_PCI_TYPE1_CONFIG 0x02000000 +#define CA_PIO_PCI_TYPE1_CONFIG_LEN 0x01000000 + +/* PCI I/O Cycles - mapped to PCI Address 0x00000000-0x04ffffff */ +#define CA_PIO_PCI_IO 0x03000000 +#define CA_PIO_PCI_IO_LEN 0x05000000 + +/* PCI MEM Cycles - mapped to PCI with CA_PIO_ADDR_OFFSET of ca_control1 */ +/* use Fast Write if enabled and coretalk packet type is a GFX request */ +#define CA_PIO_PCI_MEM_OFFSET 0x08000000 +#define CA_PIO_PCI_MEM_OFFSET_LEN 0x08000000 + +/* PCI MEM Cycles - mapped to PCI Address 0x00000000-0xbfffffff */ +/* use Fast Write if enabled and coretalk packet type is a GFX request */ +#define CA_PIO_PCI_MEM 0x40000000 +#define CA_PIO_PCI_MEM_LEN 0xc0000000 + +/* + * DMA space + * + * The CA aperature (ie. bus address range) mapped by the GART is segmented into + * two parts. The lower portion of the aperature is used for mapping 32 bit + * PCI addresses which are managed by the dma interfaces in this file. The + * upper poprtion of the aperature is used for mapping 48 bit AGP addresses. + * The AGP portion of the aperature is managed by the agpgart_be.c driver + * in drivers/linux/agp. There are ca-specific hooks in that driver to + * manipulate the gart, but management of the AGP portion of the aperature + * is the responsibility of that driver. + * + * CA allows three main types of DMA mapping: + * + * PCI 64-bit Managed by this driver + * PCI 32-bit Managed by this driver + * AGP 48-bit Managed by hooks in the /dev/agpgart driver + * + * All of the above can optionally be remapped through the GART. The following + * table lists the combinations of addressing types and GART remapping that + * is currently supported by the driver (h/w supports all, s/w limits this): + * + * PCI64 PCI32 AGP48 + * GART no yes yes + * Direct yes yes no + * + * GART remapping of PCI64 is not done because there is no need to. The + * 64 bit PCI address holds all of the information necessary to target any + * memory in the system. + * + * AGP48 is always mapped through the GART. Management of the AGP48 portion + * of the aperature is the responsibility of code in the agpgart_be driver. + * + * The non-64 bit bus address space will currently be partitioned like this: + * + * 0xffff_ffff_ffff +-------- + * | AGP48 direct + * | Space managed by this driver + * CA_AGP_DIRECT_BASE +-------- + * | AGP GART mapped (gfx aperature) + * | Space managed by /dev/agpgart driver + * | This range is exposed to the agpgart + * | driver as the "graphics aperature" + * CA_AGP_MAPPED_BASE +----- + * | PCI GART mapped + * | Space managed by this driver + * CA_PCI32_MAPPED_BASE +---- + * | PCI32 direct + * | Space managed by this driver + * 0xC000_0000 +-------- + * (CA_PCI32_DIRECT_BASE) + * + * The bus address range CA_PCI32_MAPPED_BASE through CA_AGP_DIRECT_BASE + * is what we call the CA aperature. Addresses falling in this range will + * be remapped using the GART. + * + * The bus address range CA_AGP_MAPPED_BASE through CA_AGP_DIRECT_BASE + * is what we call the graphics aperature. This is a subset of the CA + * aperature and is under the control of the agpgart_be driver. + * + * CA_PCI32_MAPPED_BASE, CA_AGP_MAPPED_BASE, and CA_AGP_DIRECT_BASE are + * somewhat arbitrary values. The known constraints on choosing these is: + * + * 1) CA_AGP_DIRECT_BASE-CA_PCI32_MAPPED_BASE+1 (the CA aperature size) + * must be one of the values supported by the ca_gart_aperature register. + * Currently valid values are: 4MB through 4096MB in powers of 2 increments + * + * 2) CA_AGP_DIRECT_BASE-CA_AGP_MAPPED_BASE+1 (the gfx aperature size) + * must be in MB units since that's what the agpgart driver assumes. + */ + +/* + * Define Bus DMA ranges. These are configurable (see constraints above) + * and will probably need tuning based on experience. + */ + + +/* + * 11/24/03 + * CA has an addressing glitch w.r.t. PCI direct 32 bit DMA that makes it + * generally unusable. The problem is that for PCI direct 32 + * DMA's, all 32 bits of the bus address are used to form the lower 32 bits + * of the coretalk address, and coretalk bits 38:32 come from a register. + * Since only PCI bus addresses 0xC0000000-0xFFFFFFFF (1GB) are available + * for DMA (the rest is allocated to PIO), host node addresses need to be + * such that their lower 32 bits fall in the 0xC0000000-0xffffffff range + * as well. So there can be no PCI32 direct DMA below 3GB!! For this + * reason we set the CA_PCI32_DIRECT_SIZE to 0 which essentially makes + * tioca_dma_direct32() a noop but preserves the code flow should this issue + * be fixed in a respin. + * + * For now, all PCI32 DMA's must be mapped through the GART. + */ + +#define CA_PCI32_DIRECT_BASE 0xC0000000UL /* BASE not configurable */ +#define CA_PCI32_DIRECT_SIZE 0x00000000UL /* 0 MB */ + +#define CA_PCI32_MAPPED_BASE 0xC0000000UL +#define CA_PCI32_MAPPED_SIZE 0x40000000UL /* 2GB */ + +#define CA_AGP_MAPPED_BASE 0x80000000UL +#define CA_AGP_MAPPED_SIZE 0x40000000UL /* 2GB */ + +#define CA_AGP_DIRECT_BASE 0x40000000UL /* 2GB */ +#define CA_AGP_DIRECT_SIZE 0x40000000UL + +#define CA_APERATURE_BASE (CA_AGP_MAPPED_BASE) +#define CA_APERATURE_SIZE (CA_AGP_MAPPED_SIZE+CA_PCI32_MAPPED_SIZE) + +#endif /* _ASM_IA64_SN_TIO_TIOCA_H */ diff --git a/kernel/arch/ia64/include/asm/sn/tioca_provider.h b/kernel/arch/ia64/include/asm/sn/tioca_provider.h new file mode 100644 index 000000000..9a820ac61 --- /dev/null +++ b/kernel/arch/ia64/include/asm/sn/tioca_provider.h @@ -0,0 +1,207 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2003-2005 Silicon Graphics, Inc. All rights reserved. + */ + +#ifndef _ASM_IA64_SN_TIO_CA_AGP_PROVIDER_H +#define _ASM_IA64_SN_TIO_CA_AGP_PROVIDER_H + +#include + +/* + * WAR enables + * Defines for individual WARs. Each is a bitmask of applicable + * part revision numbers. (1 << 1) == rev A, (1 << 2) == rev B, + * (3 << 1) == (rev A or rev B), etc + */ + +#define TIOCA_WAR_ENABLED(pv, tioca_common) \ + ((1 << tioca_common->ca_rev) & pv) + + /* TIO:ICE:FRZ:Freezer loses a PIO data ucred on PIO RD RSP with CW error */ +#define PV907908 (1 << 1) + /* ATI config space problems after BIOS execution starts */ +#define PV908234 (1 << 1) + /* CA:AGPDMA write request data mismatch with ABC1CL merge */ +#define PV895469 (1 << 1) + /* TIO:CA TLB invalidate of written GART entries possibly not occurring in CA*/ +#define PV910244 (1 << 1) + +struct tioca_dmamap{ + struct list_head cad_list; /* headed by ca_list */ + + dma_addr_t cad_dma_addr; /* Linux dma handle */ + uint cad_gart_entry; /* start entry in ca_gart_pagemap */ + uint cad_gart_size; /* #entries for this map */ +}; + +/* + * Kernel only fields. Prom may look at this stuff for debugging only. + * Access this structure through the ca_kernel_private ptr. + */ + +struct tioca_common ; + +struct tioca_kernel { + struct tioca_common *ca_common; /* tioca this belongs to */ + struct list_head ca_list; /* list of all ca's */ + struct list_head ca_dmamaps; + spinlock_t ca_lock; /* Kernel lock */ + cnodeid_t ca_closest_node; + struct list_head *ca_devices; /* bus->devices */ + + /* + * General GART stuff + */ + u64 ca_ap_size; /* size of aperature in bytes */ + u32 ca_gart_entries; /* # u64 entries in gart */ + u32 ca_ap_pagesize; /* aperature page size in bytes */ + u64 ca_ap_bus_base; /* bus address of CA aperature */ + u64 ca_gart_size; /* gart size in bytes */ + u64 *ca_gart; /* gart table vaddr */ + u64 ca_gart_coretalk_addr; /* gart coretalk addr */ + u8 ca_gart_iscoherent; /* used in tioca_tlbflush */ + + /* PCI GART convenience values */ + u64 ca_pciap_base; /* pci aperature bus base address */ + u64 ca_pciap_size; /* pci aperature size (bytes) */ + u64 ca_pcigart_base; /* gfx GART bus base address */ + u64 *ca_pcigart; /* gfx GART vm address */ + u32 ca_pcigart_entries; + u32 ca_pcigart_start; /* PCI start index in ca_gart */ + void *ca_pcigart_pagemap; + + /* AGP GART convenience values */ + u64 ca_gfxap_base; /* gfx aperature bus base address */ + u64 ca_gfxap_size; /* gfx aperature size (bytes) */ + u64 ca_gfxgart_base; /* gfx GART bus base address */ + u64 *ca_gfxgart; /* gfx GART vm address */ + u32 ca_gfxgart_entries; + u32 ca_gfxgart_start; /* agpgart start index in ca_gart */ +}; + +/* + * Common tioca info shared between kernel and prom + * + * DO NOT CHANGE THIS STRUCT WITHOUT MAKING CORRESPONDING CHANGES + * TO THE PROM VERSION. + */ + +struct tioca_common { + struct pcibus_bussoft ca_common; /* common pciio header */ + + u32 ca_rev; + u32 ca_closest_nasid; + + u64 ca_prom_private; + u64 ca_kernel_private; +}; + +/** + * tioca_paddr_to_gart - Convert an SGI coretalk address to a CA GART entry + * @paddr: page address to convert + * + * Convert a system [coretalk] address to a GART entry. GART entries are + * formed using the following: + * + * data = ( (1<<63) | ( (REMAP_NODE_ID << 40) | (MD_CHIPLET_ID << 38) | + * (REMAP_SYS_ADDR) ) >> 12 ) + * + * DATA written to 1 GART TABLE Entry in system memory is remapped system + * addr for 1 page + * + * The data is for coretalk address format right shifted 12 bits with a + * valid bit. + * + * GART_TABLE_ENTRY [ 25:0 ] -- REMAP_SYS_ADDRESS[37:12]. + * GART_TABLE_ENTRY [ 27:26 ] -- SHUB MD chiplet id. + * GART_TABLE_ENTRY [ 41:28 ] -- REMAP_NODE_ID. + * GART_TABLE_ENTRY [ 63 ] -- Valid Bit + */ +static inline u64 +tioca_paddr_to_gart(unsigned long paddr) +{ + /* + * We are assuming right now that paddr already has the correct + * format since the address from xtalk_dmaXXX should already have + * NODE_ID, CHIPLET_ID, and SYS_ADDR in the correct locations. + */ + + return ((paddr) >> 12) | (1UL << 63); +} + +/** + * tioca_physpage_to_gart - Map a host physical page for SGI CA based DMA + * @page_addr: system page address to map + */ + +static inline unsigned long +tioca_physpage_to_gart(u64 page_addr) +{ + u64 coretalk_addr; + + coretalk_addr = PHYS_TO_TIODMA(page_addr); + if (!coretalk_addr) { + return 0; + } + + return tioca_paddr_to_gart(coretalk_addr); +} + +/** + * tioca_tlbflush - invalidate cached SGI CA GART TLB entries + * @tioca_kernel: CA context + * + * Invalidate tlb entries for a given CA GART. Main complexity is to account + * for revA bug. + */ +static inline void +tioca_tlbflush(struct tioca_kernel *tioca_kernel) +{ + volatile u64 tmp; + volatile struct tioca __iomem *ca_base; + struct tioca_common *tioca_common; + + tioca_common = tioca_kernel->ca_common; + ca_base = (struct tioca __iomem *)tioca_common->ca_common.bs_base; + + /* + * Explicit flushes not needed if GART is in cached mode + */ + if (tioca_kernel->ca_gart_iscoherent) { + if (TIOCA_WAR_ENABLED(PV910244, tioca_common)) { + /* + * PV910244: RevA CA needs explicit flushes. + * Need to put GART into uncached mode before + * flushing otherwise the explicit flush is ignored. + * + * Alternate WAR would be to leave GART cached and + * touch every CL aligned GART entry. + */ + + __sn_clrq_relaxed(&ca_base->ca_control2, CA_GART_MEM_PARAM); + __sn_setq_relaxed(&ca_base->ca_control2, CA_GART_FLUSH_TLB); + __sn_setq_relaxed(&ca_base->ca_control2, + (0x2ull << CA_GART_MEM_PARAM_SHFT)); + tmp = __sn_readq_relaxed(&ca_base->ca_control2); + } + + return; + } + + /* + * Gart in uncached mode ... need an explicit flush. + */ + + __sn_setq_relaxed(&ca_base->ca_control2, CA_GART_FLUSH_TLB); + tmp = __sn_readq_relaxed(&ca_base->ca_control2); +} + +extern u32 tioca_gart_found; +extern struct list_head tioca_list; +extern int tioca_init_provider(void); +extern void tioca_fastwrite_enable(struct tioca_kernel *tioca_kern); +#endif /* _ASM_IA64_SN_TIO_CA_AGP_PROVIDER_H */ diff --git a/kernel/arch/ia64/include/asm/sn/tioce.h b/kernel/arch/ia64/include/asm/sn/tioce.h new file mode 100644 index 000000000..6eae8ada9 --- /dev/null +++ b/kernel/arch/ia64/include/asm/sn/tioce.h @@ -0,0 +1,760 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2003-2005 Silicon Graphics, Inc. All rights reserved. + */ + +#ifndef __ASM_IA64_SN_TIOCE_H__ +#define __ASM_IA64_SN_TIOCE_H__ + +/* CE ASIC part & mfgr information */ +#define TIOCE_PART_NUM 0xCE00 +#define TIOCE_SRC_ID 0x01 +#define TIOCE_REV_A 0x1 + +/* CE Virtual PPB Vendor/Device IDs */ +#define CE_VIRT_PPB_VENDOR_ID 0x10a9 +#define CE_VIRT_PPB_DEVICE_ID 0x4002 + +/* CE Host Bridge Vendor/Device IDs */ +#define CE_HOST_BRIDGE_VENDOR_ID 0x10a9 +#define CE_HOST_BRIDGE_DEVICE_ID 0x4001 + + +#define TIOCE_NUM_M40_ATES 4096 +#define TIOCE_NUM_M3240_ATES 2048 +#define TIOCE_NUM_PORTS 2 + +/* + * Register layout for TIOCE. MMR offsets are shown at the far right of the + * structure definition. + */ +typedef volatile struct tioce { + /* + * ADMIN : Administration Registers + */ + u64 ce_adm_id; /* 0x000000 */ + u64 ce_pad_000008; /* 0x000008 */ + u64 ce_adm_dyn_credit_status; /* 0x000010 */ + u64 ce_adm_last_credit_status; /* 0x000018 */ + u64 ce_adm_credit_limit; /* 0x000020 */ + u64 ce_adm_force_credit; /* 0x000028 */ + u64 ce_adm_control; /* 0x000030 */ + u64 ce_adm_mmr_chn_timeout; /* 0x000038 */ + u64 ce_adm_ssp_ure_timeout; /* 0x000040 */ + u64 ce_adm_ssp_dre_timeout; /* 0x000048 */ + u64 ce_adm_ssp_debug_sel; /* 0x000050 */ + u64 ce_adm_int_status; /* 0x000058 */ + u64 ce_adm_int_status_alias; /* 0x000060 */ + u64 ce_adm_int_mask; /* 0x000068 */ + u64 ce_adm_int_pending; /* 0x000070 */ + u64 ce_adm_force_int; /* 0x000078 */ + u64 ce_adm_ure_ups_buf_barrier_flush; /* 0x000080 */ + u64 ce_adm_int_dest[15]; /* 0x000088 -- 0x0000F8 */ + u64 ce_adm_error_summary; /* 0x000100 */ + u64 ce_adm_error_summary_alias; /* 0x000108 */ + u64 ce_adm_error_mask; /* 0x000110 */ + u64 ce_adm_first_error; /* 0x000118 */ + u64 ce_adm_error_overflow; /* 0x000120 */ + u64 ce_adm_error_overflow_alias; /* 0x000128 */ + u64 ce_pad_000130[2]; /* 0x000130 -- 0x000138 */ + u64 ce_adm_tnum_error; /* 0x000140 */ + u64 ce_adm_mmr_err_detail; /* 0x000148 */ + u64 ce_adm_msg_sram_perr_detail; /* 0x000150 */ + u64 ce_adm_bap_sram_perr_detail; /* 0x000158 */ + u64 ce_adm_ce_sram_perr_detail; /* 0x000160 */ + u64 ce_adm_ce_credit_oflow_detail; /* 0x000168 */ + u64 ce_adm_tx_link_idle_max_timer; /* 0x000170 */ + u64 ce_adm_pcie_debug_sel; /* 0x000178 */ + u64 ce_pad_000180[16]; /* 0x000180 -- 0x0001F8 */ + + u64 ce_adm_pcie_debug_sel_top; /* 0x000200 */ + u64 ce_adm_pcie_debug_lat_sel_lo_top; /* 0x000208 */ + u64 ce_adm_pcie_debug_lat_sel_hi_top; /* 0x000210 */ + u64 ce_adm_pcie_debug_trig_sel_top; /* 0x000218 */ + u64 ce_adm_pcie_debug_trig_lat_sel_lo_top; /* 0x000220 */ + u64 ce_adm_pcie_debug_trig_lat_sel_hi_top; /* 0x000228 */ + u64 ce_adm_pcie_trig_compare_top; /* 0x000230 */ + u64 ce_adm_pcie_trig_compare_en_top; /* 0x000238 */ + u64 ce_adm_ssp_debug_sel_top; /* 0x000240 */ + u64 ce_adm_ssp_debug_lat_sel_lo_top; /* 0x000248 */ + u64 ce_adm_ssp_debug_lat_sel_hi_top; /* 0x000250 */ + u64 ce_adm_ssp_debug_trig_sel_top; /* 0x000258 */ + u64 ce_adm_ssp_debug_trig_lat_sel_lo_top; /* 0x000260 */ + u64 ce_adm_ssp_debug_trig_lat_sel_hi_top; /* 0x000268 */ + u64 ce_adm_ssp_trig_compare_top; /* 0x000270 */ + u64 ce_adm_ssp_trig_compare_en_top; /* 0x000278 */ + u64 ce_pad_000280[48]; /* 0x000280 -- 0x0003F8 */ + + u64 ce_adm_bap_ctrl; /* 0x000400 */ + u64 ce_pad_000408[127]; /* 0x000408 -- 0x0007F8 */ + + u64 ce_msg_buf_data63_0[35]; /* 0x000800 -- 0x000918 */ + u64 ce_pad_000920[29]; /* 0x000920 -- 0x0009F8 */ + + u64 ce_msg_buf_data127_64[35]; /* 0x000A00 -- 0x000B18 */ + u64 ce_pad_000B20[29]; /* 0x000B20 -- 0x000BF8 */ + + u64 ce_msg_buf_parity[35]; /* 0x000C00 -- 0x000D18 */ + u64 ce_pad_000D20[29]; /* 0x000D20 -- 0x000DF8 */ + + u64 ce_pad_000E00[576]; /* 0x000E00 -- 0x001FF8 */ + + /* + * LSI : LSI's PCI Express Link Registers (Link#1 and Link#2) + * Link#1 MMRs at start at 0x002000, Link#2 MMRs at 0x003000 + * NOTE: the comment offsets at far right: let 'z' = {2 or 3} + */ + #define ce_lsi(link_num) ce_lsi[link_num-1] + struct ce_lsi_reg { + u64 ce_lsi_lpu_id; /* 0x00z000 */ + u64 ce_lsi_rst; /* 0x00z008 */ + u64 ce_lsi_dbg_stat; /* 0x00z010 */ + u64 ce_lsi_dbg_cfg; /* 0x00z018 */ + u64 ce_lsi_ltssm_ctrl; /* 0x00z020 */ + u64 ce_lsi_lk_stat; /* 0x00z028 */ + u64 ce_pad_00z030[2]; /* 0x00z030 -- 0x00z038 */ + u64 ce_lsi_int_and_stat; /* 0x00z040 */ + u64 ce_lsi_int_mask; /* 0x00z048 */ + u64 ce_pad_00z050[22]; /* 0x00z050 -- 0x00z0F8 */ + u64 ce_lsi_lk_perf_cnt_sel; /* 0x00z100 */ + u64 ce_pad_00z108; /* 0x00z108 */ + u64 ce_lsi_lk_perf_cnt_ctrl; /* 0x00z110 */ + u64 ce_pad_00z118; /* 0x00z118 */ + u64 ce_lsi_lk_perf_cnt1; /* 0x00z120 */ + u64 ce_lsi_lk_perf_cnt1_test; /* 0x00z128 */ + u64 ce_lsi_lk_perf_cnt2; /* 0x00z130 */ + u64 ce_lsi_lk_perf_cnt2_test; /* 0x00z138 */ + u64 ce_pad_00z140[24]; /* 0x00z140 -- 0x00z1F8 */ + u64 ce_lsi_lk_lyr_cfg; /* 0x00z200 */ + u64 ce_lsi_lk_lyr_status; /* 0x00z208 */ + u64 ce_lsi_lk_lyr_int_stat; /* 0x00z210 */ + u64 ce_lsi_lk_ly_int_stat_test; /* 0x00z218 */ + u64 ce_lsi_lk_ly_int_stat_mask; /* 0x00z220 */ + u64 ce_pad_00z228[3]; /* 0x00z228 -- 0x00z238 */ + u64 ce_lsi_fc_upd_ctl; /* 0x00z240 */ + u64 ce_pad_00z248[3]; /* 0x00z248 -- 0x00z258 */ + u64 ce_lsi_flw_ctl_upd_to_timer; /* 0x00z260 */ + u64 ce_lsi_flw_ctl_upd_timer0; /* 0x00z268 */ + u64 ce_lsi_flw_ctl_upd_timer1; /* 0x00z270 */ + u64 ce_pad_00z278[49]; /* 0x00z278 -- 0x00z3F8 */ + u64 ce_lsi_freq_nak_lat_thrsh; /* 0x00z400 */ + u64 ce_lsi_ack_nak_lat_tmr; /* 0x00z408 */ + u64 ce_lsi_rply_tmr_thr; /* 0x00z410 */ + u64 ce_lsi_rply_tmr; /* 0x00z418 */ + u64 ce_lsi_rply_num_stat; /* 0x00z420 */ + u64 ce_lsi_rty_buf_max_addr; /* 0x00z428 */ + u64 ce_lsi_rty_fifo_ptr; /* 0x00z430 */ + u64 ce_lsi_rty_fifo_rd_wr_ptr; /* 0x00z438 */ + u64 ce_lsi_rty_fifo_cred; /* 0x00z440 */ + u64 ce_lsi_seq_cnt; /* 0x00z448 */ + u64 ce_lsi_ack_sent_seq_num; /* 0x00z450 */ + u64 ce_lsi_seq_cnt_fifo_max_addr; /* 0x00z458 */ + u64 ce_lsi_seq_cnt_fifo_ptr; /* 0x00z460 */ + u64 ce_lsi_seq_cnt_rd_wr_ptr; /* 0x00z468 */ + u64 ce_lsi_tx_lk_ts_ctl; /* 0x00z470 */ + u64 ce_pad_00z478; /* 0x00z478 */ + u64 ce_lsi_mem_addr_ctl; /* 0x00z480 */ + u64 ce_lsi_mem_d_ld0; /* 0x00z488 */ + u64 ce_lsi_mem_d_ld1; /* 0x00z490 */ + u64 ce_lsi_mem_d_ld2; /* 0x00z498 */ + u64 ce_lsi_mem_d_ld3; /* 0x00z4A0 */ + u64 ce_lsi_mem_d_ld4; /* 0x00z4A8 */ + u64 ce_pad_00z4B0[2]; /* 0x00z4B0 -- 0x00z4B8 */ + u64 ce_lsi_rty_d_cnt; /* 0x00z4C0 */ + u64 ce_lsi_seq_buf_cnt; /* 0x00z4C8 */ + u64 ce_lsi_seq_buf_bt_d; /* 0x00z4D0 */ + u64 ce_pad_00z4D8; /* 0x00z4D8 */ + u64 ce_lsi_ack_lat_thr; /* 0x00z4E0 */ + u64 ce_pad_00z4E8[3]; /* 0x00z4E8 -- 0x00z4F8 */ + u64 ce_lsi_nxt_rcv_seq_1_cntr; /* 0x00z500 */ + u64 ce_lsi_unsp_dllp_rcvd; /* 0x00z508 */ + u64 ce_lsi_rcv_lk_ts_ctl; /* 0x00z510 */ + u64 ce_pad_00z518[29]; /* 0x00z518 -- 0x00z5F8 */ + u64 ce_lsi_phy_lyr_cfg; /* 0x00z600 */ + u64 ce_pad_00z608; /* 0x00z608 */ + u64 ce_lsi_phy_lyr_int_stat; /* 0x00z610 */ + u64 ce_lsi_phy_lyr_int_stat_test; /* 0x00z618 */ + u64 ce_lsi_phy_lyr_int_mask; /* 0x00z620 */ + u64 ce_pad_00z628[11]; /* 0x00z628 -- 0x00z678 */ + u64 ce_lsi_rcv_phy_cfg; /* 0x00z680 */ + u64 ce_lsi_rcv_phy_stat1; /* 0x00z688 */ + u64 ce_lsi_rcv_phy_stat2; /* 0x00z690 */ + u64 ce_lsi_rcv_phy_stat3; /* 0x00z698 */ + u64 ce_lsi_rcv_phy_int_stat; /* 0x00z6A0 */ + u64 ce_lsi_rcv_phy_int_stat_test; /* 0x00z6A8 */ + u64 ce_lsi_rcv_phy_int_mask; /* 0x00z6B0 */ + u64 ce_pad_00z6B8[9]; /* 0x00z6B8 -- 0x00z6F8 */ + u64 ce_lsi_tx_phy_cfg; /* 0x00z700 */ + u64 ce_lsi_tx_phy_stat; /* 0x00z708 */ + u64 ce_lsi_tx_phy_int_stat; /* 0x00z710 */ + u64 ce_lsi_tx_phy_int_stat_test; /* 0x00z718 */ + u64 ce_lsi_tx_phy_int_mask; /* 0x00z720 */ + u64 ce_lsi_tx_phy_stat2; /* 0x00z728 */ + u64 ce_pad_00z730[10]; /* 0x00z730 -- 0x00z77F */ + u64 ce_lsi_ltssm_cfg1; /* 0x00z780 */ + u64 ce_lsi_ltssm_cfg2; /* 0x00z788 */ + u64 ce_lsi_ltssm_cfg3; /* 0x00z790 */ + u64 ce_lsi_ltssm_cfg4; /* 0x00z798 */ + u64 ce_lsi_ltssm_cfg5; /* 0x00z7A0 */ + u64 ce_lsi_ltssm_stat1; /* 0x00z7A8 */ + u64 ce_lsi_ltssm_stat2; /* 0x00z7B0 */ + u64 ce_lsi_ltssm_int_stat; /* 0x00z7B8 */ + u64 ce_lsi_ltssm_int_stat_test; /* 0x00z7C0 */ + u64 ce_lsi_ltssm_int_mask; /* 0x00z7C8 */ + u64 ce_lsi_ltssm_stat_wr_en; /* 0x00z7D0 */ + u64 ce_pad_00z7D8[5]; /* 0x00z7D8 -- 0x00z7F8 */ + u64 ce_lsi_gb_cfg1; /* 0x00z800 */ + u64 ce_lsi_gb_cfg2; /* 0x00z808 */ + u64 ce_lsi_gb_cfg3; /* 0x00z810 */ + u64 ce_lsi_gb_cfg4; /* 0x00z818 */ + u64 ce_lsi_gb_stat; /* 0x00z820 */ + u64 ce_lsi_gb_int_stat; /* 0x00z828 */ + u64 ce_lsi_gb_int_stat_test; /* 0x00z830 */ + u64 ce_lsi_gb_int_mask; /* 0x00z838 */ + u64 ce_lsi_gb_pwr_dn1; /* 0x00z840 */ + u64 ce_lsi_gb_pwr_dn2; /* 0x00z848 */ + u64 ce_pad_00z850[246]; /* 0x00z850 -- 0x00zFF8 */ + } ce_lsi[2]; + + u64 ce_pad_004000[10]; /* 0x004000 -- 0x004048 */ + + /* + * CRM: Coretalk Receive Module Registers + */ + u64 ce_crm_debug_mux; /* 0x004050 */ + u64 ce_pad_004058; /* 0x004058 */ + u64 ce_crm_ssp_err_cmd_wrd; /* 0x004060 */ + u64 ce_crm_ssp_err_addr; /* 0x004068 */ + u64 ce_crm_ssp_err_syn; /* 0x004070 */ + + u64 ce_pad_004078[499]; /* 0x004078 -- 0x005008 */ + + /* + * CXM: Coretalk Xmit Module Registers + */ + u64 ce_cxm_dyn_credit_status; /* 0x005010 */ + u64 ce_cxm_last_credit_status; /* 0x005018 */ + u64 ce_cxm_credit_limit; /* 0x005020 */ + u64 ce_cxm_force_credit; /* 0x005028 */ + u64 ce_cxm_disable_bypass; /* 0x005030 */ + u64 ce_pad_005038[3]; /* 0x005038 -- 0x005048 */ + u64 ce_cxm_debug_mux; /* 0x005050 */ + + u64 ce_pad_005058[501]; /* 0x005058 -- 0x005FF8 */ + + /* + * DTL: Downstream Transaction Layer Regs (Link#1 and Link#2) + * DTL: Link#1 MMRs at start at 0x006000, Link#2 MMRs at 0x008000 + * DTL: the comment offsets at far right: let 'y' = {6 or 8} + * + * UTL: Downstream Transaction Layer Regs (Link#1 and Link#2) + * UTL: Link#1 MMRs at start at 0x007000, Link#2 MMRs at 0x009000 + * UTL: the comment offsets at far right: let 'z' = {7 or 9} + */ + #define ce_dtl(link_num) ce_dtl_utl[link_num-1] + #define ce_utl(link_num) ce_dtl_utl[link_num-1] + struct ce_dtl_utl_reg { + /* DTL */ + u64 ce_dtl_dtdr_credit_limit; /* 0x00y000 */ + u64 ce_dtl_dtdr_credit_force; /* 0x00y008 */ + u64 ce_dtl_dyn_credit_status; /* 0x00y010 */ + u64 ce_dtl_dtl_last_credit_stat; /* 0x00y018 */ + u64 ce_dtl_dtl_ctrl; /* 0x00y020 */ + u64 ce_pad_00y028[5]; /* 0x00y028 -- 0x00y048 */ + u64 ce_dtl_debug_sel; /* 0x00y050 */ + u64 ce_pad_00y058[501]; /* 0x00y058 -- 0x00yFF8 */ + + /* UTL */ + u64 ce_utl_utl_ctrl; /* 0x00z000 */ + u64 ce_utl_debug_sel; /* 0x00z008 */ + u64 ce_pad_00z010[510]; /* 0x00z010 -- 0x00zFF8 */ + } ce_dtl_utl[2]; + + u64 ce_pad_00A000[514]; /* 0x00A000 -- 0x00B008 */ + + /* + * URE: Upstream Request Engine + */ + u64 ce_ure_dyn_credit_status; /* 0x00B010 */ + u64 ce_ure_last_credit_status; /* 0x00B018 */ + u64 ce_ure_credit_limit; /* 0x00B020 */ + u64 ce_pad_00B028; /* 0x00B028 */ + u64 ce_ure_control; /* 0x00B030 */ + u64 ce_ure_status; /* 0x00B038 */ + u64 ce_pad_00B040[2]; /* 0x00B040 -- 0x00B048 */ + u64 ce_ure_debug_sel; /* 0x00B050 */ + u64 ce_ure_pcie_debug_sel; /* 0x00B058 */ + u64 ce_ure_ssp_err_cmd_wrd; /* 0x00B060 */ + u64 ce_ure_ssp_err_addr; /* 0x00B068 */ + u64 ce_ure_page_map; /* 0x00B070 */ + u64 ce_ure_dir_map[TIOCE_NUM_PORTS]; /* 0x00B078 */ + u64 ce_ure_pipe_sel1; /* 0x00B088 */ + u64 ce_ure_pipe_mask1; /* 0x00B090 */ + u64 ce_ure_pipe_sel2; /* 0x00B098 */ + u64 ce_ure_pipe_mask2; /* 0x00B0A0 */ + u64 ce_ure_pcie1_credits_sent; /* 0x00B0A8 */ + u64 ce_ure_pcie1_credits_used; /* 0x00B0B0 */ + u64 ce_ure_pcie1_credit_limit; /* 0x00B0B8 */ + u64 ce_ure_pcie2_credits_sent; /* 0x00B0C0 */ + u64 ce_ure_pcie2_credits_used; /* 0x00B0C8 */ + u64 ce_ure_pcie2_credit_limit; /* 0x00B0D0 */ + u64 ce_ure_pcie_force_credit; /* 0x00B0D8 */ + u64 ce_ure_rd_tnum_val; /* 0x00B0E0 */ + u64 ce_ure_rd_tnum_rsp_rcvd; /* 0x00B0E8 */ + u64 ce_ure_rd_tnum_esent_timer; /* 0x00B0F0 */ + u64 ce_ure_rd_tnum_error; /* 0x00B0F8 */ + u64 ce_ure_rd_tnum_first_cl; /* 0x00B100 */ + u64 ce_ure_rd_tnum_link_buf; /* 0x00B108 */ + u64 ce_ure_wr_tnum_val; /* 0x00B110 */ + u64 ce_ure_sram_err_addr0; /* 0x00B118 */ + u64 ce_ure_sram_err_addr1; /* 0x00B120 */ + u64 ce_ure_sram_err_addr2; /* 0x00B128 */ + u64 ce_ure_sram_rd_addr0; /* 0x00B130 */ + u64 ce_ure_sram_rd_addr1; /* 0x00B138 */ + u64 ce_ure_sram_rd_addr2; /* 0x00B140 */ + u64 ce_ure_sram_wr_addr0; /* 0x00B148 */ + u64 ce_ure_sram_wr_addr1; /* 0x00B150 */ + u64 ce_ure_sram_wr_addr2; /* 0x00B158 */ + u64 ce_ure_buf_flush10; /* 0x00B160 */ + u64 ce_ure_buf_flush11; /* 0x00B168 */ + u64 ce_ure_buf_flush12; /* 0x00B170 */ + u64 ce_ure_buf_flush13; /* 0x00B178 */ + u64 ce_ure_buf_flush20; /* 0x00B180 */ + u64 ce_ure_buf_flush21; /* 0x00B188 */ + u64 ce_ure_buf_flush22; /* 0x00B190 */ + u64 ce_ure_buf_flush23; /* 0x00B198 */ + u64 ce_ure_pcie_control1; /* 0x00B1A0 */ + u64 ce_ure_pcie_control2; /* 0x00B1A8 */ + + u64 ce_pad_00B1B0[458]; /* 0x00B1B0 -- 0x00BFF8 */ + + /* Upstream Data Buffer, Port1 */ + struct ce_ure_maint_ups_dat1_data { + u64 data63_0[512]; /* 0x00C000 -- 0x00CFF8 */ + u64 data127_64[512]; /* 0x00D000 -- 0x00DFF8 */ + u64 parity[512]; /* 0x00E000 -- 0x00EFF8 */ + } ce_ure_maint_ups_dat1; + + /* Upstream Header Buffer, Port1 */ + struct ce_ure_maint_ups_hdr1_data { + u64 data63_0[512]; /* 0x00F000 -- 0x00FFF8 */ + u64 data127_64[512]; /* 0x010000 -- 0x010FF8 */ + u64 parity[512]; /* 0x011000 -- 0x011FF8 */ + } ce_ure_maint_ups_hdr1; + + /* Upstream Data Buffer, Port2 */ + struct ce_ure_maint_ups_dat2_data { + u64 data63_0[512]; /* 0x012000 -- 0x012FF8 */ + u64 data127_64[512]; /* 0x013000 -- 0x013FF8 */ + u64 parity[512]; /* 0x014000 -- 0x014FF8 */ + } ce_ure_maint_ups_dat2; + + /* Upstream Header Buffer, Port2 */ + struct ce_ure_maint_ups_hdr2_data { + u64 data63_0[512]; /* 0x015000 -- 0x015FF8 */ + u64 data127_64[512]; /* 0x016000 -- 0x016FF8 */ + u64 parity[512]; /* 0x017000 -- 0x017FF8 */ + } ce_ure_maint_ups_hdr2; + + /* Downstream Data Buffer */ + struct ce_ure_maint_dns_dat_data { + u64 data63_0[512]; /* 0x018000 -- 0x018FF8 */ + u64 data127_64[512]; /* 0x019000 -- 0x019FF8 */ + u64 parity[512]; /* 0x01A000 -- 0x01AFF8 */ + } ce_ure_maint_dns_dat; + + /* Downstream Header Buffer */ + struct ce_ure_maint_dns_hdr_data { + u64 data31_0[64]; /* 0x01B000 -- 0x01B1F8 */ + u64 data95_32[64]; /* 0x01B200 -- 0x01B3F8 */ + u64 parity[64]; /* 0x01B400 -- 0x01B5F8 */ + } ce_ure_maint_dns_hdr; + + /* RCI Buffer Data */ + struct ce_ure_maint_rci_data { + u64 data41_0[64]; /* 0x01B600 -- 0x01B7F8 */ + u64 data69_42[64]; /* 0x01B800 -- 0x01B9F8 */ + } ce_ure_maint_rci; + + /* Response Queue */ + u64 ce_ure_maint_rspq[64]; /* 0x01BA00 -- 0x01BBF8 */ + + u64 ce_pad_01C000[4224]; /* 0x01BC00 -- 0x023FF8 */ + + /* Admin Build-a-Packet Buffer */ + struct ce_adm_maint_bap_buf_data { + u64 data63_0[258]; /* 0x024000 -- 0x024808 */ + u64 data127_64[258]; /* 0x024810 -- 0x025018 */ + u64 parity[258]; /* 0x025020 -- 0x025828 */ + } ce_adm_maint_bap_buf; + + u64 ce_pad_025830[5370]; /* 0x025830 -- 0x02FFF8 */ + + /* URE: 40bit PMU ATE Buffer */ /* 0x030000 -- 0x037FF8 */ + u64 ce_ure_ate40[TIOCE_NUM_M40_ATES]; + + /* URE: 32/40bit PMU ATE Buffer */ /* 0x038000 -- 0x03BFF8 */ + u64 ce_ure_ate3240[TIOCE_NUM_M3240_ATES]; + + u64 ce_pad_03C000[2050]; /* 0x03C000 -- 0x040008 */ + + /* + * DRE: Down Stream Request Engine + */ + u64 ce_dre_dyn_credit_status1; /* 0x040010 */ + u64 ce_dre_dyn_credit_status2; /* 0x040018 */ + u64 ce_dre_last_credit_status1; /* 0x040020 */ + u64 ce_dre_last_credit_status2; /* 0x040028 */ + u64 ce_dre_credit_limit1; /* 0x040030 */ + u64 ce_dre_credit_limit2; /* 0x040038 */ + u64 ce_dre_force_credit1; /* 0x040040 */ + u64 ce_dre_force_credit2; /* 0x040048 */ + u64 ce_dre_debug_mux1; /* 0x040050 */ + u64 ce_dre_debug_mux2; /* 0x040058 */ + u64 ce_dre_ssp_err_cmd_wrd; /* 0x040060 */ + u64 ce_dre_ssp_err_addr; /* 0x040068 */ + u64 ce_dre_comp_err_cmd_wrd; /* 0x040070 */ + u64 ce_dre_comp_err_addr; /* 0x040078 */ + u64 ce_dre_req_status; /* 0x040080 */ + u64 ce_dre_config1; /* 0x040088 */ + u64 ce_dre_config2; /* 0x040090 */ + u64 ce_dre_config_req_status; /* 0x040098 */ + u64 ce_pad_0400A0[12]; /* 0x0400A0 -- 0x0400F8 */ + u64 ce_dre_dyn_fifo; /* 0x040100 */ + u64 ce_pad_040108[3]; /* 0x040108 -- 0x040118 */ + u64 ce_dre_last_fifo; /* 0x040120 */ + + u64 ce_pad_040128[27]; /* 0x040128 -- 0x0401F8 */ + + /* DRE Downstream Head Queue */ + struct ce_dre_maint_ds_head_queue { + u64 data63_0[32]; /* 0x040200 -- 0x0402F8 */ + u64 data127_64[32]; /* 0x040300 -- 0x0403F8 */ + u64 parity[32]; /* 0x040400 -- 0x0404F8 */ + } ce_dre_maint_ds_head_q; + + u64 ce_pad_040500[352]; /* 0x040500 -- 0x040FF8 */ + + /* DRE Downstream Data Queue */ + struct ce_dre_maint_ds_data_queue { + u64 data63_0[256]; /* 0x041000 -- 0x0417F8 */ + u64 ce_pad_041800[256]; /* 0x041800 -- 0x041FF8 */ + u64 data127_64[256]; /* 0x042000 -- 0x0427F8 */ + u64 ce_pad_042800[256]; /* 0x042800 -- 0x042FF8 */ + u64 parity[256]; /* 0x043000 -- 0x0437F8 */ + u64 ce_pad_043800[256]; /* 0x043800 -- 0x043FF8 */ + } ce_dre_maint_ds_data_q; + + /* DRE URE Upstream Response Queue */ + struct ce_dre_maint_ure_us_rsp_queue { + u64 data63_0[8]; /* 0x044000 -- 0x044038 */ + u64 ce_pad_044040[24]; /* 0x044040 -- 0x0440F8 */ + u64 data127_64[8]; /* 0x044100 -- 0x044138 */ + u64 ce_pad_044140[24]; /* 0x044140 -- 0x0441F8 */ + u64 parity[8]; /* 0x044200 -- 0x044238 */ + u64 ce_pad_044240[24]; /* 0x044240 -- 0x0442F8 */ + } ce_dre_maint_ure_us_rsp_q; + + u64 ce_dre_maint_us_wrt_rsp[32];/* 0x044300 -- 0x0443F8 */ + + u64 ce_end_of_struct; /* 0x044400 */ +} tioce_t; + +/* ce_lsiX_gb_cfg1 register bit masks & shifts */ +#define CE_LSI_GB_CFG1_RXL0S_THS_SHFT 0 +#define CE_LSI_GB_CFG1_RXL0S_THS_MASK (0xffULL << 0) +#define CE_LSI_GB_CFG1_RXL0S_SMP_SHFT 8 +#define CE_LSI_GB_CFG1_RXL0S_SMP_MASK (0xfULL << 8) +#define CE_LSI_GB_CFG1_RXL0S_ADJ_SHFT 12 +#define CE_LSI_GB_CFG1_RXL0S_ADJ_MASK (0x7ULL << 12) +#define CE_LSI_GB_CFG1_RXL0S_FLT_SHFT 15 +#define CE_LSI_GB_CFG1_RXL0S_FLT_MASK (0x1ULL << 15) +#define CE_LSI_GB_CFG1_LPBK_SEL_SHFT 16 +#define CE_LSI_GB_CFG1_LPBK_SEL_MASK (0x3ULL << 16) +#define CE_LSI_GB_CFG1_LPBK_EN_SHFT 18 +#define CE_LSI_GB_CFG1_LPBK_EN_MASK (0x1ULL << 18) +#define CE_LSI_GB_CFG1_RVRS_LB_SHFT 19 +#define CE_LSI_GB_CFG1_RVRS_LB_MASK (0x1ULL << 19) +#define CE_LSI_GB_CFG1_RVRS_CLK_SHFT 20 +#define CE_LSI_GB_CFG1_RVRS_CLK_MASK (0x3ULL << 20) +#define CE_LSI_GB_CFG1_SLF_TS_SHFT 24 +#define CE_LSI_GB_CFG1_SLF_TS_MASK (0xfULL << 24) + +/* ce_adm_int_mask/ce_adm_int_status register bit defines */ +#define CE_ADM_INT_CE_ERROR_SHFT 0 +#define CE_ADM_INT_LSI1_IP_ERROR_SHFT 1 +#define CE_ADM_INT_LSI2_IP_ERROR_SHFT 2 +#define CE_ADM_INT_PCIE_ERROR_SHFT 3 +#define CE_ADM_INT_PORT1_HOTPLUG_EVENT_SHFT 4 +#define CE_ADM_INT_PORT2_HOTPLUG_EVENT_SHFT 5 +#define CE_ADM_INT_PCIE_PORT1_DEV_A_SHFT 6 +#define CE_ADM_INT_PCIE_PORT1_DEV_B_SHFT 7 +#define CE_ADM_INT_PCIE_PORT1_DEV_C_SHFT 8 +#define CE_ADM_INT_PCIE_PORT1_DEV_D_SHFT 9 +#define CE_ADM_INT_PCIE_PORT2_DEV_A_SHFT 10 +#define CE_ADM_INT_PCIE_PORT2_DEV_B_SHFT 11 +#define CE_ADM_INT_PCIE_PORT2_DEV_C_SHFT 12 +#define CE_ADM_INT_PCIE_PORT2_DEV_D_SHFT 13 +#define CE_ADM_INT_PCIE_MSG_SHFT 14 /*see int_dest_14*/ +#define CE_ADM_INT_PCIE_MSG_SLOT_0_SHFT 14 +#define CE_ADM_INT_PCIE_MSG_SLOT_1_SHFT 15 +#define CE_ADM_INT_PCIE_MSG_SLOT_2_SHFT 16 +#define CE_ADM_INT_PCIE_MSG_SLOT_3_SHFT 17 +#define CE_ADM_INT_PORT1_PM_PME_MSG_SHFT 22 +#define CE_ADM_INT_PORT2_PM_PME_MSG_SHFT 23 + +/* ce_adm_force_int register bit defines */ +#define CE_ADM_FORCE_INT_PCIE_PORT1_DEV_A_SHFT 0 +#define CE_ADM_FORCE_INT_PCIE_PORT1_DEV_B_SHFT 1 +#define CE_ADM_FORCE_INT_PCIE_PORT1_DEV_C_SHFT 2 +#define CE_ADM_FORCE_INT_PCIE_PORT1_DEV_D_SHFT 3 +#define CE_ADM_FORCE_INT_PCIE_PORT2_DEV_A_SHFT 4 +#define CE_ADM_FORCE_INT_PCIE_PORT2_DEV_B_SHFT 5 +#define CE_ADM_FORCE_INT_PCIE_PORT2_DEV_C_SHFT 6 +#define CE_ADM_FORCE_INT_PCIE_PORT2_DEV_D_SHFT 7 +#define CE_ADM_FORCE_INT_ALWAYS_SHFT 8 + +/* ce_adm_int_dest register bit masks & shifts */ +#define INTR_VECTOR_SHFT 56 + +/* ce_adm_error_mask and ce_adm_error_summary register bit masks */ +#define CE_ADM_ERR_CRM_SSP_REQ_INVALID (0x1ULL << 0) +#define CE_ADM_ERR_SSP_REQ_HEADER (0x1ULL << 1) +#define CE_ADM_ERR_SSP_RSP_HEADER (0x1ULL << 2) +#define CE_ADM_ERR_SSP_PROTOCOL_ERROR (0x1ULL << 3) +#define CE_ADM_ERR_SSP_SBE (0x1ULL << 4) +#define CE_ADM_ERR_SSP_MBE (0x1ULL << 5) +#define CE_ADM_ERR_CXM_CREDIT_OFLOW (0x1ULL << 6) +#define CE_ADM_ERR_DRE_SSP_REQ_INVAL (0x1ULL << 7) +#define CE_ADM_ERR_SSP_REQ_LONG (0x1ULL << 8) +#define CE_ADM_ERR_SSP_REQ_OFLOW (0x1ULL << 9) +#define CE_ADM_ERR_SSP_REQ_SHORT (0x1ULL << 10) +#define CE_ADM_ERR_SSP_REQ_SIDEBAND (0x1ULL << 11) +#define CE_ADM_ERR_SSP_REQ_ADDR_ERR (0x1ULL << 12) +#define CE_ADM_ERR_SSP_REQ_BAD_BE (0x1ULL << 13) +#define CE_ADM_ERR_PCIE_COMPL_TIMEOUT (0x1ULL << 14) +#define CE_ADM_ERR_PCIE_UNEXP_COMPL (0x1ULL << 15) +#define CE_ADM_ERR_PCIE_ERR_COMPL (0x1ULL << 16) +#define CE_ADM_ERR_DRE_CREDIT_OFLOW (0x1ULL << 17) +#define CE_ADM_ERR_DRE_SRAM_PE (0x1ULL << 18) +#define CE_ADM_ERR_SSP_RSP_INVALID (0x1ULL << 19) +#define CE_ADM_ERR_SSP_RSP_LONG (0x1ULL << 20) +#define CE_ADM_ERR_SSP_RSP_SHORT (0x1ULL << 21) +#define CE_ADM_ERR_SSP_RSP_SIDEBAND (0x1ULL << 22) +#define CE_ADM_ERR_URE_SSP_RSP_UNEXP (0x1ULL << 23) +#define CE_ADM_ERR_URE_SSP_WR_REQ_TIMEOUT (0x1ULL << 24) +#define CE_ADM_ERR_URE_SSP_RD_REQ_TIMEOUT (0x1ULL << 25) +#define CE_ADM_ERR_URE_ATE3240_PAGE_FAULT (0x1ULL << 26) +#define CE_ADM_ERR_URE_ATE40_PAGE_FAULT (0x1ULL << 27) +#define CE_ADM_ERR_URE_CREDIT_OFLOW (0x1ULL << 28) +#define CE_ADM_ERR_URE_SRAM_PE (0x1ULL << 29) +#define CE_ADM_ERR_ADM_SSP_RSP_UNEXP (0x1ULL << 30) +#define CE_ADM_ERR_ADM_SSP_REQ_TIMEOUT (0x1ULL << 31) +#define CE_ADM_ERR_MMR_ACCESS_ERROR (0x1ULL << 32) +#define CE_ADM_ERR_MMR_ADDR_ERROR (0x1ULL << 33) +#define CE_ADM_ERR_ADM_CREDIT_OFLOW (0x1ULL << 34) +#define CE_ADM_ERR_ADM_SRAM_PE (0x1ULL << 35) +#define CE_ADM_ERR_DTL1_MIN_PDATA_CREDIT_ERR (0x1ULL << 36) +#define CE_ADM_ERR_DTL1_INF_COMPL_CRED_UPDT_ERR (0x1ULL << 37) +#define CE_ADM_ERR_DTL1_INF_POSTED_CRED_UPDT_ERR (0x1ULL << 38) +#define CE_ADM_ERR_DTL1_INF_NPOSTED_CRED_UPDT_ERR (0x1ULL << 39) +#define CE_ADM_ERR_DTL1_COMP_HD_CRED_MAX_ERR (0x1ULL << 40) +#define CE_ADM_ERR_DTL1_COMP_D_CRED_MAX_ERR (0x1ULL << 41) +#define CE_ADM_ERR_DTL1_NPOSTED_HD_CRED_MAX_ERR (0x1ULL << 42) +#define CE_ADM_ERR_DTL1_NPOSTED_D_CRED_MAX_ERR (0x1ULL << 43) +#define CE_ADM_ERR_DTL1_POSTED_HD_CRED_MAX_ERR (0x1ULL << 44) +#define CE_ADM_ERR_DTL1_POSTED_D_CRED_MAX_ERR (0x1ULL << 45) +#define CE_ADM_ERR_DTL2_MIN_PDATA_CREDIT_ERR (0x1ULL << 46) +#define CE_ADM_ERR_DTL2_INF_COMPL_CRED_UPDT_ERR (0x1ULL << 47) +#define CE_ADM_ERR_DTL2_INF_POSTED_CRED_UPDT_ERR (0x1ULL << 48) +#define CE_ADM_ERR_DTL2_INF_NPOSTED_CRED_UPDT_ERR (0x1ULL << 49) +#define CE_ADM_ERR_DTL2_COMP_HD_CRED_MAX_ERR (0x1ULL << 50) +#define CE_ADM_ERR_DTL2_COMP_D_CRED_MAX_ERR (0x1ULL << 51) +#define CE_ADM_ERR_DTL2_NPOSTED_HD_CRED_MAX_ERR (0x1ULL << 52) +#define CE_ADM_ERR_DTL2_NPOSTED_D_CRED_MAX_ERR (0x1ULL << 53) +#define CE_ADM_ERR_DTL2_POSTED_HD_CRED_MAX_ERR (0x1ULL << 54) +#define CE_ADM_ERR_DTL2_POSTED_D_CRED_MAX_ERR (0x1ULL << 55) +#define CE_ADM_ERR_PORT1_PCIE_COR_ERR (0x1ULL << 56) +#define CE_ADM_ERR_PORT1_PCIE_NFAT_ERR (0x1ULL << 57) +#define CE_ADM_ERR_PORT1_PCIE_FAT_ERR (0x1ULL << 58) +#define CE_ADM_ERR_PORT2_PCIE_COR_ERR (0x1ULL << 59) +#define CE_ADM_ERR_PORT2_PCIE_NFAT_ERR (0x1ULL << 60) +#define CE_ADM_ERR_PORT2_PCIE_FAT_ERR (0x1ULL << 61) + +/* ce_adm_ure_ups_buf_barrier_flush register bit masks and shifts */ +#define FLUSH_SEL_PORT1_PIPE0_SHFT 0 +#define FLUSH_SEL_PORT1_PIPE1_SHFT 4 +#define FLUSH_SEL_PORT1_PIPE2_SHFT 8 +#define FLUSH_SEL_PORT1_PIPE3_SHFT 12 +#define FLUSH_SEL_PORT2_PIPE0_SHFT 16 +#define FLUSH_SEL_PORT2_PIPE1_SHFT 20 +#define FLUSH_SEL_PORT2_PIPE2_SHFT 24 +#define FLUSH_SEL_PORT2_PIPE3_SHFT 28 + +/* ce_dre_config1 register bit masks and shifts */ +#define CE_DRE_RO_ENABLE (0x1ULL << 0) +#define CE_DRE_DYN_RO_ENABLE (0x1ULL << 1) +#define CE_DRE_SUP_CONFIG_COMP_ERROR (0x1ULL << 2) +#define CE_DRE_SUP_IO_COMP_ERROR (0x1ULL << 3) +#define CE_DRE_ADDR_MODE_SHFT 4 + +/* ce_dre_config_req_status register bit masks */ +#define CE_DRE_LAST_CONFIG_COMPLETION (0x7ULL << 0) +#define CE_DRE_DOWNSTREAM_CONFIG_ERROR (0x1ULL << 3) +#define CE_DRE_CONFIG_COMPLETION_VALID (0x1ULL << 4) +#define CE_DRE_CONFIG_REQUEST_ACTIVE (0x1ULL << 5) + +/* ce_ure_control register bit masks & shifts */ +#define CE_URE_RD_MRG_ENABLE (0x1ULL << 0) +#define CE_URE_WRT_MRG_ENABLE1 (0x1ULL << 4) +#define CE_URE_WRT_MRG_ENABLE2 (0x1ULL << 5) +#define CE_URE_WRT_MRG_TIMER_SHFT 12 +#define CE_URE_WRT_MRG_TIMER_MASK (0x7FFULL << CE_URE_WRT_MRG_TIMER_SHFT) +#define CE_URE_WRT_MRG_TIMER(x) (((u64)(x) << \ + CE_URE_WRT_MRG_TIMER_SHFT) & \ + CE_URE_WRT_MRG_TIMER_MASK) +#define CE_URE_RSPQ_BYPASS_DISABLE (0x1ULL << 24) +#define CE_URE_UPS_DAT1_PAR_DISABLE (0x1ULL << 32) +#define CE_URE_UPS_HDR1_PAR_DISABLE (0x1ULL << 33) +#define CE_URE_UPS_DAT2_PAR_DISABLE (0x1ULL << 34) +#define CE_URE_UPS_HDR2_PAR_DISABLE (0x1ULL << 35) +#define CE_URE_ATE_PAR_DISABLE (0x1ULL << 36) +#define CE_URE_RCI_PAR_DISABLE (0x1ULL << 37) +#define CE_URE_RSPQ_PAR_DISABLE (0x1ULL << 38) +#define CE_URE_DNS_DAT_PAR_DISABLE (0x1ULL << 39) +#define CE_URE_DNS_HDR_PAR_DISABLE (0x1ULL << 40) +#define CE_URE_MALFORM_DISABLE (0x1ULL << 44) +#define CE_URE_UNSUP_DISABLE (0x1ULL << 45) + +/* ce_ure_page_map register bit masks & shifts */ +#define CE_URE_ATE3240_ENABLE (0x1ULL << 0) +#define CE_URE_ATE40_ENABLE (0x1ULL << 1) +#define CE_URE_PAGESIZE_SHFT 4 +#define CE_URE_PAGESIZE_MASK (0x7ULL << CE_URE_PAGESIZE_SHFT) +#define CE_URE_4K_PAGESIZE (0x0ULL << CE_URE_PAGESIZE_SHFT) +#define CE_URE_16K_PAGESIZE (0x1ULL << CE_URE_PAGESIZE_SHFT) +#define CE_URE_64K_PAGESIZE (0x2ULL << CE_URE_PAGESIZE_SHFT) +#define CE_URE_128K_PAGESIZE (0x3ULL << CE_URE_PAGESIZE_SHFT) +#define CE_URE_256K_PAGESIZE (0x4ULL << CE_URE_PAGESIZE_SHFT) + +/* ce_ure_pipe_sel register bit masks & shifts */ +#define PKT_TRAFIC_SHRT 16 +#define BUS_SRC_ID_SHFT 8 +#define DEV_SRC_ID_SHFT 3 +#define FNC_SRC_ID_SHFT 0 +#define CE_URE_TC_MASK (0x07ULL << PKT_TRAFIC_SHRT) +#define CE_URE_BUS_MASK (0xFFULL << BUS_SRC_ID_SHFT) +#define CE_URE_DEV_MASK (0x1FULL << DEV_SRC_ID_SHFT) +#define CE_URE_FNC_MASK (0x07ULL << FNC_SRC_ID_SHFT) +#define CE_URE_PIPE_BUS(b) (((u64)(b) << BUS_SRC_ID_SHFT) & \ + CE_URE_BUS_MASK) +#define CE_URE_PIPE_DEV(d) (((u64)(d) << DEV_SRC_ID_SHFT) & \ + CE_URE_DEV_MASK) +#define CE_URE_PIPE_FNC(f) (((u64)(f) << FNC_SRC_ID_SHFT) & \ + CE_URE_FNC_MASK) + +#define CE_URE_SEL1_SHFT 0 +#define CE_URE_SEL2_SHFT 20 +#define CE_URE_SEL3_SHFT 40 +#define CE_URE_SEL1_MASK (0x7FFFFULL << CE_URE_SEL1_SHFT) +#define CE_URE_SEL2_MASK (0x7FFFFULL << CE_URE_SEL2_SHFT) +#define CE_URE_SEL3_MASK (0x7FFFFULL << CE_URE_SEL3_SHFT) + + +/* ce_ure_pipe_mask register bit masks & shifts */ +#define CE_URE_MASK1_SHFT 0 +#define CE_URE_MASK2_SHFT 20 +#define CE_URE_MASK3_SHFT 40 +#define CE_URE_MASK1_MASK (0x7FFFFULL << CE_URE_MASK1_SHFT) +#define CE_URE_MASK2_MASK (0x7FFFFULL << CE_URE_MASK2_SHFT) +#define CE_URE_MASK3_MASK (0x7FFFFULL << CE_URE_MASK3_SHFT) + + +/* ce_ure_pcie_control1 register bit masks & shifts */ +#define CE_URE_SI (0x1ULL << 0) +#define CE_URE_ELAL_SHFT 4 +#define CE_URE_ELAL_MASK (0x7ULL << CE_URE_ELAL_SHFT) +#define CE_URE_ELAL_SET(n) (((u64)(n) << CE_URE_ELAL_SHFT) & \ + CE_URE_ELAL_MASK) +#define CE_URE_ELAL1_SHFT 8 +#define CE_URE_ELAL1_MASK (0x7ULL << CE_URE_ELAL1_SHFT) +#define CE_URE_ELAL1_SET(n) (((u64)(n) << CE_URE_ELAL1_SHFT) & \ + CE_URE_ELAL1_MASK) +#define CE_URE_SCC (0x1ULL << 12) +#define CE_URE_PN1_SHFT 16 +#define CE_URE_PN1_MASK (0xFFULL << CE_URE_PN1_SHFT) +#define CE_URE_PN2_SHFT 24 +#define CE_URE_PN2_MASK (0xFFULL << CE_URE_PN2_SHFT) +#define CE_URE_PN1_SET(n) (((u64)(n) << CE_URE_PN1_SHFT) & \ + CE_URE_PN1_MASK) +#define CE_URE_PN2_SET(n) (((u64)(n) << CE_URE_PN2_SHFT) & \ + CE_URE_PN2_MASK) + +/* ce_ure_pcie_control2 register bit masks & shifts */ +#define CE_URE_ABP (0x1ULL << 0) +#define CE_URE_PCP (0x1ULL << 1) +#define CE_URE_MSP (0x1ULL << 2) +#define CE_URE_AIP (0x1ULL << 3) +#define CE_URE_PIP (0x1ULL << 4) +#define CE_URE_HPS (0x1ULL << 5) +#define CE_URE_HPC (0x1ULL << 6) +#define CE_URE_SPLV_SHFT 7 +#define CE_URE_SPLV_MASK (0xFFULL << CE_URE_SPLV_SHFT) +#define CE_URE_SPLV_SET(n) (((u64)(n) << CE_URE_SPLV_SHFT) & \ + CE_URE_SPLV_MASK) +#define CE_URE_SPLS_SHFT 15 +#define CE_URE_SPLS_MASK (0x3ULL << CE_URE_SPLS_SHFT) +#define CE_URE_SPLS_SET(n) (((u64)(n) << CE_URE_SPLS_SHFT) & \ + CE_URE_SPLS_MASK) +#define CE_URE_PSN1_SHFT 19 +#define CE_URE_PSN1_MASK (0x1FFFULL << CE_URE_PSN1_SHFT) +#define CE_URE_PSN2_SHFT 32 +#define CE_URE_PSN2_MASK (0x1FFFULL << CE_URE_PSN2_SHFT) +#define CE_URE_PSN1_SET(n) (((u64)(n) << CE_URE_PSN1_SHFT) & \ + CE_URE_PSN1_MASK) +#define CE_URE_PSN2_SET(n) (((u64)(n) << CE_URE_PSN2_SHFT) & \ + CE_URE_PSN2_MASK) + +/* + * PIO address space ranges for CE + */ + +/* Local CE Registers Space */ +#define CE_PIO_MMR 0x00000000 +#define CE_PIO_MMR_LEN 0x04000000 + +/* PCI Compatible Config Space */ +#define CE_PIO_CONFIG_SPACE 0x04000000 +#define CE_PIO_CONFIG_SPACE_LEN 0x04000000 + +/* PCI I/O Space Alias */ +#define CE_PIO_IO_SPACE_ALIAS 0x08000000 +#define CE_PIO_IO_SPACE_ALIAS_LEN 0x08000000 + +/* PCI Enhanced Config Space */ +#define CE_PIO_E_CONFIG_SPACE 0x10000000 +#define CE_PIO_E_CONFIG_SPACE_LEN 0x10000000 + +/* PCI I/O Space */ +#define CE_PIO_IO_SPACE 0x100000000 +#define CE_PIO_IO_SPACE_LEN 0x100000000 + +/* PCI MEM Space */ +#define CE_PIO_MEM_SPACE 0x200000000 +#define CE_PIO_MEM_SPACE_LEN TIO_HWIN_SIZE + + +/* + * CE PCI Enhanced Config Space shifts & masks + */ +#define CE_E_CONFIG_BUS_SHFT 20 +#define CE_E_CONFIG_BUS_MASK (0xFF << CE_E_CONFIG_BUS_SHFT) +#define CE_E_CONFIG_DEVICE_SHFT 15 +#define CE_E_CONFIG_DEVICE_MASK (0x1F << CE_E_CONFIG_DEVICE_SHFT) +#define CE_E_CONFIG_FUNC_SHFT 12 +#define CE_E_CONFIG_FUNC_MASK (0x7 << CE_E_CONFIG_FUNC_SHFT) + +#endif /* __ASM_IA64_SN_TIOCE_H__ */ diff --git a/kernel/arch/ia64/include/asm/sn/tioce_provider.h b/kernel/arch/ia64/include/asm/sn/tioce_provider.h new file mode 100644 index 000000000..32c32f30b --- /dev/null +++ b/kernel/arch/ia64/include/asm/sn/tioce_provider.h @@ -0,0 +1,63 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2003-2005 Silicon Graphics, Inc. All rights reserved. + */ + +#ifndef _ASM_IA64_SN_CE_PROVIDER_H +#define _ASM_IA64_SN_CE_PROVIDER_H + +#include +#include + +/* + * Common TIOCE structure shared between the prom and kernel + * + * DO NOT CHANGE THIS STRUCT WITHOUT MAKING CORRESPONDING CHANGES TO THE + * PROM VERSION. + */ +struct tioce_common { + struct pcibus_bussoft ce_pcibus; /* common pciio header */ + + u32 ce_rev; + u64 ce_kernel_private; + u64 ce_prom_private; +}; + +struct tioce_kernel { + struct tioce_common *ce_common; + spinlock_t ce_lock; + struct list_head ce_dmamap_list; + + u64 ce_ate40_shadow[TIOCE_NUM_M40_ATES]; + u64 ce_ate3240_shadow[TIOCE_NUM_M3240_ATES]; + u32 ce_ate3240_pagesize; + + u8 ce_port1_secondary; + + /* per-port resources */ + struct { + int dirmap_refcnt; + u64 dirmap_shadow; + } ce_port[TIOCE_NUM_PORTS]; +}; + +struct tioce_dmamap { + struct list_head ce_dmamap_list; /* headed by tioce_kernel */ + u32 refcnt; + + u64 nbytes; /* # bytes mapped */ + + u64 ct_start; /* coretalk start address */ + u64 pci_start; /* bus start address */ + + u64 __iomem *ate_hw;/* hw ptr of first ate in map */ + u64 *ate_shadow; /* shadow ptr of firat ate */ + u16 ate_count; /* # ate's in the map */ +}; + +extern int tioce_init_provider(void); + +#endif /* __ASM_IA64_SN_CE_PROVIDER_H */ diff --git a/kernel/arch/ia64/include/asm/sn/tiocp.h b/kernel/arch/ia64/include/asm/sn/tiocp.h new file mode 100644 index 000000000..e8ad0bb5b --- /dev/null +++ b/kernel/arch/ia64/include/asm/sn/tiocp.h @@ -0,0 +1,257 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2003-2005 Silicon Graphics, Inc. All rights reserved. + */ +#ifndef _ASM_IA64_SN_PCI_TIOCP_H +#define _ASM_IA64_SN_PCI_TIOCP_H + +#define TIOCP_HOST_INTR_ADDR 0x003FFFFFFFFFFFFFUL +#define TIOCP_PCI64_CMDTYPE_MEM (0x1ull << 60) +#define TIOCP_PCI64_CMDTYPE_MSI (0x3ull << 60) + + +/***************************************************************************** + *********************** TIOCP MMR structure mapping *************************** + *****************************************************************************/ + +struct tiocp{ + + /* 0x000000-0x00FFFF -- Local Registers */ + + /* 0x000000-0x000057 -- (Legacy Widget Space) Configuration */ + u64 cp_id; /* 0x000000 */ + u64 cp_stat; /* 0x000008 */ + u64 cp_err_upper; /* 0x000010 */ + u64 cp_err_lower; /* 0x000018 */ + #define cp_err cp_err_lower + u64 cp_control; /* 0x000020 */ + u64 cp_req_timeout; /* 0x000028 */ + u64 cp_intr_upper; /* 0x000030 */ + u64 cp_intr_lower; /* 0x000038 */ + #define cp_intr cp_intr_lower + u64 cp_err_cmdword; /* 0x000040 */ + u64 _pad_000048; /* 0x000048 */ + u64 cp_tflush; /* 0x000050 */ + + /* 0x000058-0x00007F -- Bridge-specific Configuration */ + u64 cp_aux_err; /* 0x000058 */ + u64 cp_resp_upper; /* 0x000060 */ + u64 cp_resp_lower; /* 0x000068 */ + #define cp_resp cp_resp_lower + u64 cp_tst_pin_ctrl; /* 0x000070 */ + u64 cp_addr_lkerr; /* 0x000078 */ + + /* 0x000080-0x00008F -- PMU & MAP */ + u64 cp_dir_map; /* 0x000080 */ + u64 _pad_000088; /* 0x000088 */ + + /* 0x000090-0x00009F -- SSRAM */ + u64 cp_map_fault; /* 0x000090 */ + u64 _pad_000098; /* 0x000098 */ + + /* 0x0000A0-0x0000AF -- Arbitration */ + u64 cp_arb; /* 0x0000A0 */ + u64 _pad_0000A8; /* 0x0000A8 */ + + /* 0x0000B0-0x0000BF -- Number In A Can or ATE Parity Error */ + u64 cp_ate_parity_err; /* 0x0000B0 */ + u64 _pad_0000B8; /* 0x0000B8 */ + + /* 0x0000C0-0x0000FF -- PCI/GIO */ + u64 cp_bus_timeout; /* 0x0000C0 */ + u64 cp_pci_cfg; /* 0x0000C8 */ + u64 cp_pci_err_upper; /* 0x0000D0 */ + u64 cp_pci_err_lower; /* 0x0000D8 */ + #define cp_pci_err cp_pci_err_lower + u64 _pad_0000E0[4]; /* 0x0000{E0..F8} */ + + /* 0x000100-0x0001FF -- Interrupt */ + u64 cp_int_status; /* 0x000100 */ + u64 cp_int_enable; /* 0x000108 */ + u64 cp_int_rst_stat; /* 0x000110 */ + u64 cp_int_mode; /* 0x000118 */ + u64 cp_int_device; /* 0x000120 */ + u64 cp_int_host_err; /* 0x000128 */ + u64 cp_int_addr[8]; /* 0x0001{30,,,68} */ + u64 cp_err_int_view; /* 0x000170 */ + u64 cp_mult_int; /* 0x000178 */ + u64 cp_force_always[8]; /* 0x0001{80,,,B8} */ + u64 cp_force_pin[8]; /* 0x0001{C0,,,F8} */ + + /* 0x000200-0x000298 -- Device */ + u64 cp_device[4]; /* 0x0002{00,,,18} */ + u64 _pad_000220[4]; /* 0x0002{20,,,38} */ + u64 cp_wr_req_buf[4]; /* 0x0002{40,,,58} */ + u64 _pad_000260[4]; /* 0x0002{60,,,78} */ + u64 cp_rrb_map[2]; /* 0x0002{80,,,88} */ + #define cp_even_resp cp_rrb_map[0] /* 0x000280 */ + #define cp_odd_resp cp_rrb_map[1] /* 0x000288 */ + u64 cp_resp_status; /* 0x000290 */ + u64 cp_resp_clear; /* 0x000298 */ + + u64 _pad_0002A0[12]; /* 0x0002{A0..F8} */ + + /* 0x000300-0x0003F8 -- Buffer Address Match Registers */ + struct { + u64 upper; /* 0x0003{00,,,F0} */ + u64 lower; /* 0x0003{08,,,F8} */ + } cp_buf_addr_match[16]; + + /* 0x000400-0x0005FF -- Performance Monitor Registers (even only) */ + struct { + u64 flush_w_touch; /* 0x000{400,,,5C0} */ + u64 flush_wo_touch; /* 0x000{408,,,5C8} */ + u64 inflight; /* 0x000{410,,,5D0} */ + u64 prefetch; /* 0x000{418,,,5D8} */ + u64 total_pci_retry; /* 0x000{420,,,5E0} */ + u64 max_pci_retry; /* 0x000{428,,,5E8} */ + u64 max_latency; /* 0x000{430,,,5F0} */ + u64 clear_all; /* 0x000{438,,,5F8} */ + } cp_buf_count[8]; + + + /* 0x000600-0x0009FF -- PCI/X registers */ + u64 cp_pcix_bus_err_addr; /* 0x000600 */ + u64 cp_pcix_bus_err_attr; /* 0x000608 */ + u64 cp_pcix_bus_err_data; /* 0x000610 */ + u64 cp_pcix_pio_split_addr; /* 0x000618 */ + u64 cp_pcix_pio_split_attr; /* 0x000620 */ + u64 cp_pcix_dma_req_err_attr; /* 0x000628 */ + u64 cp_pcix_dma_req_err_addr; /* 0x000630 */ + u64 cp_pcix_timeout; /* 0x000638 */ + + u64 _pad_000640[24]; /* 0x000{640,,,6F8} */ + + /* 0x000700-0x000737 -- Debug Registers */ + u64 cp_ct_debug_ctl; /* 0x000700 */ + u64 cp_br_debug_ctl; /* 0x000708 */ + u64 cp_mux3_debug_ctl; /* 0x000710 */ + u64 cp_mux4_debug_ctl; /* 0x000718 */ + u64 cp_mux5_debug_ctl; /* 0x000720 */ + u64 cp_mux6_debug_ctl; /* 0x000728 */ + u64 cp_mux7_debug_ctl; /* 0x000730 */ + + u64 _pad_000738[89]; /* 0x000{738,,,9F8} */ + + /* 0x000A00-0x000BFF -- PCI/X Read&Write Buffer */ + struct { + u64 cp_buf_addr; /* 0x000{A00,,,AF0} */ + u64 cp_buf_attr; /* 0X000{A08,,,AF8} */ + } cp_pcix_read_buf_64[16]; + + struct { + u64 cp_buf_addr; /* 0x000{B00,,,BE0} */ + u64 cp_buf_attr; /* 0x000{B08,,,BE8} */ + u64 cp_buf_valid; /* 0x000{B10,,,BF0} */ + u64 __pad1; /* 0x000{B18,,,BF8} */ + } cp_pcix_write_buf_64[8]; + + /* End of Local Registers -- Start of Address Map space */ + + char _pad_000c00[0x010000 - 0x000c00]; + + /* 0x010000-0x011FF8 -- Internal ATE RAM (Auto Parity Generation) */ + u64 cp_int_ate_ram[1024]; /* 0x010000-0x011FF8 */ + + char _pad_012000[0x14000 - 0x012000]; + + /* 0x014000-0x015FF8 -- Internal ATE RAM (Manual Parity Generation) */ + u64 cp_int_ate_ram_mp[1024]; /* 0x014000-0x015FF8 */ + + char _pad_016000[0x18000 - 0x016000]; + + /* 0x18000-0x197F8 -- TIOCP Write Request Ram */ + u64 cp_wr_req_lower[256]; /* 0x18000 - 0x187F8 */ + u64 cp_wr_req_upper[256]; /* 0x18800 - 0x18FF8 */ + u64 cp_wr_req_parity[256]; /* 0x19000 - 0x197F8 */ + + char _pad_019800[0x1C000 - 0x019800]; + + /* 0x1C000-0x1EFF8 -- TIOCP Read Response Ram */ + u64 cp_rd_resp_lower[512]; /* 0x1C000 - 0x1CFF8 */ + u64 cp_rd_resp_upper[512]; /* 0x1D000 - 0x1DFF8 */ + u64 cp_rd_resp_parity[512]; /* 0x1E000 - 0x1EFF8 */ + + char _pad_01F000[0x20000 - 0x01F000]; + + /* 0x020000-0x021FFF -- Host Device (CP) Configuration Space (not used) */ + char _pad_020000[0x021000 - 0x20000]; + + /* 0x021000-0x027FFF -- PCI Device Configuration Spaces */ + union { + u8 c[0x1000 / 1]; /* 0x02{0000,,,7FFF} */ + u16 s[0x1000 / 2]; /* 0x02{0000,,,7FFF} */ + u32 l[0x1000 / 4]; /* 0x02{0000,,,7FFF} */ + u64 d[0x1000 / 8]; /* 0x02{0000,,,7FFF} */ + union { + u8 c[0x100 / 1]; + u16 s[0x100 / 2]; + u32 l[0x100 / 4]; + u64 d[0x100 / 8]; + } f[8]; + } cp_type0_cfg_dev[7]; /* 0x02{1000,,,7FFF} */ + + /* 0x028000-0x028FFF -- PCI Type 1 Configuration Space */ + union { + u8 c[0x1000 / 1]; /* 0x028000-0x029000 */ + u16 s[0x1000 / 2]; /* 0x028000-0x029000 */ + u32 l[0x1000 / 4]; /* 0x028000-0x029000 */ + u64 d[0x1000 / 8]; /* 0x028000-0x029000 */ + union { + u8 c[0x100 / 1]; + u16 s[0x100 / 2]; + u32 l[0x100 / 4]; + u64 d[0x100 / 8]; + } f[8]; + } cp_type1_cfg; /* 0x028000-0x029000 */ + + char _pad_029000[0x030000-0x029000]; + + /* 0x030000-0x030007 -- PCI Interrupt Acknowledge Cycle */ + union { + u8 c[8 / 1]; + u16 s[8 / 2]; + u32 l[8 / 4]; + u64 d[8 / 8]; + } cp_pci_iack; /* 0x030000-0x030007 */ + + char _pad_030007[0x040000-0x030008]; + + /* 0x040000-0x040007 -- PCIX Special Cycle */ + union { + u8 c[8 / 1]; + u16 s[8 / 2]; + u32 l[8 / 4]; + u64 d[8 / 8]; + } cp_pcix_cycle; /* 0x040000-0x040007 */ + + char _pad_040007[0x200000-0x040008]; + + /* 0x200000-0x7FFFFF -- PCI/GIO Device Spaces */ + union { + u8 c[0x100000 / 1]; + u16 s[0x100000 / 2]; + u32 l[0x100000 / 4]; + u64 d[0x100000 / 8]; + } cp_devio_raw[6]; /* 0x200000-0x7FFFFF */ + + #define cp_devio(n) cp_devio_raw[((n)<2)?(n*2):(n+2)] + + char _pad_800000[0xA00000-0x800000]; + + /* 0xA00000-0xBFFFFF -- PCI/GIO Device Spaces w/flush */ + union { + u8 c[0x100000 / 1]; + u16 s[0x100000 / 2]; + u32 l[0x100000 / 4]; + u64 d[0x100000 / 8]; + } cp_devio_raw_flush[6]; /* 0xA00000-0xBFFFFF */ + + #define cp_devio_flush(n) cp_devio_raw_flush[((n)<2)?(n*2):(n+2)] + +}; + +#endif /* _ASM_IA64_SN_PCI_TIOCP_H */ diff --git a/kernel/arch/ia64/include/asm/sn/tiocx.h b/kernel/arch/ia64/include/asm/sn/tiocx.h new file mode 100644 index 000000000..d29728492 --- /dev/null +++ b/kernel/arch/ia64/include/asm/sn/tiocx.h @@ -0,0 +1,72 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2005 Silicon Graphics, Inc. All rights reserved. + */ + +#ifndef _ASM_IA64_SN_TIO_TIOCX_H +#define _ASM_IA64_SN_TIO_TIOCX_H + +#ifdef __KERNEL__ + +struct cx_id_s { + unsigned int part_num; + unsigned int mfg_num; + int nasid; +}; + +struct cx_dev { + struct cx_id_s cx_id; + int bt; /* board/blade type */ + void *soft; /* driver specific */ + struct hubdev_info *hubdev; + struct device dev; + struct cx_drv *driver; +}; + +struct cx_device_id { + unsigned int part_num; + unsigned int mfg_num; +}; + +struct cx_drv { + char *name; + const struct cx_device_id *id_table; + struct device_driver driver; + int (*probe) (struct cx_dev * dev, const struct cx_device_id * id); + int (*remove) (struct cx_dev * dev); +}; + +/* create DMA address by stripping AS bits */ +#define TIOCX_DMA_ADDR(a) (u64)((u64)(a) & 0xffffcfffffffffUL) + +#define TIOCX_TO_TIOCX_DMA_ADDR(a) (u64)(((u64)(a) & 0xfffffffff) | \ + ((((u64)(a)) & 0xffffc000000000UL) <<2)) + +#define TIO_CE_ASIC_PARTNUM 0xce00 +#define TIOCX_CORELET 3 + +/* These are taken from tio_mmr_as.h */ +#define TIO_ICE_FRZ_CFG TIO_MMR_ADDR_MOD(0x00000000b0008100UL) +#define TIO_ICE_PMI_TX_CFG TIO_MMR_ADDR_MOD(0x00000000b000b100UL) +#define TIO_ICE_PMI_TX_DYN_CREDIT_STAT_CB3 TIO_MMR_ADDR_MOD(0x00000000b000be18UL) +#define TIO_ICE_PMI_TX_DYN_CREDIT_STAT_CB3_CREDIT_CNT_MASK 0x000000000000000fUL + +#define to_cx_dev(n) container_of(n, struct cx_dev, dev) +#define to_cx_driver(drv) container_of(drv, struct cx_drv, driver) + +extern struct sn_irq_info *tiocx_irq_alloc(nasid_t, int, int, nasid_t, int); +extern void tiocx_irq_free(struct sn_irq_info *); +extern int cx_device_unregister(struct cx_dev *); +extern int cx_device_register(nasid_t, int, int, struct hubdev_info *, int); +extern int cx_driver_unregister(struct cx_drv *); +extern int cx_driver_register(struct cx_drv *); +extern u64 tiocx_dma_addr(u64 addr); +extern u64 tiocx_swin_base(int nasid); +extern void tiocx_mmr_store(int nasid, u64 offset, u64 value); +extern u64 tiocx_mmr_load(int nasid, u64 offset); + +#endif // __KERNEL__ +#endif // _ASM_IA64_SN_TIO_TIOCX__ diff --git a/kernel/arch/ia64/include/asm/sn/types.h b/kernel/arch/ia64/include/asm/sn/types.h new file mode 100644 index 000000000..8e04ee211 --- /dev/null +++ b/kernel/arch/ia64/include/asm/sn/types.h @@ -0,0 +1,26 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1999,2001-2003 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (C) 1999 by Ralf Baechle + */ +#ifndef _ASM_IA64_SN_TYPES_H +#define _ASM_IA64_SN_TYPES_H + +#include + +typedef unsigned long cpuid_t; +typedef signed short nasid_t; /* node id in numa-as-id space */ +typedef signed char partid_t; /* partition ID type */ +typedef unsigned int moduleid_t; /* user-visible module number type */ +typedef unsigned int cmoduleid_t; /* kernel compact module id type */ +typedef unsigned char slotid_t; /* slot (blade) within module */ +typedef unsigned char slabid_t; /* slab (asic) within slot */ +typedef u64 nic_t; +typedef unsigned long iopaddr_t; +typedef unsigned long paddr_t; +typedef short cnodeid_t; + +#endif /* _ASM_IA64_SN_TYPES_H */ diff --git a/kernel/arch/ia64/include/asm/sparsemem.h b/kernel/arch/ia64/include/asm/sparsemem.h new file mode 100644 index 000000000..67a7c40ec --- /dev/null +++ b/kernel/arch/ia64/include/asm/sparsemem.h @@ -0,0 +1,20 @@ +#ifndef _ASM_IA64_SPARSEMEM_H +#define _ASM_IA64_SPARSEMEM_H + +#ifdef CONFIG_SPARSEMEM +/* + * SECTION_SIZE_BITS 2^N: how big each section will be + * MAX_PHYSMEM_BITS 2^N: how much memory we can have in that space + */ + +#define SECTION_SIZE_BITS (30) +#define MAX_PHYSMEM_BITS (50) +#ifdef CONFIG_FORCE_MAX_ZONEORDER +#if ((CONFIG_FORCE_MAX_ZONEORDER - 1 + PAGE_SHIFT) > SECTION_SIZE_BITS) +#undef SECTION_SIZE_BITS +#define SECTION_SIZE_BITS (CONFIG_FORCE_MAX_ZONEORDER - 1 + PAGE_SHIFT) +#endif +#endif + +#endif /* CONFIG_SPARSEMEM */ +#endif /* _ASM_IA64_SPARSEMEM_H */ diff --git a/kernel/arch/ia64/include/asm/spinlock.h b/kernel/arch/ia64/include/asm/spinlock.h new file mode 100644 index 000000000..45698cd15 --- /dev/null +++ b/kernel/arch/ia64/include/asm/spinlock.h @@ -0,0 +1,296 @@ +#ifndef _ASM_IA64_SPINLOCK_H +#define _ASM_IA64_SPINLOCK_H + +/* + * Copyright (C) 1998-2003 Hewlett-Packard Co + * David Mosberger-Tang + * Copyright (C) 1999 Walt Drummond + * + * This file is used for SMP configurations only. + */ + +#include +#include +#include + +#include +#include + +#define arch_spin_lock_init(x) ((x)->lock = 0) + +/* + * Ticket locks are conceptually two parts, one indicating the current head of + * the queue, and the other indicating the current tail. The lock is acquired + * by atomically noting the tail and incrementing it by one (thus adding + * ourself to the queue and noting our position), then waiting until the head + * becomes equal to the the initial value of the tail. + * The pad bits in the middle are used to prevent the next_ticket number + * overflowing into the now_serving number. + * + * 31 17 16 15 14 0 + * +----------------------------------------------------+ + * | now_serving | padding | next_ticket | + * +----------------------------------------------------+ + */ + +#define TICKET_SHIFT 17 +#define TICKET_BITS 15 +#define TICKET_MASK ((1 << TICKET_BITS) - 1) + +static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock) +{ + int *p = (int *)&lock->lock, ticket, serve; + + ticket = ia64_fetchadd(1, p, acq); + + if (!(((ticket >> TICKET_SHIFT) ^ ticket) & TICKET_MASK)) + return; + + ia64_invala(); + + for (;;) { + asm volatile ("ld4.c.nc %0=[%1]" : "=r"(serve) : "r"(p) : "memory"); + + if (!(((serve >> TICKET_SHIFT) ^ ticket) & TICKET_MASK)) + return; + cpu_relax(); + } +} + +static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock) +{ + int tmp = ACCESS_ONCE(lock->lock); + + if (!(((tmp >> TICKET_SHIFT) ^ tmp) & TICKET_MASK)) + return ia64_cmpxchg(acq, &lock->lock, tmp, tmp + 1, sizeof (tmp)) == tmp; + return 0; +} + +static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock) +{ + unsigned short *p = (unsigned short *)&lock->lock + 1, tmp; + + asm volatile ("ld2.bias %0=[%1]" : "=r"(tmp) : "r"(p)); + ACCESS_ONCE(*p) = (tmp + 2) & ~1; +} + +static __always_inline void __ticket_spin_unlock_wait(arch_spinlock_t *lock) +{ + int *p = (int *)&lock->lock, ticket; + + ia64_invala(); + + for (;;) { + asm volatile ("ld4.c.nc %0=[%1]" : "=r"(ticket) : "r"(p) : "memory"); + if (!(((ticket >> TICKET_SHIFT) ^ ticket) & TICKET_MASK)) + return; + cpu_relax(); + } +} + +static inline int __ticket_spin_is_locked(arch_spinlock_t *lock) +{ + long tmp = ACCESS_ONCE(lock->lock); + + return !!(((tmp >> TICKET_SHIFT) ^ tmp) & TICKET_MASK); +} + +static inline int __ticket_spin_is_contended(arch_spinlock_t *lock) +{ + long tmp = ACCESS_ONCE(lock->lock); + + return ((tmp - (tmp >> TICKET_SHIFT)) & TICKET_MASK) > 1; +} + +static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock) +{ + return !(((lock.lock >> TICKET_SHIFT) ^ lock.lock) & TICKET_MASK); +} + +static inline int arch_spin_is_locked(arch_spinlock_t *lock) +{ + return __ticket_spin_is_locked(lock); +} + +static inline int arch_spin_is_contended(arch_spinlock_t *lock) +{ + return __ticket_spin_is_contended(lock); +} +#define arch_spin_is_contended arch_spin_is_contended + +static __always_inline void arch_spin_lock(arch_spinlock_t *lock) +{ + __ticket_spin_lock(lock); +} + +static __always_inline int arch_spin_trylock(arch_spinlock_t *lock) +{ + return __ticket_spin_trylock(lock); +} + +static __always_inline void arch_spin_unlock(arch_spinlock_t *lock) +{ + __ticket_spin_unlock(lock); +} + +static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock, + unsigned long flags) +{ + arch_spin_lock(lock); +} + +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + __ticket_spin_unlock_wait(lock); +} + +#define arch_read_can_lock(rw) (*(volatile int *)(rw) >= 0) +#define arch_write_can_lock(rw) (*(volatile int *)(rw) == 0) + +#ifdef ASM_SUPPORTED + +static __always_inline void +arch_read_lock_flags(arch_rwlock_t *lock, unsigned long flags) +{ + __asm__ __volatile__ ( + "tbit.nz p6, p0 = %1,%2\n" + "br.few 3f\n" + "1:\n" + "fetchadd4.rel r2 = [%0], -1;;\n" + "(p6) ssm psr.i\n" + "2:\n" + "hint @pause\n" + "ld4 r2 = [%0];;\n" + "cmp4.lt p7,p0 = r2, r0\n" + "(p7) br.cond.spnt.few 2b\n" + "(p6) rsm psr.i\n" + ";;\n" + "3:\n" + "fetchadd4.acq r2 = [%0], 1;;\n" + "cmp4.lt p7,p0 = r2, r0\n" + "(p7) br.cond.spnt.few 1b\n" + : : "r"(lock), "r"(flags), "i"(IA64_PSR_I_BIT) + : "p6", "p7", "r2", "memory"); +} + +#define arch_read_lock(lock) arch_read_lock_flags(lock, 0) + +#else /* !ASM_SUPPORTED */ + +#define arch_read_lock_flags(rw, flags) arch_read_lock(rw) + +#define arch_read_lock(rw) \ +do { \ + arch_rwlock_t *__read_lock_ptr = (rw); \ + \ + while (unlikely(ia64_fetchadd(1, (int *) __read_lock_ptr, acq) < 0)) { \ + ia64_fetchadd(-1, (int *) __read_lock_ptr, rel); \ + while (*(volatile int *)__read_lock_ptr < 0) \ + cpu_relax(); \ + } \ +} while (0) + +#endif /* !ASM_SUPPORTED */ + +#define arch_read_unlock(rw) \ +do { \ + arch_rwlock_t *__read_lock_ptr = (rw); \ + ia64_fetchadd(-1, (int *) __read_lock_ptr, rel); \ +} while (0) + +#ifdef ASM_SUPPORTED + +static __always_inline void +arch_write_lock_flags(arch_rwlock_t *lock, unsigned long flags) +{ + __asm__ __volatile__ ( + "tbit.nz p6, p0 = %1, %2\n" + "mov ar.ccv = r0\n" + "dep r29 = -1, r0, 31, 1\n" + "br.few 3f;;\n" + "1:\n" + "(p6) ssm psr.i\n" + "2:\n" + "hint @pause\n" + "ld4 r2 = [%0];;\n" + "cmp4.eq p0,p7 = r0, r2\n" + "(p7) br.cond.spnt.few 2b\n" + "(p6) rsm psr.i\n" + ";;\n" + "3:\n" + "cmpxchg4.acq r2 = [%0], r29, ar.ccv;;\n" + "cmp4.eq p0,p7 = r0, r2\n" + "(p7) br.cond.spnt.few 1b;;\n" + : : "r"(lock), "r"(flags), "i"(IA64_PSR_I_BIT) + : "ar.ccv", "p6", "p7", "r2", "r29", "memory"); +} + +#define arch_write_lock(rw) arch_write_lock_flags(rw, 0) + +#define arch_write_trylock(rw) \ +({ \ + register long result; \ + \ + __asm__ __volatile__ ( \ + "mov ar.ccv = r0\n" \ + "dep r29 = -1, r0, 31, 1;;\n" \ + "cmpxchg4.acq %0 = [%1], r29, ar.ccv\n" \ + : "=r"(result) : "r"(rw) : "ar.ccv", "r29", "memory"); \ + (result == 0); \ +}) + +static inline void arch_write_unlock(arch_rwlock_t *x) +{ + u8 *y = (u8 *)x; + barrier(); + asm volatile ("st1.rel.nta [%0] = r0\n\t" :: "r"(y+3) : "memory" ); +} + +#else /* !ASM_SUPPORTED */ + +#define arch_write_lock_flags(l, flags) arch_write_lock(l) + +#define arch_write_lock(l) \ +({ \ + __u64 ia64_val, ia64_set_val = ia64_dep_mi(-1, 0, 31, 1); \ + __u32 *ia64_write_lock_ptr = (__u32 *) (l); \ + do { \ + while (*ia64_write_lock_ptr) \ + ia64_barrier(); \ + ia64_val = ia64_cmpxchg4_acq(ia64_write_lock_ptr, ia64_set_val, 0); \ + } while (ia64_val); \ +}) + +#define arch_write_trylock(rw) \ +({ \ + __u64 ia64_val; \ + __u64 ia64_set_val = ia64_dep_mi(-1, 0, 31,1); \ + ia64_val = ia64_cmpxchg4_acq((__u32 *)(rw), ia64_set_val, 0); \ + (ia64_val == 0); \ +}) + +static inline void arch_write_unlock(arch_rwlock_t *x) +{ + barrier(); + x->write_lock = 0; +} + +#endif /* !ASM_SUPPORTED */ + +static inline int arch_read_trylock(arch_rwlock_t *x) +{ + union { + arch_rwlock_t lock; + __u32 word; + } old, new; + old.lock = new.lock = *x; + old.lock.write_lock = new.lock.write_lock = 0; + ++new.lock.read_counter; + return (u32)ia64_cmpxchg4_acq((__u32 *)(x), new.word, old.word) == old.word; +} + +#define arch_spin_relax(lock) cpu_relax() +#define arch_read_relax(lock) cpu_relax() +#define arch_write_relax(lock) cpu_relax() + +#endif /* _ASM_IA64_SPINLOCK_H */ diff --git a/kernel/arch/ia64/include/asm/spinlock_types.h b/kernel/arch/ia64/include/asm/spinlock_types.h new file mode 100644 index 000000000..e2b42a52a --- /dev/null +++ b/kernel/arch/ia64/include/asm/spinlock_types.h @@ -0,0 +1,21 @@ +#ifndef _ASM_IA64_SPINLOCK_TYPES_H +#define _ASM_IA64_SPINLOCK_TYPES_H + +#ifndef __LINUX_SPINLOCK_TYPES_H +# error "please don't include this file directly" +#endif + +typedef struct { + volatile unsigned int lock; +} arch_spinlock_t; + +#define __ARCH_SPIN_LOCK_UNLOCKED { 0 } + +typedef struct { + volatile unsigned int read_counter : 31; + volatile unsigned int write_lock : 1; +} arch_rwlock_t; + +#define __ARCH_RW_LOCK_UNLOCKED { 0, 0 } + +#endif diff --git a/kernel/arch/ia64/include/asm/string.h b/kernel/arch/ia64/include/asm/string.h new file mode 100644 index 000000000..85fd65c52 --- /dev/null +++ b/kernel/arch/ia64/include/asm/string.h @@ -0,0 +1,21 @@ +#ifndef _ASM_IA64_STRING_H +#define _ASM_IA64_STRING_H + +/* + * Here is where we want to put optimized versions of the string + * routines. + * + * Copyright (C) 1998-2000, 2002 Hewlett-Packard Co + * David Mosberger-Tang + */ + + +#define __HAVE_ARCH_STRLEN 1 /* see arch/ia64/lib/strlen.S */ +#define __HAVE_ARCH_MEMSET 1 /* see arch/ia64/lib/memset.S */ +#define __HAVE_ARCH_MEMCPY 1 /* see arch/ia64/lib/memcpy.S */ + +extern __kernel_size_t strlen (const char *); +extern void *memcpy (void *, const void *, __kernel_size_t); +extern void *memset (void *, int, __kernel_size_t); + +#endif /* _ASM_IA64_STRING_H */ diff --git a/kernel/arch/ia64/include/asm/swiotlb.h b/kernel/arch/ia64/include/asm/swiotlb.h new file mode 100644 index 000000000..f0acde68a --- /dev/null +++ b/kernel/arch/ia64/include/asm/swiotlb.h @@ -0,0 +1,17 @@ +#ifndef ASM_IA64__SWIOTLB_H +#define ASM_IA64__SWIOTLB_H + +#include +#include + +#ifdef CONFIG_SWIOTLB +extern int swiotlb; +extern void pci_swiotlb_init(void); +#else +#define swiotlb 0 +static inline void pci_swiotlb_init(void) +{ +} +#endif + +#endif /* ASM_IA64__SWIOTLB_H */ diff --git a/kernel/arch/ia64/include/asm/switch_to.h b/kernel/arch/ia64/include/asm/switch_to.h new file mode 100644 index 000000000..e8f3585e7 --- /dev/null +++ b/kernel/arch/ia64/include/asm/switch_to.h @@ -0,0 +1,79 @@ +/* + * Low-level task switching. This is based on information published in + * the Processor Abstraction Layer and the System Abstraction Layer + * manual. + * + * Copyright (C) 1998-2003 Hewlett-Packard Co + * David Mosberger-Tang + * Copyright (C) 1999 Asit Mallick + * Copyright (C) 1999 Don Dugger + */ +#ifndef _ASM_IA64_SWITCH_TO_H +#define _ASM_IA64_SWITCH_TO_H + +#include + +struct task_struct; + +/* + * Context switch from one thread to another. If the two threads have + * different address spaces, schedule() has already taken care of + * switching to the new address space by calling switch_mm(). + * + * Disabling access to the fph partition and the debug-register + * context switch MUST be done before calling ia64_switch_to() since a + * newly created thread returns directly to + * ia64_ret_from_syscall_clear_r8. + */ +extern struct task_struct *ia64_switch_to (void *next_task); + +extern void ia64_save_extra (struct task_struct *task); +extern void ia64_load_extra (struct task_struct *task); + +#ifdef CONFIG_PERFMON + DECLARE_PER_CPU(unsigned long, pfm_syst_info); +# define PERFMON_IS_SYSWIDE() (__this_cpu_read(pfm_syst_info) & 0x1) +#else +# define PERFMON_IS_SYSWIDE() (0) +#endif + +#define IA64_HAS_EXTRA_STATE(t) \ + ((t)->thread.flags & (IA64_THREAD_DBG_VALID|IA64_THREAD_PM_VALID) \ + || PERFMON_IS_SYSWIDE()) + +#define __switch_to(prev,next,last) do { \ + if (IA64_HAS_EXTRA_STATE(prev)) \ + ia64_save_extra(prev); \ + if (IA64_HAS_EXTRA_STATE(next)) \ + ia64_load_extra(next); \ + ia64_psr(task_pt_regs(next))->dfh = !ia64_is_local_fpu_owner(next); \ + (last) = ia64_switch_to((next)); \ +} while (0) + +#ifdef CONFIG_SMP +/* + * In the SMP case, we save the fph state when context-switching away from a thread that + * modified fph. This way, when the thread gets scheduled on another CPU, the CPU can + * pick up the state from task->thread.fph, avoiding the complication of having to fetch + * the latest fph state from another CPU. In other words: eager save, lazy restore. + */ +# define switch_to(prev,next,last) do { \ + if (ia64_psr(task_pt_regs(prev))->mfh && ia64_is_local_fpu_owner(prev)) { \ + ia64_psr(task_pt_regs(prev))->mfh = 0; \ + (prev)->thread.flags |= IA64_THREAD_FPH_VALID; \ + __ia64_save_fpu((prev)->thread.fph); \ + } \ + __switch_to(prev, next, last); \ + /* "next" in old context is "current" in new context */ \ + if (unlikely((current->thread.flags & IA64_THREAD_MIGRATION) && \ + (task_cpu(current) != \ + task_thread_info(current)->last_cpu))) { \ + platform_migrate(current); \ + task_thread_info(current)->last_cpu = task_cpu(current); \ + } \ +} while (0) +#else +# define switch_to(prev,next,last) __switch_to(prev, next, last) +#endif + +#endif /* _ASM_IA64_SWITCH_TO_H */ diff --git a/kernel/arch/ia64/include/asm/syscall.h b/kernel/arch/ia64/include/asm/syscall.h new file mode 100644 index 000000000..1d0b875fe --- /dev/null +++ b/kernel/arch/ia64/include/asm/syscall.h @@ -0,0 +1,88 @@ +/* + * Access to user system call parameters and results + * + * Copyright (C) 2008 Intel Corp. Shaohua Li + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + * + * See asm-generic/syscall.h for descriptions of what we must do here. + */ + +#ifndef _ASM_SYSCALL_H +#define _ASM_SYSCALL_H 1 + +#include +#include +#include + +static inline long syscall_get_nr(struct task_struct *task, + struct pt_regs *regs) +{ + if ((long)regs->cr_ifs < 0) /* Not a syscall */ + return -1; + + return regs->r15; +} + +static inline void syscall_rollback(struct task_struct *task, + struct pt_regs *regs) +{ + /* do nothing */ +} + +static inline long syscall_get_error(struct task_struct *task, + struct pt_regs *regs) +{ + return regs->r10 == -1 ? regs->r8:0; +} + +static inline long syscall_get_return_value(struct task_struct *task, + struct pt_regs *regs) +{ + return regs->r8; +} + +static inline void syscall_set_return_value(struct task_struct *task, + struct pt_regs *regs, + int error, long val) +{ + if (error) { + /* error < 0, but ia64 uses > 0 return value */ + regs->r8 = -error; + regs->r10 = -1; + } else { + regs->r8 = val; + regs->r10 = 0; + } +} + +extern void ia64_syscall_get_set_arguments(struct task_struct *task, + struct pt_regs *regs, unsigned int i, unsigned int n, + unsigned long *args, int rw); +static inline void syscall_get_arguments(struct task_struct *task, + struct pt_regs *regs, + unsigned int i, unsigned int n, + unsigned long *args) +{ + BUG_ON(i + n > 6); + + ia64_syscall_get_set_arguments(task, regs, i, n, args, 0); +} + +static inline void syscall_set_arguments(struct task_struct *task, + struct pt_regs *regs, + unsigned int i, unsigned int n, + unsigned long *args) +{ + BUG_ON(i + n > 6); + + ia64_syscall_get_set_arguments(task, regs, i, n, args, 1); +} + +static inline int syscall_get_arch(void) +{ + return AUDIT_ARCH_IA64; +} +#endif /* _ASM_SYSCALL_H */ diff --git a/kernel/arch/ia64/include/asm/termios.h b/kernel/arch/ia64/include/asm/termios.h new file mode 100644 index 000000000..a42f870ca --- /dev/null +++ b/kernel/arch/ia64/include/asm/termios.h @@ -0,0 +1,57 @@ +/* + * Modified 1999 + * David Mosberger-Tang , Hewlett-Packard Co + * + * 99/01/28 Added N_IRDA and N_SMSBLOCK + */ +#ifndef _ASM_IA64_TERMIOS_H +#define _ASM_IA64_TERMIOS_H + +#include + + +/* intr=^C quit=^\ erase=del kill=^U + eof=^D vtime=\0 vmin=\1 sxtc=\0 + start=^Q stop=^S susp=^Z eol=\0 + reprint=^R discard=^U werase=^W lnext=^V + eol2=\0 +*/ +#define INIT_C_CC "\003\034\177\025\004\0\1\0\021\023\032\0\022\017\027\026\0" + +/* + * Translate a "termio" structure into a "termios". Ugh. + */ +#define SET_LOW_TERMIOS_BITS(termios, termio, x) { \ + unsigned short __tmp; \ + get_user(__tmp,&(termio)->x); \ + *(unsigned short *) &(termios)->x = __tmp; \ +} + +#define user_termio_to_kernel_termios(termios, termio) \ +({ \ + SET_LOW_TERMIOS_BITS(termios, termio, c_iflag); \ + SET_LOW_TERMIOS_BITS(termios, termio, c_oflag); \ + SET_LOW_TERMIOS_BITS(termios, termio, c_cflag); \ + SET_LOW_TERMIOS_BITS(termios, termio, c_lflag); \ + copy_from_user((termios)->c_cc, (termio)->c_cc, NCC); \ +}) + +/* + * Translate a "termios" structure into a "termio". Ugh. + */ +#define kernel_termios_to_user_termio(termio, termios) \ +({ \ + put_user((termios)->c_iflag, &(termio)->c_iflag); \ + put_user((termios)->c_oflag, &(termio)->c_oflag); \ + put_user((termios)->c_cflag, &(termio)->c_cflag); \ + put_user((termios)->c_lflag, &(termio)->c_lflag); \ + put_user((termios)->c_line, &(termio)->c_line); \ + copy_to_user((termio)->c_cc, (termios)->c_cc, NCC); \ +}) + +#define user_termios_to_kernel_termios(k, u) copy_from_user(k, u, sizeof(struct termios2)) +#define kernel_termios_to_user_termios(u, k) copy_to_user(u, k, sizeof(struct termios2)) +#define user_termios_to_kernel_termios_1(k, u) copy_from_user(k, u, sizeof(struct termios)) +#define kernel_termios_to_user_termios_1(u, k) copy_to_user(u, k, sizeof(struct termios)) + +#endif /* _ASM_IA64_TERMIOS_H */ diff --git a/kernel/arch/ia64/include/asm/thread_info.h b/kernel/arch/ia64/include/asm/thread_info.h new file mode 100644 index 000000000..aa995b67c --- /dev/null +++ b/kernel/arch/ia64/include/asm/thread_info.h @@ -0,0 +1,152 @@ +/* + * Copyright (C) 2002-2003 Hewlett-Packard Co + * David Mosberger-Tang + */ +#ifndef _ASM_IA64_THREAD_INFO_H +#define _ASM_IA64_THREAD_INFO_H + +#ifndef ASM_OFFSETS_C +#include +#endif +#include +#include + +#ifndef __ASSEMBLY__ + +/* + * On IA-64, we want to keep the task structure and kernel stack together, so they can be + * mapped by a single TLB entry and so they can be addressed by the "current" pointer + * without having to do pointer masking. + */ +struct thread_info { + struct task_struct *task; /* XXX not really needed, except for dup_task_struct() */ + __u32 flags; /* thread_info flags (see TIF_*) */ + __u32 cpu; /* current CPU */ + __u32 last_cpu; /* Last CPU thread ran on */ + __u32 status; /* Thread synchronous flags */ + mm_segment_t addr_limit; /* user-level address space limit */ + int preempt_count; /* 0=premptable, <0=BUG; will also serve as bh-counter */ +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE + __u64 ac_stamp; + __u64 ac_leave; + __u64 ac_stime; + __u64 ac_utime; +#endif +}; + +#define THREAD_SIZE KERNEL_STACK_SIZE + +#define INIT_THREAD_INFO(tsk) \ +{ \ + .task = &tsk, \ + .flags = 0, \ + .cpu = 0, \ + .addr_limit = KERNEL_DS, \ + .preempt_count = INIT_PREEMPT_COUNT, \ +} + +#ifndef ASM_OFFSETS_C +/* how to get the thread information struct from C */ +#define current_thread_info() ((struct thread_info *) ((char *) current + IA64_TASK_SIZE)) +#define alloc_thread_info_node(tsk, node) \ + ((struct thread_info *) ((char *) (tsk) + IA64_TASK_SIZE)) +#define task_thread_info(tsk) ((struct thread_info *) ((char *) (tsk) + IA64_TASK_SIZE)) +#else +#define current_thread_info() ((struct thread_info *) 0) +#define alloc_thread_info_node(tsk, node) ((struct thread_info *) 0) +#define task_thread_info(tsk) ((struct thread_info *) 0) +#endif +#define free_thread_info(ti) /* nothing */ +#define task_stack_page(tsk) ((void *)(tsk)) + +#define __HAVE_THREAD_FUNCTIONS +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE +#define setup_thread_stack(p, org) \ + *task_thread_info(p) = *task_thread_info(org); \ + task_thread_info(p)->ac_stime = 0; \ + task_thread_info(p)->ac_utime = 0; \ + task_thread_info(p)->task = (p); +#else +#define setup_thread_stack(p, org) \ + *task_thread_info(p) = *task_thread_info(org); \ + task_thread_info(p)->task = (p); +#endif +#define end_of_stack(p) (unsigned long *)((void *)(p) + IA64_RBS_OFFSET) + +#define alloc_task_struct_node(node) \ +({ \ + struct page *page = alloc_pages_node(node, GFP_KERNEL | __GFP_COMP, \ + KERNEL_STACK_SIZE_ORDER); \ + struct task_struct *ret = page ? page_address(page) : NULL; \ + \ + ret; \ +}) +#define free_task_struct(tsk) free_pages((unsigned long) (tsk), KERNEL_STACK_SIZE_ORDER) + +#endif /* !__ASSEMBLY */ + +/* + * thread information flags + * - these are process state flags that various assembly files may need to access + * - pending work-to-be-done flags are in least-significant 16 bits, other flags + * in top 16 bits + */ +#define TIF_SIGPENDING 0 /* signal pending */ +#define TIF_NEED_RESCHED 1 /* rescheduling necessary */ +#define TIF_SYSCALL_TRACE 2 /* syscall trace active */ +#define TIF_SYSCALL_AUDIT 3 /* syscall auditing active */ +#define TIF_SINGLESTEP 4 /* restore singlestep on return to user mode */ +#define TIF_NOTIFY_RESUME 6 /* resumption notification requested */ +#define TIF_MEMDIE 17 /* is terminating due to OOM killer */ +#define TIF_MCA_INIT 18 /* this task is processing MCA or INIT */ +#define TIF_DB_DISABLED 19 /* debug trap disabled for fsyscall */ +#define TIF_RESTORE_RSE 21 /* user RBS is newer than kernel RBS */ +#define TIF_POLLING_NRFLAG 22 /* idle is polling for TIF_NEED_RESCHED */ + +#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) +#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) +#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) +#define _TIF_SYSCALL_TRACEAUDIT (_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP) +#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) +#define _TIF_SIGPENDING (1 << TIF_SIGPENDING) +#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) +#define _TIF_MCA_INIT (1 << TIF_MCA_INIT) +#define _TIF_DB_DISABLED (1 << TIF_DB_DISABLED) +#define _TIF_RESTORE_RSE (1 << TIF_RESTORE_RSE) +#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) + +/* "work to do on user-return" bits */ +#define TIF_ALLWORK_MASK (_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SYSCALL_AUDIT|\ + _TIF_NEED_RESCHED|_TIF_SYSCALL_TRACE) +/* like TIF_ALLWORK_BITS but sans TIF_SYSCALL_TRACE or TIF_SYSCALL_AUDIT */ +#define TIF_WORK_MASK (TIF_ALLWORK_MASK&~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT)) + +#define TS_RESTORE_SIGMASK 2 /* restore signal mask in do_signal() */ + +#ifndef __ASSEMBLY__ +#define HAVE_SET_RESTORE_SIGMASK 1 +static inline void set_restore_sigmask(void) +{ + struct thread_info *ti = current_thread_info(); + ti->status |= TS_RESTORE_SIGMASK; + WARN_ON(!test_bit(TIF_SIGPENDING, &ti->flags)); +} +static inline void clear_restore_sigmask(void) +{ + current_thread_info()->status &= ~TS_RESTORE_SIGMASK; +} +static inline bool test_restore_sigmask(void) +{ + return current_thread_info()->status & TS_RESTORE_SIGMASK; +} +static inline bool test_and_clear_restore_sigmask(void) +{ + struct thread_info *ti = current_thread_info(); + if (!(ti->status & TS_RESTORE_SIGMASK)) + return false; + ti->status &= ~TS_RESTORE_SIGMASK; + return true; +} +#endif /* !__ASSEMBLY__ */ + +#endif /* _ASM_IA64_THREAD_INFO_H */ diff --git a/kernel/arch/ia64/include/asm/timex.h b/kernel/arch/ia64/include/asm/timex.h new file mode 100644 index 000000000..86c7db861 --- /dev/null +++ b/kernel/arch/ia64/include/asm/timex.h @@ -0,0 +1,45 @@ +#ifndef _ASM_IA64_TIMEX_H +#define _ASM_IA64_TIMEX_H + +/* + * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co + * David Mosberger-Tang + */ +/* + * 2001/01/18 davidm Removed CLOCK_TICK_RATE. It makes no sense on IA-64. + * Also removed cacheflush_time as it's entirely unused. + */ + +#include +#include + +typedef unsigned long cycles_t; + +extern void (*ia64_udelay)(unsigned long usecs); + +/* + * For performance reasons, we don't want to define CLOCK_TICK_TRATE as + * local_cpu_data->itc_rate. Fortunately, we don't have to, either: according to George + * Anzinger, 1/CLOCK_TICK_RATE is taken as the resolution of the timer clock. The time + * calculation assumes that you will use enough of these so that your tick size <= 1/HZ. + * If the calculation shows that your CLOCK_TICK_RATE can not supply exactly 1/HZ ticks, + * the actual value is calculated and used to update the wall clock each jiffie. Setting + * the CLOCK_TICK_RATE to x*HZ insures that the calculation will find no errors. Hence we + * pick a multiple of HZ which gives us a (totally virtual) CLOCK_TICK_RATE of about + * 100MHz. + */ +#define CLOCK_TICK_RATE (HZ * 100000UL) + +static inline cycles_t +get_cycles (void) +{ + cycles_t ret; + + ret = ia64_getreg(_IA64_REG_AR_ITC); + return ret; +} + +extern void ia64_cpu_local_tick (void); +extern unsigned long long ia64_native_sched_clock (void); + +#endif /* _ASM_IA64_TIMEX_H */ diff --git a/kernel/arch/ia64/include/asm/tlb.h b/kernel/arch/ia64/include/asm/tlb.h new file mode 100644 index 000000000..39d64e0df --- /dev/null +++ b/kernel/arch/ia64/include/asm/tlb.h @@ -0,0 +1,283 @@ +#ifndef _ASM_IA64_TLB_H +#define _ASM_IA64_TLB_H +/* + * Based on . + * + * Copyright (C) 2002-2003 Hewlett-Packard Co + * David Mosberger-Tang + */ +/* + * Removing a translation from a page table (including TLB-shootdown) is a four-step + * procedure: + * + * (1) Flush (virtual) caches --- ensures virtual memory is coherent with kernel memory + * (this is a no-op on ia64). + * (2) Clear the relevant portions of the page-table + * (3) Flush the TLBs --- ensures that stale content is gone from CPU TLBs + * (4) Release the pages that were freed up in step (2). + * + * Note that the ordering of these steps is crucial to avoid races on MP machines. + * + * The Linux kernel defines several platform-specific hooks for TLB-shootdown. When + * unmapping a portion of the virtual address space, these hooks are called according to + * the following template: + * + * tlb <- tlb_gather_mmu(mm, start, end); // start unmap for address space MM + * { + * for each vma that needs a shootdown do { + * tlb_start_vma(tlb, vma); + * for each page-table-entry PTE that needs to be removed do { + * tlb_remove_tlb_entry(tlb, pte, address); + * if (pte refers to a normal page) { + * tlb_remove_page(tlb, page); + * } + * } + * tlb_end_vma(tlb, vma); + * } + * } + * tlb_finish_mmu(tlb, start, end); // finish unmap for address space MM + */ +#include +#include +#include + +#include +#include +#include +#include + +/* + * If we can't allocate a page to make a big batch of page pointers + * to work on, then just handle a few from the on-stack structure. + */ +#define IA64_GATHER_BUNDLE 8 + +struct mmu_gather { + struct mm_struct *mm; + unsigned int nr; + unsigned int max; + unsigned char fullmm; /* non-zero means full mm flush */ + unsigned char need_flush; /* really unmapped some PTEs? */ + unsigned long start, end; + unsigned long start_addr; + unsigned long end_addr; + struct page **pages; + struct page *local[IA64_GATHER_BUNDLE]; +}; + +struct ia64_tr_entry { + u64 ifa; + u64 itir; + u64 pte; + u64 rr; +}; /*Record for tr entry!*/ + +extern int ia64_itr_entry(u64 target_mask, u64 va, u64 pte, u64 log_size); +extern void ia64_ptr_entry(u64 target_mask, int slot); + +extern struct ia64_tr_entry *ia64_idtrs[NR_CPUS]; + +/* + region register macros +*/ +#define RR_TO_VE(val) (((val) >> 0) & 0x0000000000000001) +#define RR_VE(val) (((val) & 0x0000000000000001) << 0) +#define RR_VE_MASK 0x0000000000000001L +#define RR_VE_SHIFT 0 +#define RR_TO_PS(val) (((val) >> 2) & 0x000000000000003f) +#define RR_PS(val) (((val) & 0x000000000000003f) << 2) +#define RR_PS_MASK 0x00000000000000fcL +#define RR_PS_SHIFT 2 +#define RR_RID_MASK 0x00000000ffffff00L +#define RR_TO_RID(val) ((val >> 8) & 0xffffff) + +static inline void +ia64_tlb_flush_mmu_tlbonly(struct mmu_gather *tlb, unsigned long start, unsigned long end) +{ + tlb->need_flush = 0; + + if (tlb->fullmm) { + /* + * Tearing down the entire address space. This happens both as a result + * of exit() and execve(). The latter case necessitates the call to + * flush_tlb_mm() here. + */ + flush_tlb_mm(tlb->mm); + } else if (unlikely (end - start >= 1024*1024*1024*1024UL + || REGION_NUMBER(start) != REGION_NUMBER(end - 1))) + { + /* + * If we flush more than a tera-byte or across regions, we're probably + * better off just flushing the entire TLB(s). This should be very rare + * and is not worth optimizing for. + */ + flush_tlb_all(); + } else { + /* + * XXX fix me: flush_tlb_range() should take an mm pointer instead of a + * vma pointer. + */ + struct vm_area_struct vma; + + vma.vm_mm = tlb->mm; + /* flush the address range from the tlb: */ + flush_tlb_range(&vma, start, end); + /* now flush the virt. page-table area mapping the address range: */ + flush_tlb_range(&vma, ia64_thash(start), ia64_thash(end)); + } + +} + +static inline void +ia64_tlb_flush_mmu_free(struct mmu_gather *tlb) +{ + unsigned long i; + unsigned int nr; + + /* lastly, release the freed pages */ + nr = tlb->nr; + + tlb->nr = 0; + tlb->start_addr = ~0UL; + for (i = 0; i < nr; ++i) + free_page_and_swap_cache(tlb->pages[i]); +} + +/* + * Flush the TLB for address range START to END and, if not in fast mode, release the + * freed pages that where gathered up to this point. + */ +static inline void +ia64_tlb_flush_mmu (struct mmu_gather *tlb, unsigned long start, unsigned long end) +{ + if (!tlb->need_flush) + return; + ia64_tlb_flush_mmu_tlbonly(tlb, start, end); + ia64_tlb_flush_mmu_free(tlb); +} + +static inline void __tlb_alloc_page(struct mmu_gather *tlb) +{ + unsigned long addr = __get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0); + + if (addr) { + tlb->pages = (void *)addr; + tlb->max = PAGE_SIZE / sizeof(void *); + } +} + + +static inline void +tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end) +{ + tlb->mm = mm; + tlb->max = ARRAY_SIZE(tlb->local); + tlb->pages = tlb->local; + tlb->nr = 0; + tlb->fullmm = !(start | (end+1)); + tlb->start = start; + tlb->end = end; + tlb->start_addr = ~0UL; +} + +/* + * Called at the end of the shootdown operation to free up any resources that were + * collected. + */ +static inline void +tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) +{ + /* + * Note: tlb->nr may be 0 at this point, so we can't rely on tlb->start_addr and + * tlb->end_addr. + */ + ia64_tlb_flush_mmu(tlb, start, end); + + /* keep the page table cache within bounds */ + check_pgt_cache(); + + if (tlb->pages != tlb->local) + free_pages((unsigned long)tlb->pages, 0); +} + +/* + * Logically, this routine frees PAGE. On MP machines, the actual freeing of the page + * must be delayed until after the TLB has been flushed (see comments at the beginning of + * this file). + */ +static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page) +{ + tlb->need_flush = 1; + + if (!tlb->nr && tlb->pages == tlb->local) + __tlb_alloc_page(tlb); + + tlb->pages[tlb->nr++] = page; + VM_BUG_ON(tlb->nr > tlb->max); + + return tlb->max - tlb->nr; +} + +static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb) +{ + ia64_tlb_flush_mmu_tlbonly(tlb, tlb->start_addr, tlb->end_addr); +} + +static inline void tlb_flush_mmu_free(struct mmu_gather *tlb) +{ + ia64_tlb_flush_mmu_free(tlb); +} + +static inline void tlb_flush_mmu(struct mmu_gather *tlb) +{ + ia64_tlb_flush_mmu(tlb, tlb->start_addr, tlb->end_addr); +} + +static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) +{ + if (!__tlb_remove_page(tlb, page)) + tlb_flush_mmu(tlb); +} + +/* + * Remove TLB entry for PTE mapped at virtual address ADDRESS. This is called for any + * PTE, not just those pointing to (normal) physical memory. + */ +static inline void +__tlb_remove_tlb_entry (struct mmu_gather *tlb, pte_t *ptep, unsigned long address) +{ + if (tlb->start_addr == ~0UL) + tlb->start_addr = address; + tlb->end_addr = address + PAGE_SIZE; +} + +#define tlb_migrate_finish(mm) platform_tlb_migrate_finish(mm) + +#define tlb_start_vma(tlb, vma) do { } while (0) +#define tlb_end_vma(tlb, vma) do { } while (0) + +#define tlb_remove_tlb_entry(tlb, ptep, addr) \ +do { \ + tlb->need_flush = 1; \ + __tlb_remove_tlb_entry(tlb, ptep, addr); \ +} while (0) + +#define pte_free_tlb(tlb, ptep, address) \ +do { \ + tlb->need_flush = 1; \ + __pte_free_tlb(tlb, ptep, address); \ +} while (0) + +#define pmd_free_tlb(tlb, ptep, address) \ +do { \ + tlb->need_flush = 1; \ + __pmd_free_tlb(tlb, ptep, address); \ +} while (0) + +#define pud_free_tlb(tlb, pudp, address) \ +do { \ + tlb->need_flush = 1; \ + __pud_free_tlb(tlb, pudp, address); \ +} while (0) + +#endif /* _ASM_IA64_TLB_H */ diff --git a/kernel/arch/ia64/include/asm/tlbflush.h b/kernel/arch/ia64/include/asm/tlbflush.h new file mode 100644 index 000000000..3be25dfed --- /dev/null +++ b/kernel/arch/ia64/include/asm/tlbflush.h @@ -0,0 +1,102 @@ +#ifndef _ASM_IA64_TLBFLUSH_H +#define _ASM_IA64_TLBFLUSH_H + +/* + * Copyright (C) 2002 Hewlett-Packard Co + * David Mosberger-Tang + */ + + +#include + +#include +#include +#include + +/* + * Now for some TLB flushing routines. This is the kind of stuff that + * can be very expensive, so try to avoid them whenever possible. + */ +extern void setup_ptcg_sem(int max_purges, int from_palo); + +/* + * Flush everything (kernel mapping may also have changed due to + * vmalloc/vfree). + */ +extern void local_flush_tlb_all (void); + +#ifdef CONFIG_SMP + extern void smp_flush_tlb_all (void); + extern void smp_flush_tlb_mm (struct mm_struct *mm); + extern void smp_flush_tlb_cpumask (cpumask_t xcpumask); +# define flush_tlb_all() smp_flush_tlb_all() +#else +# define flush_tlb_all() local_flush_tlb_all() +# define smp_flush_tlb_cpumask(m) local_flush_tlb_all() +#endif + +static inline void +local_finish_flush_tlb_mm (struct mm_struct *mm) +{ + if (mm == current->active_mm) + activate_context(mm); +} + +/* + * Flush a specified user mapping. This is called, e.g., as a result of fork() and + * exit(). fork() ends up here because the copy-on-write mechanism needs to write-protect + * the PTEs of the parent task. + */ +static inline void +flush_tlb_mm (struct mm_struct *mm) +{ + if (!mm) + return; + + set_bit(mm->context, ia64_ctx.flushmap); + mm->context = 0; + + if (atomic_read(&mm->mm_users) == 0) + return; /* happens as a result of exit_mmap() */ + +#ifdef CONFIG_SMP + smp_flush_tlb_mm(mm); +#else + local_finish_flush_tlb_mm(mm); +#endif +} + +extern void flush_tlb_range (struct vm_area_struct *vma, unsigned long start, unsigned long end); + +/* + * Page-granular tlb flush. + */ +static inline void +flush_tlb_page (struct vm_area_struct *vma, unsigned long addr) +{ +#ifdef CONFIG_SMP + flush_tlb_range(vma, (addr & PAGE_MASK), (addr & PAGE_MASK) + PAGE_SIZE); +#else + if (vma->vm_mm == current->active_mm) + ia64_ptcl(addr, (PAGE_SHIFT << 2)); + else + vma->vm_mm->context = 0; +#endif +} + +/* + * Flush the local TLB. Invoked from another cpu using an IPI. + */ +#ifdef CONFIG_SMP +void smp_local_flush_tlb(void); +#else +#define smp_local_flush_tlb() +#endif + +static inline void flush_tlb_kernel_range(unsigned long start, + unsigned long end) +{ + flush_tlb_all(); /* XXX fix me */ +} + +#endif /* _ASM_IA64_TLBFLUSH_H */ diff --git a/kernel/arch/ia64/include/asm/topology.h b/kernel/arch/ia64/include/asm/topology.h new file mode 100644 index 000000000..6437ca21f --- /dev/null +++ b/kernel/arch/ia64/include/asm/topology.h @@ -0,0 +1,67 @@ +/* + * Copyright (C) 2002, Erich Focht, NEC + * + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ +#ifndef _ASM_IA64_TOPOLOGY_H +#define _ASM_IA64_TOPOLOGY_H + +#include +#include +#include + +#ifdef CONFIG_NUMA + +/* Nodes w/o CPUs are preferred for memory allocations, see build_zonelists */ +#define PENALTY_FOR_NODE_WITH_CPUS 255 + +/* + * Nodes within this distance are eligible for reclaim by zone_reclaim() when + * zone_reclaim_mode is enabled. + */ +#define RECLAIM_DISTANCE 15 + +/* + * Returns a bitmask of CPUs on Node 'node'. + */ +#define cpumask_of_node(node) ((node) == -1 ? \ + cpu_all_mask : \ + &node_to_cpu_mask[node]) + +/* + * Returns the number of the node containing Node 'nid'. + * Not implemented here. Multi-level hierarchies detected with + * the help of node_distance(). + */ +#define parent_node(nid) (nid) + +/* + * Determines the node for a given pci bus + */ +#define pcibus_to_node(bus) PCI_CONTROLLER(bus)->node + +void build_cpu_to_node_map(void); + +#endif /* CONFIG_NUMA */ + +#ifdef CONFIG_SMP +#define topology_physical_package_id(cpu) (cpu_data(cpu)->socket_id) +#define topology_core_id(cpu) (cpu_data(cpu)->core_id) +#define topology_core_cpumask(cpu) (&cpu_core_map[cpu]) +#define topology_thread_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu)) +#endif + +extern void arch_fix_phys_package_id(int num, u32 slot); + +#define cpumask_of_pcibus(bus) (pcibus_to_node(bus) == -1 ? \ + cpu_all_mask : \ + cpumask_of_node(pcibus_to_node(bus))) + +#include + +#endif /* _ASM_IA64_TOPOLOGY_H */ diff --git a/kernel/arch/ia64/include/asm/types.h b/kernel/arch/ia64/include/asm/types.h new file mode 100644 index 000000000..4c351b169 --- /dev/null +++ b/kernel/arch/ia64/include/asm/types.h @@ -0,0 +1,31 @@ +/* + * This file is never included by application software unless explicitly + * requested (e.g., via linux/types.h) in which case the application is + * Linux specific so (user-) name space pollution is not a major issue. + * However, for interoperability, libraries still need to be careful to + * avoid naming clashes. + * + * Based on . + * + * Modified 1998-2000, 2002 + * David Mosberger-Tang , Hewlett-Packard Co + */ +#ifndef _ASM_IA64_TYPES_H +#define _ASM_IA64_TYPES_H + +#include +#include + +#ifdef __ASSEMBLY__ +#else +/* + * These aren't exported outside the kernel to avoid name space clashes + */ + +struct fnptr { + unsigned long ip; + unsigned long gp; +}; + +#endif /* !__ASSEMBLY__ */ +#endif /* _ASM_IA64_TYPES_H */ diff --git a/kernel/arch/ia64/include/asm/uaccess.h b/kernel/arch/ia64/include/asm/uaccess.h new file mode 100644 index 000000000..4f3fb6ccb --- /dev/null +++ b/kernel/arch/ia64/include/asm/uaccess.h @@ -0,0 +1,402 @@ +#ifndef _ASM_IA64_UACCESS_H +#define _ASM_IA64_UACCESS_H + +/* + * This file defines various macros to transfer memory areas across + * the user/kernel boundary. This needs to be done carefully because + * this code is executed in kernel mode and uses user-specified + * addresses. Thus, we need to be careful not to let the user to + * trick us into accessing kernel memory that would normally be + * inaccessible. This code is also fairly performance sensitive, + * so we want to spend as little time doing safety checks as + * possible. + * + * To make matters a bit more interesting, these macros sometimes also + * called from within the kernel itself, in which case the address + * validity check must be skipped. The get_fs() macro tells us what + * to do: if get_fs()==USER_DS, checking is performed, if + * get_fs()==KERNEL_DS, checking is bypassed. + * + * Note that even if the memory area specified by the user is in a + * valid address range, it is still possible that we'll get a page + * fault while accessing it. This is handled by filling out an + * exception handler fixup entry for each instruction that has the + * potential to fault. When such a fault occurs, the page fault + * handler checks to see whether the faulting instruction has a fixup + * associated and, if so, sets r8 to -EFAULT and clears r9 to 0 and + * then resumes execution at the continuation point. + * + * Based on . + * + * Copyright (C) 1998, 1999, 2001-2004 Hewlett-Packard Co + * David Mosberger-Tang + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +/* + * For historical reasons, the following macros are grossly misnamed: + */ +#define KERNEL_DS ((mm_segment_t) { ~0UL }) /* cf. access_ok() */ +#define USER_DS ((mm_segment_t) { TASK_SIZE-1 }) /* cf. access_ok() */ + +#define VERIFY_READ 0 +#define VERIFY_WRITE 1 + +#define get_ds() (KERNEL_DS) +#define get_fs() (current_thread_info()->addr_limit) +#define set_fs(x) (current_thread_info()->addr_limit = (x)) + +#define segment_eq(a, b) ((a).seg == (b).seg) + +/* + * When accessing user memory, we need to make sure the entire area really is in + * user-level space. In order to do this efficiently, we make sure that the page at + * address TASK_SIZE is never valid. We also need to make sure that the address doesn't + * point inside the virtually mapped linear page table. + */ +#define __access_ok(addr, size, segment) \ +({ \ + __chk_user_ptr(addr); \ + (likely((unsigned long) (addr) <= (segment).seg) \ + && ((segment).seg == KERNEL_DS.seg \ + || likely(REGION_OFFSET((unsigned long) (addr)) < RGN_MAP_LIMIT))); \ +}) +#define access_ok(type, addr, size) __access_ok((addr), (size), get_fs()) + +/* + * These are the main single-value transfer routines. They automatically + * use the right size if we just have the right pointer type. + * + * Careful to not + * (a) re-use the arguments for side effects (sizeof/typeof is ok) + * (b) require any knowledge of processes at this stage + */ +#define put_user(x, ptr) __put_user_check((__typeof__(*(ptr))) (x), (ptr), sizeof(*(ptr)), get_fs()) +#define get_user(x, ptr) __get_user_check((x), (ptr), sizeof(*(ptr)), get_fs()) + +/* + * The "__xxx" versions do not do address space checking, useful when + * doing multiple accesses to the same area (the programmer has to do the + * checks by hand with "access_ok()") + */ +#define __put_user(x, ptr) __put_user_nocheck((__typeof__(*(ptr))) (x), (ptr), sizeof(*(ptr))) +#define __get_user(x, ptr) __get_user_nocheck((x), (ptr), sizeof(*(ptr))) + +extern long __put_user_unaligned_unknown (void); + +#define __put_user_unaligned(x, ptr) \ +({ \ + long __ret; \ + switch (sizeof(*(ptr))) { \ + case 1: __ret = __put_user((x), (ptr)); break; \ + case 2: __ret = (__put_user((x), (u8 __user *)(ptr))) \ + | (__put_user((x) >> 8, ((u8 __user *)(ptr) + 1))); break; \ + case 4: __ret = (__put_user((x), (u16 __user *)(ptr))) \ + | (__put_user((x) >> 16, ((u16 __user *)(ptr) + 1))); break; \ + case 8: __ret = (__put_user((x), (u32 __user *)(ptr))) \ + | (__put_user((x) >> 32, ((u32 __user *)(ptr) + 1))); break; \ + default: __ret = __put_user_unaligned_unknown(); \ + } \ + __ret; \ +}) + +extern long __get_user_unaligned_unknown (void); + +#define __get_user_unaligned(x, ptr) \ +({ \ + long __ret; \ + switch (sizeof(*(ptr))) { \ + case 1: __ret = __get_user((x), (ptr)); break; \ + case 2: __ret = (__get_user((x), (u8 __user *)(ptr))) \ + | (__get_user((x) >> 8, ((u8 __user *)(ptr) + 1))); break; \ + case 4: __ret = (__get_user((x), (u16 __user *)(ptr))) \ + | (__get_user((x) >> 16, ((u16 __user *)(ptr) + 1))); break; \ + case 8: __ret = (__get_user((x), (u32 __user *)(ptr))) \ + | (__get_user((x) >> 32, ((u32 __user *)(ptr) + 1))); break; \ + default: __ret = __get_user_unaligned_unknown(); \ + } \ + __ret; \ +}) + +#ifdef ASM_SUPPORTED + struct __large_struct { unsigned long buf[100]; }; +# define __m(x) (*(struct __large_struct __user *)(x)) + +/* We need to declare the __ex_table section before we can use it in .xdata. */ +asm (".section \"__ex_table\", \"a\"\n\t.previous"); + +# define __get_user_size(val, addr, n, err) \ +do { \ + register long __gu_r8 asm ("r8") = 0; \ + register long __gu_r9 asm ("r9"); \ + asm ("\n[1:]\tld"#n" %0=%2%P2\t// %0 and %1 get overwritten by exception handler\n" \ + "\t.xdata4 \"__ex_table\", 1b-., 1f-.+4\n" \ + "[1:]" \ + : "=r"(__gu_r9), "=r"(__gu_r8) : "m"(__m(addr)), "1"(__gu_r8)); \ + (err) = __gu_r8; \ + (val) = __gu_r9; \ +} while (0) + +/* + * The "__put_user_size()" macro tells gcc it reads from memory instead of writing it. This + * is because they do not write to any memory gcc knows about, so there are no aliasing + * issues. + */ +# define __put_user_size(val, addr, n, err) \ +do { \ + register long __pu_r8 asm ("r8") = 0; \ + asm volatile ("\n[1:]\tst"#n" %1=%r2%P1\t// %0 gets overwritten by exception handler\n" \ + "\t.xdata4 \"__ex_table\", 1b-., 1f-.\n" \ + "[1:]" \ + : "=r"(__pu_r8) : "m"(__m(addr)), "rO"(val), "0"(__pu_r8)); \ + (err) = __pu_r8; \ +} while (0) + +#else /* !ASM_SUPPORTED */ +# define RELOC_TYPE 2 /* ip-rel */ +# define __get_user_size(val, addr, n, err) \ +do { \ + __ld_user("__ex_table", (unsigned long) addr, n, RELOC_TYPE); \ + (err) = ia64_getreg(_IA64_REG_R8); \ + (val) = ia64_getreg(_IA64_REG_R9); \ +} while (0) +# define __put_user_size(val, addr, n, err) \ +do { \ + __st_user("__ex_table", (unsigned long) addr, n, RELOC_TYPE, \ + (__force unsigned long) (val)); \ + (err) = ia64_getreg(_IA64_REG_R8); \ +} while (0) +#endif /* !ASM_SUPPORTED */ + +extern void __get_user_unknown (void); + +/* + * Evaluating arguments X, PTR, SIZE, and SEGMENT may involve subroutine-calls, which + * could clobber r8 and r9 (among others). Thus, be careful not to evaluate it while + * using r8/r9. + */ +#define __do_get_user(check, x, ptr, size, segment) \ +({ \ + const __typeof__(*(ptr)) __user *__gu_ptr = (ptr); \ + __typeof__ (size) __gu_size = (size); \ + long __gu_err = -EFAULT; \ + unsigned long __gu_val = 0; \ + if (!check || __access_ok(__gu_ptr, size, segment)) \ + switch (__gu_size) { \ + case 1: __get_user_size(__gu_val, __gu_ptr, 1, __gu_err); break; \ + case 2: __get_user_size(__gu_val, __gu_ptr, 2, __gu_err); break; \ + case 4: __get_user_size(__gu_val, __gu_ptr, 4, __gu_err); break; \ + case 8: __get_user_size(__gu_val, __gu_ptr, 8, __gu_err); break; \ + default: __get_user_unknown(); break; \ + } \ + (x) = (__force __typeof__(*(__gu_ptr))) __gu_val; \ + __gu_err; \ +}) + +#define __get_user_nocheck(x, ptr, size) __do_get_user(0, x, ptr, size, KERNEL_DS) +#define __get_user_check(x, ptr, size, segment) __do_get_user(1, x, ptr, size, segment) + +extern void __put_user_unknown (void); + +/* + * Evaluating arguments X, PTR, SIZE, and SEGMENT may involve subroutine-calls, which + * could clobber r8 (among others). Thus, be careful not to evaluate them while using r8. + */ +#define __do_put_user(check, x, ptr, size, segment) \ +({ \ + __typeof__ (x) __pu_x = (x); \ + __typeof__ (*(ptr)) __user *__pu_ptr = (ptr); \ + __typeof__ (size) __pu_size = (size); \ + long __pu_err = -EFAULT; \ + \ + if (!check || __access_ok(__pu_ptr, __pu_size, segment)) \ + switch (__pu_size) { \ + case 1: __put_user_size(__pu_x, __pu_ptr, 1, __pu_err); break; \ + case 2: __put_user_size(__pu_x, __pu_ptr, 2, __pu_err); break; \ + case 4: __put_user_size(__pu_x, __pu_ptr, 4, __pu_err); break; \ + case 8: __put_user_size(__pu_x, __pu_ptr, 8, __pu_err); break; \ + default: __put_user_unknown(); break; \ + } \ + __pu_err; \ +}) + +#define __put_user_nocheck(x, ptr, size) __do_put_user(0, x, ptr, size, KERNEL_DS) +#define __put_user_check(x, ptr, size, segment) __do_put_user(1, x, ptr, size, segment) + +/* + * Complex access routines + */ +extern unsigned long __must_check __copy_user (void __user *to, const void __user *from, + unsigned long count); + +static inline unsigned long +__copy_to_user (void __user *to, const void *from, unsigned long count) +{ + return __copy_user(to, (__force void __user *) from, count); +} + +static inline unsigned long +__copy_from_user (void *to, const void __user *from, unsigned long count) +{ + return __copy_user((__force void __user *) to, from, count); +} + +#define __copy_to_user_inatomic __copy_to_user +#define __copy_from_user_inatomic __copy_from_user +#define copy_to_user(to, from, n) \ +({ \ + void __user *__cu_to = (to); \ + const void *__cu_from = (from); \ + long __cu_len = (n); \ + \ + if (__access_ok(__cu_to, __cu_len, get_fs())) \ + __cu_len = __copy_user(__cu_to, (__force void __user *) __cu_from, __cu_len); \ + __cu_len; \ +}) + +#define copy_from_user(to, from, n) \ +({ \ + void *__cu_to = (to); \ + const void __user *__cu_from = (from); \ + long __cu_len = (n); \ + \ + __chk_user_ptr(__cu_from); \ + if (__access_ok(__cu_from, __cu_len, get_fs())) \ + __cu_len = __copy_user((__force void __user *) __cu_to, __cu_from, __cu_len); \ + __cu_len; \ +}) + +#define __copy_in_user(to, from, size) __copy_user((to), (from), (size)) + +static inline unsigned long +copy_in_user (void __user *to, const void __user *from, unsigned long n) +{ + if (likely(access_ok(VERIFY_READ, from, n) && access_ok(VERIFY_WRITE, to, n))) + n = __copy_user(to, from, n); + return n; +} + +extern unsigned long __do_clear_user (void __user *, unsigned long); + +#define __clear_user(to, n) __do_clear_user(to, n) + +#define clear_user(to, n) \ +({ \ + unsigned long __cu_len = (n); \ + if (__access_ok(to, __cu_len, get_fs())) \ + __cu_len = __do_clear_user(to, __cu_len); \ + __cu_len; \ +}) + + +/* + * Returns: -EFAULT if exception before terminator, N if the entire buffer filled, else + * strlen. + */ +extern long __must_check __strncpy_from_user (char *to, const char __user *from, long to_len); + +#define strncpy_from_user(to, from, n) \ +({ \ + const char __user * __sfu_from = (from); \ + long __sfu_ret = -EFAULT; \ + if (__access_ok(__sfu_from, 0, get_fs())) \ + __sfu_ret = __strncpy_from_user((to), __sfu_from, (n)); \ + __sfu_ret; \ +}) + +/* Returns: 0 if bad, string length+1 (memory size) of string if ok */ +extern unsigned long __strlen_user (const char __user *); + +#define strlen_user(str) \ +({ \ + const char __user *__su_str = (str); \ + unsigned long __su_ret = 0; \ + if (__access_ok(__su_str, 0, get_fs())) \ + __su_ret = __strlen_user(__su_str); \ + __su_ret; \ +}) + +/* + * Returns: 0 if exception before NUL or reaching the supplied limit + * (N), a value greater than N if the limit would be exceeded, else + * strlen. + */ +extern unsigned long __strnlen_user (const char __user *, long); + +#define strnlen_user(str, len) \ +({ \ + const char __user *__su_str = (str); \ + unsigned long __su_ret = 0; \ + if (__access_ok(__su_str, 0, get_fs())) \ + __su_ret = __strnlen_user(__su_str, len); \ + __su_ret; \ +}) + +/* Generic code can't deal with the location-relative format that we use for compactness. */ +#define ARCH_HAS_SORT_EXTABLE +#define ARCH_HAS_SEARCH_EXTABLE + +struct exception_table_entry { + int addr; /* location-relative address of insn this fixup is for */ + int cont; /* location-relative continuation addr.; if bit 2 is set, r9 is set to 0 */ +}; + +extern void ia64_handle_exception (struct pt_regs *regs, const struct exception_table_entry *e); +extern const struct exception_table_entry *search_exception_tables (unsigned long addr); + +static inline int +ia64_done_with_exception (struct pt_regs *regs) +{ + const struct exception_table_entry *e; + e = search_exception_tables(regs->cr_iip + ia64_psr(regs)->ri); + if (e) { + ia64_handle_exception(regs, e); + return 1; + } + return 0; +} + +#define ARCH_HAS_TRANSLATE_MEM_PTR 1 +static __inline__ void * +xlate_dev_mem_ptr(phys_addr_t p) +{ + struct page *page; + void *ptr; + + page = pfn_to_page(p >> PAGE_SHIFT); + if (PageUncached(page)) + ptr = (void *)p + __IA64_UNCACHED_OFFSET; + else + ptr = __va(p); + + return ptr; +} + +/* + * Convert a virtual cached kernel memory pointer to an uncached pointer + */ +static __inline__ void * +xlate_dev_kmem_ptr(void *p) +{ + struct page *page; + void *ptr; + + page = virt_to_page((unsigned long)p); + if (PageUncached(page)) + ptr = (void *)__pa(p) + __IA64_UNCACHED_OFFSET; + else + ptr = p; + + return ptr; +} + +#endif /* _ASM_IA64_UACCESS_H */ diff --git a/kernel/arch/ia64/include/asm/unaligned.h b/kernel/arch/ia64/include/asm/unaligned.h new file mode 100644 index 000000000..7bddc7f58 --- /dev/null +++ b/kernel/arch/ia64/include/asm/unaligned.h @@ -0,0 +1,11 @@ +#ifndef _ASM_IA64_UNALIGNED_H +#define _ASM_IA64_UNALIGNED_H + +#include +#include +#include + +#define get_unaligned __get_unaligned_le +#define put_unaligned __put_unaligned_le + +#endif /* _ASM_IA64_UNALIGNED_H */ diff --git a/kernel/arch/ia64/include/asm/uncached.h b/kernel/arch/ia64/include/asm/uncached.h new file mode 100644 index 000000000..13d7e65ca --- /dev/null +++ b/kernel/arch/ia64/include/asm/uncached.h @@ -0,0 +1,12 @@ +/* + * Copyright (C) 2001-2008 Silicon Graphics, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + * + * Prototypes for the uncached page allocator + */ + +extern unsigned long uncached_alloc_page(int starting_nid, int n_pages); +extern void uncached_free_page(unsigned long uc_addr, int n_pages); diff --git a/kernel/arch/ia64/include/asm/unistd.h b/kernel/arch/ia64/include/asm/unistd.h new file mode 100644 index 000000000..95c39b95e --- /dev/null +++ b/kernel/arch/ia64/include/asm/unistd.h @@ -0,0 +1,50 @@ +/* + * IA-64 Linux syscall numbers and inline-functions. + * + * Copyright (C) 1998-2005 Hewlett-Packard Co + * David Mosberger-Tang + */ +#ifndef _ASM_IA64_UNISTD_H +#define _ASM_IA64_UNISTD_H + +#include + + + +#define NR_syscalls 319 /* length of syscall table */ + +/* + * The following defines stop scripts/checksyscalls.sh from complaining about + * unimplemented system calls. Glibc provides for each of these by using + * more modern equivalent system calls. + */ +#define __IGNORE_fork /* clone() */ +#define __IGNORE_time /* gettimeofday() */ +#define __IGNORE_alarm /* setitimer(ITIMER_REAL, ... */ +#define __IGNORE_pause /* rt_sigprocmask(), rt_sigsuspend() */ +#define __IGNORE_utime /* utimes() */ +#define __IGNORE_getpgrp /* getpgid() */ +#define __IGNORE_vfork /* clone() */ +#define __IGNORE_umount2 /* umount() */ + +#if !defined(__ASSEMBLY__) && !defined(ASSEMBLER) + +#include +#include +#include + +extern long __ia64_syscall (long a0, long a1, long a2, long a3, long a4, long nr); + +asmlinkage unsigned long sys_mmap( + unsigned long addr, unsigned long len, + int prot, int flags, + int fd, long off); +asmlinkage unsigned long sys_mmap2( + unsigned long addr, unsigned long len, + int prot, int flags, + int fd, long pgoff); +struct pt_regs; +asmlinkage long sys_ia64_pipe(void); + +#endif /* !__ASSEMBLY__ */ +#endif /* _ASM_IA64_UNISTD_H */ diff --git a/kernel/arch/ia64/include/asm/unwind.h b/kernel/arch/ia64/include/asm/unwind.h new file mode 100644 index 000000000..1af3875f1 --- /dev/null +++ b/kernel/arch/ia64/include/asm/unwind.h @@ -0,0 +1,233 @@ +#ifndef _ASM_IA64_UNWIND_H +#define _ASM_IA64_UNWIND_H + +/* + * Copyright (C) 1999-2000, 2003 Hewlett-Packard Co + * David Mosberger-Tang + * + * A simple API for unwinding kernel stacks. This is used for + * debugging and error reporting purposes. The kernel doesn't need + * full-blown stack unwinding with all the bells and whitles, so there + * is not much point in implementing the full IA-64 unwind API (though + * it would of course be possible to implement the kernel API on top + * of it). + */ + +struct task_struct; /* forward declaration */ +struct switch_stack; /* forward declaration */ + +enum unw_application_register { + UNW_AR_BSP, + UNW_AR_BSPSTORE, + UNW_AR_PFS, + UNW_AR_RNAT, + UNW_AR_UNAT, + UNW_AR_LC, + UNW_AR_EC, + UNW_AR_FPSR, + UNW_AR_RSC, + UNW_AR_CCV, + UNW_AR_CSD, + UNW_AR_SSD +}; + +/* + * The following declarations are private to the unwind + * implementation: + */ + +struct unw_stack { + unsigned long limit; + unsigned long top; +}; + +#define UNW_FLAG_INTERRUPT_FRAME (1UL << 0) + +/* + * No user of this module should every access this structure directly + * as it is subject to change. It is declared here solely so we can + * use automatic variables. + */ +struct unw_frame_info { + struct unw_stack regstk; + struct unw_stack memstk; + unsigned int flags; + short hint; + short prev_script; + + /* current frame info: */ + unsigned long bsp; /* backing store pointer value */ + unsigned long sp; /* stack pointer value */ + unsigned long psp; /* previous sp value */ + unsigned long ip; /* instruction pointer value */ + unsigned long pr; /* current predicate values */ + unsigned long *cfm_loc; /* cfm save location (or NULL) */ + unsigned long pt; /* struct pt_regs location */ + + struct task_struct *task; + struct switch_stack *sw; + + /* preserved state: */ + unsigned long *bsp_loc; /* previous bsp save location */ + unsigned long *bspstore_loc; + unsigned long *pfs_loc; + unsigned long *rnat_loc; + unsigned long *rp_loc; + unsigned long *pri_unat_loc; + unsigned long *unat_loc; + unsigned long *pr_loc; + unsigned long *lc_loc; + unsigned long *fpsr_loc; + struct unw_ireg { + unsigned long *loc; + struct unw_ireg_nat { + unsigned long type : 3; /* enum unw_nat_type */ + signed long off : 61; /* NaT word is at loc+nat.off */ + } nat; + } r4, r5, r6, r7; + unsigned long *b1_loc, *b2_loc, *b3_loc, *b4_loc, *b5_loc; + struct ia64_fpreg *f2_loc, *f3_loc, *f4_loc, *f5_loc, *fr_loc[16]; +}; + +/* + * The official API follows below: + */ + +struct unw_table_entry { + u64 start_offset; + u64 end_offset; + u64 info_offset; +}; + +/* + * Initialize unwind support. + */ +extern void unw_init (void); + +extern void *unw_add_unwind_table (const char *name, unsigned long segment_base, unsigned long gp, + const void *table_start, const void *table_end); + +extern void unw_remove_unwind_table (void *handle); + +/* + * Prepare to unwind blocked task t. + */ +extern void unw_init_from_blocked_task (struct unw_frame_info *info, struct task_struct *t); + +extern void unw_init_frame_info (struct unw_frame_info *info, struct task_struct *t, + struct switch_stack *sw); + +/* + * Prepare to unwind the currently running thread. + */ +extern void unw_init_running (void (*callback)(struct unw_frame_info *info, void *arg), void *arg); + +/* + * Unwind to previous to frame. Returns 0 if successful, negative + * number in case of an error. + */ +extern int unw_unwind (struct unw_frame_info *info); + +/* + * Unwind until the return pointer is in user-land (or until an error + * occurs). Returns 0 if successful, negative number in case of + * error. + */ +extern int unw_unwind_to_user (struct unw_frame_info *info); + +#define unw_is_intr_frame(info) (((info)->flags & UNW_FLAG_INTERRUPT_FRAME) != 0) + +static inline int +unw_get_ip (struct unw_frame_info *info, unsigned long *valp) +{ + *valp = (info)->ip; + return 0; +} + +static inline int +unw_get_sp (struct unw_frame_info *info, unsigned long *valp) +{ + *valp = (info)->sp; + return 0; +} + +static inline int +unw_get_psp (struct unw_frame_info *info, unsigned long *valp) +{ + *valp = (info)->psp; + return 0; +} + +static inline int +unw_get_bsp (struct unw_frame_info *info, unsigned long *valp) +{ + *valp = (info)->bsp; + return 0; +} + +static inline int +unw_get_cfm (struct unw_frame_info *info, unsigned long *valp) +{ + *valp = *(info)->cfm_loc; + return 0; +} + +static inline int +unw_set_cfm (struct unw_frame_info *info, unsigned long val) +{ + *(info)->cfm_loc = val; + return 0; +} + +static inline int +unw_get_rp (struct unw_frame_info *info, unsigned long *val) +{ + if (!info->rp_loc) + return -1; + *val = *info->rp_loc; + return 0; +} + +extern int unw_access_gr (struct unw_frame_info *, int, unsigned long *, char *, int); +extern int unw_access_br (struct unw_frame_info *, int, unsigned long *, int); +extern int unw_access_fr (struct unw_frame_info *, int, struct ia64_fpreg *, int); +extern int unw_access_ar (struct unw_frame_info *, int, unsigned long *, int); +extern int unw_access_pr (struct unw_frame_info *, unsigned long *, int); + +static inline int +unw_set_gr (struct unw_frame_info *i, int n, unsigned long v, char nat) +{ + return unw_access_gr(i, n, &v, &nat, 1); +} + +static inline int +unw_set_br (struct unw_frame_info *i, int n, unsigned long v) +{ + return unw_access_br(i, n, &v, 1); +} + +static inline int +unw_set_fr (struct unw_frame_info *i, int n, struct ia64_fpreg v) +{ + return unw_access_fr(i, n, &v, 1); +} + +static inline int +unw_set_ar (struct unw_frame_info *i, int n, unsigned long v) +{ + return unw_access_ar(i, n, &v, 1); +} + +static inline int +unw_set_pr (struct unw_frame_info *i, unsigned long v) +{ + return unw_access_pr(i, &v, 1); +} + +#define unw_get_gr(i,n,v,nat) unw_access_gr(i,n,v,nat,0) +#define unw_get_br(i,n,v) unw_access_br(i,n,v,0) +#define unw_get_fr(i,n,v) unw_access_fr(i,n,v,0) +#define unw_get_ar(i,n,v) unw_access_ar(i,n,v,0) +#define unw_get_pr(i,v) unw_access_pr(i,v,0) + +#endif /* _ASM_UNWIND_H */ diff --git a/kernel/arch/ia64/include/asm/user.h b/kernel/arch/ia64/include/asm/user.h new file mode 100644 index 000000000..8b9821110 --- /dev/null +++ b/kernel/arch/ia64/include/asm/user.h @@ -0,0 +1,58 @@ +#ifndef _ASM_IA64_USER_H +#define _ASM_IA64_USER_H + +/* + * Core file format: The core file is written in such a way that gdb + * can understand it and provide useful information to the user (under + * linux we use the `trad-core' bfd). The file contents are as + * follows: + * + * upage: 1 page consisting of a user struct that tells gdb + * what is present in the file. Directly after this is a + * copy of the task_struct, which is currently not used by gdb, + * but it may come in handy at some point. All of the registers + * are stored as part of the upage. The upage should always be + * only one page long. + * data: The data segment follows next. We use current->end_text to + * current->brk to pick up all of the user variables, plus any memory + * that may have been sbrk'ed. No attempt is made to determine if a + * page is demand-zero or if a page is totally unused, we just cover + * the entire range. All of the addresses are rounded in such a way + * that an integral number of pages is written. + * stack: We need the stack information in order to get a meaningful + * backtrace. We need to write the data from usp to + * current->start_stack, so we round each of these in order to be able + * to write an integer number of pages. + * + * Modified 1998, 1999, 2001 + * David Mosberger-Tang , Hewlett-Packard Co + */ + +#include +#include + +#include + +#define EF_SIZE 3072 /* XXX fix me */ + +struct user { + unsigned long regs[EF_SIZE/8+32]; /* integer and fp regs */ + size_t u_tsize; /* text size (pages) */ + size_t u_dsize; /* data size (pages) */ + size_t u_ssize; /* stack size (pages) */ + unsigned long start_code; /* text starting address */ + unsigned long start_data; /* data starting address */ + unsigned long start_stack; /* stack starting address */ + long int signal; /* signal causing core dump */ + unsigned long u_ar0; /* help gdb find registers */ + unsigned long magic; /* identifies a core file */ + char u_comm[32]; /* user command name */ +}; + +#define NBPG PAGE_SIZE +#define UPAGES 1 +#define HOST_TEXT_START_ADDR (u.start_code) +#define HOST_DATA_START_ADDR (u.start_data) +#define HOST_STACK_END_ADDR (u.start_stack + u.u_ssize * NBPG) + +#endif /* _ASM_IA64_USER_H */ diff --git a/kernel/arch/ia64/include/asm/ustack.h b/kernel/arch/ia64/include/asm/ustack.h new file mode 100644 index 000000000..b275401b9 --- /dev/null +++ b/kernel/arch/ia64/include/asm/ustack.h @@ -0,0 +1,11 @@ +#ifndef _ASM_IA64_USTACK_H +#define _ASM_IA64_USTACK_H + +#include +#include + +/* The absolute hard limit for stack size is 1/2 of the mappable space in the region */ +#define MAX_USER_STACK_SIZE (RGN_MAP_LIMIT/2) +#define STACK_TOP (0x6000000000000000UL + RGN_MAP_LIMIT) +#define STACK_TOP_MAX STACK_TOP +#endif /* _ASM_IA64_USTACK_H */ diff --git a/kernel/arch/ia64/include/asm/uv/uv.h b/kernel/arch/ia64/include/asm/uv/uv.h new file mode 100644 index 000000000..8f6cbaa74 --- /dev/null +++ b/kernel/arch/ia64/include/asm/uv/uv.h @@ -0,0 +1,12 @@ +#ifndef _ASM_IA64_UV_UV_H +#define _ASM_IA64_UV_UV_H + +#include + +static inline int is_uv_system(void) +{ + /* temporary support for running on hardware simulator */ + return IS_MEDUSA() || ia64_platform_is("uv"); +} + +#endif /* _ASM_IA64_UV_UV_H */ diff --git a/kernel/arch/ia64/include/asm/uv/uv_hub.h b/kernel/arch/ia64/include/asm/uv/uv_hub.h new file mode 100644 index 000000000..2a88c7204 --- /dev/null +++ b/kernel/arch/ia64/include/asm/uv/uv_hub.h @@ -0,0 +1,315 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * SGI UV architectural definitions + * + * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved. + */ + +#ifndef __ASM_IA64_UV_HUB_H__ +#define __ASM_IA64_UV_HUB_H__ + +#include +#include +#include +#include + + +/* + * Addressing Terminology + * + * M - The low M bits of a physical address represent the offset + * into the blade local memory. RAM memory on a blade is physically + * contiguous (although various IO spaces may punch holes in + * it).. + * + * N - Number of bits in the node portion of a socket physical + * address. + * + * NASID - network ID of a router, Mbrick or Cbrick. Nasid values of + * routers always have low bit of 1, C/MBricks have low bit + * equal to 0. Most addressing macros that target UV hub chips + * right shift the NASID by 1 to exclude the always-zero bit. + * NASIDs contain up to 15 bits. + * + * GNODE - NASID right shifted by 1 bit. Most mmrs contain gnodes instead + * of nasids. + * + * PNODE - the low N bits of the GNODE. The PNODE is the most useful variant + * of the nasid for socket usage. + * + * + * NumaLink Global Physical Address Format: + * +--------------------------------+---------------------+ + * |00..000| GNODE | NodeOffset | + * +--------------------------------+---------------------+ + * |<-------53 - M bits --->|<--------M bits -----> + * + * M - number of node offset bits (35 .. 40) + * + * + * Memory/UV-HUB Processor Socket Address Format: + * +----------------+---------------+---------------------+ + * |00..000000000000| PNODE | NodeOffset | + * +----------------+---------------+---------------------+ + * <--- N bits --->|<--------M bits -----> + * + * M - number of node offset bits (35 .. 40) + * N - number of PNODE bits (0 .. 10) + * + * Note: M + N cannot currently exceed 44 (x86_64) or 46 (IA64). + * The actual values are configuration dependent and are set at + * boot time. M & N values are set by the hardware/BIOS at boot. + */ + + +/* + * Maximum number of bricks in all partitions and in all coherency domains. + * This is the total number of bricks accessible in the numalink fabric. It + * includes all C & M bricks. Routers are NOT included. + * + * This value is also the value of the maximum number of non-router NASIDs + * in the numalink fabric. + * + * NOTE: a brick may contain 1 or 2 OS nodes. Don't get these confused. + */ +#define UV_MAX_NUMALINK_BLADES 16384 + +/* + * Maximum number of C/Mbricks within a software SSI (hardware may support + * more). + */ +#define UV_MAX_SSI_BLADES 1 + +/* + * The largest possible NASID of a C or M brick (+ 2) + */ +#define UV_MAX_NASID_VALUE (UV_MAX_NUMALINK_NODES * 2) + +/* + * The following defines attributes of the HUB chip. These attributes are + * frequently referenced and are kept in the per-cpu data areas of each cpu. + * They are kept together in a struct to minimize cache misses. + */ +struct uv_hub_info_s { + unsigned long global_mmr_base; + unsigned long gpa_mask; + unsigned long gnode_upper; + unsigned long lowmem_remap_top; + unsigned long lowmem_remap_base; + unsigned short pnode; + unsigned short pnode_mask; + unsigned short coherency_domain_number; + unsigned short numa_blade_id; + unsigned char blade_processor_id; + unsigned char m_val; + unsigned char n_val; +}; +DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); +#define uv_hub_info this_cpu_ptr(&__uv_hub_info) +#define uv_cpu_hub_info(cpu) (&per_cpu(__uv_hub_info, cpu)) + +/* + * Local & Global MMR space macros. + * Note: macros are intended to be used ONLY by inline functions + * in this file - not by other kernel code. + * n - NASID (full 15-bit global nasid) + * g - GNODE (full 15-bit global nasid, right shifted 1) + * p - PNODE (local part of nsids, right shifted 1) + */ +#define UV_NASID_TO_PNODE(n) (((n) >> 1) & uv_hub_info->pnode_mask) +#define UV_PNODE_TO_NASID(p) (((p) << 1) | uv_hub_info->gnode_upper) + +#define UV_LOCAL_MMR_BASE 0xf4000000UL +#define UV_GLOBAL_MMR32_BASE 0xf8000000UL +#define UV_GLOBAL_MMR64_BASE (uv_hub_info->global_mmr_base) + +#define UV_GLOBAL_MMR32_PNODE_SHIFT 15 +#define UV_GLOBAL_MMR64_PNODE_SHIFT 26 + +#define UV_GLOBAL_MMR32_PNODE_BITS(p) ((p) << (UV_GLOBAL_MMR32_PNODE_SHIFT)) + +#define UV_GLOBAL_MMR64_PNODE_BITS(p) \ + ((unsigned long)(p) << UV_GLOBAL_MMR64_PNODE_SHIFT) + +/* + * Macros for converting between kernel virtual addresses, socket local physical + * addresses, and UV global physical addresses. + * Note: use the standard __pa() & __va() macros for converting + * between socket virtual and socket physical addresses. + */ + +/* socket phys RAM --> UV global physical address */ +static inline unsigned long uv_soc_phys_ram_to_gpa(unsigned long paddr) +{ + if (paddr < uv_hub_info->lowmem_remap_top) + paddr += uv_hub_info->lowmem_remap_base; + return paddr | uv_hub_info->gnode_upper; +} + + +/* socket virtual --> UV global physical address */ +static inline unsigned long uv_gpa(void *v) +{ + return __pa(v) | uv_hub_info->gnode_upper; +} + +/* socket virtual --> UV global physical address */ +static inline void *uv_vgpa(void *v) +{ + return (void *)uv_gpa(v); +} + +/* UV global physical address --> socket virtual */ +static inline void *uv_va(unsigned long gpa) +{ + return __va(gpa & uv_hub_info->gpa_mask); +} + +/* pnode, offset --> socket virtual */ +static inline void *uv_pnode_offset_to_vaddr(int pnode, unsigned long offset) +{ + return __va(((unsigned long)pnode << uv_hub_info->m_val) | offset); +} + + +/* + * Access global MMRs using the low memory MMR32 space. This region supports + * faster MMR access but not all MMRs are accessible in this space. + */ +static inline unsigned long *uv_global_mmr32_address(int pnode, + unsigned long offset) +{ + return __va(UV_GLOBAL_MMR32_BASE | + UV_GLOBAL_MMR32_PNODE_BITS(pnode) | offset); +} + +static inline void uv_write_global_mmr32(int pnode, unsigned long offset, + unsigned long val) +{ + *uv_global_mmr32_address(pnode, offset) = val; +} + +static inline unsigned long uv_read_global_mmr32(int pnode, + unsigned long offset) +{ + return *uv_global_mmr32_address(pnode, offset); +} + +/* + * Access Global MMR space using the MMR space located at the top of physical + * memory. + */ +static inline unsigned long *uv_global_mmr64_address(int pnode, + unsigned long offset) +{ + return __va(UV_GLOBAL_MMR64_BASE | + UV_GLOBAL_MMR64_PNODE_BITS(pnode) | offset); +} + +static inline void uv_write_global_mmr64(int pnode, unsigned long offset, + unsigned long val) +{ + *uv_global_mmr64_address(pnode, offset) = val; +} + +static inline unsigned long uv_read_global_mmr64(int pnode, + unsigned long offset) +{ + return *uv_global_mmr64_address(pnode, offset); +} + +/* + * Access hub local MMRs. Faster than using global space but only local MMRs + * are accessible. + */ +static inline unsigned long *uv_local_mmr_address(unsigned long offset) +{ + return __va(UV_LOCAL_MMR_BASE | offset); +} + +static inline unsigned long uv_read_local_mmr(unsigned long offset) +{ + return *uv_local_mmr_address(offset); +} + +static inline void uv_write_local_mmr(unsigned long offset, unsigned long val) +{ + *uv_local_mmr_address(offset) = val; +} + +/* + * Structures and definitions for converting between cpu, node, pnode, and blade + * numbers. + */ + +/* Blade-local cpu number of current cpu. Numbered 0 .. <# cpus on the blade> */ +static inline int uv_blade_processor_id(void) +{ + return smp_processor_id(); +} + +/* Blade number of current cpu. Numnbered 0 .. <#blades -1> */ +static inline int uv_numa_blade_id(void) +{ + return 0; +} + +/* Convert a cpu number to the the UV blade number */ +static inline int uv_cpu_to_blade_id(int cpu) +{ + return 0; +} + +/* Convert linux node number to the UV blade number */ +static inline int uv_node_to_blade_id(int nid) +{ + return 0; +} + +/* Convert a blade id to the PNODE of the blade */ +static inline int uv_blade_to_pnode(int bid) +{ + return 0; +} + +/* Determine the number of possible cpus on a blade */ +static inline int uv_blade_nr_possible_cpus(int bid) +{ + return num_possible_cpus(); +} + +/* Determine the number of online cpus on a blade */ +static inline int uv_blade_nr_online_cpus(int bid) +{ + return num_online_cpus(); +} + +/* Convert a cpu id to the PNODE of the blade containing the cpu */ +static inline int uv_cpu_to_pnode(int cpu) +{ + return 0; +} + +/* Convert a linux node number to the PNODE of the blade */ +static inline int uv_node_to_pnode(int nid) +{ + return 0; +} + +/* Maximum possible number of blades */ +static inline int uv_num_possible_blades(void) +{ + return 1; +} + +static inline void uv_hub_send_ipi(int pnode, int apicid, int vector) +{ + /* not currently needed on ia64 */ +} + + +#endif /* __ASM_IA64_UV_HUB__ */ + diff --git a/kernel/arch/ia64/include/asm/uv/uv_mmrs.h b/kernel/arch/ia64/include/asm/uv/uv_mmrs.h new file mode 100644 index 000000000..fe0b8f05e --- /dev/null +++ b/kernel/arch/ia64/include/asm/uv/uv_mmrs.h @@ -0,0 +1,825 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * SGI UV MMR definitions + * + * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved. + */ + +#ifndef _ASM_IA64_UV_UV_MMRS_H +#define _ASM_IA64_UV_UV_MMRS_H + +#define UV_MMR_ENABLE (1UL << 63) + +/* ========================================================================= */ +/* UVH_BAU_DATA_CONFIG */ +/* ========================================================================= */ +#define UVH_BAU_DATA_CONFIG 0x61680UL +#define UVH_BAU_DATA_CONFIG_32 0x0438 + +#define UVH_BAU_DATA_CONFIG_VECTOR_SHFT 0 +#define UVH_BAU_DATA_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UVH_BAU_DATA_CONFIG_DM_SHFT 8 +#define UVH_BAU_DATA_CONFIG_DM_MASK 0x0000000000000700UL +#define UVH_BAU_DATA_CONFIG_DESTMODE_SHFT 11 +#define UVH_BAU_DATA_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UVH_BAU_DATA_CONFIG_STATUS_SHFT 12 +#define UVH_BAU_DATA_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UVH_BAU_DATA_CONFIG_P_SHFT 13 +#define UVH_BAU_DATA_CONFIG_P_MASK 0x0000000000002000UL +#define UVH_BAU_DATA_CONFIG_T_SHFT 15 +#define UVH_BAU_DATA_CONFIG_T_MASK 0x0000000000008000UL +#define UVH_BAU_DATA_CONFIG_M_SHFT 16 +#define UVH_BAU_DATA_CONFIG_M_MASK 0x0000000000010000UL +#define UVH_BAU_DATA_CONFIG_APIC_ID_SHFT 32 +#define UVH_BAU_DATA_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + +union uvh_bau_data_config_u { + unsigned long v; + struct uvh_bau_data_config_s { + unsigned long vector_ : 8; /* RW */ + unsigned long dm : 3; /* RW */ + unsigned long destmode : 1; /* RW */ + unsigned long status : 1; /* RO */ + unsigned long p : 1; /* RO */ + unsigned long rsvd_14 : 1; /* */ + unsigned long t : 1; /* RO */ + unsigned long m : 1; /* RW */ + unsigned long rsvd_17_31: 15; /* */ + unsigned long apic_id : 32; /* RW */ + } s; +}; + +/* ========================================================================= */ +/* UVH_EVENT_OCCURRED0 */ +/* ========================================================================= */ +#define UVH_EVENT_OCCURRED0 0x70000UL +#define UVH_EVENT_OCCURRED0_32 0x005e8 + +#define UVH_EVENT_OCCURRED0_LB_HCERR_SHFT 0 +#define UVH_EVENT_OCCURRED0_LB_HCERR_MASK 0x0000000000000001UL +#define UVH_EVENT_OCCURRED0_GR0_HCERR_SHFT 1 +#define UVH_EVENT_OCCURRED0_GR0_HCERR_MASK 0x0000000000000002UL +#define UVH_EVENT_OCCURRED0_GR1_HCERR_SHFT 2 +#define UVH_EVENT_OCCURRED0_GR1_HCERR_MASK 0x0000000000000004UL +#define UVH_EVENT_OCCURRED0_LH_HCERR_SHFT 3 +#define UVH_EVENT_OCCURRED0_LH_HCERR_MASK 0x0000000000000008UL +#define UVH_EVENT_OCCURRED0_RH_HCERR_SHFT 4 +#define UVH_EVENT_OCCURRED0_RH_HCERR_MASK 0x0000000000000010UL +#define UVH_EVENT_OCCURRED0_XN_HCERR_SHFT 5 +#define UVH_EVENT_OCCURRED0_XN_HCERR_MASK 0x0000000000000020UL +#define UVH_EVENT_OCCURRED0_SI_HCERR_SHFT 6 +#define UVH_EVENT_OCCURRED0_SI_HCERR_MASK 0x0000000000000040UL +#define UVH_EVENT_OCCURRED0_LB_AOERR0_SHFT 7 +#define UVH_EVENT_OCCURRED0_LB_AOERR0_MASK 0x0000000000000080UL +#define UVH_EVENT_OCCURRED0_GR0_AOERR0_SHFT 8 +#define UVH_EVENT_OCCURRED0_GR0_AOERR0_MASK 0x0000000000000100UL +#define UVH_EVENT_OCCURRED0_GR1_AOERR0_SHFT 9 +#define UVH_EVENT_OCCURRED0_GR1_AOERR0_MASK 0x0000000000000200UL +#define UVH_EVENT_OCCURRED0_LH_AOERR0_SHFT 10 +#define UVH_EVENT_OCCURRED0_LH_AOERR0_MASK 0x0000000000000400UL +#define UVH_EVENT_OCCURRED0_RH_AOERR0_SHFT 11 +#define UVH_EVENT_OCCURRED0_RH_AOERR0_MASK 0x0000000000000800UL +#define UVH_EVENT_OCCURRED0_XN_AOERR0_SHFT 12 +#define UVH_EVENT_OCCURRED0_XN_AOERR0_MASK 0x0000000000001000UL +#define UVH_EVENT_OCCURRED0_SI_AOERR0_SHFT 13 +#define UVH_EVENT_OCCURRED0_SI_AOERR0_MASK 0x0000000000002000UL +#define UVH_EVENT_OCCURRED0_LB_AOERR1_SHFT 14 +#define UVH_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000004000UL +#define UVH_EVENT_OCCURRED0_GR0_AOERR1_SHFT 15 +#define UVH_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000000008000UL +#define UVH_EVENT_OCCURRED0_GR1_AOERR1_SHFT 16 +#define UVH_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000000010000UL +#define UVH_EVENT_OCCURRED0_LH_AOERR1_SHFT 17 +#define UVH_EVENT_OCCURRED0_LH_AOERR1_MASK 0x0000000000020000UL +#define UVH_EVENT_OCCURRED0_RH_AOERR1_SHFT 18 +#define UVH_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000000040000UL +#define UVH_EVENT_OCCURRED0_XN_AOERR1_SHFT 19 +#define UVH_EVENT_OCCURRED0_XN_AOERR1_MASK 0x0000000000080000UL +#define UVH_EVENT_OCCURRED0_SI_AOERR1_SHFT 20 +#define UVH_EVENT_OCCURRED0_SI_AOERR1_MASK 0x0000000000100000UL +#define UVH_EVENT_OCCURRED0_RH_VPI_INT_SHFT 21 +#define UVH_EVENT_OCCURRED0_RH_VPI_INT_MASK 0x0000000000200000UL +#define UVH_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 22 +#define UVH_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000000000400000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 23 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000000000800000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 24 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000000001000000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 25 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000000002000000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 26 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000000004000000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 27 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000000008000000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 28 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000000010000000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 29 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000000020000000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 30 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000000040000000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 31 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000000080000000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 32 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000000100000000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 33 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0000000200000000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 34 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0000000400000000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 35 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0000000800000000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 36 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0000001000000000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 37 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0000002000000000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 38 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0000004000000000UL +#define UVH_EVENT_OCCURRED0_L1_NMI_INT_SHFT 39 +#define UVH_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0000008000000000UL +#define UVH_EVENT_OCCURRED0_STOP_CLOCK_SHFT 40 +#define UVH_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0000010000000000UL +#define UVH_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 41 +#define UVH_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0000020000000000UL +#define UVH_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 42 +#define UVH_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0000040000000000UL +#define UVH_EVENT_OCCURRED0_LTC_INT_SHFT 43 +#define UVH_EVENT_OCCURRED0_LTC_INT_MASK 0x0000080000000000UL +#define UVH_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 44 +#define UVH_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0000100000000000UL +#define UVH_EVENT_OCCURRED0_IPI_INT_SHFT 45 +#define UVH_EVENT_OCCURRED0_IPI_INT_MASK 0x0000200000000000UL +#define UVH_EVENT_OCCURRED0_EXTIO_INT0_SHFT 46 +#define UVH_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x0000400000000000UL +#define UVH_EVENT_OCCURRED0_EXTIO_INT1_SHFT 47 +#define UVH_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x0000800000000000UL +#define UVH_EVENT_OCCURRED0_EXTIO_INT2_SHFT 48 +#define UVH_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x0001000000000000UL +#define UVH_EVENT_OCCURRED0_EXTIO_INT3_SHFT 49 +#define UVH_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x0002000000000000UL +#define UVH_EVENT_OCCURRED0_PROFILE_INT_SHFT 50 +#define UVH_EVENT_OCCURRED0_PROFILE_INT_MASK 0x0004000000000000UL +#define UVH_EVENT_OCCURRED0_RTC0_SHFT 51 +#define UVH_EVENT_OCCURRED0_RTC0_MASK 0x0008000000000000UL +#define UVH_EVENT_OCCURRED0_RTC1_SHFT 52 +#define UVH_EVENT_OCCURRED0_RTC1_MASK 0x0010000000000000UL +#define UVH_EVENT_OCCURRED0_RTC2_SHFT 53 +#define UVH_EVENT_OCCURRED0_RTC2_MASK 0x0020000000000000UL +#define UVH_EVENT_OCCURRED0_RTC3_SHFT 54 +#define UVH_EVENT_OCCURRED0_RTC3_MASK 0x0040000000000000UL +#define UVH_EVENT_OCCURRED0_BAU_DATA_SHFT 55 +#define UVH_EVENT_OCCURRED0_BAU_DATA_MASK 0x0080000000000000UL +#define UVH_EVENT_OCCURRED0_POWER_MANAGEMENT_REQ_SHFT 56 +#define UVH_EVENT_OCCURRED0_POWER_MANAGEMENT_REQ_MASK 0x0100000000000000UL +union uvh_event_occurred0_u { + unsigned long v; + struct uvh_event_occurred0_s { + unsigned long lb_hcerr : 1; /* RW, W1C */ + unsigned long gr0_hcerr : 1; /* RW, W1C */ + unsigned long gr1_hcerr : 1; /* RW, W1C */ + unsigned long lh_hcerr : 1; /* RW, W1C */ + unsigned long rh_hcerr : 1; /* RW, W1C */ + unsigned long xn_hcerr : 1; /* RW, W1C */ + unsigned long si_hcerr : 1; /* RW, W1C */ + unsigned long lb_aoerr0 : 1; /* RW, W1C */ + unsigned long gr0_aoerr0 : 1; /* RW, W1C */ + unsigned long gr1_aoerr0 : 1; /* RW, W1C */ + unsigned long lh_aoerr0 : 1; /* RW, W1C */ + unsigned long rh_aoerr0 : 1; /* RW, W1C */ + unsigned long xn_aoerr0 : 1; /* RW, W1C */ + unsigned long si_aoerr0 : 1; /* RW, W1C */ + unsigned long lb_aoerr1 : 1; /* RW, W1C */ + unsigned long gr0_aoerr1 : 1; /* RW, W1C */ + unsigned long gr1_aoerr1 : 1; /* RW, W1C */ + unsigned long lh_aoerr1 : 1; /* RW, W1C */ + unsigned long rh_aoerr1 : 1; /* RW, W1C */ + unsigned long xn_aoerr1 : 1; /* RW, W1C */ + unsigned long si_aoerr1 : 1; /* RW, W1C */ + unsigned long rh_vpi_int : 1; /* RW, W1C */ + unsigned long system_shutdown_int : 1; /* RW, W1C */ + unsigned long lb_irq_int_0 : 1; /* RW, W1C */ + unsigned long lb_irq_int_1 : 1; /* RW, W1C */ + unsigned long lb_irq_int_2 : 1; /* RW, W1C */ + unsigned long lb_irq_int_3 : 1; /* RW, W1C */ + unsigned long lb_irq_int_4 : 1; /* RW, W1C */ + unsigned long lb_irq_int_5 : 1; /* RW, W1C */ + unsigned long lb_irq_int_6 : 1; /* RW, W1C */ + unsigned long lb_irq_int_7 : 1; /* RW, W1C */ + unsigned long lb_irq_int_8 : 1; /* RW, W1C */ + unsigned long lb_irq_int_9 : 1; /* RW, W1C */ + unsigned long lb_irq_int_10 : 1; /* RW, W1C */ + unsigned long lb_irq_int_11 : 1; /* RW, W1C */ + unsigned long lb_irq_int_12 : 1; /* RW, W1C */ + unsigned long lb_irq_int_13 : 1; /* RW, W1C */ + unsigned long lb_irq_int_14 : 1; /* RW, W1C */ + unsigned long lb_irq_int_15 : 1; /* RW, W1C */ + unsigned long l1_nmi_int : 1; /* RW, W1C */ + unsigned long stop_clock : 1; /* RW, W1C */ + unsigned long asic_to_l1 : 1; /* RW, W1C */ + unsigned long l1_to_asic : 1; /* RW, W1C */ + unsigned long ltc_int : 1; /* RW, W1C */ + unsigned long la_seq_trigger : 1; /* RW, W1C */ + unsigned long ipi_int : 1; /* RW, W1C */ + unsigned long extio_int0 : 1; /* RW, W1C */ + unsigned long extio_int1 : 1; /* RW, W1C */ + unsigned long extio_int2 : 1; /* RW, W1C */ + unsigned long extio_int3 : 1; /* RW, W1C */ + unsigned long profile_int : 1; /* RW, W1C */ + unsigned long rtc0 : 1; /* RW, W1C */ + unsigned long rtc1 : 1; /* RW, W1C */ + unsigned long rtc2 : 1; /* RW, W1C */ + unsigned long rtc3 : 1; /* RW, W1C */ + unsigned long bau_data : 1; /* RW, W1C */ + unsigned long power_management_req : 1; /* RW, W1C */ + unsigned long rsvd_57_63 : 7; /* */ + } s; +}; + +/* ========================================================================= */ +/* UVH_EVENT_OCCURRED0_ALIAS */ +/* ========================================================================= */ +#define UVH_EVENT_OCCURRED0_ALIAS 0x0000000000070008UL +#define UVH_EVENT_OCCURRED0_ALIAS_32 0x005f0 + +/* ========================================================================= */ +/* UVH_GR0_TLB_INT0_CONFIG */ +/* ========================================================================= */ +#define UVH_GR0_TLB_INT0_CONFIG 0x61b00UL + +#define UVH_GR0_TLB_INT0_CONFIG_VECTOR_SHFT 0 +#define UVH_GR0_TLB_INT0_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UVH_GR0_TLB_INT0_CONFIG_DM_SHFT 8 +#define UVH_GR0_TLB_INT0_CONFIG_DM_MASK 0x0000000000000700UL +#define UVH_GR0_TLB_INT0_CONFIG_DESTMODE_SHFT 11 +#define UVH_GR0_TLB_INT0_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UVH_GR0_TLB_INT0_CONFIG_STATUS_SHFT 12 +#define UVH_GR0_TLB_INT0_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UVH_GR0_TLB_INT0_CONFIG_P_SHFT 13 +#define UVH_GR0_TLB_INT0_CONFIG_P_MASK 0x0000000000002000UL +#define UVH_GR0_TLB_INT0_CONFIG_T_SHFT 15 +#define UVH_GR0_TLB_INT0_CONFIG_T_MASK 0x0000000000008000UL +#define UVH_GR0_TLB_INT0_CONFIG_M_SHFT 16 +#define UVH_GR0_TLB_INT0_CONFIG_M_MASK 0x0000000000010000UL +#define UVH_GR0_TLB_INT0_CONFIG_APIC_ID_SHFT 32 +#define UVH_GR0_TLB_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + +union uvh_gr0_tlb_int0_config_u { + unsigned long v; + struct uvh_gr0_tlb_int0_config_s { + unsigned long vector_ : 8; /* RW */ + unsigned long dm : 3; /* RW */ + unsigned long destmode : 1; /* RW */ + unsigned long status : 1; /* RO */ + unsigned long p : 1; /* RO */ + unsigned long rsvd_14 : 1; /* */ + unsigned long t : 1; /* RO */ + unsigned long m : 1; /* RW */ + unsigned long rsvd_17_31: 15; /* */ + unsigned long apic_id : 32; /* RW */ + } s; +}; + +/* ========================================================================= */ +/* UVH_GR0_TLB_INT1_CONFIG */ +/* ========================================================================= */ +#define UVH_GR0_TLB_INT1_CONFIG 0x61b40UL + +#define UVH_GR0_TLB_INT1_CONFIG_VECTOR_SHFT 0 +#define UVH_GR0_TLB_INT1_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UVH_GR0_TLB_INT1_CONFIG_DM_SHFT 8 +#define UVH_GR0_TLB_INT1_CONFIG_DM_MASK 0x0000000000000700UL +#define UVH_GR0_TLB_INT1_CONFIG_DESTMODE_SHFT 11 +#define UVH_GR0_TLB_INT1_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UVH_GR0_TLB_INT1_CONFIG_STATUS_SHFT 12 +#define UVH_GR0_TLB_INT1_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UVH_GR0_TLB_INT1_CONFIG_P_SHFT 13 +#define UVH_GR0_TLB_INT1_CONFIG_P_MASK 0x0000000000002000UL +#define UVH_GR0_TLB_INT1_CONFIG_T_SHFT 15 +#define UVH_GR0_TLB_INT1_CONFIG_T_MASK 0x0000000000008000UL +#define UVH_GR0_TLB_INT1_CONFIG_M_SHFT 16 +#define UVH_GR0_TLB_INT1_CONFIG_M_MASK 0x0000000000010000UL +#define UVH_GR0_TLB_INT1_CONFIG_APIC_ID_SHFT 32 +#define UVH_GR0_TLB_INT1_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + +union uvh_gr0_tlb_int1_config_u { + unsigned long v; + struct uvh_gr0_tlb_int1_config_s { + unsigned long vector_ : 8; /* RW */ + unsigned long dm : 3; /* RW */ + unsigned long destmode : 1; /* RW */ + unsigned long status : 1; /* RO */ + unsigned long p : 1; /* RO */ + unsigned long rsvd_14 : 1; /* */ + unsigned long t : 1; /* RO */ + unsigned long m : 1; /* RW */ + unsigned long rsvd_17_31: 15; /* */ + unsigned long apic_id : 32; /* RW */ + } s; +}; + +/* ========================================================================= */ +/* UVH_GR1_TLB_INT0_CONFIG */ +/* ========================================================================= */ +#define UVH_GR1_TLB_INT0_CONFIG 0x61f00UL + +#define UVH_GR1_TLB_INT0_CONFIG_VECTOR_SHFT 0 +#define UVH_GR1_TLB_INT0_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UVH_GR1_TLB_INT0_CONFIG_DM_SHFT 8 +#define UVH_GR1_TLB_INT0_CONFIG_DM_MASK 0x0000000000000700UL +#define UVH_GR1_TLB_INT0_CONFIG_DESTMODE_SHFT 11 +#define UVH_GR1_TLB_INT0_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UVH_GR1_TLB_INT0_CONFIG_STATUS_SHFT 12 +#define UVH_GR1_TLB_INT0_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UVH_GR1_TLB_INT0_CONFIG_P_SHFT 13 +#define UVH_GR1_TLB_INT0_CONFIG_P_MASK 0x0000000000002000UL +#define UVH_GR1_TLB_INT0_CONFIG_T_SHFT 15 +#define UVH_GR1_TLB_INT0_CONFIG_T_MASK 0x0000000000008000UL +#define UVH_GR1_TLB_INT0_CONFIG_M_SHFT 16 +#define UVH_GR1_TLB_INT0_CONFIG_M_MASK 0x0000000000010000UL +#define UVH_GR1_TLB_INT0_CONFIG_APIC_ID_SHFT 32 +#define UVH_GR1_TLB_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + +union uvh_gr1_tlb_int0_config_u { + unsigned long v; + struct uvh_gr1_tlb_int0_config_s { + unsigned long vector_ : 8; /* RW */ + unsigned long dm : 3; /* RW */ + unsigned long destmode : 1; /* RW */ + unsigned long status : 1; /* RO */ + unsigned long p : 1; /* RO */ + unsigned long rsvd_14 : 1; /* */ + unsigned long t : 1; /* RO */ + unsigned long m : 1; /* RW */ + unsigned long rsvd_17_31: 15; /* */ + unsigned long apic_id : 32; /* RW */ + } s; +}; + +/* ========================================================================= */ +/* UVH_GR1_TLB_INT1_CONFIG */ +/* ========================================================================= */ +#define UVH_GR1_TLB_INT1_CONFIG 0x61f40UL + +#define UVH_GR1_TLB_INT1_CONFIG_VECTOR_SHFT 0 +#define UVH_GR1_TLB_INT1_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UVH_GR1_TLB_INT1_CONFIG_DM_SHFT 8 +#define UVH_GR1_TLB_INT1_CONFIG_DM_MASK 0x0000000000000700UL +#define UVH_GR1_TLB_INT1_CONFIG_DESTMODE_SHFT 11 +#define UVH_GR1_TLB_INT1_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UVH_GR1_TLB_INT1_CONFIG_STATUS_SHFT 12 +#define UVH_GR1_TLB_INT1_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UVH_GR1_TLB_INT1_CONFIG_P_SHFT 13 +#define UVH_GR1_TLB_INT1_CONFIG_P_MASK 0x0000000000002000UL +#define UVH_GR1_TLB_INT1_CONFIG_T_SHFT 15 +#define UVH_GR1_TLB_INT1_CONFIG_T_MASK 0x0000000000008000UL +#define UVH_GR1_TLB_INT1_CONFIG_M_SHFT 16 +#define UVH_GR1_TLB_INT1_CONFIG_M_MASK 0x0000000000010000UL +#define UVH_GR1_TLB_INT1_CONFIG_APIC_ID_SHFT 32 +#define UVH_GR1_TLB_INT1_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + +union uvh_gr1_tlb_int1_config_u { + unsigned long v; + struct uvh_gr1_tlb_int1_config_s { + unsigned long vector_ : 8; /* RW */ + unsigned long dm : 3; /* RW */ + unsigned long destmode : 1; /* RW */ + unsigned long status : 1; /* RO */ + unsigned long p : 1; /* RO */ + unsigned long rsvd_14 : 1; /* */ + unsigned long t : 1; /* RO */ + unsigned long m : 1; /* RW */ + unsigned long rsvd_17_31: 15; /* */ + unsigned long apic_id : 32; /* RW */ + } s; +}; + +/* ========================================================================= */ +/* UVH_INT_CMPB */ +/* ========================================================================= */ +#define UVH_INT_CMPB 0x22080UL + +#define UVH_INT_CMPB_REAL_TIME_CMPB_SHFT 0 +#define UVH_INT_CMPB_REAL_TIME_CMPB_MASK 0x00ffffffffffffffUL + +union uvh_int_cmpb_u { + unsigned long v; + struct uvh_int_cmpb_s { + unsigned long real_time_cmpb : 56; /* RW */ + unsigned long rsvd_56_63 : 8; /* */ + } s; +}; + +/* ========================================================================= */ +/* UVH_INT_CMPC */ +/* ========================================================================= */ +#define UVH_INT_CMPC 0x22100UL + +#define UVH_INT_CMPC_REAL_TIME_CMPC_SHFT 0 +#define UVH_INT_CMPC_REAL_TIME_CMPC_MASK 0x00ffffffffffffffUL + +union uvh_int_cmpc_u { + unsigned long v; + struct uvh_int_cmpc_s { + unsigned long real_time_cmpc : 56; /* RW */ + unsigned long rsvd_56_63 : 8; /* */ + } s; +}; + +/* ========================================================================= */ +/* UVH_INT_CMPD */ +/* ========================================================================= */ +#define UVH_INT_CMPD 0x22180UL + +#define UVH_INT_CMPD_REAL_TIME_CMPD_SHFT 0 +#define UVH_INT_CMPD_REAL_TIME_CMPD_MASK 0x00ffffffffffffffUL + +union uvh_int_cmpd_u { + unsigned long v; + struct uvh_int_cmpd_s { + unsigned long real_time_cmpd : 56; /* RW */ + unsigned long rsvd_56_63 : 8; /* */ + } s; +}; + +/* ========================================================================= */ +/* UVH_NODE_ID */ +/* ========================================================================= */ +#define UVH_NODE_ID 0x0UL + +#define UVH_NODE_ID_FORCE1_SHFT 0 +#define UVH_NODE_ID_FORCE1_MASK 0x0000000000000001UL +#define UVH_NODE_ID_MANUFACTURER_SHFT 1 +#define UVH_NODE_ID_MANUFACTURER_MASK 0x0000000000000ffeUL +#define UVH_NODE_ID_PART_NUMBER_SHFT 12 +#define UVH_NODE_ID_PART_NUMBER_MASK 0x000000000ffff000UL +#define UVH_NODE_ID_REVISION_SHFT 28 +#define UVH_NODE_ID_REVISION_MASK 0x00000000f0000000UL +#define UVH_NODE_ID_NODE_ID_SHFT 32 +#define UVH_NODE_ID_NODE_ID_MASK 0x00007fff00000000UL +#define UVH_NODE_ID_NODES_PER_BIT_SHFT 48 +#define UVH_NODE_ID_NODES_PER_BIT_MASK 0x007f000000000000UL +#define UVH_NODE_ID_NI_PORT_SHFT 56 +#define UVH_NODE_ID_NI_PORT_MASK 0x0f00000000000000UL + +union uvh_node_id_u { + unsigned long v; + struct uvh_node_id_s { + unsigned long force1 : 1; /* RO */ + unsigned long manufacturer : 11; /* RO */ + unsigned long part_number : 16; /* RO */ + unsigned long revision : 4; /* RO */ + unsigned long node_id : 15; /* RW */ + unsigned long rsvd_47 : 1; /* */ + unsigned long nodes_per_bit : 7; /* RW */ + unsigned long rsvd_55 : 1; /* */ + unsigned long ni_port : 4; /* RO */ + unsigned long rsvd_60_63 : 4; /* */ + } s; +}; + +/* ========================================================================= */ +/* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR */ +/* ========================================================================= */ +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x16000d0UL + +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24 +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL + +union uvh_rh_gam_alias210_redirect_config_0_mmr_u { + unsigned long v; + struct uvh_rh_gam_alias210_redirect_config_0_mmr_s { + unsigned long rsvd_0_23 : 24; /* */ + unsigned long dest_base : 22; /* RW */ + unsigned long rsvd_46_63: 18; /* */ + } s; +}; + +/* ========================================================================= */ +/* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR */ +/* ========================================================================= */ +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x16000e0UL + +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24 +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL + +union uvh_rh_gam_alias210_redirect_config_1_mmr_u { + unsigned long v; + struct uvh_rh_gam_alias210_redirect_config_1_mmr_s { + unsigned long rsvd_0_23 : 24; /* */ + unsigned long dest_base : 22; /* RW */ + unsigned long rsvd_46_63: 18; /* */ + } s; +}; + +/* ========================================================================= */ +/* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR */ +/* ========================================================================= */ +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x16000f0UL + +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24 +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL + +union uvh_rh_gam_alias210_redirect_config_2_mmr_u { + unsigned long v; + struct uvh_rh_gam_alias210_redirect_config_2_mmr_s { + unsigned long rsvd_0_23 : 24; /* */ + unsigned long dest_base : 22; /* RW */ + unsigned long rsvd_46_63: 18; /* */ + } s; +}; + +/* ========================================================================= */ +/* UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR */ +/* ========================================================================= */ +#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL + +#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28 +#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffff0000000UL +#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_GR4_SHFT 48 +#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_GR4_MASK 0x0001000000000000UL +#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52 +#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL +#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 +#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL + +union uvh_rh_gam_gru_overlay_config_mmr_u { + unsigned long v; + struct uvh_rh_gam_gru_overlay_config_mmr_s { + unsigned long rsvd_0_27: 28; /* */ + unsigned long base : 18; /* RW */ + unsigned long rsvd_46_47: 2; /* */ + unsigned long gr4 : 1; /* RW */ + unsigned long rsvd_49_51: 3; /* */ + unsigned long n_gru : 4; /* RW */ + unsigned long rsvd_56_62: 7; /* */ + unsigned long enable : 1; /* RW */ + } s; +}; + +/* ========================================================================= */ +/* UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR */ +/* ========================================================================= */ +#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x1600028UL + +#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26 +#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL +#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_DUAL_HUB_SHFT 46 +#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_DUAL_HUB_MASK 0x0000400000000000UL +#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 +#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL + +union uvh_rh_gam_mmr_overlay_config_mmr_u { + unsigned long v; + struct uvh_rh_gam_mmr_overlay_config_mmr_s { + unsigned long rsvd_0_25: 26; /* */ + unsigned long base : 20; /* RW */ + unsigned long dual_hub : 1; /* RW */ + unsigned long rsvd_47_62: 16; /* */ + unsigned long enable : 1; /* RW */ + } s; +}; + +/* ========================================================================= */ +/* UVH_RTC */ +/* ========================================================================= */ +#define UVH_RTC 0x340000UL + +#define UVH_RTC_REAL_TIME_CLOCK_SHFT 0 +#define UVH_RTC_REAL_TIME_CLOCK_MASK 0x00ffffffffffffffUL + +union uvh_rtc_u { + unsigned long v; + struct uvh_rtc_s { + unsigned long real_time_clock : 56; /* RW */ + unsigned long rsvd_56_63 : 8; /* */ + } s; +}; + +/* ========================================================================= */ +/* UVH_RTC1_INT_CONFIG */ +/* ========================================================================= */ +#define UVH_RTC1_INT_CONFIG 0x615c0UL + +#define UVH_RTC1_INT_CONFIG_VECTOR_SHFT 0 +#define UVH_RTC1_INT_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UVH_RTC1_INT_CONFIG_DM_SHFT 8 +#define UVH_RTC1_INT_CONFIG_DM_MASK 0x0000000000000700UL +#define UVH_RTC1_INT_CONFIG_DESTMODE_SHFT 11 +#define UVH_RTC1_INT_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UVH_RTC1_INT_CONFIG_STATUS_SHFT 12 +#define UVH_RTC1_INT_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UVH_RTC1_INT_CONFIG_P_SHFT 13 +#define UVH_RTC1_INT_CONFIG_P_MASK 0x0000000000002000UL +#define UVH_RTC1_INT_CONFIG_T_SHFT 15 +#define UVH_RTC1_INT_CONFIG_T_MASK 0x0000000000008000UL +#define UVH_RTC1_INT_CONFIG_M_SHFT 16 +#define UVH_RTC1_INT_CONFIG_M_MASK 0x0000000000010000UL +#define UVH_RTC1_INT_CONFIG_APIC_ID_SHFT 32 +#define UVH_RTC1_INT_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + +union uvh_rtc1_int_config_u { + unsigned long v; + struct uvh_rtc1_int_config_s { + unsigned long vector_ : 8; /* RW */ + unsigned long dm : 3; /* RW */ + unsigned long destmode : 1; /* RW */ + unsigned long status : 1; /* RO */ + unsigned long p : 1; /* RO */ + unsigned long rsvd_14 : 1; /* */ + unsigned long t : 1; /* RO */ + unsigned long m : 1; /* RW */ + unsigned long rsvd_17_31: 15; /* */ + unsigned long apic_id : 32; /* RW */ + } s; +}; + +/* ========================================================================= */ +/* UVH_RTC2_INT_CONFIG */ +/* ========================================================================= */ +#define UVH_RTC2_INT_CONFIG 0x61600UL + +#define UVH_RTC2_INT_CONFIG_VECTOR_SHFT 0 +#define UVH_RTC2_INT_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UVH_RTC2_INT_CONFIG_DM_SHFT 8 +#define UVH_RTC2_INT_CONFIG_DM_MASK 0x0000000000000700UL +#define UVH_RTC2_INT_CONFIG_DESTMODE_SHFT 11 +#define UVH_RTC2_INT_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UVH_RTC2_INT_CONFIG_STATUS_SHFT 12 +#define UVH_RTC2_INT_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UVH_RTC2_INT_CONFIG_P_SHFT 13 +#define UVH_RTC2_INT_CONFIG_P_MASK 0x0000000000002000UL +#define UVH_RTC2_INT_CONFIG_T_SHFT 15 +#define UVH_RTC2_INT_CONFIG_T_MASK 0x0000000000008000UL +#define UVH_RTC2_INT_CONFIG_M_SHFT 16 +#define UVH_RTC2_INT_CONFIG_M_MASK 0x0000000000010000UL +#define UVH_RTC2_INT_CONFIG_APIC_ID_SHFT 32 +#define UVH_RTC2_INT_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + +union uvh_rtc2_int_config_u { + unsigned long v; + struct uvh_rtc2_int_config_s { + unsigned long vector_ : 8; /* RW */ + unsigned long dm : 3; /* RW */ + unsigned long destmode : 1; /* RW */ + unsigned long status : 1; /* RO */ + unsigned long p : 1; /* RO */ + unsigned long rsvd_14 : 1; /* */ + unsigned long t : 1; /* RO */ + unsigned long m : 1; /* RW */ + unsigned long rsvd_17_31: 15; /* */ + unsigned long apic_id : 32; /* RW */ + } s; +}; + +/* ========================================================================= */ +/* UVH_RTC3_INT_CONFIG */ +/* ========================================================================= */ +#define UVH_RTC3_INT_CONFIG 0x61640UL + +#define UVH_RTC3_INT_CONFIG_VECTOR_SHFT 0 +#define UVH_RTC3_INT_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UVH_RTC3_INT_CONFIG_DM_SHFT 8 +#define UVH_RTC3_INT_CONFIG_DM_MASK 0x0000000000000700UL +#define UVH_RTC3_INT_CONFIG_DESTMODE_SHFT 11 +#define UVH_RTC3_INT_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UVH_RTC3_INT_CONFIG_STATUS_SHFT 12 +#define UVH_RTC3_INT_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UVH_RTC3_INT_CONFIG_P_SHFT 13 +#define UVH_RTC3_INT_CONFIG_P_MASK 0x0000000000002000UL +#define UVH_RTC3_INT_CONFIG_T_SHFT 15 +#define UVH_RTC3_INT_CONFIG_T_MASK 0x0000000000008000UL +#define UVH_RTC3_INT_CONFIG_M_SHFT 16 +#define UVH_RTC3_INT_CONFIG_M_MASK 0x0000000000010000UL +#define UVH_RTC3_INT_CONFIG_APIC_ID_SHFT 32 +#define UVH_RTC3_INT_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + +union uvh_rtc3_int_config_u { + unsigned long v; + struct uvh_rtc3_int_config_s { + unsigned long vector_ : 8; /* RW */ + unsigned long dm : 3; /* RW */ + unsigned long destmode : 1; /* RW */ + unsigned long status : 1; /* RO */ + unsigned long p : 1; /* RO */ + unsigned long rsvd_14 : 1; /* */ + unsigned long t : 1; /* RO */ + unsigned long m : 1; /* RW */ + unsigned long rsvd_17_31: 15; /* */ + unsigned long apic_id : 32; /* RW */ + } s; +}; + +/* ========================================================================= */ +/* UVH_RTC_INC_RATIO */ +/* ========================================================================= */ +#define UVH_RTC_INC_RATIO 0x350000UL + +#define UVH_RTC_INC_RATIO_FRACTION_SHFT 0 +#define UVH_RTC_INC_RATIO_FRACTION_MASK 0x00000000000fffffUL +#define UVH_RTC_INC_RATIO_RATIO_SHFT 20 +#define UVH_RTC_INC_RATIO_RATIO_MASK 0x0000000000700000UL + +union uvh_rtc_inc_ratio_u { + unsigned long v; + struct uvh_rtc_inc_ratio_s { + unsigned long fraction : 20; /* RW */ + unsigned long ratio : 3; /* RW */ + unsigned long rsvd_23_63: 41; /* */ + } s; +}; + +/* ========================================================================= */ +/* UVH_SI_ADDR_MAP_CONFIG */ +/* ========================================================================= */ +#define UVH_SI_ADDR_MAP_CONFIG 0xc80000UL + +#define UVH_SI_ADDR_MAP_CONFIG_M_SKT_SHFT 0 +#define UVH_SI_ADDR_MAP_CONFIG_M_SKT_MASK 0x000000000000003fUL +#define UVH_SI_ADDR_MAP_CONFIG_N_SKT_SHFT 8 +#define UVH_SI_ADDR_MAP_CONFIG_N_SKT_MASK 0x0000000000000f00UL + +union uvh_si_addr_map_config_u { + unsigned long v; + struct uvh_si_addr_map_config_s { + unsigned long m_skt : 6; /* RW */ + unsigned long rsvd_6_7: 2; /* */ + unsigned long n_skt : 4; /* RW */ + unsigned long rsvd_12_63: 52; /* */ + } s; +}; + +/* ========================================================================= */ +/* UVH_SI_ALIAS0_OVERLAY_CONFIG */ +/* ========================================================================= */ +#define UVH_SI_ALIAS0_OVERLAY_CONFIG 0xc80008UL + +#define UVH_SI_ALIAS0_OVERLAY_CONFIG_BASE_SHFT 24 +#define UVH_SI_ALIAS0_OVERLAY_CONFIG_BASE_MASK 0x00000000ff000000UL +#define UVH_SI_ALIAS0_OVERLAY_CONFIG_M_ALIAS_SHFT 48 +#define UVH_SI_ALIAS0_OVERLAY_CONFIG_M_ALIAS_MASK 0x001f000000000000UL +#define UVH_SI_ALIAS0_OVERLAY_CONFIG_ENABLE_SHFT 63 +#define UVH_SI_ALIAS0_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL + +union uvh_si_alias0_overlay_config_u { + unsigned long v; + struct uvh_si_alias0_overlay_config_s { + unsigned long rsvd_0_23: 24; /* */ + unsigned long base : 8; /* RW */ + unsigned long rsvd_32_47: 16; /* */ + unsigned long m_alias : 5; /* RW */ + unsigned long rsvd_53_62: 10; /* */ + unsigned long enable : 1; /* RW */ + } s; +}; + +/* ========================================================================= */ +/* UVH_SI_ALIAS1_OVERLAY_CONFIG */ +/* ========================================================================= */ +#define UVH_SI_ALIAS1_OVERLAY_CONFIG 0xc80010UL + +#define UVH_SI_ALIAS1_OVERLAY_CONFIG_BASE_SHFT 24 +#define UVH_SI_ALIAS1_OVERLAY_CONFIG_BASE_MASK 0x00000000ff000000UL +#define UVH_SI_ALIAS1_OVERLAY_CONFIG_M_ALIAS_SHFT 48 +#define UVH_SI_ALIAS1_OVERLAY_CONFIG_M_ALIAS_MASK 0x001f000000000000UL +#define UVH_SI_ALIAS1_OVERLAY_CONFIG_ENABLE_SHFT 63 +#define UVH_SI_ALIAS1_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL + +union uvh_si_alias1_overlay_config_u { + unsigned long v; + struct uvh_si_alias1_overlay_config_s { + unsigned long rsvd_0_23: 24; /* */ + unsigned long base : 8; /* RW */ + unsigned long rsvd_32_47: 16; /* */ + unsigned long m_alias : 5; /* RW */ + unsigned long rsvd_53_62: 10; /* */ + unsigned long enable : 1; /* RW */ + } s; +}; + +/* ========================================================================= */ +/* UVH_SI_ALIAS2_OVERLAY_CONFIG */ +/* ========================================================================= */ +#define UVH_SI_ALIAS2_OVERLAY_CONFIG 0xc80018UL + +#define UVH_SI_ALIAS2_OVERLAY_CONFIG_BASE_SHFT 24 +#define UVH_SI_ALIAS2_OVERLAY_CONFIG_BASE_MASK 0x00000000ff000000UL +#define UVH_SI_ALIAS2_OVERLAY_CONFIG_M_ALIAS_SHFT 48 +#define UVH_SI_ALIAS2_OVERLAY_CONFIG_M_ALIAS_MASK 0x001f000000000000UL +#define UVH_SI_ALIAS2_OVERLAY_CONFIG_ENABLE_SHFT 63 +#define UVH_SI_ALIAS2_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL + +union uvh_si_alias2_overlay_config_u { + unsigned long v; + struct uvh_si_alias2_overlay_config_s { + unsigned long rsvd_0_23: 24; /* */ + unsigned long base : 8; /* RW */ + unsigned long rsvd_32_47: 16; /* */ + unsigned long m_alias : 5; /* RW */ + unsigned long rsvd_53_62: 10; /* */ + unsigned long enable : 1; /* RW */ + } s; +}; + + +#endif /* _ASM_IA64_UV_UV_MMRS_H */ diff --git a/kernel/arch/ia64/include/asm/vga.h b/kernel/arch/ia64/include/asm/vga.h new file mode 100644 index 000000000..02184ecd8 --- /dev/null +++ b/kernel/arch/ia64/include/asm/vga.h @@ -0,0 +1,25 @@ +/* + * Access to VGA videoram + * + * (c) 1998 Martin Mares + * (c) 1999 Asit Mallick + * (c) 1999 Don Dugger + */ + +#ifndef __ASM_IA64_VGA_H_ +#define __ASM_IA64_VGA_H_ + +/* + * On the PC, we can just recalculate addresses and then access the + * videoram directly without any black magic. + */ + +extern unsigned long vga_console_iobase; +extern unsigned long vga_console_membase; + +#define VGA_MAP_MEM(x,s) ((unsigned long) ioremap_nocache(vga_console_membase + (x), s)) + +#define vga_readb(x) (*(x)) +#define vga_writeb(x,y) (*(y) = (x)) + +#endif /* __ASM_IA64_VGA_H_ */ diff --git a/kernel/arch/ia64/include/asm/xor.h b/kernel/arch/ia64/include/asm/xor.h new file mode 100644 index 000000000..a349e23de --- /dev/null +++ b/kernel/arch/ia64/include/asm/xor.h @@ -0,0 +1,31 @@ +/* + * Optimized RAID-5 checksumming functions for IA-64. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * You should have received a copy of the GNU General Public License + * (for example /usr/src/linux/COPYING); if not, write to the Free + * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + + +extern void xor_ia64_2(unsigned long, unsigned long *, unsigned long *); +extern void xor_ia64_3(unsigned long, unsigned long *, unsigned long *, + unsigned long *); +extern void xor_ia64_4(unsigned long, unsigned long *, unsigned long *, + unsigned long *, unsigned long *); +extern void xor_ia64_5(unsigned long, unsigned long *, unsigned long *, + unsigned long *, unsigned long *, unsigned long *); + +static struct xor_block_template xor_block_ia64 = { + .name = "ia64", + .do_2 = xor_ia64_2, + .do_3 = xor_ia64_3, + .do_4 = xor_ia64_4, + .do_5 = xor_ia64_5, +}; + +#define XOR_TRY_TEMPLATES xor_speed(&xor_block_ia64) diff --git a/kernel/arch/ia64/include/uapi/asm/Kbuild b/kernel/arch/ia64/include/uapi/asm/Kbuild new file mode 100644 index 000000000..891002bbb --- /dev/null +++ b/kernel/arch/ia64/include/uapi/asm/Kbuild @@ -0,0 +1,49 @@ +# UAPI Header export list +include include/uapi/asm-generic/Kbuild.asm + +generic-y += kvm_para.h + +header-y += auxvec.h +header-y += bitsperlong.h +header-y += break.h +header-y += byteorder.h +header-y += cmpxchg.h +header-y += errno.h +header-y += fcntl.h +header-y += fpu.h +header-y += gcc_intrin.h +header-y += ia64regs.h +header-y += intel_intrin.h +header-y += intrinsics.h +header-y += ioctl.h +header-y += ioctls.h +header-y += ipcbuf.h +header-y += kvm_para.h +header-y += mman.h +header-y += msgbuf.h +header-y += param.h +header-y += perfmon.h +header-y += perfmon_default_smpl.h +header-y += poll.h +header-y += posix_types.h +header-y += ptrace.h +header-y += ptrace_offsets.h +header-y += resource.h +header-y += rse.h +header-y += sembuf.h +header-y += setup.h +header-y += shmbuf.h +header-y += sigcontext.h +header-y += siginfo.h +header-y += signal.h +header-y += socket.h +header-y += sockios.h +header-y += stat.h +header-y += statfs.h +header-y += swab.h +header-y += termbits.h +header-y += termios.h +header-y += types.h +header-y += ucontext.h +header-y += unistd.h +header-y += ustack.h diff --git a/kernel/arch/ia64/include/uapi/asm/auxvec.h b/kernel/arch/ia64/include/uapi/asm/auxvec.h new file mode 100644 index 000000000..58277fc65 --- /dev/null +++ b/kernel/arch/ia64/include/uapi/asm/auxvec.h @@ -0,0 +1,13 @@ +#ifndef _ASM_IA64_AUXVEC_H +#define _ASM_IA64_AUXVEC_H + +/* + * Architecture-neutral AT_ values are in the range 0-17. Leave some room for more of + * them, start the architecture-specific ones at 32. + */ +#define AT_SYSINFO 32 +#define AT_SYSINFO_EHDR 33 + +#define AT_VECTOR_SIZE_ARCH 2 /* entries in ARCH_DLINFO */ + +#endif /* _ASM_IA64_AUXVEC_H */ diff --git a/kernel/arch/ia64/include/uapi/asm/bitsperlong.h b/kernel/arch/ia64/include/uapi/asm/bitsperlong.h new file mode 100644 index 000000000..ec4db3c97 --- /dev/null +++ b/kernel/arch/ia64/include/uapi/asm/bitsperlong.h @@ -0,0 +1,8 @@ +#ifndef __ASM_IA64_BITSPERLONG_H +#define __ASM_IA64_BITSPERLONG_H + +#define __BITS_PER_LONG 64 + +#include + +#endif /* __ASM_IA64_BITSPERLONG_H */ diff --git a/kernel/arch/ia64/include/uapi/asm/break.h b/kernel/arch/ia64/include/uapi/asm/break.h new file mode 100644 index 000000000..f03402039 --- /dev/null +++ b/kernel/arch/ia64/include/uapi/asm/break.h @@ -0,0 +1,23 @@ +#ifndef _ASM_IA64_BREAK_H +#define _ASM_IA64_BREAK_H + +/* + * IA-64 Linux break numbers. + * + * Copyright (C) 1999 Hewlett-Packard Co + * Copyright (C) 1999 David Mosberger-Tang + */ + +/* + * OS-specific debug break numbers: + */ +#define __IA64_BREAK_KDB 0x80100 +#define __IA64_BREAK_KPROBE 0x81000 /* .. 0x81fff */ +#define __IA64_BREAK_JPROBE 0x82000 + +/* + * OS-specific break numbers: + */ +#define __IA64_BREAK_SYSCALL 0x100000 + +#endif /* _ASM_IA64_BREAK_H */ diff --git a/kernel/arch/ia64/include/uapi/asm/byteorder.h b/kernel/arch/ia64/include/uapi/asm/byteorder.h new file mode 100644 index 000000000..a8dd73558 --- /dev/null +++ b/kernel/arch/ia64/include/uapi/asm/byteorder.h @@ -0,0 +1,6 @@ +#ifndef _ASM_IA64_BYTEORDER_H +#define _ASM_IA64_BYTEORDER_H + +#include + +#endif /* _ASM_IA64_BYTEORDER_H */ diff --git a/kernel/arch/ia64/include/uapi/asm/cmpxchg.h b/kernel/arch/ia64/include/uapi/asm/cmpxchg.h new file mode 100644 index 000000000..f35109b1d --- /dev/null +++ b/kernel/arch/ia64/include/uapi/asm/cmpxchg.h @@ -0,0 +1,156 @@ +#ifndef _ASM_IA64_CMPXCHG_H +#define _ASM_IA64_CMPXCHG_H + +/* + * Compare/Exchange, forked from asm/intrinsics.h + * which was: + * + * Copyright (C) 2002-2003 Hewlett-Packard Co + * David Mosberger-Tang + */ + +#ifndef __ASSEMBLY__ + +#include +/* include compiler specific intrinsics */ +#include +#ifdef __INTEL_COMPILER +# include +#else +# include +#endif + +/* + * This function doesn't exist, so you'll get a linker error if + * something tries to do an invalid xchg(). + */ +extern void ia64_xchg_called_with_bad_pointer(void); + +#define __xchg(x, ptr, size) \ +({ \ + unsigned long __xchg_result; \ + \ + switch (size) { \ + case 1: \ + __xchg_result = ia64_xchg1((__u8 *)ptr, x); \ + break; \ + \ + case 2: \ + __xchg_result = ia64_xchg2((__u16 *)ptr, x); \ + break; \ + \ + case 4: \ + __xchg_result = ia64_xchg4((__u32 *)ptr, x); \ + break; \ + \ + case 8: \ + __xchg_result = ia64_xchg8((__u64 *)ptr, x); \ + break; \ + default: \ + ia64_xchg_called_with_bad_pointer(); \ + } \ + __xchg_result; \ +}) + +#define xchg(ptr, x) \ +((__typeof__(*(ptr))) __xchg((unsigned long) (x), (ptr), sizeof(*(ptr)))) + +/* + * Atomic compare and exchange. Compare OLD with MEM, if identical, + * store NEW in MEM. Return the initial value in MEM. Success is + * indicated by comparing RETURN with OLD. + */ + +#define __HAVE_ARCH_CMPXCHG 1 + +/* + * This function doesn't exist, so you'll get a linker error + * if something tries to do an invalid cmpxchg(). + */ +extern long ia64_cmpxchg_called_with_bad_pointer(void); + +#define ia64_cmpxchg(sem, ptr, old, new, size) \ +({ \ + __u64 _o_, _r_; \ + \ + switch (size) { \ + case 1: \ + _o_ = (__u8) (long) (old); \ + break; \ + case 2: \ + _o_ = (__u16) (long) (old); \ + break; \ + case 4: \ + _o_ = (__u32) (long) (old); \ + break; \ + case 8: \ + _o_ = (__u64) (long) (old); \ + break; \ + default: \ + break; \ + } \ + switch (size) { \ + case 1: \ + _r_ = ia64_cmpxchg1_##sem((__u8 *) ptr, new, _o_); \ + break; \ + \ + case 2: \ + _r_ = ia64_cmpxchg2_##sem((__u16 *) ptr, new, _o_); \ + break; \ + \ + case 4: \ + _r_ = ia64_cmpxchg4_##sem((__u32 *) ptr, new, _o_); \ + break; \ + \ + case 8: \ + _r_ = ia64_cmpxchg8_##sem((__u64 *) ptr, new, _o_); \ + break; \ + \ + default: \ + _r_ = ia64_cmpxchg_called_with_bad_pointer(); \ + break; \ + } \ + (__typeof__(old)) _r_; \ +}) + +#define cmpxchg_acq(ptr, o, n) \ + ia64_cmpxchg(acq, (ptr), (o), (n), sizeof(*(ptr))) +#define cmpxchg_rel(ptr, o, n) \ + ia64_cmpxchg(rel, (ptr), (o), (n), sizeof(*(ptr))) + +/* + * Worse still - early processor implementations actually just ignored + * the acquire/release and did a full fence all the time. Unfortunately + * this meant a lot of badly written code that used .acq when they really + * wanted .rel became legacy out in the wild - so when we made a cpu + * that strictly did the .acq or .rel ... all that code started breaking - so + * we had to back-pedal and keep the "legacy" behavior of a full fence :-( + */ + +/* for compatibility with other platforms: */ +#define cmpxchg(ptr, o, n) cmpxchg_acq((ptr), (o), (n)) +#define cmpxchg64(ptr, o, n) cmpxchg_acq((ptr), (o), (n)) + +#define cmpxchg_local cmpxchg +#define cmpxchg64_local cmpxchg64 + +#ifdef CONFIG_IA64_DEBUG_CMPXCHG +# define CMPXCHG_BUGCHECK_DECL int _cmpxchg_bugcheck_count = 128; +# define CMPXCHG_BUGCHECK(v) \ +do { \ + if (_cmpxchg_bugcheck_count-- <= 0) { \ + void *ip; \ + extern int printk(const char *fmt, ...); \ + ip = (void *) ia64_getreg(_IA64_REG_IP); \ + printk("CMPXCHG_BUGCHECK: stuck at %p on word %p\n", ip, (v));\ + break; \ + } \ +} while (0) +#else /* !CONFIG_IA64_DEBUG_CMPXCHG */ +# define CMPXCHG_BUGCHECK_DECL +# define CMPXCHG_BUGCHECK(v) +#endif /* !CONFIG_IA64_DEBUG_CMPXCHG */ + +#endif /* !__ASSEMBLY__ */ + +#endif /* _ASM_IA64_CMPXCHG_H */ diff --git a/kernel/arch/ia64/include/uapi/asm/errno.h b/kernel/arch/ia64/include/uapi/asm/errno.h new file mode 100644 index 000000000..4c82b503d --- /dev/null +++ b/kernel/arch/ia64/include/uapi/asm/errno.h @@ -0,0 +1 @@ +#include diff --git a/kernel/arch/ia64/include/uapi/asm/fcntl.h b/kernel/arch/ia64/include/uapi/asm/fcntl.h new file mode 100644 index 000000000..7b485876c --- /dev/null +++ b/kernel/arch/ia64/include/uapi/asm/fcntl.h @@ -0,0 +1,14 @@ +#ifndef _ASM_IA64_FCNTL_H +#define _ASM_IA64_FCNTL_H +/* + * Modified 1998-2000 + * David Mosberger-Tang , Hewlett-Packard Co. + */ + +#define force_o_largefile() \ + (personality(current->personality) != PER_LINUX32) + +#include +#include + +#endif /* _ASM_IA64_FCNTL_H */ diff --git a/kernel/arch/ia64/include/uapi/asm/fpu.h b/kernel/arch/ia64/include/uapi/asm/fpu.h new file mode 100644 index 000000000..b6395ad15 --- /dev/null +++ b/kernel/arch/ia64/include/uapi/asm/fpu.h @@ -0,0 +1,66 @@ +#ifndef _ASM_IA64_FPU_H +#define _ASM_IA64_FPU_H + +/* + * Copyright (C) 1998, 1999, 2002, 2003 Hewlett-Packard Co + * David Mosberger-Tang + */ + +#include + +/* floating point status register: */ +#define FPSR_TRAP_VD (1 << 0) /* invalid op trap disabled */ +#define FPSR_TRAP_DD (1 << 1) /* denormal trap disabled */ +#define FPSR_TRAP_ZD (1 << 2) /* zero-divide trap disabled */ +#define FPSR_TRAP_OD (1 << 3) /* overflow trap disabled */ +#define FPSR_TRAP_UD (1 << 4) /* underflow trap disabled */ +#define FPSR_TRAP_ID (1 << 5) /* inexact trap disabled */ +#define FPSR_S0(x) ((x) << 6) +#define FPSR_S1(x) ((x) << 19) +#define FPSR_S2(x) (__IA64_UL(x) << 32) +#define FPSR_S3(x) (__IA64_UL(x) << 45) + +/* floating-point status field controls: */ +#define FPSF_FTZ (1 << 0) /* flush-to-zero */ +#define FPSF_WRE (1 << 1) /* widest-range exponent */ +#define FPSF_PC(x) (((x) & 0x3) << 2) /* precision control */ +#define FPSF_RC(x) (((x) & 0x3) << 4) /* rounding control */ +#define FPSF_TD (1 << 6) /* trap disabled */ + +/* floating-point status field flags: */ +#define FPSF_V (1 << 7) /* invalid operation flag */ +#define FPSF_D (1 << 8) /* denormal/unnormal operand flag */ +#define FPSF_Z (1 << 9) /* zero divide (IEEE) flag */ +#define FPSF_O (1 << 10) /* overflow (IEEE) flag */ +#define FPSF_U (1 << 11) /* underflow (IEEE) flag */ +#define FPSF_I (1 << 12) /* inexact (IEEE) flag) */ + +/* floating-point rounding control: */ +#define FPRC_NEAREST 0x0 +#define FPRC_NEGINF 0x1 +#define FPRC_POSINF 0x2 +#define FPRC_TRUNC 0x3 + +#define FPSF_DEFAULT (FPSF_PC (0x3) | FPSF_RC (FPRC_NEAREST)) + +/* This default value is the same as HP-UX uses. Don't change it + without a very good reason. */ +#define FPSR_DEFAULT (FPSR_TRAP_VD | FPSR_TRAP_DD | FPSR_TRAP_ZD \ + | FPSR_TRAP_OD | FPSR_TRAP_UD | FPSR_TRAP_ID \ + | FPSR_S0 (FPSF_DEFAULT) \ + | FPSR_S1 (FPSF_DEFAULT | FPSF_TD | FPSF_WRE) \ + | FPSR_S2 (FPSF_DEFAULT | FPSF_TD) \ + | FPSR_S3 (FPSF_DEFAULT | FPSF_TD)) + +# ifndef __ASSEMBLY__ + +struct ia64_fpreg { + union { + unsigned long bits[2]; + long double __dummy; /* force 16-byte alignment */ + } u; +}; + +# endif /* __ASSEMBLY__ */ + +#endif /* _ASM_IA64_FPU_H */ diff --git a/kernel/arch/ia64/include/uapi/asm/gcc_intrin.h b/kernel/arch/ia64/include/uapi/asm/gcc_intrin.h new file mode 100644 index 000000000..61d0d0111 --- /dev/null +++ b/kernel/arch/ia64/include/uapi/asm/gcc_intrin.h @@ -0,0 +1,618 @@ +/* + * + * Copyright (C) 2002,2003 Jun Nakajima + * Copyright (C) 2002,2003 Suresh Siddha + */ +#ifndef _UAPI_ASM_IA64_GCC_INTRIN_H +#define _UAPI_ASM_IA64_GCC_INTRIN_H + +#include +#include + +/* define this macro to get some asm stmts included in 'c' files */ +#define ASM_SUPPORTED + +/* Optimization barrier */ +/* The "volatile" is due to gcc bugs */ +#define ia64_barrier() asm volatile ("":::"memory") + +#define ia64_stop() asm volatile (";;"::) + +#define ia64_invala_gr(regnum) asm volatile ("invala.e r%0" :: "i"(regnum)) + +#define ia64_invala_fr(regnum) asm volatile ("invala.e f%0" :: "i"(regnum)) + +#define ia64_flushrs() asm volatile ("flushrs;;":::"memory") + +#define ia64_loadrs() asm volatile ("loadrs;;":::"memory") + +extern void ia64_bad_param_for_setreg (void); +extern void ia64_bad_param_for_getreg (void); + + +#define ia64_native_setreg(regnum, val) \ +({ \ + switch (regnum) { \ + case _IA64_REG_PSR_L: \ + asm volatile ("mov psr.l=%0" :: "r"(val) : "memory"); \ + break; \ + case _IA64_REG_AR_KR0 ... _IA64_REG_AR_EC: \ + asm volatile ("mov ar%0=%1" :: \ + "i" (regnum - _IA64_REG_AR_KR0), \ + "r"(val): "memory"); \ + break; \ + case _IA64_REG_CR_DCR ... _IA64_REG_CR_LRR1: \ + asm volatile ("mov cr%0=%1" :: \ + "i" (regnum - _IA64_REG_CR_DCR), \ + "r"(val): "memory" ); \ + break; \ + case _IA64_REG_SP: \ + asm volatile ("mov r12=%0" :: \ + "r"(val): "memory"); \ + break; \ + case _IA64_REG_GP: \ + asm volatile ("mov gp=%0" :: "r"(val) : "memory"); \ + break; \ + default: \ + ia64_bad_param_for_setreg(); \ + break; \ + } \ +}) + +#define ia64_native_getreg(regnum) \ +({ \ + __u64 ia64_intri_res; \ + \ + switch (regnum) { \ + case _IA64_REG_GP: \ + asm volatile ("mov %0=gp" : "=r"(ia64_intri_res)); \ + break; \ + case _IA64_REG_IP: \ + asm volatile ("mov %0=ip" : "=r"(ia64_intri_res)); \ + break; \ + case _IA64_REG_PSR: \ + asm volatile ("mov %0=psr" : "=r"(ia64_intri_res)); \ + break; \ + case _IA64_REG_TP: /* for current() */ \ + ia64_intri_res = ia64_r13; \ + break; \ + case _IA64_REG_AR_KR0 ... _IA64_REG_AR_EC: \ + asm volatile ("mov %0=ar%1" : "=r" (ia64_intri_res) \ + : "i"(regnum - _IA64_REG_AR_KR0)); \ + break; \ + case _IA64_REG_CR_DCR ... _IA64_REG_CR_LRR1: \ + asm volatile ("mov %0=cr%1" : "=r" (ia64_intri_res) \ + : "i" (regnum - _IA64_REG_CR_DCR)); \ + break; \ + case _IA64_REG_SP: \ + asm volatile ("mov %0=sp" : "=r" (ia64_intri_res)); \ + break; \ + default: \ + ia64_bad_param_for_getreg(); \ + break; \ + } \ + ia64_intri_res; \ +}) + +#define ia64_hint_pause 0 + +#define ia64_hint(mode) \ +({ \ + switch (mode) { \ + case ia64_hint_pause: \ + asm volatile ("hint @pause" ::: "memory"); \ + break; \ + } \ +}) + + +/* Integer values for mux1 instruction */ +#define ia64_mux1_brcst 0 +#define ia64_mux1_mix 8 +#define ia64_mux1_shuf 9 +#define ia64_mux1_alt 10 +#define ia64_mux1_rev 11 + +#define ia64_mux1(x, mode) \ +({ \ + __u64 ia64_intri_res; \ + \ + switch (mode) { \ + case ia64_mux1_brcst: \ + asm ("mux1 %0=%1,@brcst" : "=r" (ia64_intri_res) : "r" (x)); \ + break; \ + case ia64_mux1_mix: \ + asm ("mux1 %0=%1,@mix" : "=r" (ia64_intri_res) : "r" (x)); \ + break; \ + case ia64_mux1_shuf: \ + asm ("mux1 %0=%1,@shuf" : "=r" (ia64_intri_res) : "r" (x)); \ + break; \ + case ia64_mux1_alt: \ + asm ("mux1 %0=%1,@alt" : "=r" (ia64_intri_res) : "r" (x)); \ + break; \ + case ia64_mux1_rev: \ + asm ("mux1 %0=%1,@rev" : "=r" (ia64_intri_res) : "r" (x)); \ + break; \ + } \ + ia64_intri_res; \ +}) + +#if __GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) +# define ia64_popcnt(x) __builtin_popcountl(x) +#else +# define ia64_popcnt(x) \ + ({ \ + __u64 ia64_intri_res; \ + asm ("popcnt %0=%1" : "=r" (ia64_intri_res) : "r" (x)); \ + \ + ia64_intri_res; \ + }) +#endif + +#define ia64_getf_exp(x) \ +({ \ + long ia64_intri_res; \ + \ + asm ("getf.exp %0=%1" : "=r"(ia64_intri_res) : "f"(x)); \ + \ + ia64_intri_res; \ +}) + +#define ia64_shrp(a, b, count) \ +({ \ + __u64 ia64_intri_res; \ + asm ("shrp %0=%1,%2,%3" : "=r"(ia64_intri_res) : "r"(a), "r"(b), "i"(count)); \ + ia64_intri_res; \ +}) + +#define ia64_ldfs(regnum, x) \ +({ \ + register double __f__ asm ("f"#regnum); \ + asm volatile ("ldfs %0=[%1]" :"=f"(__f__): "r"(x)); \ +}) + +#define ia64_ldfd(regnum, x) \ +({ \ + register double __f__ asm ("f"#regnum); \ + asm volatile ("ldfd %0=[%1]" :"=f"(__f__): "r"(x)); \ +}) + +#define ia64_ldfe(regnum, x) \ +({ \ + register double __f__ asm ("f"#regnum); \ + asm volatile ("ldfe %0=[%1]" :"=f"(__f__): "r"(x)); \ +}) + +#define ia64_ldf8(regnum, x) \ +({ \ + register double __f__ asm ("f"#regnum); \ + asm volatile ("ldf8 %0=[%1]" :"=f"(__f__): "r"(x)); \ +}) + +#define ia64_ldf_fill(regnum, x) \ +({ \ + register double __f__ asm ("f"#regnum); \ + asm volatile ("ldf.fill %0=[%1]" :"=f"(__f__): "r"(x)); \ +}) + +#define ia64_st4_rel_nta(m, val) \ +({ \ + asm volatile ("st4.rel.nta [%0] = %1\n\t" :: "r"(m), "r"(val)); \ +}) + +#define ia64_stfs(x, regnum) \ +({ \ + register double __f__ asm ("f"#regnum); \ + asm volatile ("stfs [%0]=%1" :: "r"(x), "f"(__f__) : "memory"); \ +}) + +#define ia64_stfd(x, regnum) \ +({ \ + register double __f__ asm ("f"#regnum); \ + asm volatile ("stfd [%0]=%1" :: "r"(x), "f"(__f__) : "memory"); \ +}) + +#define ia64_stfe(x, regnum) \ +({ \ + register double __f__ asm ("f"#regnum); \ + asm volatile ("stfe [%0]=%1" :: "r"(x), "f"(__f__) : "memory"); \ +}) + +#define ia64_stf8(x, regnum) \ +({ \ + register double __f__ asm ("f"#regnum); \ + asm volatile ("stf8 [%0]=%1" :: "r"(x), "f"(__f__) : "memory"); \ +}) + +#define ia64_stf_spill(x, regnum) \ +({ \ + register double __f__ asm ("f"#regnum); \ + asm volatile ("stf.spill [%0]=%1" :: "r"(x), "f"(__f__) : "memory"); \ +}) + +#define ia64_fetchadd4_acq(p, inc) \ +({ \ + \ + __u64 ia64_intri_res; \ + asm volatile ("fetchadd4.acq %0=[%1],%2" \ + : "=r"(ia64_intri_res) : "r"(p), "i" (inc) \ + : "memory"); \ + \ + ia64_intri_res; \ +}) + +#define ia64_fetchadd4_rel(p, inc) \ +({ \ + __u64 ia64_intri_res; \ + asm volatile ("fetchadd4.rel %0=[%1],%2" \ + : "=r"(ia64_intri_res) : "r"(p), "i" (inc) \ + : "memory"); \ + \ + ia64_intri_res; \ +}) + +#define ia64_fetchadd8_acq(p, inc) \ +({ \ + \ + __u64 ia64_intri_res; \ + asm volatile ("fetchadd8.acq %0=[%1],%2" \ + : "=r"(ia64_intri_res) : "r"(p), "i" (inc) \ + : "memory"); \ + \ + ia64_intri_res; \ +}) + +#define ia64_fetchadd8_rel(p, inc) \ +({ \ + __u64 ia64_intri_res; \ + asm volatile ("fetchadd8.rel %0=[%1],%2" \ + : "=r"(ia64_intri_res) : "r"(p), "i" (inc) \ + : "memory"); \ + \ + ia64_intri_res; \ +}) + +#define ia64_xchg1(ptr,x) \ +({ \ + __u64 ia64_intri_res; \ + asm volatile ("xchg1 %0=[%1],%2" \ + : "=r" (ia64_intri_res) : "r" (ptr), "r" (x) : "memory"); \ + ia64_intri_res; \ +}) + +#define ia64_xchg2(ptr,x) \ +({ \ + __u64 ia64_intri_res; \ + asm volatile ("xchg2 %0=[%1],%2" : "=r" (ia64_intri_res) \ + : "r" (ptr), "r" (x) : "memory"); \ + ia64_intri_res; \ +}) + +#define ia64_xchg4(ptr,x) \ +({ \ + __u64 ia64_intri_res; \ + asm volatile ("xchg4 %0=[%1],%2" : "=r" (ia64_intri_res) \ + : "r" (ptr), "r" (x) : "memory"); \ + ia64_intri_res; \ +}) + +#define ia64_xchg8(ptr,x) \ +({ \ + __u64 ia64_intri_res; \ + asm volatile ("xchg8 %0=[%1],%2" : "=r" (ia64_intri_res) \ + : "r" (ptr), "r" (x) : "memory"); \ + ia64_intri_res; \ +}) + +#define ia64_cmpxchg1_acq(ptr, new, old) \ +({ \ + __u64 ia64_intri_res; \ + asm volatile ("mov ar.ccv=%0;;" :: "rO"(old)); \ + asm volatile ("cmpxchg1.acq %0=[%1],%2,ar.ccv": \ + "=r"(ia64_intri_res) : "r"(ptr), "r"(new) : "memory"); \ + ia64_intri_res; \ +}) + +#define ia64_cmpxchg1_rel(ptr, new, old) \ +({ \ + __u64 ia64_intri_res; \ + asm volatile ("mov ar.ccv=%0;;" :: "rO"(old)); \ + asm volatile ("cmpxchg1.rel %0=[%1],%2,ar.ccv": \ + "=r"(ia64_intri_res) : "r"(ptr), "r"(new) : "memory"); \ + ia64_intri_res; \ +}) + +#define ia64_cmpxchg2_acq(ptr, new, old) \ +({ \ + __u64 ia64_intri_res; \ + asm volatile ("mov ar.ccv=%0;;" :: "rO"(old)); \ + asm volatile ("cmpxchg2.acq %0=[%1],%2,ar.ccv": \ + "=r"(ia64_intri_res) : "r"(ptr), "r"(new) : "memory"); \ + ia64_intri_res; \ +}) + +#define ia64_cmpxchg2_rel(ptr, new, old) \ +({ \ + __u64 ia64_intri_res; \ + asm volatile ("mov ar.ccv=%0;;" :: "rO"(old)); \ + \ + asm volatile ("cmpxchg2.rel %0=[%1],%2,ar.ccv": \ + "=r"(ia64_intri_res) : "r"(ptr), "r"(new) : "memory"); \ + ia64_intri_res; \ +}) + +#define ia64_cmpxchg4_acq(ptr, new, old) \ +({ \ + __u64 ia64_intri_res; \ + asm volatile ("mov ar.ccv=%0;;" :: "rO"(old)); \ + asm volatile ("cmpxchg4.acq %0=[%1],%2,ar.ccv": \ + "=r"(ia64_intri_res) : "r"(ptr), "r"(new) : "memory"); \ + ia64_intri_res; \ +}) + +#define ia64_cmpxchg4_rel(ptr, new, old) \ +({ \ + __u64 ia64_intri_res; \ + asm volatile ("mov ar.ccv=%0;;" :: "rO"(old)); \ + asm volatile ("cmpxchg4.rel %0=[%1],%2,ar.ccv": \ + "=r"(ia64_intri_res) : "r"(ptr), "r"(new) : "memory"); \ + ia64_intri_res; \ +}) + +#define ia64_cmpxchg8_acq(ptr, new, old) \ +({ \ + __u64 ia64_intri_res; \ + asm volatile ("mov ar.ccv=%0;;" :: "rO"(old)); \ + asm volatile ("cmpxchg8.acq %0=[%1],%2,ar.ccv": \ + "=r"(ia64_intri_res) : "r"(ptr), "r"(new) : "memory"); \ + ia64_intri_res; \ +}) + +#define ia64_cmpxchg8_rel(ptr, new, old) \ +({ \ + __u64 ia64_intri_res; \ + asm volatile ("mov ar.ccv=%0;;" :: "rO"(old)); \ + \ + asm volatile ("cmpxchg8.rel %0=[%1],%2,ar.ccv": \ + "=r"(ia64_intri_res) : "r"(ptr), "r"(new) : "memory"); \ + ia64_intri_res; \ +}) + +#define ia64_mf() asm volatile ("mf" ::: "memory") +#define ia64_mfa() asm volatile ("mf.a" ::: "memory") + +#define ia64_invala() asm volatile ("invala" ::: "memory") + +#define ia64_native_thash(addr) \ +({ \ + unsigned long ia64_intri_res; \ + asm volatile ("thash %0=%1" : "=r"(ia64_intri_res) : "r" (addr)); \ + ia64_intri_res; \ +}) + +#define ia64_srlz_i() asm volatile (";; srlz.i ;;" ::: "memory") +#define ia64_srlz_d() asm volatile (";; srlz.d" ::: "memory"); + +#ifdef HAVE_SERIALIZE_DIRECTIVE +# define ia64_dv_serialize_data() asm volatile (".serialize.data"); +# define ia64_dv_serialize_instruction() asm volatile (".serialize.instruction"); +#else +# define ia64_dv_serialize_data() +# define ia64_dv_serialize_instruction() +#endif + +#define ia64_nop(x) asm volatile ("nop %0"::"i"(x)); + +#define ia64_itci(addr) asm volatile ("itc.i %0;;" :: "r"(addr) : "memory") + +#define ia64_itcd(addr) asm volatile ("itc.d %0;;" :: "r"(addr) : "memory") + + +#define ia64_itri(trnum, addr) asm volatile ("itr.i itr[%0]=%1" \ + :: "r"(trnum), "r"(addr) : "memory") + +#define ia64_itrd(trnum, addr) asm volatile ("itr.d dtr[%0]=%1" \ + :: "r"(trnum), "r"(addr) : "memory") + +#define ia64_tpa(addr) \ +({ \ + unsigned long ia64_pa; \ + asm volatile ("tpa %0 = %1" : "=r"(ia64_pa) : "r"(addr) : "memory"); \ + ia64_pa; \ +}) + +#define __ia64_set_dbr(index, val) \ + asm volatile ("mov dbr[%0]=%1" :: "r"(index), "r"(val) : "memory") + +#define ia64_set_ibr(index, val) \ + asm volatile ("mov ibr[%0]=%1" :: "r"(index), "r"(val) : "memory") + +#define ia64_set_pkr(index, val) \ + asm volatile ("mov pkr[%0]=%1" :: "r"(index), "r"(val) : "memory") + +#define ia64_set_pmc(index, val) \ + asm volatile ("mov pmc[%0]=%1" :: "r"(index), "r"(val) : "memory") + +#define ia64_set_pmd(index, val) \ + asm volatile ("mov pmd[%0]=%1" :: "r"(index), "r"(val) : "memory") + +#define ia64_native_set_rr(index, val) \ + asm volatile ("mov rr[%0]=%1" :: "r"(index), "r"(val) : "memory"); + +#define ia64_native_get_cpuid(index) \ +({ \ + unsigned long ia64_intri_res; \ + asm volatile ("mov %0=cpuid[%r1]" : "=r"(ia64_intri_res) : "rO"(index)); \ + ia64_intri_res; \ +}) + +#define __ia64_get_dbr(index) \ +({ \ + unsigned long ia64_intri_res; \ + asm volatile ("mov %0=dbr[%1]" : "=r"(ia64_intri_res) : "r"(index)); \ + ia64_intri_res; \ +}) + +#define ia64_get_ibr(index) \ +({ \ + unsigned long ia64_intri_res; \ + asm volatile ("mov %0=ibr[%1]" : "=r"(ia64_intri_res) : "r"(index)); \ + ia64_intri_res; \ +}) + +#define ia64_get_pkr(index) \ +({ \ + unsigned long ia64_intri_res; \ + asm volatile ("mov %0=pkr[%1]" : "=r"(ia64_intri_res) : "r"(index)); \ + ia64_intri_res; \ +}) + +#define ia64_get_pmc(index) \ +({ \ + unsigned long ia64_intri_res; \ + asm volatile ("mov %0=pmc[%1]" : "=r"(ia64_intri_res) : "r"(index)); \ + ia64_intri_res; \ +}) + + +#define ia64_native_get_pmd(index) \ +({ \ + unsigned long ia64_intri_res; \ + asm volatile ("mov %0=pmd[%1]" : "=r"(ia64_intri_res) : "r"(index)); \ + ia64_intri_res; \ +}) + +#define ia64_native_get_rr(index) \ +({ \ + unsigned long ia64_intri_res; \ + asm volatile ("mov %0=rr[%1]" : "=r"(ia64_intri_res) : "r" (index)); \ + ia64_intri_res; \ +}) + +#define ia64_native_fc(addr) asm volatile ("fc %0" :: "r"(addr) : "memory") + + +#define ia64_sync_i() asm volatile (";; sync.i" ::: "memory") + +#define ia64_native_ssm(mask) asm volatile ("ssm %0":: "i"((mask)) : "memory") +#define ia64_native_rsm(mask) asm volatile ("rsm %0":: "i"((mask)) : "memory") +#define ia64_sum(mask) asm volatile ("sum %0":: "i"((mask)) : "memory") +#define ia64_rum(mask) asm volatile ("rum %0":: "i"((mask)) : "memory") + +#define ia64_ptce(addr) asm volatile ("ptc.e %0" :: "r"(addr)) + +#define ia64_native_ptcga(addr, size) \ +do { \ + asm volatile ("ptc.ga %0,%1" :: "r"(addr), "r"(size) : "memory"); \ + ia64_dv_serialize_data(); \ +} while (0) + +#define ia64_ptcl(addr, size) \ +do { \ + asm volatile ("ptc.l %0,%1" :: "r"(addr), "r"(size) : "memory"); \ + ia64_dv_serialize_data(); \ +} while (0) + +#define ia64_ptri(addr, size) \ + asm volatile ("ptr.i %0,%1" :: "r"(addr), "r"(size) : "memory") + +#define ia64_ptrd(addr, size) \ + asm volatile ("ptr.d %0,%1" :: "r"(addr), "r"(size) : "memory") + +#define ia64_ttag(addr) \ +({ \ + __u64 ia64_intri_res; \ + asm volatile ("ttag %0=%1" : "=r"(ia64_intri_res) : "r" (addr)); \ + ia64_intri_res; \ +}) + + +/* Values for lfhint in ia64_lfetch and ia64_lfetch_fault */ + +#define ia64_lfhint_none 0 +#define ia64_lfhint_nt1 1 +#define ia64_lfhint_nt2 2 +#define ia64_lfhint_nta 3 + +#define ia64_lfetch(lfhint, y) \ +({ \ + switch (lfhint) { \ + case ia64_lfhint_none: \ + asm volatile ("lfetch [%0]" : : "r"(y)); \ + break; \ + case ia64_lfhint_nt1: \ + asm volatile ("lfetch.nt1 [%0]" : : "r"(y)); \ + break; \ + case ia64_lfhint_nt2: \ + asm volatile ("lfetch.nt2 [%0]" : : "r"(y)); \ + break; \ + case ia64_lfhint_nta: \ + asm volatile ("lfetch.nta [%0]" : : "r"(y)); \ + break; \ + } \ +}) + +#define ia64_lfetch_excl(lfhint, y) \ +({ \ + switch (lfhint) { \ + case ia64_lfhint_none: \ + asm volatile ("lfetch.excl [%0]" :: "r"(y)); \ + break; \ + case ia64_lfhint_nt1: \ + asm volatile ("lfetch.excl.nt1 [%0]" :: "r"(y)); \ + break; \ + case ia64_lfhint_nt2: \ + asm volatile ("lfetch.excl.nt2 [%0]" :: "r"(y)); \ + break; \ + case ia64_lfhint_nta: \ + asm volatile ("lfetch.excl.nta [%0]" :: "r"(y)); \ + break; \ + } \ +}) + +#define ia64_lfetch_fault(lfhint, y) \ +({ \ + switch (lfhint) { \ + case ia64_lfhint_none: \ + asm volatile ("lfetch.fault [%0]" : : "r"(y)); \ + break; \ + case ia64_lfhint_nt1: \ + asm volatile ("lfetch.fault.nt1 [%0]" : : "r"(y)); \ + break; \ + case ia64_lfhint_nt2: \ + asm volatile ("lfetch.fault.nt2 [%0]" : : "r"(y)); \ + break; \ + case ia64_lfhint_nta: \ + asm volatile ("lfetch.fault.nta [%0]" : : "r"(y)); \ + break; \ + } \ +}) + +#define ia64_lfetch_fault_excl(lfhint, y) \ +({ \ + switch (lfhint) { \ + case ia64_lfhint_none: \ + asm volatile ("lfetch.fault.excl [%0]" :: "r"(y)); \ + break; \ + case ia64_lfhint_nt1: \ + asm volatile ("lfetch.fault.excl.nt1 [%0]" :: "r"(y)); \ + break; \ + case ia64_lfhint_nt2: \ + asm volatile ("lfetch.fault.excl.nt2 [%0]" :: "r"(y)); \ + break; \ + case ia64_lfhint_nta: \ + asm volatile ("lfetch.fault.excl.nta [%0]" :: "r"(y)); \ + break; \ + } \ +}) + +#define ia64_native_intrin_local_irq_restore(x) \ +do { \ + asm volatile (";; cmp.ne p6,p7=%0,r0;;" \ + "(p6) ssm psr.i;" \ + "(p7) rsm psr.i;;" \ + "(p6) srlz.d" \ + :: "r"((x)) : "p6", "p7", "memory"); \ +} while (0) + +#endif /* _UAPI_ASM_IA64_GCC_INTRIN_H */ diff --git a/kernel/arch/ia64/include/uapi/asm/ia64regs.h b/kernel/arch/ia64/include/uapi/asm/ia64regs.h new file mode 100644 index 000000000..1757f1c11 --- /dev/null +++ b/kernel/arch/ia64/include/uapi/asm/ia64regs.h @@ -0,0 +1,100 @@ +/* + * Copyright (C) 2002,2003 Intel Corp. + * Jun Nakajima + * Suresh Siddha + */ + +#ifndef _ASM_IA64_IA64REGS_H +#define _ASM_IA64_IA64REGS_H + +/* + * Register Names for getreg() and setreg(). + * + * The "magic" numbers happen to match the values used by the Intel compiler's + * getreg()/setreg() intrinsics. + */ + +/* Special Registers */ + +#define _IA64_REG_IP 1016 /* getreg only */ +#define _IA64_REG_PSR 1019 +#define _IA64_REG_PSR_L 1019 + +/* General Integer Registers */ + +#define _IA64_REG_GP 1025 /* R1 */ +#define _IA64_REG_R8 1032 /* R8 */ +#define _IA64_REG_R9 1033 /* R9 */ +#define _IA64_REG_SP 1036 /* R12 */ +#define _IA64_REG_TP 1037 /* R13 */ + +/* Application Registers */ + +#define _IA64_REG_AR_KR0 3072 +#define _IA64_REG_AR_KR1 3073 +#define _IA64_REG_AR_KR2 3074 +#define _IA64_REG_AR_KR3 3075 +#define _IA64_REG_AR_KR4 3076 +#define _IA64_REG_AR_KR5 3077 +#define _IA64_REG_AR_KR6 3078 +#define _IA64_REG_AR_KR7 3079 +#define _IA64_REG_AR_RSC 3088 +#define _IA64_REG_AR_BSP 3089 +#define _IA64_REG_AR_BSPSTORE 3090 +#define _IA64_REG_AR_RNAT 3091 +#define _IA64_REG_AR_FCR 3093 +#define _IA64_REG_AR_EFLAG 3096 +#define _IA64_REG_AR_CSD 3097 +#define _IA64_REG_AR_SSD 3098 +#define _IA64_REG_AR_CFLAG 3099 +#define _IA64_REG_AR_FSR 3100 +#define _IA64_REG_AR_FIR 3101 +#define _IA64_REG_AR_FDR 3102 +#define _IA64_REG_AR_CCV 3104 +#define _IA64_REG_AR_UNAT 3108 +#define _IA64_REG_AR_FPSR 3112 +#define _IA64_REG_AR_ITC 3116 +#define _IA64_REG_AR_PFS 3136 +#define _IA64_REG_AR_LC 3137 +#define _IA64_REG_AR_EC 3138 + +/* Control Registers */ + +#define _IA64_REG_CR_DCR 4096 +#define _IA64_REG_CR_ITM 4097 +#define _IA64_REG_CR_IVA 4098 +#define _IA64_REG_CR_PTA 4104 +#define _IA64_REG_CR_IPSR 4112 +#define _IA64_REG_CR_ISR 4113 +#define _IA64_REG_CR_IIP 4115 +#define _IA64_REG_CR_IFA 4116 +#define _IA64_REG_CR_ITIR 4117 +#define _IA64_REG_CR_IIPA 4118 +#define _IA64_REG_CR_IFS 4119 +#define _IA64_REG_CR_IIM 4120 +#define _IA64_REG_CR_IHA 4121 +#define _IA64_REG_CR_LID 4160 +#define _IA64_REG_CR_IVR 4161 /* getreg only */ +#define _IA64_REG_CR_TPR 4162 +#define _IA64_REG_CR_EOI 4163 +#define _IA64_REG_CR_IRR0 4164 /* getreg only */ +#define _IA64_REG_CR_IRR1 4165 /* getreg only */ +#define _IA64_REG_CR_IRR2 4166 /* getreg only */ +#define _IA64_REG_CR_IRR3 4167 /* getreg only */ +#define _IA64_REG_CR_ITV 4168 +#define _IA64_REG_CR_PMV 4169 +#define _IA64_REG_CR_CMCV 4170 +#define _IA64_REG_CR_LRR0 4176 +#define _IA64_REG_CR_LRR1 4177 + +/* Indirect Registers for getindreg() and setindreg() */ + +#define _IA64_REG_INDR_CPUID 9000 /* getindreg only */ +#define _IA64_REG_INDR_DBR 9001 +#define _IA64_REG_INDR_IBR 9002 +#define _IA64_REG_INDR_PKR 9003 +#define _IA64_REG_INDR_PMC 9004 +#define _IA64_REG_INDR_PMD 9005 +#define _IA64_REG_INDR_RR 9006 + +#endif /* _ASM_IA64_IA64REGS_H */ diff --git a/kernel/arch/ia64/include/uapi/asm/intel_intrin.h b/kernel/arch/ia64/include/uapi/asm/intel_intrin.h new file mode 100644 index 000000000..53cec5775 --- /dev/null +++ b/kernel/arch/ia64/include/uapi/asm/intel_intrin.h @@ -0,0 +1,161 @@ +#ifndef _ASM_IA64_INTEL_INTRIN_H +#define _ASM_IA64_INTEL_INTRIN_H +/* + * Intel Compiler Intrinsics + * + * Copyright (C) 2002,2003 Jun Nakajima + * Copyright (C) 2002,2003 Suresh Siddha + * Copyright (C) 2005,2006 Hongjiu Lu + * + */ +#include + +#define ia64_barrier() __memory_barrier() + +#define ia64_stop() /* Nothing: As of now stop bit is generated for each + * intrinsic + */ + +#define ia64_native_getreg __getReg +#define ia64_native_setreg __setReg + +#define ia64_hint __hint +#define ia64_hint_pause __hint_pause + +#define ia64_mux1_brcst _m64_mux1_brcst +#define ia64_mux1_mix _m64_mux1_mix +#define ia64_mux1_shuf _m64_mux1_shuf +#define ia64_mux1_alt _m64_mux1_alt +#define ia64_mux1_rev _m64_mux1_rev + +#define ia64_mux1(x,v) _m_to_int64(_m64_mux1(_m_from_int64(x), (v))) +#define ia64_popcnt _m64_popcnt +#define ia64_getf_exp __getf_exp +#define ia64_shrp _m64_shrp + +#define ia64_tpa __tpa +#define ia64_invala __invala +#define ia64_invala_gr __invala_gr +#define ia64_invala_fr __invala_fr +#define ia64_nop __nop +#define ia64_sum __sum +#define ia64_native_ssm __ssm +#define ia64_rum __rum +#define ia64_native_rsm __rsm +#define ia64_native_fc __fc + +#define ia64_ldfs __ldfs +#define ia64_ldfd __ldfd +#define ia64_ldfe __ldfe +#define ia64_ldf8 __ldf8 +#define ia64_ldf_fill __ldf_fill + +#define ia64_stfs __stfs +#define ia64_stfd __stfd +#define ia64_stfe __stfe +#define ia64_stf8 __stf8 +#define ia64_stf_spill __stf_spill + +#define ia64_mf __mf +#define ia64_mfa __mfa + +#define ia64_fetchadd4_acq __fetchadd4_acq +#define ia64_fetchadd4_rel __fetchadd4_rel +#define ia64_fetchadd8_acq __fetchadd8_acq +#define ia64_fetchadd8_rel __fetchadd8_rel + +#define ia64_xchg1 _InterlockedExchange8 +#define ia64_xchg2 _InterlockedExchange16 +#define ia64_xchg4 _InterlockedExchange +#define ia64_xchg8 _InterlockedExchange64 + +#define ia64_cmpxchg1_rel _InterlockedCompareExchange8_rel +#define ia64_cmpxchg1_acq _InterlockedCompareExchange8_acq +#define ia64_cmpxchg2_rel _InterlockedCompareExchange16_rel +#define ia64_cmpxchg2_acq _InterlockedCompareExchange16_acq +#define ia64_cmpxchg4_rel _InterlockedCompareExchange_rel +#define ia64_cmpxchg4_acq _InterlockedCompareExchange_acq +#define ia64_cmpxchg8_rel _InterlockedCompareExchange64_rel +#define ia64_cmpxchg8_acq _InterlockedCompareExchange64_acq + +#define __ia64_set_dbr(index, val) \ + __setIndReg(_IA64_REG_INDR_DBR, index, val) +#define ia64_set_ibr(index, val) \ + __setIndReg(_IA64_REG_INDR_IBR, index, val) +#define ia64_set_pkr(index, val) \ + __setIndReg(_IA64_REG_INDR_PKR, index, val) +#define ia64_set_pmc(index, val) \ + __setIndReg(_IA64_REG_INDR_PMC, index, val) +#define ia64_set_pmd(index, val) \ + __setIndReg(_IA64_REG_INDR_PMD, index, val) +#define ia64_native_set_rr(index, val) \ + __setIndReg(_IA64_REG_INDR_RR, index, val) + +#define ia64_native_get_cpuid(index) \ + __getIndReg(_IA64_REG_INDR_CPUID, index) +#define __ia64_get_dbr(index) __getIndReg(_IA64_REG_INDR_DBR, index) +#define ia64_get_ibr(index) __getIndReg(_IA64_REG_INDR_IBR, index) +#define ia64_get_pkr(index) __getIndReg(_IA64_REG_INDR_PKR, index) +#define ia64_get_pmc(index) __getIndReg(_IA64_REG_INDR_PMC, index) +#define ia64_native_get_pmd(index) __getIndReg(_IA64_REG_INDR_PMD, index) +#define ia64_native_get_rr(index) __getIndReg(_IA64_REG_INDR_RR, index) + +#define ia64_srlz_d __dsrlz +#define ia64_srlz_i __isrlz + +#define ia64_dv_serialize_data() +#define ia64_dv_serialize_instruction() + +#define ia64_st1_rel __st1_rel +#define ia64_st2_rel __st2_rel +#define ia64_st4_rel __st4_rel +#define ia64_st8_rel __st8_rel + +/* FIXME: need st4.rel.nta intrinsic */ +#define ia64_st4_rel_nta __st4_rel + +#define ia64_ld1_acq __ld1_acq +#define ia64_ld2_acq __ld2_acq +#define ia64_ld4_acq __ld4_acq +#define ia64_ld8_acq __ld8_acq + +#define ia64_sync_i __synci +#define ia64_native_thash __thash +#define ia64_native_ttag __ttag +#define ia64_itcd __itcd +#define ia64_itci __itci +#define ia64_itrd __itrd +#define ia64_itri __itri +#define ia64_ptce __ptce +#define ia64_ptcl __ptcl +#define ia64_native_ptcg __ptcg +#define ia64_native_ptcga __ptcga +#define ia64_ptri __ptri +#define ia64_ptrd __ptrd +#define ia64_dep_mi _m64_dep_mi + +/* Values for lfhint in __lfetch and __lfetch_fault */ + +#define ia64_lfhint_none __lfhint_none +#define ia64_lfhint_nt1 __lfhint_nt1 +#define ia64_lfhint_nt2 __lfhint_nt2 +#define ia64_lfhint_nta __lfhint_nta + +#define ia64_lfetch __lfetch +#define ia64_lfetch_excl __lfetch_excl +#define ia64_lfetch_fault __lfetch_fault +#define ia64_lfetch_fault_excl __lfetch_fault_excl + +#define ia64_native_intrin_local_irq_restore(x) \ +do { \ + if ((x) != 0) { \ + ia64_native_ssm(IA64_PSR_I); \ + ia64_srlz_d(); \ + } else { \ + ia64_native_rsm(IA64_PSR_I); \ + } \ +} while (0) + +#define __builtin_trap() __break(0); + +#endif /* _ASM_IA64_INTEL_INTRIN_H */ diff --git a/kernel/arch/ia64/include/uapi/asm/intrinsics.h b/kernel/arch/ia64/include/uapi/asm/intrinsics.h new file mode 100644 index 000000000..5829978ff --- /dev/null +++ b/kernel/arch/ia64/include/uapi/asm/intrinsics.h @@ -0,0 +1,124 @@ +/* + * Compiler-dependent intrinsics. + * + * Copyright (C) 2002-2003 Hewlett-Packard Co + * David Mosberger-Tang + */ +#ifndef _UAPI_ASM_IA64_INTRINSICS_H +#define _UAPI_ASM_IA64_INTRINSICS_H + + +#ifndef __ASSEMBLY__ + +#include +/* include compiler specific intrinsics */ +#include +#ifdef __INTEL_COMPILER +# include +#else +# include +#endif +#include + +#define ia64_native_get_psr_i() (ia64_native_getreg(_IA64_REG_PSR) & IA64_PSR_I) + +#define ia64_native_set_rr0_to_rr4(val0, val1, val2, val3, val4) \ +do { \ + ia64_native_set_rr(0x0000000000000000UL, (val0)); \ + ia64_native_set_rr(0x2000000000000000UL, (val1)); \ + ia64_native_set_rr(0x4000000000000000UL, (val2)); \ + ia64_native_set_rr(0x6000000000000000UL, (val3)); \ + ia64_native_set_rr(0x8000000000000000UL, (val4)); \ +} while (0) + +/* + * Force an unresolved reference if someone tries to use + * ia64_fetch_and_add() with a bad value. + */ +extern unsigned long __bad_size_for_ia64_fetch_and_add (void); +extern unsigned long __bad_increment_for_ia64_fetch_and_add (void); + +#define IA64_FETCHADD(tmp,v,n,sz,sem) \ +({ \ + switch (sz) { \ + case 4: \ + tmp = ia64_fetchadd4_##sem((unsigned int *) v, n); \ + break; \ + \ + case 8: \ + tmp = ia64_fetchadd8_##sem((unsigned long *) v, n); \ + break; \ + \ + default: \ + __bad_size_for_ia64_fetch_and_add(); \ + } \ +}) + +#define ia64_fetchadd(i,v,sem) \ +({ \ + __u64 _tmp; \ + volatile __typeof__(*(v)) *_v = (v); \ + /* Can't use a switch () here: gcc isn't always smart enough for that... */ \ + if ((i) == -16) \ + IA64_FETCHADD(_tmp, _v, -16, sizeof(*(v)), sem); \ + else if ((i) == -8) \ + IA64_FETCHADD(_tmp, _v, -8, sizeof(*(v)), sem); \ + else if ((i) == -4) \ + IA64_FETCHADD(_tmp, _v, -4, sizeof(*(v)), sem); \ + else if ((i) == -1) \ + IA64_FETCHADD(_tmp, _v, -1, sizeof(*(v)), sem); \ + else if ((i) == 1) \ + IA64_FETCHADD(_tmp, _v, 1, sizeof(*(v)), sem); \ + else if ((i) == 4) \ + IA64_FETCHADD(_tmp, _v, 4, sizeof(*(v)), sem); \ + else if ((i) == 8) \ + IA64_FETCHADD(_tmp, _v, 8, sizeof(*(v)), sem); \ + else if ((i) == 16) \ + IA64_FETCHADD(_tmp, _v, 16, sizeof(*(v)), sem); \ + else \ + _tmp = __bad_increment_for_ia64_fetch_and_add(); \ + (__typeof__(*(v))) (_tmp); /* return old value */ \ +}) + +#define ia64_fetch_and_add(i,v) (ia64_fetchadd(i, v, rel) + (i)) /* return new value */ + +#endif + + +#ifndef __ASSEMBLY__ + +#define IA64_INTRINSIC_API(name) ia64_native_ ## name +#define IA64_INTRINSIC_MACRO(name) ia64_native_ ## name + + +/************************************************/ +/* Instructions paravirtualized for correctness */ +/************************************************/ +/* fc, thash, get_cpuid, get_pmd, get_eflags, set_eflags */ +/* Note that "ttag" and "cover" are also privilege-sensitive; "ttag" + * is not currently used (though it may be in a long-format VHPT system!) + */ +#define ia64_fc IA64_INTRINSIC_API(fc) +#define ia64_thash IA64_INTRINSIC_API(thash) +#define ia64_get_cpuid IA64_INTRINSIC_API(get_cpuid) +#define ia64_get_pmd IA64_INTRINSIC_API(get_pmd) + + +/************************************************/ +/* Instructions paravirtualized for performance */ +/************************************************/ +#define ia64_ssm IA64_INTRINSIC_MACRO(ssm) +#define ia64_rsm IA64_INTRINSIC_MACRO(rsm) +#define ia64_getreg IA64_INTRINSIC_MACRO(getreg) +#define ia64_setreg IA64_INTRINSIC_API(setreg) +#define ia64_set_rr IA64_INTRINSIC_API(set_rr) +#define ia64_get_rr IA64_INTRINSIC_API(get_rr) +#define ia64_ptcga IA64_INTRINSIC_API(ptcga) +#define ia64_get_psr_i IA64_INTRINSIC_API(get_psr_i) +#define ia64_intrin_local_irq_restore \ + IA64_INTRINSIC_API(intrin_local_irq_restore) +#define ia64_set_rr0_to_rr4 IA64_INTRINSIC_API(set_rr0_to_rr4) + +#endif /* !__ASSEMBLY__ */ + +#endif /* _UAPI_ASM_IA64_INTRINSICS_H */ diff --git a/kernel/arch/ia64/include/uapi/asm/ioctl.h b/kernel/arch/ia64/include/uapi/asm/ioctl.h new file mode 100644 index 000000000..b279fe06d --- /dev/null +++ b/kernel/arch/ia64/include/uapi/asm/ioctl.h @@ -0,0 +1 @@ +#include diff --git a/kernel/arch/ia64/include/uapi/asm/ioctls.h b/kernel/arch/ia64/include/uapi/asm/ioctls.h new file mode 100644 index 000000000..f3aab5512 --- /dev/null +++ b/kernel/arch/ia64/include/uapi/asm/ioctls.h @@ -0,0 +1,6 @@ +#ifndef _ASM_IA64_IOCTLS_H +#define _ASM_IA64_IOCTLS_H + +#include + +#endif /* _ASM_IA64_IOCTLS_H */ diff --git a/kernel/arch/ia64/include/uapi/asm/ipcbuf.h b/kernel/arch/ia64/include/uapi/asm/ipcbuf.h new file mode 100644 index 000000000..84c7e51cb --- /dev/null +++ b/kernel/arch/ia64/include/uapi/asm/ipcbuf.h @@ -0,0 +1 @@ +#include diff --git a/kernel/arch/ia64/include/uapi/asm/mman.h b/kernel/arch/ia64/include/uapi/asm/mman.h new file mode 100644 index 000000000..8740819ad --- /dev/null +++ b/kernel/arch/ia64/include/uapi/asm/mman.h @@ -0,0 +1,16 @@ +/* + * Based on . + * + * Modified 1998-2000, 2002 + * David Mosberger-Tang , Hewlett-Packard Co + */ +#ifndef _UAPI_ASM_IA64_MMAN_H +#define _UAPI_ASM_IA64_MMAN_H + + +#include + +#define MAP_GROWSUP 0x0200 /* register stack-like segment */ + + +#endif /* _UAPI_ASM_IA64_MMAN_H */ diff --git a/kernel/arch/ia64/include/uapi/asm/msgbuf.h b/kernel/arch/ia64/include/uapi/asm/msgbuf.h new file mode 100644 index 000000000..6c64c0d2a --- /dev/null +++ b/kernel/arch/ia64/include/uapi/asm/msgbuf.h @@ -0,0 +1,27 @@ +#ifndef _ASM_IA64_MSGBUF_H +#define _ASM_IA64_MSGBUF_H + +/* + * The msqid64_ds structure for IA-64 architecture. + * Note extra padding because this structure is passed back and forth + * between kernel and user space. + * + * Pad space is left for: + * - 2 miscellaneous 64-bit values + */ + +struct msqid64_ds { + struct ipc64_perm msg_perm; + __kernel_time_t msg_stime; /* last msgsnd time */ + __kernel_time_t msg_rtime; /* last msgrcv time */ + __kernel_time_t msg_ctime; /* last change time */ + unsigned long msg_cbytes; /* current number of bytes on queue */ + unsigned long msg_qnum; /* number of messages in queue */ + unsigned long msg_qbytes; /* max number of bytes on queue */ + __kernel_pid_t msg_lspid; /* pid of last msgsnd */ + __kernel_pid_t msg_lrpid; /* last receive pid */ + unsigned long __unused1; + unsigned long __unused2; +}; + +#endif /* _ASM_IA64_MSGBUF_H */ diff --git a/kernel/arch/ia64/include/uapi/asm/param.h b/kernel/arch/ia64/include/uapi/asm/param.h new file mode 100644 index 000000000..d7da41d94 --- /dev/null +++ b/kernel/arch/ia64/include/uapi/asm/param.h @@ -0,0 +1,29 @@ +/* + * Fundamental kernel parameters. + * + * Based on . + * + * Modified 1998, 1999, 2002-2003 + * David Mosberger-Tang , Hewlett-Packard Co + */ +#ifndef _UAPI_ASM_IA64_PARAM_H +#define _UAPI_ASM_IA64_PARAM_H + + +#define EXEC_PAGESIZE 65536 + +#ifndef NOGROUP +# define NOGROUP (-1) +#endif + +#define MAXHOSTNAMELEN 64 /* max length of hostname */ + +#ifndef __KERNEL__ + /* + * Technically, this is wrong, but some old apps still refer to it. The proper way to + * get the HZ value is via sysconf(_SC_CLK_TCK). + */ +# define HZ 1024 +#endif + +#endif /* _UAPI_ASM_IA64_PARAM_H */ diff --git a/kernel/arch/ia64/include/uapi/asm/perfmon.h b/kernel/arch/ia64/include/uapi/asm/perfmon.h new file mode 100644 index 000000000..1a10a2dd5 --- /dev/null +++ b/kernel/arch/ia64/include/uapi/asm/perfmon.h @@ -0,0 +1,177 @@ +/* + * Copyright (C) 2001-2003 Hewlett-Packard Co + * Stephane Eranian + */ + +#ifndef _UAPI_ASM_IA64_PERFMON_H +#define _UAPI_ASM_IA64_PERFMON_H + +/* + * perfmon commands supported on all CPU models + */ +#define PFM_WRITE_PMCS 0x01 +#define PFM_WRITE_PMDS 0x02 +#define PFM_READ_PMDS 0x03 +#define PFM_STOP 0x04 +#define PFM_START 0x05 +#define PFM_ENABLE 0x06 /* obsolete */ +#define PFM_DISABLE 0x07 /* obsolete */ +#define PFM_CREATE_CONTEXT 0x08 +#define PFM_DESTROY_CONTEXT 0x09 /* obsolete use close() */ +#define PFM_RESTART 0x0a +#define PFM_PROTECT_CONTEXT 0x0b /* obsolete */ +#define PFM_GET_FEATURES 0x0c +#define PFM_DEBUG 0x0d +#define PFM_UNPROTECT_CONTEXT 0x0e /* obsolete */ +#define PFM_GET_PMC_RESET_VAL 0x0f +#define PFM_LOAD_CONTEXT 0x10 +#define PFM_UNLOAD_CONTEXT 0x11 + +/* + * PMU model specific commands (may not be supported on all PMU models) + */ +#define PFM_WRITE_IBRS 0x20 +#define PFM_WRITE_DBRS 0x21 + +/* + * context flags + */ +#define PFM_FL_NOTIFY_BLOCK 0x01 /* block task on user level notifications */ +#define PFM_FL_SYSTEM_WIDE 0x02 /* create a system wide context */ +#define PFM_FL_OVFL_NO_MSG 0x80 /* do not post overflow/end messages for notification */ + +/* + * event set flags + */ +#define PFM_SETFL_EXCL_IDLE 0x01 /* exclude idle task (syswide only) XXX: DO NOT USE YET */ + +/* + * PMC flags + */ +#define PFM_REGFL_OVFL_NOTIFY 0x1 /* send notification on overflow */ +#define PFM_REGFL_RANDOM 0x2 /* randomize sampling interval */ + +/* + * PMD/PMC/IBR/DBR return flags (ignored on input) + * + * Those flags are used on output and must be checked in case EAGAIN is returned + * by any of the calls using a pfarg_reg_t or pfarg_dbreg_t structure. + */ +#define PFM_REG_RETFL_NOTAVAIL (1UL<<31) /* set if register is implemented but not available */ +#define PFM_REG_RETFL_EINVAL (1UL<<30) /* set if register entry is invalid */ +#define PFM_REG_RETFL_MASK (PFM_REG_RETFL_NOTAVAIL|PFM_REG_RETFL_EINVAL) + +#define PFM_REG_HAS_ERROR(flag) (((flag) & PFM_REG_RETFL_MASK) != 0) + +typedef unsigned char pfm_uuid_t[16]; /* custom sampling buffer identifier type */ + +/* + * Request structure used to define a context + */ +typedef struct { + pfm_uuid_t ctx_smpl_buf_id; /* which buffer format to use (if needed) */ + unsigned long ctx_flags; /* noblock/block */ + unsigned short ctx_nextra_sets; /* number of extra event sets (you always get 1) */ + unsigned short ctx_reserved1; /* for future use */ + int ctx_fd; /* return arg: unique identification for context */ + void *ctx_smpl_vaddr; /* return arg: virtual address of sampling buffer, is used */ + unsigned long ctx_reserved2[11];/* for future use */ +} pfarg_context_t; + +/* + * Request structure used to write/read a PMC or PMD + */ +typedef struct { + unsigned int reg_num; /* which register */ + unsigned short reg_set; /* event set for this register */ + unsigned short reg_reserved1; /* for future use */ + + unsigned long reg_value; /* initial pmc/pmd value */ + unsigned long reg_flags; /* input: pmc/pmd flags, return: reg error */ + + unsigned long reg_long_reset; /* reset after buffer overflow notification */ + unsigned long reg_short_reset; /* reset after counter overflow */ + + unsigned long reg_reset_pmds[4]; /* which other counters to reset on overflow */ + unsigned long reg_random_seed; /* seed value when randomization is used */ + unsigned long reg_random_mask; /* bitmask used to limit random value */ + unsigned long reg_last_reset_val;/* return: PMD last reset value */ + + unsigned long reg_smpl_pmds[4]; /* which pmds are accessed when PMC overflows */ + unsigned long reg_smpl_eventid; /* opaque sampling event identifier */ + + unsigned long reg_reserved2[3]; /* for future use */ +} pfarg_reg_t; + +typedef struct { + unsigned int dbreg_num; /* which debug register */ + unsigned short dbreg_set; /* event set for this register */ + unsigned short dbreg_reserved1; /* for future use */ + unsigned long dbreg_value; /* value for debug register */ + unsigned long dbreg_flags; /* return: dbreg error */ + unsigned long dbreg_reserved2[1]; /* for future use */ +} pfarg_dbreg_t; + +typedef struct { + unsigned int ft_version; /* perfmon: major [16-31], minor [0-15] */ + unsigned int ft_reserved; /* reserved for future use */ + unsigned long reserved[4]; /* for future use */ +} pfarg_features_t; + +typedef struct { + pid_t load_pid; /* process to load the context into */ + unsigned short load_set; /* first event set to load */ + unsigned short load_reserved1; /* for future use */ + unsigned long load_reserved2[3]; /* for future use */ +} pfarg_load_t; + +typedef struct { + int msg_type; /* generic message header */ + int msg_ctx_fd; /* generic message header */ + unsigned long msg_ovfl_pmds[4]; /* which PMDs overflowed */ + unsigned short msg_active_set; /* active set at the time of overflow */ + unsigned short msg_reserved1; /* for future use */ + unsigned int msg_reserved2; /* for future use */ + unsigned long msg_tstamp; /* for perf tuning/debug */ +} pfm_ovfl_msg_t; + +typedef struct { + int msg_type; /* generic message header */ + int msg_ctx_fd; /* generic message header */ + unsigned long msg_tstamp; /* for perf tuning */ +} pfm_end_msg_t; + +typedef struct { + int msg_type; /* type of the message */ + int msg_ctx_fd; /* unique identifier for the context */ + unsigned long msg_tstamp; /* for perf tuning */ +} pfm_gen_msg_t; + +#define PFM_MSG_OVFL 1 /* an overflow happened */ +#define PFM_MSG_END 2 /* task to which context was attached ended */ + +typedef union { + pfm_ovfl_msg_t pfm_ovfl_msg; + pfm_end_msg_t pfm_end_msg; + pfm_gen_msg_t pfm_gen_msg; +} pfm_msg_t; + +/* + * Define the version numbers for both perfmon as a whole and the sampling buffer format. + */ +#define PFM_VERSION_MAJ 2U +#define PFM_VERSION_MIN 0U +#define PFM_VERSION (((PFM_VERSION_MAJ&0xffff)<<16)|(PFM_VERSION_MIN & 0xffff)) +#define PFM_VERSION_MAJOR(x) (((x)>>16) & 0xffff) +#define PFM_VERSION_MINOR(x) ((x) & 0xffff) + + +/* + * miscellaneous architected definitions + */ +#define PMU_FIRST_COUNTER 4 /* first counting monitor (PMC/PMD) */ +#define PMU_MAX_PMCS 256 /* maximum architected number of PMC registers */ +#define PMU_MAX_PMDS 256 /* maximum architected number of PMD registers */ + + +#endif /* _UAPI_ASM_IA64_PERFMON_H */ diff --git a/kernel/arch/ia64/include/uapi/asm/perfmon_default_smpl.h b/kernel/arch/ia64/include/uapi/asm/perfmon_default_smpl.h new file mode 100644 index 000000000..a2d560c67 --- /dev/null +++ b/kernel/arch/ia64/include/uapi/asm/perfmon_default_smpl.h @@ -0,0 +1,83 @@ +/* + * Copyright (C) 2002-2003 Hewlett-Packard Co + * Stephane Eranian + * + * This file implements the default sampling buffer format + * for Linux/ia64 perfmon subsystem. + */ +#ifndef __PERFMON_DEFAULT_SMPL_H__ +#define __PERFMON_DEFAULT_SMPL_H__ 1 + +#define PFM_DEFAULT_SMPL_UUID { \ + 0x4d, 0x72, 0xbe, 0xc0, 0x06, 0x64, 0x41, 0x43, 0x82, 0xb4, 0xd3, 0xfd, 0x27, 0x24, 0x3c, 0x97} + +/* + * format specific parameters (passed at context creation) + */ +typedef struct { + unsigned long buf_size; /* size of the buffer in bytes */ + unsigned int flags; /* buffer specific flags */ + unsigned int res1; /* for future use */ + unsigned long reserved[2]; /* for future use */ +} pfm_default_smpl_arg_t; + +/* + * combined context+format specific structure. Can be passed + * to PFM_CONTEXT_CREATE + */ +typedef struct { + pfarg_context_t ctx_arg; + pfm_default_smpl_arg_t buf_arg; +} pfm_default_smpl_ctx_arg_t; + +/* + * This header is at the beginning of the sampling buffer returned to the user. + * It is directly followed by the first record. + */ +typedef struct { + unsigned long hdr_count; /* how many valid entries */ + unsigned long hdr_cur_offs; /* current offset from top of buffer */ + unsigned long hdr_reserved2; /* reserved for future use */ + + unsigned long hdr_overflows; /* how many times the buffer overflowed */ + unsigned long hdr_buf_size; /* how many bytes in the buffer */ + + unsigned int hdr_version; /* contains perfmon version (smpl format diffs) */ + unsigned int hdr_reserved1; /* for future use */ + unsigned long hdr_reserved[10]; /* for future use */ +} pfm_default_smpl_hdr_t; + +/* + * Entry header in the sampling buffer. The header is directly followed + * with the values of the PMD registers of interest saved in increasing + * index order: PMD4, PMD5, and so on. How many PMDs are present depends + * on how the session was programmed. + * + * In the case where multiple counters overflow at the same time, multiple + * entries are written consecutively. + * + * last_reset_value member indicates the initial value of the overflowed PMD. + */ +typedef struct { + int pid; /* thread id (for NPTL, this is gettid()) */ + unsigned char reserved1[3]; /* reserved for future use */ + unsigned char ovfl_pmd; /* index of overflowed PMD */ + + unsigned long last_reset_val; /* initial value of overflowed PMD */ + unsigned long ip; /* where did the overflow interrupt happened */ + unsigned long tstamp; /* ar.itc when entering perfmon intr. handler */ + + unsigned short cpu; /* cpu on which the overflow occurred */ + unsigned short set; /* event set active when overflow occurred */ + int tgid; /* thread group id (for NPTL, this is getpid()) */ +} pfm_default_smpl_entry_t; + +#define PFM_DEFAULT_MAX_PMDS 64 /* how many pmds supported by data structures (sizeof(unsigned long) */ +#define PFM_DEFAULT_MAX_ENTRY_SIZE (sizeof(pfm_default_smpl_entry_t)+(sizeof(unsigned long)*PFM_DEFAULT_MAX_PMDS)) +#define PFM_DEFAULT_SMPL_MIN_BUF_SIZE (sizeof(pfm_default_smpl_hdr_t)+PFM_DEFAULT_MAX_ENTRY_SIZE) + +#define PFM_DEFAULT_SMPL_VERSION_MAJ 2U +#define PFM_DEFAULT_SMPL_VERSION_MIN 0U +#define PFM_DEFAULT_SMPL_VERSION (((PFM_DEFAULT_SMPL_VERSION_MAJ&0xffff)<<16)|(PFM_DEFAULT_SMPL_VERSION_MIN & 0xffff)) + +#endif /* __PERFMON_DEFAULT_SMPL_H__ */ diff --git a/kernel/arch/ia64/include/uapi/asm/poll.h b/kernel/arch/ia64/include/uapi/asm/poll.h new file mode 100644 index 000000000..c98509d31 --- /dev/null +++ b/kernel/arch/ia64/include/uapi/asm/poll.h @@ -0,0 +1 @@ +#include diff --git a/kernel/arch/ia64/include/uapi/asm/posix_types.h b/kernel/arch/ia64/include/uapi/asm/posix_types.h new file mode 100644 index 000000000..99ee1d651 --- /dev/null +++ b/kernel/arch/ia64/include/uapi/asm/posix_types.h @@ -0,0 +1,8 @@ +#ifndef _ASM_IA64_POSIX_TYPES_H +#define _ASM_IA64_POSIX_TYPES_H + +typedef unsigned long __kernel_sigset_t; /* at least 32 bits */ + +#include + +#endif /* _ASM_IA64_POSIX_TYPES_H */ diff --git a/kernel/arch/ia64/include/uapi/asm/ptrace.h b/kernel/arch/ia64/include/uapi/asm/ptrace.h new file mode 100644 index 000000000..0a02f634e --- /dev/null +++ b/kernel/arch/ia64/include/uapi/asm/ptrace.h @@ -0,0 +1,247 @@ +/* + * Copyright (C) 1998-2004 Hewlett-Packard Co + * David Mosberger-Tang + * Stephane Eranian + * Copyright (C) 2003 Intel Co + * Suresh Siddha + * Fenghua Yu + * Arun Sharma + * + * 12/07/98 S. Eranian added pt_regs & switch_stack + * 12/21/98 D. Mosberger updated to match latest code + * 6/17/99 D. Mosberger added second unat member to "struct switch_stack" + * + */ +#ifndef _UAPI_ASM_IA64_PTRACE_H +#define _UAPI_ASM_IA64_PTRACE_H + +/* + * When a user process is blocked, its state looks as follows: + * + * +----------------------+ ------- IA64_STK_OFFSET + * | | ^ + * | struct pt_regs | | + * | | | + * +----------------------+ | + * | | | + * | memory stack | | + * | (growing downwards) | | + * //.....................// | + * | + * //.....................// | + * | | | + * +----------------------+ | + * | struct switch_stack | | + * | | | + * +----------------------+ | + * | | | + * //.....................// | + * | + * //.....................// | + * | | | + * | register stack | | + * | (growing upwards) | | + * | | | + * +----------------------+ | --- IA64_RBS_OFFSET + * | struct thread_info | | ^ + * +----------------------+ | | + * | | | | + * | struct task_struct | | | + * current -> | | | | + * +----------------------+ ------- + * + * Note that ar.ec is not saved explicitly in pt_reg or switch_stack. + * This is because ar.ec is saved as part of ar.pfs. + */ + + +#include + + +#ifndef __ASSEMBLY__ + +/* + * This struct defines the way the registers are saved on system + * calls. + * + * We don't save all floating point register because the kernel + * is compiled to use only a very small subset, so the other are + * untouched. + * + * THIS STRUCTURE MUST BE A MULTIPLE 16-BYTE IN SIZE + * (because the memory stack pointer MUST ALWAYS be aligned this way) + * + */ +struct pt_regs { + /* The following registers are saved by SAVE_MIN: */ + unsigned long b6; /* scratch */ + unsigned long b7; /* scratch */ + + unsigned long ar_csd; /* used by cmp8xchg16 (scratch) */ + unsigned long ar_ssd; /* reserved for future use (scratch) */ + + unsigned long r8; /* scratch (return value register 0) */ + unsigned long r9; /* scratch (return value register 1) */ + unsigned long r10; /* scratch (return value register 2) */ + unsigned long r11; /* scratch (return value register 3) */ + + unsigned long cr_ipsr; /* interrupted task's psr */ + unsigned long cr_iip; /* interrupted task's instruction pointer */ + /* + * interrupted task's function state; if bit 63 is cleared, it + * contains syscall's ar.pfs.pfm: + */ + unsigned long cr_ifs; + + unsigned long ar_unat; /* interrupted task's NaT register (preserved) */ + unsigned long ar_pfs; /* prev function state */ + unsigned long ar_rsc; /* RSE configuration */ + /* The following two are valid only if cr_ipsr.cpl > 0 || ti->flags & _TIF_MCA_INIT */ + unsigned long ar_rnat; /* RSE NaT */ + unsigned long ar_bspstore; /* RSE bspstore */ + + unsigned long pr; /* 64 predicate registers (1 bit each) */ + unsigned long b0; /* return pointer (bp) */ + unsigned long loadrs; /* size of dirty partition << 16 */ + + unsigned long r1; /* the gp pointer */ + unsigned long r12; /* interrupted task's memory stack pointer */ + unsigned long r13; /* thread pointer */ + + unsigned long ar_fpsr; /* floating point status (preserved) */ + unsigned long r15; /* scratch */ + + /* The remaining registers are NOT saved for system calls. */ + + unsigned long r14; /* scratch */ + unsigned long r2; /* scratch */ + unsigned long r3; /* scratch */ + + /* The following registers are saved by SAVE_REST: */ + unsigned long r16; /* scratch */ + unsigned long r17; /* scratch */ + unsigned long r18; /* scratch */ + unsigned long r19; /* scratch */ + unsigned long r20; /* scratch */ + unsigned long r21; /* scratch */ + unsigned long r22; /* scratch */ + unsigned long r23; /* scratch */ + unsigned long r24; /* scratch */ + unsigned long r25; /* scratch */ + unsigned long r26; /* scratch */ + unsigned long r27; /* scratch */ + unsigned long r28; /* scratch */ + unsigned long r29; /* scratch */ + unsigned long r30; /* scratch */ + unsigned long r31; /* scratch */ + + unsigned long ar_ccv; /* compare/exchange value (scratch) */ + + /* + * Floating point registers that the kernel considers scratch: + */ + struct ia64_fpreg f6; /* scratch */ + struct ia64_fpreg f7; /* scratch */ + struct ia64_fpreg f8; /* scratch */ + struct ia64_fpreg f9; /* scratch */ + struct ia64_fpreg f10; /* scratch */ + struct ia64_fpreg f11; /* scratch */ +}; + +/* + * This structure contains the addition registers that need to + * preserved across a context switch. This generally consists of + * "preserved" registers. + */ +struct switch_stack { + unsigned long caller_unat; /* user NaT collection register (preserved) */ + unsigned long ar_fpsr; /* floating-point status register */ + + struct ia64_fpreg f2; /* preserved */ + struct ia64_fpreg f3; /* preserved */ + struct ia64_fpreg f4; /* preserved */ + struct ia64_fpreg f5; /* preserved */ + + struct ia64_fpreg f12; /* scratch, but untouched by kernel */ + struct ia64_fpreg f13; /* scratch, but untouched by kernel */ + struct ia64_fpreg f14; /* scratch, but untouched by kernel */ + struct ia64_fpreg f15; /* scratch, but untouched by kernel */ + struct ia64_fpreg f16; /* preserved */ + struct ia64_fpreg f17; /* preserved */ + struct ia64_fpreg f18; /* preserved */ + struct ia64_fpreg f19; /* preserved */ + struct ia64_fpreg f20; /* preserved */ + struct ia64_fpreg f21; /* preserved */ + struct ia64_fpreg f22; /* preserved */ + struct ia64_fpreg f23; /* preserved */ + struct ia64_fpreg f24; /* preserved */ + struct ia64_fpreg f25; /* preserved */ + struct ia64_fpreg f26; /* preserved */ + struct ia64_fpreg f27; /* preserved */ + struct ia64_fpreg f28; /* preserved */ + struct ia64_fpreg f29; /* preserved */ + struct ia64_fpreg f30; /* preserved */ + struct ia64_fpreg f31; /* preserved */ + + unsigned long r4; /* preserved */ + unsigned long r5; /* preserved */ + unsigned long r6; /* preserved */ + unsigned long r7; /* preserved */ + + unsigned long b0; /* so we can force a direct return in copy_thread */ + unsigned long b1; + unsigned long b2; + unsigned long b3; + unsigned long b4; + unsigned long b5; + + unsigned long ar_pfs; /* previous function state */ + unsigned long ar_lc; /* loop counter (preserved) */ + unsigned long ar_unat; /* NaT bits for r4-r7 */ + unsigned long ar_rnat; /* RSE NaT collection register */ + unsigned long ar_bspstore; /* RSE dirty base (preserved) */ + unsigned long pr; /* 64 predicate registers (1 bit each) */ +}; + + +/* pt_all_user_regs is used for PTRACE_GETREGS PTRACE_SETREGS */ +struct pt_all_user_regs { + unsigned long nat; + unsigned long cr_iip; + unsigned long cfm; + unsigned long cr_ipsr; + unsigned long pr; + + unsigned long gr[32]; + unsigned long br[8]; + unsigned long ar[128]; + struct ia64_fpreg fr[128]; +}; + +#endif /* !__ASSEMBLY__ */ + +/* indices to application-registers array in pt_all_user_regs */ +#define PT_AUR_RSC 16 +#define PT_AUR_BSP 17 +#define PT_AUR_BSPSTORE 18 +#define PT_AUR_RNAT 19 +#define PT_AUR_CCV 32 +#define PT_AUR_UNAT 36 +#define PT_AUR_FPSR 40 +#define PT_AUR_PFS 64 +#define PT_AUR_LC 65 +#define PT_AUR_EC 66 + +/* + * The numbers chosen here are somewhat arbitrary but absolutely MUST + * not overlap with any of the number assigned in . + */ +#define PTRACE_SINGLEBLOCK 12 /* resume execution until next branch */ +#define PTRACE_OLD_GETSIGINFO 13 /* (replaced by PTRACE_GETSIGINFO in ) */ +#define PTRACE_OLD_SETSIGINFO 14 /* (replaced by PTRACE_SETSIGINFO in ) */ +#define PTRACE_GETREGS 18 /* get all registers (pt_all_user_regs) in one shot */ +#define PTRACE_SETREGS 19 /* set all registers (pt_all_user_regs) in one shot */ + +#define PTRACE_OLDSETOPTIONS 21 + +#endif /* _UAPI_ASM_IA64_PTRACE_H */ diff --git a/kernel/arch/ia64/include/uapi/asm/ptrace_offsets.h b/kernel/arch/ia64/include/uapi/asm/ptrace_offsets.h new file mode 100644 index 000000000..b712773c7 --- /dev/null +++ b/kernel/arch/ia64/include/uapi/asm/ptrace_offsets.h @@ -0,0 +1,268 @@ +#ifndef _ASM_IA64_PTRACE_OFFSETS_H +#define _ASM_IA64_PTRACE_OFFSETS_H + +/* + * Copyright (C) 1999, 2003 Hewlett-Packard Co + * David Mosberger-Tang + */ +/* + * The "uarea" that can be accessed via PEEKUSER and POKEUSER is a + * virtual structure that would have the following definition: + * + * struct uarea { + * struct ia64_fpreg fph[96]; // f32-f127 + * unsigned long nat_bits; + * unsigned long empty1; + * struct ia64_fpreg f2; // f2-f5 + * : + * struct ia64_fpreg f5; + * struct ia64_fpreg f10; // f10-f31 + * : + * struct ia64_fpreg f31; + * unsigned long r4; // r4-r7 + * : + * unsigned long r7; + * unsigned long b1; // b1-b5 + * : + * unsigned long b5; + * unsigned long ar_ec; + * unsigned long ar_lc; + * unsigned long empty2[5]; + * unsigned long cr_ipsr; + * unsigned long cr_iip; + * unsigned long cfm; + * unsigned long ar_unat; + * unsigned long ar_pfs; + * unsigned long ar_rsc; + * unsigned long ar_rnat; + * unsigned long ar_bspstore; + * unsigned long pr; + * unsigned long b6; + * unsigned long ar_bsp; + * unsigned long r1; + * unsigned long r2; + * unsigned long r3; + * unsigned long r12; + * unsigned long r13; + * unsigned long r14; + * unsigned long r15; + * unsigned long r8; + * unsigned long r9; + * unsigned long r10; + * unsigned long r11; + * unsigned long r16; + * : + * unsigned long r31; + * unsigned long ar_ccv; + * unsigned long ar_fpsr; + * unsigned long b0; + * unsigned long b7; + * unsigned long f6; + * unsigned long f7; + * unsigned long f8; + * unsigned long f9; + * unsigned long ar_csd; + * unsigned long ar_ssd; + * unsigned long rsvd1[710]; + * unsigned long dbr[8]; + * unsigned long rsvd2[504]; + * unsigned long ibr[8]; + * unsigned long rsvd3[504]; + * unsigned long pmd[4]; + * } + */ + +/* fph: */ +#define PT_F32 0x0000 +#define PT_F33 0x0010 +#define PT_F34 0x0020 +#define PT_F35 0x0030 +#define PT_F36 0x0040 +#define PT_F37 0x0050 +#define PT_F38 0x0060 +#define PT_F39 0x0070 +#define PT_F40 0x0080 +#define PT_F41 0x0090 +#define PT_F42 0x00a0 +#define PT_F43 0x00b0 +#define PT_F44 0x00c0 +#define PT_F45 0x00d0 +#define PT_F46 0x00e0 +#define PT_F47 0x00f0 +#define PT_F48 0x0100 +#define PT_F49 0x0110 +#define PT_F50 0x0120 +#define PT_F51 0x0130 +#define PT_F52 0x0140 +#define PT_F53 0x0150 +#define PT_F54 0x0160 +#define PT_F55 0x0170 +#define PT_F56 0x0180 +#define PT_F57 0x0190 +#define PT_F58 0x01a0 +#define PT_F59 0x01b0 +#define PT_F60 0x01c0 +#define PT_F61 0x01d0 +#define PT_F62 0x01e0 +#define PT_F63 0x01f0 +#define PT_F64 0x0200 +#define PT_F65 0x0210 +#define PT_F66 0x0220 +#define PT_F67 0x0230 +#define PT_F68 0x0240 +#define PT_F69 0x0250 +#define PT_F70 0x0260 +#define PT_F71 0x0270 +#define PT_F72 0x0280 +#define PT_F73 0x0290 +#define PT_F74 0x02a0 +#define PT_F75 0x02b0 +#define PT_F76 0x02c0 +#define PT_F77 0x02d0 +#define PT_F78 0x02e0 +#define PT_F79 0x02f0 +#define PT_F80 0x0300 +#define PT_F81 0x0310 +#define PT_F82 0x0320 +#define PT_F83 0x0330 +#define PT_F84 0x0340 +#define PT_F85 0x0350 +#define PT_F86 0x0360 +#define PT_F87 0x0370 +#define PT_F88 0x0380 +#define PT_F89 0x0390 +#define PT_F90 0x03a0 +#define PT_F91 0x03b0 +#define PT_F92 0x03c0 +#define PT_F93 0x03d0 +#define PT_F94 0x03e0 +#define PT_F95 0x03f0 +#define PT_F96 0x0400 +#define PT_F97 0x0410 +#define PT_F98 0x0420 +#define PT_F99 0x0430 +#define PT_F100 0x0440 +#define PT_F101 0x0450 +#define PT_F102 0x0460 +#define PT_F103 0x0470 +#define PT_F104 0x0480 +#define PT_F105 0x0490 +#define PT_F106 0x04a0 +#define PT_F107 0x04b0 +#define PT_F108 0x04c0 +#define PT_F109 0x04d0 +#define PT_F110 0x04e0 +#define PT_F111 0x04f0 +#define PT_F112 0x0500 +#define PT_F113 0x0510 +#define PT_F114 0x0520 +#define PT_F115 0x0530 +#define PT_F116 0x0540 +#define PT_F117 0x0550 +#define PT_F118 0x0560 +#define PT_F119 0x0570 +#define PT_F120 0x0580 +#define PT_F121 0x0590 +#define PT_F122 0x05a0 +#define PT_F123 0x05b0 +#define PT_F124 0x05c0 +#define PT_F125 0x05d0 +#define PT_F126 0x05e0 +#define PT_F127 0x05f0 + +#define PT_NAT_BITS 0x0600 + +#define PT_F2 0x0610 +#define PT_F3 0x0620 +#define PT_F4 0x0630 +#define PT_F5 0x0640 +#define PT_F10 0x0650 +#define PT_F11 0x0660 +#define PT_F12 0x0670 +#define PT_F13 0x0680 +#define PT_F14 0x0690 +#define PT_F15 0x06a0 +#define PT_F16 0x06b0 +#define PT_F17 0x06c0 +#define PT_F18 0x06d0 +#define PT_F19 0x06e0 +#define PT_F20 0x06f0 +#define PT_F21 0x0700 +#define PT_F22 0x0710 +#define PT_F23 0x0720 +#define PT_F24 0x0730 +#define PT_F25 0x0740 +#define PT_F26 0x0750 +#define PT_F27 0x0760 +#define PT_F28 0x0770 +#define PT_F29 0x0780 +#define PT_F30 0x0790 +#define PT_F31 0x07a0 +#define PT_R4 0x07b0 +#define PT_R5 0x07b8 +#define PT_R6 0x07c0 +#define PT_R7 0x07c8 + +#define PT_B1 0x07d8 +#define PT_B2 0x07e0 +#define PT_B3 0x07e8 +#define PT_B4 0x07f0 +#define PT_B5 0x07f8 + +#define PT_AR_EC 0x0800 +#define PT_AR_LC 0x0808 + +#define PT_CR_IPSR 0x0830 +#define PT_CR_IIP 0x0838 +#define PT_CFM 0x0840 +#define PT_AR_UNAT 0x0848 +#define PT_AR_PFS 0x0850 +#define PT_AR_RSC 0x0858 +#define PT_AR_RNAT 0x0860 +#define PT_AR_BSPSTORE 0x0868 +#define PT_PR 0x0870 +#define PT_B6 0x0878 +#define PT_AR_BSP 0x0880 /* note: this points to the *end* of the backing store! */ +#define PT_R1 0x0888 +#define PT_R2 0x0890 +#define PT_R3 0x0898 +#define PT_R12 0x08a0 +#define PT_R13 0x08a8 +#define PT_R14 0x08b0 +#define PT_R15 0x08b8 +#define PT_R8 0x08c0 +#define PT_R9 0x08c8 +#define PT_R10 0x08d0 +#define PT_R11 0x08d8 +#define PT_R16 0x08e0 +#define PT_R17 0x08e8 +#define PT_R18 0x08f0 +#define PT_R19 0x08f8 +#define PT_R20 0x0900 +#define PT_R21 0x0908 +#define PT_R22 0x0910 +#define PT_R23 0x0918 +#define PT_R24 0x0920 +#define PT_R25 0x0928 +#define PT_R26 0x0930 +#define PT_R27 0x0938 +#define PT_R28 0x0940 +#define PT_R29 0x0948 +#define PT_R30 0x0950 +#define PT_R31 0x0958 +#define PT_AR_CCV 0x0960 +#define PT_AR_FPSR 0x0968 +#define PT_B0 0x0970 +#define PT_B7 0x0978 +#define PT_F6 0x0980 +#define PT_F7 0x0990 +#define PT_F8 0x09a0 +#define PT_F9 0x09b0 +#define PT_AR_CSD 0x09c0 +#define PT_AR_SSD 0x09c8 + +#define PT_DBR 0x2000 /* data breakpoint registers */ +#define PT_IBR 0x3000 /* instruction breakpoint registers */ +#define PT_PMD 0x4000 /* performance monitoring counters */ + +#endif /* _ASM_IA64_PTRACE_OFFSETS_H */ diff --git a/kernel/arch/ia64/include/uapi/asm/resource.h b/kernel/arch/ia64/include/uapi/asm/resource.h new file mode 100644 index 000000000..ba2272a87 --- /dev/null +++ b/kernel/arch/ia64/include/uapi/asm/resource.h @@ -0,0 +1,7 @@ +#ifndef _ASM_IA64_RESOURCE_H +#define _ASM_IA64_RESOURCE_H + +#include +#include + +#endif /* _ASM_IA64_RESOURCE_H */ diff --git a/kernel/arch/ia64/include/uapi/asm/rse.h b/kernel/arch/ia64/include/uapi/asm/rse.h new file mode 100644 index 000000000..02830a3b0 --- /dev/null +++ b/kernel/arch/ia64/include/uapi/asm/rse.h @@ -0,0 +1,66 @@ +#ifndef _ASM_IA64_RSE_H +#define _ASM_IA64_RSE_H + +/* + * Copyright (C) 1998, 1999 Hewlett-Packard Co + * Copyright (C) 1998, 1999 David Mosberger-Tang + * + * Register stack engine related helper functions. This file may be + * used in applications, so be careful about the name-space and give + * some consideration to non-GNU C compilers (though __inline__ is + * fine). + */ + +static __inline__ unsigned long +ia64_rse_slot_num (unsigned long *addr) +{ + return (((unsigned long) addr) >> 3) & 0x3f; +} + +/* + * Return TRUE if ADDR is the address of an RNAT slot. + */ +static __inline__ unsigned long +ia64_rse_is_rnat_slot (unsigned long *addr) +{ + return ia64_rse_slot_num(addr) == 0x3f; +} + +/* + * Returns the address of the RNAT slot that covers the slot at + * address SLOT_ADDR. + */ +static __inline__ unsigned long * +ia64_rse_rnat_addr (unsigned long *slot_addr) +{ + return (unsigned long *) ((unsigned long) slot_addr | (0x3f << 3)); +} + +/* + * Calculate the number of registers in the dirty partition starting at BSPSTORE and + * ending at BSP. This isn't simply (BSP-BSPSTORE)/8 because every 64th slot stores + * ar.rnat. + */ +static __inline__ unsigned long +ia64_rse_num_regs (unsigned long *bspstore, unsigned long *bsp) +{ + unsigned long slots = (bsp - bspstore); + + return slots - (ia64_rse_slot_num(bspstore) + slots)/0x40; +} + +/* + * The inverse of the above: given bspstore and the number of + * registers, calculate ar.bsp. + */ +static __inline__ unsigned long * +ia64_rse_skip_regs (unsigned long *addr, long num_regs) +{ + long delta = ia64_rse_slot_num(addr) + num_regs; + + if (num_regs < 0) + delta -= 0x3e; + return addr + num_regs + delta/0x3f; +} + +#endif /* _ASM_IA64_RSE_H */ diff --git a/kernel/arch/ia64/include/uapi/asm/sembuf.h b/kernel/arch/ia64/include/uapi/asm/sembuf.h new file mode 100644 index 000000000..1340fbc04 --- /dev/null +++ b/kernel/arch/ia64/include/uapi/asm/sembuf.h @@ -0,0 +1,22 @@ +#ifndef _ASM_IA64_SEMBUF_H +#define _ASM_IA64_SEMBUF_H + +/* + * The semid64_ds structure for IA-64 architecture. + * Note extra padding because this structure is passed back and forth + * between kernel and user space. + * + * Pad space is left for: + * - 2 miscellaneous 64-bit values + */ + +struct semid64_ds { + struct ipc64_perm sem_perm; /* permissions .. see ipc.h */ + __kernel_time_t sem_otime; /* last semop time */ + __kernel_time_t sem_ctime; /* last change time */ + unsigned long sem_nsems; /* no. of semaphores in array */ + unsigned long __unused1; + unsigned long __unused2; +}; + +#endif /* _ASM_IA64_SEMBUF_H */ diff --git a/kernel/arch/ia64/include/uapi/asm/setup.h b/kernel/arch/ia64/include/uapi/asm/setup.h new file mode 100644 index 000000000..8d5645831 --- /dev/null +++ b/kernel/arch/ia64/include/uapi/asm/setup.h @@ -0,0 +1,24 @@ +#ifndef __IA64_SETUP_H +#define __IA64_SETUP_H + +#define COMMAND_LINE_SIZE 2048 + +extern struct ia64_boot_param { + __u64 command_line; /* physical address of command line arguments */ + __u64 efi_systab; /* physical address of EFI system table */ + __u64 efi_memmap; /* physical address of EFI memory map */ + __u64 efi_memmap_size; /* size of EFI memory map */ + __u64 efi_memdesc_size; /* size of an EFI memory map descriptor */ + __u32 efi_memdesc_version; /* memory descriptor version */ + struct { + __u16 num_cols; /* number of columns on console output device */ + __u16 num_rows; /* number of rows on console output device */ + __u16 orig_x; /* cursor's x position */ + __u16 orig_y; /* cursor's y position */ + } console_info; + __u64 fpswa; /* physical address of the fpswa interface */ + __u64 initrd_start; + __u64 initrd_size; +} *ia64_boot_param; + +#endif diff --git a/kernel/arch/ia64/include/uapi/asm/shmbuf.h b/kernel/arch/ia64/include/uapi/asm/shmbuf.h new file mode 100644 index 000000000..585002a77 --- /dev/null +++ b/kernel/arch/ia64/include/uapi/asm/shmbuf.h @@ -0,0 +1,38 @@ +#ifndef _ASM_IA64_SHMBUF_H +#define _ASM_IA64_SHMBUF_H + +/* + * The shmid64_ds structure for IA-64 architecture. + * Note extra padding because this structure is passed back and forth + * between kernel and user space. + * + * Pad space is left for: + * - 2 miscellaneous 64-bit values + */ + +struct shmid64_ds { + struct ipc64_perm shm_perm; /* operation perms */ + size_t shm_segsz; /* size of segment (bytes) */ + __kernel_time_t shm_atime; /* last attach time */ + __kernel_time_t shm_dtime; /* last detach time */ + __kernel_time_t shm_ctime; /* last change time */ + __kernel_pid_t shm_cpid; /* pid of creator */ + __kernel_pid_t shm_lpid; /* pid of last operator */ + unsigned long shm_nattch; /* no. of current attaches */ + unsigned long __unused1; + unsigned long __unused2; +}; + +struct shminfo64 { + unsigned long shmmax; + unsigned long shmmin; + unsigned long shmmni; + unsigned long shmseg; + unsigned long shmall; + unsigned long __unused1; + unsigned long __unused2; + unsigned long __unused3; + unsigned long __unused4; +}; + +#endif /* _ASM_IA64_SHMBUF_H */ diff --git a/kernel/arch/ia64/include/uapi/asm/sigcontext.h b/kernel/arch/ia64/include/uapi/asm/sigcontext.h new file mode 100644 index 000000000..57ff777bc --- /dev/null +++ b/kernel/arch/ia64/include/uapi/asm/sigcontext.h @@ -0,0 +1,70 @@ +#ifndef _ASM_IA64_SIGCONTEXT_H +#define _ASM_IA64_SIGCONTEXT_H + +/* + * Copyright (C) 1998, 1999, 2001 Hewlett-Packard Co + * Copyright (C) 1998, 1999, 2001 David Mosberger-Tang + */ + +#include + +#define IA64_SC_FLAG_ONSTACK_BIT 0 /* is handler running on signal stack? */ +#define IA64_SC_FLAG_IN_SYSCALL_BIT 1 /* did signal interrupt a syscall? */ +#define IA64_SC_FLAG_FPH_VALID_BIT 2 /* is state in f[32]-f[127] valid? */ + +#define IA64_SC_FLAG_ONSTACK (1 << IA64_SC_FLAG_ONSTACK_BIT) +#define IA64_SC_FLAG_IN_SYSCALL (1 << IA64_SC_FLAG_IN_SYSCALL_BIT) +#define IA64_SC_FLAG_FPH_VALID (1 << IA64_SC_FLAG_FPH_VALID_BIT) + +# ifndef __ASSEMBLY__ + +/* + * Note on handling of register backing store: sc_ar_bsp contains the address that would + * be found in ar.bsp after executing a "cover" instruction the context in which the + * signal was raised. If signal delivery required switching to an alternate signal stack + * (sc_rbs_base is not NULL), the "dirty" partition (as it would exist after executing the + * imaginary "cover" instruction) is backed by the *alternate* signal stack, not the + * original one. In this case, sc_rbs_base contains the base address of the new register + * backing store. The number of registers in the dirty partition can be calculated as: + * + * ndirty = ia64_rse_num_regs(sc_rbs_base, sc_rbs_base + (sc_loadrs >> 16)) + * + */ + +struct sigcontext { + unsigned long sc_flags; /* see manifest constants above */ + unsigned long sc_nat; /* bit i == 1 iff scratch reg gr[i] is a NaT */ + stack_t sc_stack; /* previously active stack */ + + unsigned long sc_ip; /* instruction pointer */ + unsigned long sc_cfm; /* current frame marker */ + unsigned long sc_um; /* user mask bits */ + unsigned long sc_ar_rsc; /* register stack configuration register */ + unsigned long sc_ar_bsp; /* backing store pointer */ + unsigned long sc_ar_rnat; /* RSE NaT collection register */ + unsigned long sc_ar_ccv; /* compare and exchange compare value register */ + unsigned long sc_ar_unat; /* ar.unat of interrupted context */ + unsigned long sc_ar_fpsr; /* floating-point status register */ + unsigned long sc_ar_pfs; /* previous function state */ + unsigned long sc_ar_lc; /* loop count register */ + unsigned long sc_pr; /* predicate registers */ + unsigned long sc_br[8]; /* branch registers */ + /* Note: sc_gr[0] is used as the "uc_link" member of ucontext_t */ + unsigned long sc_gr[32]; /* general registers (static partition) */ + struct ia64_fpreg sc_fr[128]; /* floating-point registers */ + + unsigned long sc_rbs_base; /* NULL or new base of sighandler's rbs */ + unsigned long sc_loadrs; /* see description above */ + + unsigned long sc_ar25; /* cmp8xchg16 uses this */ + unsigned long sc_ar26; /* rsvd for scratch use */ + unsigned long sc_rsvd[12]; /* reserved for future use */ + /* + * The mask must come last so we can increase _NSIG_WORDS + * without breaking binary compatibility. + */ + sigset_t sc_mask; /* signal mask to restore after handler returns */ +}; + +# endif /* __ASSEMBLY__ */ +#endif /* _ASM_IA64_SIGCONTEXT_H */ diff --git a/kernel/arch/ia64/include/uapi/asm/siginfo.h b/kernel/arch/ia64/include/uapi/asm/siginfo.h new file mode 100644 index 000000000..bce9bc1a6 --- /dev/null +++ b/kernel/arch/ia64/include/uapi/asm/siginfo.h @@ -0,0 +1,125 @@ +/* + * Based on . + * + * Modified 1998-2002 + * David Mosberger-Tang , Hewlett-Packard Co + */ +#ifndef _UAPI_ASM_IA64_SIGINFO_H +#define _UAPI_ASM_IA64_SIGINFO_H + + +#define __ARCH_SI_PREAMBLE_SIZE (4 * sizeof(int)) + +#define HAVE_ARCH_SIGINFO_T +#define HAVE_ARCH_COPY_SIGINFO +#define HAVE_ARCH_COPY_SIGINFO_TO_USER + +#include + +typedef struct siginfo { + int si_signo; + int si_errno; + int si_code; + int __pad0; + + union { + int _pad[SI_PAD_SIZE]; + + /* kill() */ + struct { + pid_t _pid; /* sender's pid */ + uid_t _uid; /* sender's uid */ + } _kill; + + /* POSIX.1b timers */ + struct { + timer_t _tid; /* timer id */ + int _overrun; /* overrun count */ + char _pad[sizeof(__ARCH_SI_UID_T) - sizeof(int)]; + sigval_t _sigval; /* must overlay ._rt._sigval! */ + int _sys_private; /* not to be passed to user */ + } _timer; + + /* POSIX.1b signals */ + struct { + pid_t _pid; /* sender's pid */ + uid_t _uid; /* sender's uid */ + sigval_t _sigval; + } _rt; + + /* SIGCHLD */ + struct { + pid_t _pid; /* which child */ + uid_t _uid; /* sender's uid */ + int _status; /* exit code */ + clock_t _utime; + clock_t _stime; + } _sigchld; + + /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */ + struct { + void __user *_addr; /* faulting insn/memory ref. */ + int _imm; /* immediate value for "break" */ + unsigned int _flags; /* see below */ + unsigned long _isr; /* isr */ + short _addr_lsb; /* lsb of faulting address */ + struct { + void __user *_lower; + void __user *_upper; + } _addr_bnd; + } _sigfault; + + /* SIGPOLL */ + struct { + long _band; /* POLL_IN, POLL_OUT, POLL_MSG (XPG requires a "long") */ + int _fd; + } _sigpoll; + } _sifields; +} siginfo_t; + +#define si_imm _sifields._sigfault._imm /* as per UNIX SysV ABI spec */ +#define si_flags _sifields._sigfault._flags +/* + * si_isr is valid for SIGILL, SIGFPE, SIGSEGV, SIGBUS, and SIGTRAP provided that + * si_code is non-zero and __ISR_VALID is set in si_flags. + */ +#define si_isr _sifields._sigfault._isr + +/* + * Flag values for si_flags: + */ +#define __ISR_VALID_BIT 0 +#define __ISR_VALID (1 << __ISR_VALID_BIT) + +/* + * SIGILL si_codes + */ +#define ILL_BADIADDR (__SI_FAULT|9) /* unimplemented instruction address */ +#define __ILL_BREAK (__SI_FAULT|10) /* illegal break */ +#define __ILL_BNDMOD (__SI_FAULT|11) /* bundle-update (modification) in progress */ +#undef NSIGILL +#define NSIGILL 11 + +/* + * SIGFPE si_codes + */ +#define __FPE_DECOVF (__SI_FAULT|9) /* decimal overflow */ +#define __FPE_DECDIV (__SI_FAULT|10) /* decimal division by zero */ +#define __FPE_DECERR (__SI_FAULT|11) /* packed decimal error */ +#define __FPE_INVASC (__SI_FAULT|12) /* invalid ASCII digit */ +#define __FPE_INVDEC (__SI_FAULT|13) /* invalid decimal digit */ +#undef NSIGFPE +#define NSIGFPE 13 + +/* + * SIGSEGV si_codes + */ +#define __SEGV_PSTKOVF (__SI_FAULT|4) /* paragraph stack overflow */ +#undef NSIGSEGV +#define NSIGSEGV 4 + +#undef NSIGTRAP +#define NSIGTRAP 4 + + +#endif /* _UAPI_ASM_IA64_SIGINFO_H */ diff --git a/kernel/arch/ia64/include/uapi/asm/signal.h b/kernel/arch/ia64/include/uapi/asm/signal.h new file mode 100644 index 000000000..c0ea2855e --- /dev/null +++ b/kernel/arch/ia64/include/uapi/asm/signal.h @@ -0,0 +1,121 @@ +/* + * Modified 1998-2001, 2003 + * David Mosberger-Tang , Hewlett-Packard Co + * + * Unfortunately, this file is being included by bits/signal.h in + * glibc-2.x. Hence the #ifdef __KERNEL__ ugliness. + */ +#ifndef _UAPI_ASM_IA64_SIGNAL_H +#define _UAPI_ASM_IA64_SIGNAL_H + + +#define SIGHUP 1 +#define SIGINT 2 +#define SIGQUIT 3 +#define SIGILL 4 +#define SIGTRAP 5 +#define SIGABRT 6 +#define SIGIOT 6 +#define SIGBUS 7 +#define SIGFPE 8 +#define SIGKILL 9 +#define SIGUSR1 10 +#define SIGSEGV 11 +#define SIGUSR2 12 +#define SIGPIPE 13 +#define SIGALRM 14 +#define SIGTERM 15 +#define SIGSTKFLT 16 +#define SIGCHLD 17 +#define SIGCONT 18 +#define SIGSTOP 19 +#define SIGTSTP 20 +#define SIGTTIN 21 +#define SIGTTOU 22 +#define SIGURG 23 +#define SIGXCPU 24 +#define SIGXFSZ 25 +#define SIGVTALRM 26 +#define SIGPROF 27 +#define SIGWINCH 28 +#define SIGIO 29 +#define SIGPOLL SIGIO +/* +#define SIGLOST 29 +*/ +#define SIGPWR 30 +#define SIGSYS 31 +/* signal 31 is no longer "unused", but the SIGUNUSED macro remains for backwards compatibility */ +#define SIGUNUSED 31 + +/* These should not be considered constants from userland. */ +#define SIGRTMIN 32 +#define SIGRTMAX _NSIG + +/* + * SA_FLAGS values: + * + * SA_ONSTACK indicates that a registered stack_t will be used. + * SA_RESTART flag to get restarting signals (which were the default long ago) + * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop. + * SA_RESETHAND clears the handler when the signal is delivered. + * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies. + * SA_NODEFER prevents the current signal from being masked in the handler. + * + * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single + * Unix names RESETHAND and NODEFER respectively. + */ +#define SA_NOCLDSTOP 0x00000001 +#define SA_NOCLDWAIT 0x00000002 +#define SA_SIGINFO 0x00000004 +#define SA_ONSTACK 0x08000000 +#define SA_RESTART 0x10000000 +#define SA_NODEFER 0x40000000 +#define SA_RESETHAND 0x80000000 + +#define SA_NOMASK SA_NODEFER +#define SA_ONESHOT SA_RESETHAND + +#define SA_RESTORER 0x04000000 + +/* + * The minimum stack size needs to be fairly large because we want to + * be sure that an app compiled for today's CPUs will continue to run + * on all future CPU models. The CPU model matters because the signal + * frame needs to have space for the complete machine state, including + * all physical stacked registers. The number of physical stacked + * registers is CPU model dependent, but given that the width of + * ar.rsc.loadrs is 14 bits, we can assume that they'll never take up + * more than 16KB of space. + */ +#if 1 + /* + * This is a stupid typo: the value was _meant_ to be 131072 (0x20000), but I typed it + * in wrong. ;-( To preserve backwards compatibility, we leave the kernel at the + * incorrect value and fix libc only. + */ +# define MINSIGSTKSZ 131027 /* min. stack size for sigaltstack() */ +#else +# define MINSIGSTKSZ 131072 /* min. stack size for sigaltstack() */ +#endif +#define SIGSTKSZ 262144 /* default stack size for sigaltstack() */ + + +#include + +# ifndef __ASSEMBLY__ + +# include + +/* Avoid too many header ordering problems. */ +struct siginfo; + +typedef struct sigaltstack { + void __user *ss_sp; + int ss_flags; + size_t ss_size; +} stack_t; + + +# endif /* !__ASSEMBLY__ */ +#endif /* _UAPI_ASM_IA64_SIGNAL_H */ diff --git a/kernel/arch/ia64/include/uapi/asm/socket.h b/kernel/arch/ia64/include/uapi/asm/socket.h new file mode 100644 index 000000000..59be3d87f --- /dev/null +++ b/kernel/arch/ia64/include/uapi/asm/socket.h @@ -0,0 +1,97 @@ +#ifndef _ASM_IA64_SOCKET_H +#define _ASM_IA64_SOCKET_H + +/* + * Socket related defines. + * + * Based on . + * + * Modified 1998-2000 + * David Mosberger-Tang , Hewlett-Packard Co + */ + +#include + +/* For setsockopt(2) */ +#define SOL_SOCKET 1 + +#define SO_DEBUG 1 +#define SO_REUSEADDR 2 +#define SO_TYPE 3 +#define SO_ERROR 4 +#define SO_DONTROUTE 5 +#define SO_BROADCAST 6 +#define SO_SNDBUF 7 +#define SO_RCVBUF 8 +#define SO_SNDBUFFORCE 32 +#define SO_RCVBUFFORCE 33 +#define SO_KEEPALIVE 9 +#define SO_OOBINLINE 10 +#define SO_NO_CHECK 11 +#define SO_PRIORITY 12 +#define SO_LINGER 13 +#define SO_BSDCOMPAT 14 +#define SO_REUSEPORT 15 +#define SO_PASSCRED 16 +#define SO_PEERCRED 17 +#define SO_RCVLOWAT 18 +#define SO_SNDLOWAT 19 +#define SO_RCVTIMEO 20 +#define SO_SNDTIMEO 21 + +/* Security levels - as per NRL IPv6 - don't actually do anything */ +#define SO_SECURITY_AUTHENTICATION 22 +#define SO_SECURITY_ENCRYPTION_TRANSPORT 23 +#define SO_SECURITY_ENCRYPTION_NETWORK 24 + +#define SO_BINDTODEVICE 25 + +/* Socket filtering */ +#define SO_ATTACH_FILTER 26 +#define SO_DETACH_FILTER 27 +#define SO_GET_FILTER SO_ATTACH_FILTER + +#define SO_PEERNAME 28 +#define SO_TIMESTAMP 29 +#define SCM_TIMESTAMP SO_TIMESTAMP + +#define SO_ACCEPTCONN 30 + +#define SO_PEERSEC 31 +#define SO_PASSSEC 34 +#define SO_TIMESTAMPNS 35 +#define SCM_TIMESTAMPNS SO_TIMESTAMPNS + +#define SO_MARK 36 + +#define SO_TIMESTAMPING 37 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + +#define SO_PROTOCOL 38 +#define SO_DOMAIN 39 + +#define SO_RXQ_OVFL 40 + +#define SO_WIFI_STATUS 41 +#define SCM_WIFI_STATUS SO_WIFI_STATUS +#define SO_PEEK_OFF 42 + +/* Instruct lower device to use last 4-bytes of skb data as FCS */ +#define SO_NOFCS 43 + +#define SO_LOCK_FILTER 44 + +#define SO_SELECT_ERR_QUEUE 45 + +#define SO_BUSY_POLL 46 + +#define SO_MAX_PACING_RATE 47 + +#define SO_BPF_EXTENSIONS 48 + +#define SO_INCOMING_CPU 49 + +#define SO_ATTACH_BPF 50 +#define SO_DETACH_BPF SO_DETACH_FILTER + +#endif /* _ASM_IA64_SOCKET_H */ diff --git a/kernel/arch/ia64/include/uapi/asm/sockios.h b/kernel/arch/ia64/include/uapi/asm/sockios.h new file mode 100644 index 000000000..15c92468a --- /dev/null +++ b/kernel/arch/ia64/include/uapi/asm/sockios.h @@ -0,0 +1,20 @@ +#ifndef _ASM_IA64_SOCKIOS_H +#define _ASM_IA64_SOCKIOS_H + +/* + * Socket-level I/O control calls. + * + * Based on . + * + * Modified 1998, 1999 + * David Mosberger-Tang , Hewlett-Packard Co + */ +#define FIOSETOWN 0x8901 +#define SIOCSPGRP 0x8902 +#define FIOGETOWN 0x8903 +#define SIOCGPGRP 0x8904 +#define SIOCATMARK 0x8905 +#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */ +#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */ + +#endif /* _ASM_IA64_SOCKIOS_H */ diff --git a/kernel/arch/ia64/include/uapi/asm/stat.h b/kernel/arch/ia64/include/uapi/asm/stat.h new file mode 100644 index 000000000..367bb90cd --- /dev/null +++ b/kernel/arch/ia64/include/uapi/asm/stat.h @@ -0,0 +1,51 @@ +#ifndef _ASM_IA64_STAT_H +#define _ASM_IA64_STAT_H + +/* + * Modified 1998, 1999 + * David Mosberger-Tang , Hewlett-Packard Co + */ + +struct stat { + unsigned long st_dev; + unsigned long st_ino; + unsigned long st_nlink; + unsigned int st_mode; + unsigned int st_uid; + unsigned int st_gid; + unsigned int __pad0; + unsigned long st_rdev; + unsigned long st_size; + unsigned long st_atime; + unsigned long st_atime_nsec; + unsigned long st_mtime; + unsigned long st_mtime_nsec; + unsigned long st_ctime; + unsigned long st_ctime_nsec; + unsigned long st_blksize; + long st_blocks; + unsigned long __unused[3]; +}; + +#define STAT_HAVE_NSEC 1 + +struct ia64_oldstat { + unsigned int st_dev; + unsigned int st_ino; + unsigned int st_mode; + unsigned int st_nlink; + unsigned int st_uid; + unsigned int st_gid; + unsigned int st_rdev; + unsigned int __pad1; + unsigned long st_size; + unsigned long st_atime; + unsigned long st_mtime; + unsigned long st_ctime; + unsigned int st_blksize; + int st_blocks; + unsigned int __unused1; + unsigned int __unused2; +}; + +#endif /* _ASM_IA64_STAT_H */ diff --git a/kernel/arch/ia64/include/uapi/asm/statfs.h b/kernel/arch/ia64/include/uapi/asm/statfs.h new file mode 100644 index 000000000..1e589669d --- /dev/null +++ b/kernel/arch/ia64/include/uapi/asm/statfs.h @@ -0,0 +1,20 @@ +#ifndef _ASM_IA64_STATFS_H +#define _ASM_IA64_STATFS_H + +/* + * Based on . + * + * Modified 1998, 1999, 2003 + * David Mosberger-Tang , Hewlett-Packard Co + */ + +/* + * We need compat_statfs64 to be packed, because the i386 ABI won't + * add padding at the end to bring it to a multiple of 8 bytes, but + * the IA64 ABI will. + */ +#define ARCH_PACK_COMPAT_STATFS64 __attribute__((packed,aligned(4))) + +#include + +#endif /* _ASM_IA64_STATFS_H */ diff --git a/kernel/arch/ia64/include/uapi/asm/swab.h b/kernel/arch/ia64/include/uapi/asm/swab.h new file mode 100644 index 000000000..c89a8cb5d --- /dev/null +++ b/kernel/arch/ia64/include/uapi/asm/swab.h @@ -0,0 +1,34 @@ +#ifndef _ASM_IA64_SWAB_H +#define _ASM_IA64_SWAB_H + +/* + * Modified 1998, 1999 + * David Mosberger-Tang , Hewlett-Packard Co. + */ + +#include +#include +#include + +static __inline__ __attribute_const__ __u64 __arch_swab64(__u64 x) +{ + __u64 result; + + result = ia64_mux1(x, ia64_mux1_rev); + return result; +} +#define __arch_swab64 __arch_swab64 + +static __inline__ __attribute_const__ __u32 __arch_swab32(__u32 x) +{ + return __arch_swab64(x) >> 32; +} +#define __arch_swab32 __arch_swab32 + +static __inline__ __attribute_const__ __u16 __arch_swab16(__u16 x) +{ + return __arch_swab64(x) >> 48; +} +#define __arch_swab16 __arch_swab16 + +#endif /* _ASM_IA64_SWAB_H */ diff --git a/kernel/arch/ia64/include/uapi/asm/termbits.h b/kernel/arch/ia64/include/uapi/asm/termbits.h new file mode 100644 index 000000000..c009b94e5 --- /dev/null +++ b/kernel/arch/ia64/include/uapi/asm/termbits.h @@ -0,0 +1,208 @@ +#ifndef _ASM_IA64_TERMBITS_H +#define _ASM_IA64_TERMBITS_H + +/* + * Based on . + * + * Modified 1999 + * David Mosberger-Tang , Hewlett-Packard Co + * + * 99/01/28 Added new baudrates + */ + +#include + +typedef unsigned char cc_t; +typedef unsigned int speed_t; +typedef unsigned int tcflag_t; + +#define NCCS 19 +struct termios { + tcflag_t c_iflag; /* input mode flags */ + tcflag_t c_oflag; /* output mode flags */ + tcflag_t c_cflag; /* control mode flags */ + tcflag_t c_lflag; /* local mode flags */ + cc_t c_line; /* line discipline */ + cc_t c_cc[NCCS]; /* control characters */ +}; + +struct termios2 { + tcflag_t c_iflag; /* input mode flags */ + tcflag_t c_oflag; /* output mode flags */ + tcflag_t c_cflag; /* control mode flags */ + tcflag_t c_lflag; /* local mode flags */ + cc_t c_line; /* line discipline */ + cc_t c_cc[NCCS]; /* control characters */ + speed_t c_ispeed; /* input speed */ + speed_t c_ospeed; /* output speed */ +}; + +struct ktermios { + tcflag_t c_iflag; /* input mode flags */ + tcflag_t c_oflag; /* output mode flags */ + tcflag_t c_cflag; /* control mode flags */ + tcflag_t c_lflag; /* local mode flags */ + cc_t c_line; /* line discipline */ + cc_t c_cc[NCCS]; /* control characters */ + speed_t c_ispeed; /* input speed */ + speed_t c_ospeed; /* output speed */ +}; + +/* c_cc characters */ +#define VINTR 0 +#define VQUIT 1 +#define VERASE 2 +#define VKILL 3 +#define VEOF 4 +#define VTIME 5 +#define VMIN 6 +#define VSWTC 7 +#define VSTART 8 +#define VSTOP 9 +#define VSUSP 10 +#define VEOL 11 +#define VREPRINT 12 +#define VDISCARD 13 +#define VWERASE 14 +#define VLNEXT 15 +#define VEOL2 16 + +/* c_iflag bits */ +#define IGNBRK 0000001 +#define BRKINT 0000002 +#define IGNPAR 0000004 +#define PARMRK 0000010 +#define INPCK 0000020 +#define ISTRIP 0000040 +#define INLCR 0000100 +#define IGNCR 0000200 +#define ICRNL 0000400 +#define IUCLC 0001000 +#define IXON 0002000 +#define IXANY 0004000 +#define IXOFF 0010000 +#define IMAXBEL 0020000 +#define IUTF8 0040000 + +/* c_oflag bits */ +#define OPOST 0000001 +#define OLCUC 0000002 +#define ONLCR 0000004 +#define OCRNL 0000010 +#define ONOCR 0000020 +#define ONLRET 0000040 +#define OFILL 0000100 +#define OFDEL 0000200 +#define NLDLY 0000400 +#define NL0 0000000 +#define NL1 0000400 +#define CRDLY 0003000 +#define CR0 0000000 +#define CR1 0001000 +#define CR2 0002000 +#define CR3 0003000 +#define TABDLY 0014000 +#define TAB0 0000000 +#define TAB1 0004000 +#define TAB2 0010000 +#define TAB3 0014000 +#define XTABS 0014000 +#define BSDLY 0020000 +#define BS0 0000000 +#define BS1 0020000 +#define VTDLY 0040000 +#define VT0 0000000 +#define VT1 0040000 +#define FFDLY 0100000 +#define FF0 0000000 +#define FF1 0100000 + +/* c_cflag bit meaning */ +#define CBAUD 0010017 +#define B0 0000000 /* hang up */ +#define B50 0000001 +#define B75 0000002 +#define B110 0000003 +#define B134 0000004 +#define B150 0000005 +#define B200 0000006 +#define B300 0000007 +#define B600 0000010 +#define B1200 0000011 +#define B1800 0000012 +#define B2400 0000013 +#define B4800 0000014 +#define B9600 0000015 +#define B19200 0000016 +#define B38400 0000017 +#define EXTA B19200 +#define EXTB B38400 +#define CSIZE 0000060 +#define CS5 0000000 +#define CS6 0000020 +#define CS7 0000040 +#define CS8 0000060 +#define CSTOPB 0000100 +#define CREAD 0000200 +#define PARENB 0000400 +#define PARODD 0001000 +#define HUPCL 0002000 +#define CLOCAL 0004000 +#define CBAUDEX 0010000 +#define BOTHER 0010000 +#define B57600 0010001 +#define B115200 0010002 +#define B230400 0010003 +#define B460800 0010004 +#define B500000 0010005 +#define B576000 0010006 +#define B921600 0010007 +#define B1000000 0010010 +#define B1152000 0010011 +#define B1500000 0010012 +#define B2000000 0010013 +#define B2500000 0010014 +#define B3000000 0010015 +#define B3500000 0010016 +#define B4000000 0010017 +#define CIBAUD 002003600000 /* input baud rate */ +#define CMSPAR 010000000000 /* mark or space (stick) parity */ +#define CRTSCTS 020000000000 /* flow control */ + +#define IBSHIFT 16 /* Shift from CBAUD to CIBAUD */ + +/* c_lflag bits */ +#define ISIG 0000001 +#define ICANON 0000002 +#define XCASE 0000004 +#define ECHO 0000010 +#define ECHOE 0000020 +#define ECHOK 0000040 +#define ECHONL 0000100 +#define NOFLSH 0000200 +#define TOSTOP 0000400 +#define ECHOCTL 0001000 +#define ECHOPRT 0002000 +#define ECHOKE 0004000 +#define FLUSHO 0010000 +#define PENDIN 0040000 +#define IEXTEN 0100000 +#define EXTPROC 0200000 + +/* tcflow() and TCXONC use these */ +#define TCOOFF 0 +#define TCOON 1 +#define TCIOFF 2 +#define TCION 3 + +/* tcflush() and TCFLSH use these */ +#define TCIFLUSH 0 +#define TCOFLUSH 1 +#define TCIOFLUSH 2 + +/* tcsetattr uses these */ +#define TCSANOW 0 +#define TCSADRAIN 1 +#define TCSAFLUSH 2 + +#endif /* _ASM_IA64_TERMBITS_H */ diff --git a/kernel/arch/ia64/include/uapi/asm/termios.h b/kernel/arch/ia64/include/uapi/asm/termios.h new file mode 100644 index 000000000..d59b48c30 --- /dev/null +++ b/kernel/arch/ia64/include/uapi/asm/termios.h @@ -0,0 +1,50 @@ +/* + * Modified 1999 + * David Mosberger-Tang , Hewlett-Packard Co + * + * 99/01/28 Added N_IRDA and N_SMSBLOCK + */ +#ifndef _UAPI_ASM_IA64_TERMIOS_H +#define _UAPI_ASM_IA64_TERMIOS_H + + +#include +#include + +struct winsize { + unsigned short ws_row; + unsigned short ws_col; + unsigned short ws_xpixel; + unsigned short ws_ypixel; +}; + +#define NCC 8 +struct termio { + unsigned short c_iflag; /* input mode flags */ + unsigned short c_oflag; /* output mode flags */ + unsigned short c_cflag; /* control mode flags */ + unsigned short c_lflag; /* local mode flags */ + unsigned char c_line; /* line discipline */ + unsigned char c_cc[NCC]; /* control characters */ +}; + +/* modem lines */ +#define TIOCM_LE 0x001 +#define TIOCM_DTR 0x002 +#define TIOCM_RTS 0x004 +#define TIOCM_ST 0x008 +#define TIOCM_SR 0x010 +#define TIOCM_CTS 0x020 +#define TIOCM_CAR 0x040 +#define TIOCM_RNG 0x080 +#define TIOCM_DSR 0x100 +#define TIOCM_CD TIOCM_CAR +#define TIOCM_RI TIOCM_RNG +#define TIOCM_OUT1 0x2000 +#define TIOCM_OUT2 0x4000 +#define TIOCM_LOOP 0x8000 + +/* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */ + + +#endif /* _UAPI_ASM_IA64_TERMIOS_H */ diff --git a/kernel/arch/ia64/include/uapi/asm/types.h b/kernel/arch/ia64/include/uapi/asm/types.h new file mode 100644 index 000000000..321193b05 --- /dev/null +++ b/kernel/arch/ia64/include/uapi/asm/types.h @@ -0,0 +1,31 @@ +/* + * This file is never included by application software unless explicitly + * requested (e.g., via linux/types.h) in which case the application is + * Linux specific so (user-) name space pollution is not a major issue. + * However, for interoperability, libraries still need to be careful to + * avoid naming clashes. + * + * Based on . + * + * Modified 1998-2000, 2002 + * David Mosberger-Tang , Hewlett-Packard Co + */ +#ifndef _UAPI_ASM_IA64_TYPES_H +#define _UAPI_ASM_IA64_TYPES_H + + +#ifndef __KERNEL__ +#include +#endif + +#ifdef __ASSEMBLY__ +# define __IA64_UL(x) (x) +# define __IA64_UL_CONST(x) x + +#else +# define __IA64_UL(x) ((unsigned long)(x)) +# define __IA64_UL_CONST(x) x##UL + +#endif /* !__ASSEMBLY__ */ + +#endif /* _UAPI_ASM_IA64_TYPES_H */ diff --git a/kernel/arch/ia64/include/uapi/asm/ucontext.h b/kernel/arch/ia64/include/uapi/asm/ucontext.h new file mode 100644 index 000000000..bf573dc8c --- /dev/null +++ b/kernel/arch/ia64/include/uapi/asm/ucontext.h @@ -0,0 +1,12 @@ +#ifndef _ASM_IA64_UCONTEXT_H +#define _ASM_IA64_UCONTEXT_H + +struct ucontext { + struct sigcontext uc_mcontext; +}; + +#define uc_link uc_mcontext.sc_gr[0] /* wrong type; nobody cares */ +#define uc_sigmask uc_mcontext.sc_sigmask +#define uc_stack uc_mcontext.sc_stack + +#endif /* _ASM_IA64_UCONTEXT_H */ diff --git a/kernel/arch/ia64/include/uapi/asm/unistd.h b/kernel/arch/ia64/include/uapi/asm/unistd.h new file mode 100644 index 000000000..461079560 --- /dev/null +++ b/kernel/arch/ia64/include/uapi/asm/unistd.h @@ -0,0 +1,336 @@ +/* + * IA-64 Linux syscall numbers and inline-functions. + * + * Copyright (C) 1998-2005 Hewlett-Packard Co + * David Mosberger-Tang + */ +#ifndef _UAPI_ASM_IA64_UNISTD_H +#define _UAPI_ASM_IA64_UNISTD_H + + +#include + +#define __BREAK_SYSCALL __IA64_BREAK_SYSCALL + +#define __NR_ni_syscall 1024 +#define __NR_exit 1025 +#define __NR_read 1026 +#define __NR_write 1027 +#define __NR_open 1028 +#define __NR_close 1029 +#define __NR_creat 1030 +#define __NR_link 1031 +#define __NR_unlink 1032 +#define __NR_execve 1033 +#define __NR_chdir 1034 +#define __NR_fchdir 1035 +#define __NR_utimes 1036 +#define __NR_mknod 1037 +#define __NR_chmod 1038 +#define __NR_chown 1039 +#define __NR_lseek 1040 +#define __NR_getpid 1041 +#define __NR_getppid 1042 +#define __NR_mount 1043 +#define __NR_umount 1044 +#define __NR_setuid 1045 +#define __NR_getuid 1046 +#define __NR_geteuid 1047 +#define __NR_ptrace 1048 +#define __NR_access 1049 +#define __NR_sync 1050 +#define __NR_fsync 1051 +#define __NR_fdatasync 1052 +#define __NR_kill 1053 +#define __NR_rename 1054 +#define __NR_mkdir 1055 +#define __NR_rmdir 1056 +#define __NR_dup 1057 +#define __NR_pipe 1058 +#define __NR_times 1059 +#define __NR_brk 1060 +#define __NR_setgid 1061 +#define __NR_getgid 1062 +#define __NR_getegid 1063 +#define __NR_acct 1064 +#define __NR_ioctl 1065 +#define __NR_fcntl 1066 +#define __NR_umask 1067 +#define __NR_chroot 1068 +#define __NR_ustat 1069 +#define __NR_dup2 1070 +#define __NR_setreuid 1071 +#define __NR_setregid 1072 +#define __NR_getresuid 1073 +#define __NR_setresuid 1074 +#define __NR_getresgid 1075 +#define __NR_setresgid 1076 +#define __NR_getgroups 1077 +#define __NR_setgroups 1078 +#define __NR_getpgid 1079 +#define __NR_setpgid 1080 +#define __NR_setsid 1081 +#define __NR_getsid 1082 +#define __NR_sethostname 1083 +#define __NR_setrlimit 1084 +#define __NR_getrlimit 1085 +#define __NR_getrusage 1086 +#define __NR_gettimeofday 1087 +#define __NR_settimeofday 1088 +#define __NR_select 1089 +#define __NR_poll 1090 +#define __NR_symlink 1091 +#define __NR_readlink 1092 +#define __NR_uselib 1093 +#define __NR_swapon 1094 +#define __NR_swapoff 1095 +#define __NR_reboot 1096 +#define __NR_truncate 1097 +#define __NR_ftruncate 1098 +#define __NR_fchmod 1099 +#define __NR_fchown 1100 +#define __NR_getpriority 1101 +#define __NR_setpriority 1102 +#define __NR_statfs 1103 +#define __NR_fstatfs 1104 +#define __NR_gettid 1105 +#define __NR_semget 1106 +#define __NR_semop 1107 +#define __NR_semctl 1108 +#define __NR_msgget 1109 +#define __NR_msgsnd 1110 +#define __NR_msgrcv 1111 +#define __NR_msgctl 1112 +#define __NR_shmget 1113 +#define __NR_shmat 1114 +#define __NR_shmdt 1115 +#define __NR_shmctl 1116 +/* also known as klogctl() in GNU libc: */ +#define __NR_syslog 1117 +#define __NR_setitimer 1118 +#define __NR_getitimer 1119 +/* 1120 was __NR_old_stat */ +/* 1121 was __NR_old_lstat */ +/* 1122 was __NR_old_fstat */ +#define __NR_vhangup 1123 +#define __NR_lchown 1124 +#define __NR_remap_file_pages 1125 +#define __NR_wait4 1126 +#define __NR_sysinfo 1127 +#define __NR_clone 1128 +#define __NR_setdomainname 1129 +#define __NR_uname 1130 +#define __NR_adjtimex 1131 +/* 1132 was __NR_create_module */ +#define __NR_init_module 1133 +#define __NR_delete_module 1134 +/* 1135 was __NR_get_kernel_syms */ +/* 1136 was __NR_query_module */ +#define __NR_quotactl 1137 +#define __NR_bdflush 1138 +#define __NR_sysfs 1139 +#define __NR_personality 1140 +#define __NR_afs_syscall 1141 +#define __NR_setfsuid 1142 +#define __NR_setfsgid 1143 +#define __NR_getdents 1144 +#define __NR_flock 1145 +#define __NR_readv 1146 +#define __NR_writev 1147 +#define __NR_pread64 1148 +#define __NR_pwrite64 1149 +#define __NR__sysctl 1150 +#define __NR_mmap 1151 +#define __NR_munmap 1152 +#define __NR_mlock 1153 +#define __NR_mlockall 1154 +#define __NR_mprotect 1155 +#define __NR_mremap 1156 +#define __NR_msync 1157 +#define __NR_munlock 1158 +#define __NR_munlockall 1159 +#define __NR_sched_getparam 1160 +#define __NR_sched_setparam 1161 +#define __NR_sched_getscheduler 1162 +#define __NR_sched_setscheduler 1163 +#define __NR_sched_yield 1164 +#define __NR_sched_get_priority_max 1165 +#define __NR_sched_get_priority_min 1166 +#define __NR_sched_rr_get_interval 1167 +#define __NR_nanosleep 1168 +#define __NR_nfsservctl 1169 +#define __NR_prctl 1170 +/* 1171 is reserved for backwards compatibility with old __NR_getpagesize */ +#define __NR_mmap2 1172 +#define __NR_pciconfig_read 1173 +#define __NR_pciconfig_write 1174 +#define __NR_perfmonctl 1175 +#define __NR_sigaltstack 1176 +#define __NR_rt_sigaction 1177 +#define __NR_rt_sigpending 1178 +#define __NR_rt_sigprocmask 1179 +#define __NR_rt_sigqueueinfo 1180 +#define __NR_rt_sigreturn 1181 +#define __NR_rt_sigsuspend 1182 +#define __NR_rt_sigtimedwait 1183 +#define __NR_getcwd 1184 +#define __NR_capget 1185 +#define __NR_capset 1186 +#define __NR_sendfile 1187 +#define __NR_getpmsg 1188 +#define __NR_putpmsg 1189 +#define __NR_socket 1190 +#define __NR_bind 1191 +#define __NR_connect 1192 +#define __NR_listen 1193 +#define __NR_accept 1194 +#define __NR_getsockname 1195 +#define __NR_getpeername 1196 +#define __NR_socketpair 1197 +#define __NR_send 1198 +#define __NR_sendto 1199 +#define __NR_recv 1200 +#define __NR_recvfrom 1201 +#define __NR_shutdown 1202 +#define __NR_setsockopt 1203 +#define __NR_getsockopt 1204 +#define __NR_sendmsg 1205 +#define __NR_recvmsg 1206 +#define __NR_pivot_root 1207 +#define __NR_mincore 1208 +#define __NR_madvise 1209 +#define __NR_stat 1210 +#define __NR_lstat 1211 +#define __NR_fstat 1212 +#define __NR_clone2 1213 +#define __NR_getdents64 1214 +#define __NR_getunwind 1215 +#define __NR_readahead 1216 +#define __NR_setxattr 1217 +#define __NR_lsetxattr 1218 +#define __NR_fsetxattr 1219 +#define __NR_getxattr 1220 +#define __NR_lgetxattr 1221 +#define __NR_fgetxattr 1222 +#define __NR_listxattr 1223 +#define __NR_llistxattr 1224 +#define __NR_flistxattr 1225 +#define __NR_removexattr 1226 +#define __NR_lremovexattr 1227 +#define __NR_fremovexattr 1228 +#define __NR_tkill 1229 +#define __NR_futex 1230 +#define __NR_sched_setaffinity 1231 +#define __NR_sched_getaffinity 1232 +#define __NR_set_tid_address 1233 +#define __NR_fadvise64 1234 +#define __NR_tgkill 1235 +#define __NR_exit_group 1236 +#define __NR_lookup_dcookie 1237 +#define __NR_io_setup 1238 +#define __NR_io_destroy 1239 +#define __NR_io_getevents 1240 +#define __NR_io_submit 1241 +#define __NR_io_cancel 1242 +#define __NR_epoll_create 1243 +#define __NR_epoll_ctl 1244 +#define __NR_epoll_wait 1245 +#define __NR_restart_syscall 1246 +#define __NR_semtimedop 1247 +#define __NR_timer_create 1248 +#define __NR_timer_settime 1249 +#define __NR_timer_gettime 1250 +#define __NR_timer_getoverrun 1251 +#define __NR_timer_delete 1252 +#define __NR_clock_settime 1253 +#define __NR_clock_gettime 1254 +#define __NR_clock_getres 1255 +#define __NR_clock_nanosleep 1256 +#define __NR_fstatfs64 1257 +#define __NR_statfs64 1258 +#define __NR_mbind 1259 +#define __NR_get_mempolicy 1260 +#define __NR_set_mempolicy 1261 +#define __NR_mq_open 1262 +#define __NR_mq_unlink 1263 +#define __NR_mq_timedsend 1264 +#define __NR_mq_timedreceive 1265 +#define __NR_mq_notify 1266 +#define __NR_mq_getsetattr 1267 +#define __NR_kexec_load 1268 +#define __NR_vserver 1269 +#define __NR_waitid 1270 +#define __NR_add_key 1271 +#define __NR_request_key 1272 +#define __NR_keyctl 1273 +#define __NR_ioprio_set 1274 +#define __NR_ioprio_get 1275 +#define __NR_move_pages 1276 +#define __NR_inotify_init 1277 +#define __NR_inotify_add_watch 1278 +#define __NR_inotify_rm_watch 1279 +#define __NR_migrate_pages 1280 +#define __NR_openat 1281 +#define __NR_mkdirat 1282 +#define __NR_mknodat 1283 +#define __NR_fchownat 1284 +#define __NR_futimesat 1285 +#define __NR_newfstatat 1286 +#define __NR_unlinkat 1287 +#define __NR_renameat 1288 +#define __NR_linkat 1289 +#define __NR_symlinkat 1290 +#define __NR_readlinkat 1291 +#define __NR_fchmodat 1292 +#define __NR_faccessat 1293 +#define __NR_pselect6 1294 +#define __NR_ppoll 1295 +#define __NR_unshare 1296 +#define __NR_splice 1297 +#define __NR_set_robust_list 1298 +#define __NR_get_robust_list 1299 +#define __NR_sync_file_range 1300 +#define __NR_tee 1301 +#define __NR_vmsplice 1302 +#define __NR_fallocate 1303 +#define __NR_getcpu 1304 +#define __NR_epoll_pwait 1305 +#define __NR_utimensat 1306 +#define __NR_signalfd 1307 +#define __NR_timerfd 1308 +#define __NR_eventfd 1309 +#define __NR_timerfd_create 1310 +#define __NR_timerfd_settime 1311 +#define __NR_timerfd_gettime 1312 +#define __NR_signalfd4 1313 +#define __NR_eventfd2 1314 +#define __NR_epoll_create1 1315 +#define __NR_dup3 1316 +#define __NR_pipe2 1317 +#define __NR_inotify_init1 1318 +#define __NR_preadv 1319 +#define __NR_pwritev 1320 +#define __NR_rt_tgsigqueueinfo 1321 +#define __NR_recvmmsg 1322 +#define __NR_fanotify_init 1323 +#define __NR_fanotify_mark 1324 +#define __NR_prlimit64 1325 +#define __NR_name_to_handle_at 1326 +#define __NR_open_by_handle_at 1327 +#define __NR_clock_adjtime 1328 +#define __NR_syncfs 1329 +#define __NR_setns 1330 +#define __NR_sendmmsg 1331 +#define __NR_process_vm_readv 1332 +#define __NR_process_vm_writev 1333 +#define __NR_accept4 1334 +#define __NR_finit_module 1335 +#define __NR_sched_setattr 1336 +#define __NR_sched_getattr 1337 +#define __NR_renameat2 1338 +#define __NR_getrandom 1339 +#define __NR_memfd_create 1340 +#define __NR_bpf 1341 +#define __NR_execveat 1342 + +#endif /* _UAPI_ASM_IA64_UNISTD_H */ diff --git a/kernel/arch/ia64/include/uapi/asm/ustack.h b/kernel/arch/ia64/include/uapi/asm/ustack.h new file mode 100644 index 000000000..1dfebc622 --- /dev/null +++ b/kernel/arch/ia64/include/uapi/asm/ustack.h @@ -0,0 +1,12 @@ +#ifndef _UAPI_ASM_IA64_USTACK_H +#define _UAPI_ASM_IA64_USTACK_H + +/* + * Constants for the user stack size + */ + + +/* Make a default stack size of 2GiB */ +#define DEFAULT_USER_STACK_SIZE (1UL << 31) + +#endif /* _UAPI_ASM_IA64_USTACK_H */ diff --git a/kernel/arch/ia64/install.sh b/kernel/arch/ia64/install.sh new file mode 100644 index 000000000..0e932f5dc --- /dev/null +++ b/kernel/arch/ia64/install.sh @@ -0,0 +1,40 @@ +#!/bin/sh +# +# arch/ia64/install.sh +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 1995 by Linus Torvalds +# +# Adapted from code in arch/i386/boot/Makefile by H. Peter Anvin +# +# "make install" script for ia64 architecture +# +# Arguments: +# $1 - kernel version +# $2 - kernel image file +# $3 - kernel map file +# $4 - default install path (blank if root directory) +# + +# User may have a custom install script + +if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi +if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi + +# Default install - same as make zlilo + +if [ -f $4/vmlinuz ]; then + mv $4/vmlinuz $4/vmlinuz.old +fi + +if [ -f $4/System.map ]; then + mv $4/System.map $4/System.old +fi + +cat $2 > $4/vmlinuz +cp $3 $4/System.map + +test -x /usr/sbin/elilo && /usr/sbin/elilo diff --git a/kernel/arch/ia64/kernel/.gitignore b/kernel/arch/ia64/kernel/.gitignore new file mode 100644 index 000000000..21cb0da5d --- /dev/null +++ b/kernel/arch/ia64/kernel/.gitignore @@ -0,0 +1,2 @@ +gate.lds +vmlinux.lds diff --git a/kernel/arch/ia64/kernel/Makefile b/kernel/arch/ia64/kernel/Makefile new file mode 100644 index 000000000..d68b5cf81 --- /dev/null +++ b/kernel/arch/ia64/kernel/Makefile @@ -0,0 +1,113 @@ +# +# Makefile for the linux kernel. +# + +ifdef CONFIG_DYNAMIC_FTRACE +CFLAGS_REMOVE_ftrace.o = -pg +endif + +extra-y := head.o init_task.o vmlinux.lds + +obj-y := entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o \ + irq_lsapic.o ivt.o machvec.o pal.o paravirt_patchlist.o patch.o process.o perfmon.o ptrace.o sal.o \ + salinfo.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \ + unwind.o mca.o mca_asm.o topology.o dma-mapping.o + +obj-$(CONFIG_ACPI) += acpi.o acpi-ext.o +obj-$(CONFIG_IA64_BRL_EMU) += brl_emu.o + +obj-$(CONFIG_IA64_PALINFO) += palinfo.o +obj-$(CONFIG_IOSAPIC) += iosapic.o +obj-$(CONFIG_MODULES) += module.o +obj-$(CONFIG_SMP) += smp.o smpboot.o +obj-$(CONFIG_NUMA) += numa.o +obj-$(CONFIG_PERFMON) += perfmon_default_smpl.o +obj-$(CONFIG_IA64_CYCLONE) += cyclone.o +obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o +obj-$(CONFIG_KPROBES) += kprobes.o jprobes.o +obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o +obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o +obj-$(CONFIG_CRASH_DUMP) += crash_dump.o +obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR) += uncached.o +obj-$(CONFIG_AUDIT) += audit.o +obj-$(CONFIG_PCI_MSI) += msi_ia64.o +mca_recovery-y += mca_drv.o mca_drv_asm.o +obj-$(CONFIG_IA64_MC_ERR_INJECT)+= err_inject.o +obj-$(CONFIG_STACKTRACE) += stacktrace.o + +obj-$(CONFIG_PARAVIRT) += paravirt.o paravirtentry.o \ + paravirt_patch.o + +obj-$(CONFIG_IA64_ESI) += esi.o +ifneq ($(CONFIG_IA64_ESI),) +obj-y += esi_stub.o # must be in kernel proper +endif +obj-$(CONFIG_INTEL_IOMMU) += pci-dma.o +obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o + +obj-$(CONFIG_BINFMT_ELF) += elfcore.o + +# fp_emulate() expects f2-f5,f16-f31 to contain the user-level state. +CFLAGS_traps.o += -mfixed-range=f2-f5,f16-f31 + +# The gate DSO image is built using a special linker script. +include $(src)/Makefile.gate +# tell compiled for native +CPPFLAGS_gate.lds += -D__IA64_GATE_PARAVIRTUALIZED_NATIVE + +# Calculate NR_IRQ = max(IA64_NATIVE_NR_IRQS, XEN_NR_IRQS, ...) based on config +define sed-y + "/^->/{s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; s:->::; p;}" +endef +quiet_cmd_nr_irqs = GEN $@ +define cmd_nr_irqs + (set -e; \ + echo "#ifndef __ASM_NR_IRQS_H__"; \ + echo "#define __ASM_NR_IRQS_H__"; \ + echo "/*"; \ + echo " * DO NOT MODIFY."; \ + echo " *"; \ + echo " * This file was generated by Kbuild"; \ + echo " *"; \ + echo " */"; \ + echo ""; \ + sed -ne $(sed-y) $<; \ + echo ""; \ + echo "#endif" ) > $@ +endef + +# We use internal kbuild rules to avoid the "is up to date" message from make +arch/$(SRCARCH)/kernel/nr-irqs.s: arch/$(SRCARCH)/kernel/nr-irqs.c + $(Q)mkdir -p $(dir $@) + $(call if_changed_dep,cc_s_c) + +include/generated/nr-irqs.h: arch/$(SRCARCH)/kernel/nr-irqs.s + $(Q)mkdir -p $(dir $@) + $(call cmd,nr_irqs) + +# +# native ivt.S, entry.S and fsys.S +# +ASM_PARAVIRT_OBJS = ivt.o entry.o fsys.o +define paravirtualized_native +AFLAGS_$(1) += -D__IA64_ASM_PARAVIRTUALIZED_NATIVE +AFLAGS_pvchk-sed-$(1) += -D__IA64_ASM_PARAVIRTUALIZED_PVCHECK +extra-y += pvchk-$(1) +endef +$(foreach obj,$(ASM_PARAVIRT_OBJS),$(eval $(call paravirtualized_native,$(obj)))) + +# +# Checker for paravirtualizations of privileged operations. +# +quiet_cmd_pv_check_sed = PVCHK $@ +define cmd_pv_check_sed + sed -f $(srctree)/arch/$(SRCARCH)/scripts/pvcheck.sed $< > $@ +endef + +$(obj)/pvchk-sed-%.s: $(src)/%.S $(srctree)/arch/$(SRCARCH)/scripts/pvcheck.sed FORCE + $(call if_changed_dep,as_s_S) +$(obj)/pvchk-%.s: $(obj)/pvchk-sed-%.s FORCE + $(call if_changed,pv_check_sed) +$(obj)/pvchk-%.o: $(obj)/pvchk-%.s FORCE + $(call if_changed,as_o_S) +.PRECIOUS: $(obj)/pvchk-sed-%.s $(obj)/pvchk-%.s $(obj)/pvchk-%.o diff --git a/kernel/arch/ia64/kernel/Makefile.gate b/kernel/arch/ia64/kernel/Makefile.gate new file mode 100644 index 000000000..ceeffc509 --- /dev/null +++ b/kernel/arch/ia64/kernel/Makefile.gate @@ -0,0 +1,27 @@ +# The gate DSO image is built using a special linker script. + +targets += gate.so gate-syms.o + +extra-y += gate.so gate-syms.o gate.lds gate.o + +CPPFLAGS_gate.lds := -P -C -U$(ARCH) + +quiet_cmd_gate = GATE $@ + cmd_gate = $(CC) -nostdlib $(GATECFLAGS_$(@F)) -Wl,-T,$(filter-out FORCE,$^) -o $@ + +GATECFLAGS_gate.so = -shared -s -Wl,-soname=linux-gate.so.1 \ + $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) +$(obj)/gate.so: $(obj)/gate.lds $(obj)/gate.o FORCE + $(call if_changed,gate) + +$(obj)/built-in.o: $(obj)/gate-syms.o +$(obj)/built-in.o: ld_flags += -R $(obj)/gate-syms.o + +GATECFLAGS_gate-syms.o = -r +$(obj)/gate-syms.o: $(obj)/gate.lds $(obj)/gate.o FORCE + $(call if_changed,gate) + +# gate-data.o contains the gate DSO image as data in section .data..gate. +# We must build gate.so before we can assemble it. +# Note: kbuild does not track this dependency due to usage of .incbin +$(obj)/gate-data.o: $(obj)/gate.so diff --git a/kernel/arch/ia64/kernel/acpi-ext.c b/kernel/arch/ia64/kernel/acpi-ext.c new file mode 100644 index 000000000..bd09bf74f --- /dev/null +++ b/kernel/arch/ia64/kernel/acpi-ext.c @@ -0,0 +1,104 @@ +/* + * (c) Copyright 2003, 2006 Hewlett-Packard Development Company, L.P. + * Alex Williamson + * Bjorn Helgaas + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include + +#include + +/* + * Device CSRs that do not appear in PCI config space should be described + * via ACPI. This would normally be done with Address Space Descriptors + * marked as "consumer-only," but old versions of Windows and Linux ignore + * the producer/consumer flag, so HP invented a vendor-defined resource to + * describe the location and size of CSR space. + */ + +struct acpi_vendor_uuid hp_ccsr_uuid = { + .subtype = 2, + .data = { 0xf9, 0xad, 0xe9, 0x69, 0x4f, 0x92, 0x5f, 0xab, 0xf6, 0x4a, + 0x24, 0xd2, 0x01, 0x37, 0x0e, 0xad }, +}; + +static acpi_status hp_ccsr_locate(acpi_handle obj, u64 *base, u64 *length) +{ + acpi_status status; + struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + struct acpi_resource *resource; + struct acpi_resource_vendor_typed *vendor; + + status = acpi_get_vendor_resource(obj, METHOD_NAME__CRS, &hp_ccsr_uuid, + &buffer); + + resource = buffer.pointer; + vendor = &resource->data.vendor_typed; + + if (ACPI_FAILURE(status) || vendor->byte_length < 16) { + status = AE_NOT_FOUND; + goto exit; + } + + memcpy(base, vendor->byte_data, sizeof(*base)); + memcpy(length, vendor->byte_data + 8, sizeof(*length)); + + exit: + kfree(buffer.pointer); + return status; +} + +struct csr_space { + u64 base; + u64 length; +}; + +static acpi_status find_csr_space(struct acpi_resource *resource, void *data) +{ + struct csr_space *space = data; + struct acpi_resource_address64 addr; + acpi_status status; + + status = acpi_resource_to_address64(resource, &addr); + if (ACPI_SUCCESS(status) && + addr.resource_type == ACPI_MEMORY_RANGE && + addr.address.address_length && + addr.producer_consumer == ACPI_CONSUMER) { + space->base = addr.address.minimum; + space->length = addr.address.address_length; + return AE_CTRL_TERMINATE; + } + return AE_OK; /* keep looking */ +} + +static acpi_status hp_crs_locate(acpi_handle obj, u64 *base, u64 *length) +{ + struct csr_space space = { 0, 0 }; + + acpi_walk_resources(obj, METHOD_NAME__CRS, find_csr_space, &space); + if (!space.length) + return AE_NOT_FOUND; + + *base = space.base; + *length = space.length; + return AE_OK; +} + +acpi_status hp_acpi_csr_space(acpi_handle obj, u64 *csr_base, u64 *csr_length) +{ + acpi_status status; + + status = hp_ccsr_locate(obj, csr_base, csr_length); + if (ACPI_SUCCESS(status)) + return status; + + return hp_crs_locate(obj, csr_base, csr_length); +} +EXPORT_SYMBOL(hp_acpi_csr_space); diff --git a/kernel/arch/ia64/kernel/acpi.c b/kernel/arch/ia64/kernel/acpi.c new file mode 100644 index 000000000..b1698bc04 --- /dev/null +++ b/kernel/arch/ia64/kernel/acpi.c @@ -0,0 +1,994 @@ +/* + * acpi.c - Architecture-Specific Low-Level ACPI Support + * + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999,2000 Walt Drummond + * Copyright (C) 2000, 2002-2003 Hewlett-Packard Co. + * David Mosberger-Tang + * Copyright (C) 2000 Intel Corp. + * Copyright (C) 2000,2001 J.I. Lee + * Copyright (C) 2001 Paul Diefenbaugh + * Copyright (C) 2001 Jenna Hall + * Copyright (C) 2001 Takayoshi Kochi + * Copyright (C) 2002 Erich Focht + * Copyright (C) 2004 Ashok Raj + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define PREFIX "ACPI: " + +int acpi_lapic; +unsigned int acpi_cpei_override; +unsigned int acpi_cpei_phys_cpuid; + +unsigned long acpi_wakeup_address = 0; + +#ifdef CONFIG_IA64_GENERIC +static unsigned long __init acpi_find_rsdp(void) +{ + unsigned long rsdp_phys = 0; + + if (efi.acpi20 != EFI_INVALID_TABLE_ADDR) + rsdp_phys = efi.acpi20; + else if (efi.acpi != EFI_INVALID_TABLE_ADDR) + printk(KERN_WARNING PREFIX + "v1.0/r0.71 tables no longer supported\n"); + return rsdp_phys; +} + +const char __init * +acpi_get_sysname(void) +{ + unsigned long rsdp_phys; + struct acpi_table_rsdp *rsdp; + struct acpi_table_xsdt *xsdt; + struct acpi_table_header *hdr; +#ifdef CONFIG_INTEL_IOMMU + u64 i, nentries; +#endif + + rsdp_phys = acpi_find_rsdp(); + if (!rsdp_phys) { + printk(KERN_ERR + "ACPI 2.0 RSDP not found, default to \"dig\"\n"); + return "dig"; + } + + rsdp = (struct acpi_table_rsdp *)__va(rsdp_phys); + if (strncmp(rsdp->signature, ACPI_SIG_RSDP, sizeof(ACPI_SIG_RSDP) - 1)) { + printk(KERN_ERR + "ACPI 2.0 RSDP signature incorrect, default to \"dig\"\n"); + return "dig"; + } + + xsdt = (struct acpi_table_xsdt *)__va(rsdp->xsdt_physical_address); + hdr = &xsdt->header; + if (strncmp(hdr->signature, ACPI_SIG_XSDT, sizeof(ACPI_SIG_XSDT) - 1)) { + printk(KERN_ERR + "ACPI 2.0 XSDT signature incorrect, default to \"dig\"\n"); + return "dig"; + } + + if (!strcmp(hdr->oem_id, "HP")) { + return "hpzx1"; + } else if (!strcmp(hdr->oem_id, "SGI")) { + if (!strcmp(hdr->oem_table_id + 4, "UV")) + return "uv"; + else + return "sn2"; + } + +#ifdef CONFIG_INTEL_IOMMU + /* Look for Intel IOMMU */ + nentries = (hdr->length - sizeof(*hdr)) / + sizeof(xsdt->table_offset_entry[0]); + for (i = 0; i < nentries; i++) { + hdr = __va(xsdt->table_offset_entry[i]); + if (strncmp(hdr->signature, ACPI_SIG_DMAR, + sizeof(ACPI_SIG_DMAR) - 1) == 0) + return "dig_vtd"; + } +#endif + + return "dig"; +} +#endif /* CONFIG_IA64_GENERIC */ + +#define ACPI_MAX_PLATFORM_INTERRUPTS 256 + +/* Array to record platform interrupt vectors for generic interrupt routing. */ +int platform_intr_list[ACPI_MAX_PLATFORM_INTERRUPTS] = { + [0 ... ACPI_MAX_PLATFORM_INTERRUPTS - 1] = -1 +}; + +enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_IOSAPIC; + +/* + * Interrupt routing API for device drivers. Provides interrupt vector for + * a generic platform event. Currently only CPEI is implemented. + */ +int acpi_request_vector(u32 int_type) +{ + int vector = -1; + + if (int_type < ACPI_MAX_PLATFORM_INTERRUPTS) { + /* corrected platform error interrupt */ + vector = platform_intr_list[int_type]; + } else + printk(KERN_ERR + "acpi_request_vector(): invalid interrupt type\n"); + return vector; +} + +char *__init __acpi_map_table(unsigned long phys_addr, unsigned long size) +{ + return __va(phys_addr); +} + +void __init __acpi_unmap_table(char *map, unsigned long size) +{ +} + +/* -------------------------------------------------------------------------- + Boot-time Table Parsing + -------------------------------------------------------------------------- */ + +static int available_cpus __initdata; +struct acpi_table_madt *acpi_madt __initdata; +static u8 has_8259; + +static int __init +acpi_parse_lapic_addr_ovr(struct acpi_subtable_header * header, + const unsigned long end) +{ + struct acpi_madt_local_apic_override *lapic; + + lapic = (struct acpi_madt_local_apic_override *)header; + + if (BAD_MADT_ENTRY(lapic, end)) + return -EINVAL; + + if (lapic->address) { + iounmap(ipi_base_addr); + ipi_base_addr = ioremap(lapic->address, 0); + } + return 0; +} + +static int __init +acpi_parse_lsapic(struct acpi_subtable_header * header, const unsigned long end) +{ + struct acpi_madt_local_sapic *lsapic; + + lsapic = (struct acpi_madt_local_sapic *)header; + + /*Skip BAD_MADT_ENTRY check, as lsapic size could vary */ + + if (lsapic->lapic_flags & ACPI_MADT_ENABLED) { +#ifdef CONFIG_SMP + smp_boot_data.cpu_phys_id[available_cpus] = + (lsapic->id << 8) | lsapic->eid; +#endif + ++available_cpus; + } + + total_cpus++; + return 0; +} + +static int __init +acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long end) +{ + struct acpi_madt_local_apic_nmi *lacpi_nmi; + + lacpi_nmi = (struct acpi_madt_local_apic_nmi *)header; + + if (BAD_MADT_ENTRY(lacpi_nmi, end)) + return -EINVAL; + + /* TBD: Support lapic_nmi entries */ + return 0; +} + +static int __init +acpi_parse_iosapic(struct acpi_subtable_header * header, const unsigned long end) +{ + struct acpi_madt_io_sapic *iosapic; + + iosapic = (struct acpi_madt_io_sapic *)header; + + if (BAD_MADT_ENTRY(iosapic, end)) + return -EINVAL; + + return iosapic_init(iosapic->address, iosapic->global_irq_base); +} + +static unsigned int __initdata acpi_madt_rev; + +static int __init +acpi_parse_plat_int_src(struct acpi_subtable_header * header, + const unsigned long end) +{ + struct acpi_madt_interrupt_source *plintsrc; + int vector; + + plintsrc = (struct acpi_madt_interrupt_source *)header; + + if (BAD_MADT_ENTRY(plintsrc, end)) + return -EINVAL; + + /* + * Get vector assignment for this interrupt, set attributes, + * and program the IOSAPIC routing table. + */ + vector = iosapic_register_platform_intr(plintsrc->type, + plintsrc->global_irq, + plintsrc->io_sapic_vector, + plintsrc->eid, + plintsrc->id, + ((plintsrc->inti_flags & ACPI_MADT_POLARITY_MASK) == + ACPI_MADT_POLARITY_ACTIVE_HIGH) ? + IOSAPIC_POL_HIGH : IOSAPIC_POL_LOW, + ((plintsrc->inti_flags & ACPI_MADT_TRIGGER_MASK) == + ACPI_MADT_TRIGGER_EDGE) ? + IOSAPIC_EDGE : IOSAPIC_LEVEL); + + platform_intr_list[plintsrc->type] = vector; + if (acpi_madt_rev > 1) { + acpi_cpei_override = plintsrc->flags & ACPI_MADT_CPEI_OVERRIDE; + } + + /* + * Save the physical id, so we can check when its being removed + */ + acpi_cpei_phys_cpuid = ((plintsrc->id << 8) | (plintsrc->eid)) & 0xffff; + + return 0; +} + +#ifdef CONFIG_HOTPLUG_CPU +unsigned int can_cpei_retarget(void) +{ + extern int cpe_vector; + extern unsigned int force_cpei_retarget; + + /* + * Only if CPEI is supported and the override flag + * is present, otherwise return that its re-targettable + * if we are in polling mode. + */ + if (cpe_vector > 0) { + if (acpi_cpei_override || force_cpei_retarget) + return 1; + else + return 0; + } + return 1; +} + +unsigned int is_cpu_cpei_target(unsigned int cpu) +{ + unsigned int logical_id; + + logical_id = cpu_logical_id(acpi_cpei_phys_cpuid); + + if (logical_id == cpu) + return 1; + else + return 0; +} + +void set_cpei_target_cpu(unsigned int cpu) +{ + acpi_cpei_phys_cpuid = cpu_physical_id(cpu); +} +#endif + +unsigned int get_cpei_target_cpu(void) +{ + return acpi_cpei_phys_cpuid; +} + +static int __init +acpi_parse_int_src_ovr(struct acpi_subtable_header * header, + const unsigned long end) +{ + struct acpi_madt_interrupt_override *p; + + p = (struct acpi_madt_interrupt_override *)header; + + if (BAD_MADT_ENTRY(p, end)) + return -EINVAL; + + iosapic_override_isa_irq(p->source_irq, p->global_irq, + ((p->inti_flags & ACPI_MADT_POLARITY_MASK) == + ACPI_MADT_POLARITY_ACTIVE_LOW) ? + IOSAPIC_POL_LOW : IOSAPIC_POL_HIGH, + ((p->inti_flags & ACPI_MADT_TRIGGER_MASK) == + ACPI_MADT_TRIGGER_LEVEL) ? + IOSAPIC_LEVEL : IOSAPIC_EDGE); + return 0; +} + +static int __init +acpi_parse_nmi_src(struct acpi_subtable_header * header, const unsigned long end) +{ + struct acpi_madt_nmi_source *nmi_src; + + nmi_src = (struct acpi_madt_nmi_source *)header; + + if (BAD_MADT_ENTRY(nmi_src, end)) + return -EINVAL; + + /* TBD: Support nimsrc entries */ + return 0; +} + +static void __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ + if (!strncmp(oem_id, "IBM", 3) && (!strncmp(oem_table_id, "SERMOW", 6))) { + + /* + * Unfortunately ITC_DRIFT is not yet part of the + * official SAL spec, so the ITC_DRIFT bit is not + * set by the BIOS on this hardware. + */ + sal_platform_features |= IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT; + + cyclone_setup(); + } +} + +static int __init acpi_parse_madt(struct acpi_table_header *table) +{ + acpi_madt = (struct acpi_table_madt *)table; + + acpi_madt_rev = acpi_madt->header.revision; + + /* remember the value for reference after free_initmem() */ +#ifdef CONFIG_ITANIUM + has_8259 = 1; /* Firmware on old Itanium systems is broken */ +#else + has_8259 = acpi_madt->flags & ACPI_MADT_PCAT_COMPAT; +#endif + iosapic_system_init(has_8259); + + /* Get base address of IPI Message Block */ + + if (acpi_madt->address) + ipi_base_addr = ioremap(acpi_madt->address, 0); + + printk(KERN_INFO PREFIX "Local APIC address %p\n", ipi_base_addr); + + acpi_madt_oem_check(acpi_madt->header.oem_id, + acpi_madt->header.oem_table_id); + + return 0; +} + +#ifdef CONFIG_ACPI_NUMA + +#undef SLIT_DEBUG + +#define PXM_FLAG_LEN ((MAX_PXM_DOMAINS + 1)/32) + +static int __initdata srat_num_cpus; /* number of cpus */ +static u32 pxm_flag[PXM_FLAG_LEN]; +#define pxm_bit_set(bit) (set_bit(bit,(void *)pxm_flag)) +#define pxm_bit_test(bit) (test_bit(bit,(void *)pxm_flag)) +static struct acpi_table_slit __initdata *slit_table; +cpumask_t early_cpu_possible_map = CPU_MASK_NONE; + +static int __init +get_processor_proximity_domain(struct acpi_srat_cpu_affinity *pa) +{ + int pxm; + + pxm = pa->proximity_domain_lo; + if (ia64_platform_is("sn2") || acpi_srat_revision >= 2) + pxm += pa->proximity_domain_hi[0] << 8; + return pxm; +} + +static int __init +get_memory_proximity_domain(struct acpi_srat_mem_affinity *ma) +{ + int pxm; + + pxm = ma->proximity_domain; + if (!ia64_platform_is("sn2") && acpi_srat_revision <= 1) + pxm &= 0xff; + + return pxm; +} + +/* + * ACPI 2.0 SLIT (System Locality Information Table) + * http://devresource.hp.com/devresource/Docs/TechPapers/IA64/slit.pdf + */ +void __init acpi_numa_slit_init(struct acpi_table_slit *slit) +{ + u32 len; + + len = sizeof(struct acpi_table_header) + 8 + + slit->locality_count * slit->locality_count; + if (slit->header.length != len) { + printk(KERN_ERR + "ACPI 2.0 SLIT: size mismatch: %d expected, %d actual\n", + len, slit->header.length); + return; + } + slit_table = slit; +} + +void __init +acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) +{ + int pxm; + + if (!(pa->flags & ACPI_SRAT_CPU_ENABLED)) + return; + + if (srat_num_cpus >= ARRAY_SIZE(node_cpuid)) { + printk_once(KERN_WARNING + "node_cpuid[%ld] is too small, may not be able to use all cpus\n", + ARRAY_SIZE(node_cpuid)); + return; + } + pxm = get_processor_proximity_domain(pa); + + /* record this node in proximity bitmap */ + pxm_bit_set(pxm); + + node_cpuid[srat_num_cpus].phys_id = + (pa->apic_id << 8) | (pa->local_sapic_eid); + /* nid should be overridden as logical node id later */ + node_cpuid[srat_num_cpus].nid = pxm; + cpumask_set_cpu(srat_num_cpus, &early_cpu_possible_map); + srat_num_cpus++; +} + +int __init +acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) +{ + unsigned long paddr, size; + int pxm; + struct node_memblk_s *p, *q, *pend; + + pxm = get_memory_proximity_domain(ma); + + /* fill node memory chunk structure */ + paddr = ma->base_address; + size = ma->length; + + /* Ignore disabled entries */ + if (!(ma->flags & ACPI_SRAT_MEM_ENABLED)) + return -1; + + /* record this node in proximity bitmap */ + pxm_bit_set(pxm); + + /* Insertion sort based on base address */ + pend = &node_memblk[num_node_memblks]; + for (p = &node_memblk[0]; p < pend; p++) { + if (paddr < p->start_paddr) + break; + } + if (p < pend) { + for (q = pend - 1; q >= p; q--) + *(q + 1) = *q; + } + p->start_paddr = paddr; + p->size = size; + p->nid = pxm; + num_node_memblks++; + return 0; +} + +void __init acpi_numa_arch_fixup(void) +{ + int i, j, node_from, node_to; + + /* If there's no SRAT, fix the phys_id and mark node 0 online */ + if (srat_num_cpus == 0) { + node_set_online(0); + node_cpuid[0].phys_id = hard_smp_processor_id(); + return; + } + + /* + * MCD - This can probably be dropped now. No need for pxm ID to node ID + * mapping with sparse node numbering iff MAX_PXM_DOMAINS <= MAX_NUMNODES. + */ + nodes_clear(node_online_map); + for (i = 0; i < MAX_PXM_DOMAINS; i++) { + if (pxm_bit_test(i)) { + int nid = acpi_map_pxm_to_node(i); + node_set_online(nid); + } + } + + /* set logical node id in memory chunk structure */ + for (i = 0; i < num_node_memblks; i++) + node_memblk[i].nid = pxm_to_node(node_memblk[i].nid); + + /* assign memory bank numbers for each chunk on each node */ + for_each_online_node(i) { + int bank; + + bank = 0; + for (j = 0; j < num_node_memblks; j++) + if (node_memblk[j].nid == i) + node_memblk[j].bank = bank++; + } + + /* set logical node id in cpu structure */ + for_each_possible_early_cpu(i) + node_cpuid[i].nid = pxm_to_node(node_cpuid[i].nid); + + printk(KERN_INFO "Number of logical nodes in system = %d\n", + num_online_nodes()); + printk(KERN_INFO "Number of memory chunks in system = %d\n", + num_node_memblks); + + if (!slit_table) { + for (i = 0; i < MAX_NUMNODES; i++) + for (j = 0; j < MAX_NUMNODES; j++) + node_distance(i, j) = i == j ? LOCAL_DISTANCE : + REMOTE_DISTANCE; + return; + } + + memset(numa_slit, -1, sizeof(numa_slit)); + for (i = 0; i < slit_table->locality_count; i++) { + if (!pxm_bit_test(i)) + continue; + node_from = pxm_to_node(i); + for (j = 0; j < slit_table->locality_count; j++) { + if (!pxm_bit_test(j)) + continue; + node_to = pxm_to_node(j); + node_distance(node_from, node_to) = + slit_table->entry[i * slit_table->locality_count + j]; + } + } + +#ifdef SLIT_DEBUG + printk("ACPI 2.0 SLIT locality table:\n"); + for_each_online_node(i) { + for_each_online_node(j) + printk("%03d ", node_distance(i, j)); + printk("\n"); + } +#endif +} +#endif /* CONFIG_ACPI_NUMA */ + +/* + * success: return IRQ number (>=0) + * failure: return < 0 + */ +int acpi_register_gsi(struct device *dev, u32 gsi, int triggering, int polarity) +{ + if (acpi_irq_model == ACPI_IRQ_MODEL_PLATFORM) + return gsi; + + if (has_8259 && gsi < 16) + return isa_irq_to_vector(gsi); + + return iosapic_register_intr(gsi, + (polarity == + ACPI_ACTIVE_HIGH) ? IOSAPIC_POL_HIGH : + IOSAPIC_POL_LOW, + (triggering == + ACPI_EDGE_SENSITIVE) ? IOSAPIC_EDGE : + IOSAPIC_LEVEL); +} +EXPORT_SYMBOL_GPL(acpi_register_gsi); + +void acpi_unregister_gsi(u32 gsi) +{ + if (acpi_irq_model == ACPI_IRQ_MODEL_PLATFORM) + return; + + if (has_8259 && gsi < 16) + return; + + iosapic_unregister_intr(gsi); +} +EXPORT_SYMBOL_GPL(acpi_unregister_gsi); + +static int __init acpi_parse_fadt(struct acpi_table_header *table) +{ + struct acpi_table_header *fadt_header; + struct acpi_table_fadt *fadt; + + fadt_header = (struct acpi_table_header *)table; + if (fadt_header->revision != 3) + return -ENODEV; /* Only deal with ACPI 2.0 FADT */ + + fadt = (struct acpi_table_fadt *)fadt_header; + + acpi_register_gsi(NULL, fadt->sci_interrupt, ACPI_LEVEL_SENSITIVE, + ACPI_ACTIVE_LOW); + return 0; +} + +int __init early_acpi_boot_init(void) +{ + int ret; + + /* + * do a partial walk of MADT to determine how many CPUs + * we have including offline CPUs + */ + if (acpi_table_parse(ACPI_SIG_MADT, acpi_parse_madt)) { + printk(KERN_ERR PREFIX "Can't find MADT\n"); + return 0; + } + + ret = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_SAPIC, + acpi_parse_lsapic, NR_CPUS); + if (ret < 1) + printk(KERN_ERR PREFIX + "Error parsing MADT - no LAPIC entries\n"); + else + acpi_lapic = 1; + +#ifdef CONFIG_SMP + if (available_cpus == 0) { + printk(KERN_INFO "ACPI: Found 0 CPUS; assuming 1\n"); + printk(KERN_INFO "CPU 0 (0x%04x)", hard_smp_processor_id()); + smp_boot_data.cpu_phys_id[available_cpus] = + hard_smp_processor_id(); + available_cpus = 1; /* We've got at least one of these, no? */ + } + smp_boot_data.cpu_count = available_cpus; +#endif + /* Make boot-up look pretty */ + printk(KERN_INFO "%d CPUs available, %d CPUs total\n", available_cpus, + total_cpus); + + return 0; +} + +int __init acpi_boot_init(void) +{ + + /* + * MADT + * ---- + * Parse the Multiple APIC Description Table (MADT), if exists. + * Note that this table provides platform SMP configuration + * information -- the successor to MPS tables. + */ + + if (acpi_table_parse(ACPI_SIG_MADT, acpi_parse_madt)) { + printk(KERN_ERR PREFIX "Can't find MADT\n"); + goto skip_madt; + } + + /* Local APIC */ + + if (acpi_table_parse_madt + (ACPI_MADT_TYPE_LOCAL_APIC_OVERRIDE, acpi_parse_lapic_addr_ovr, 0) < 0) + printk(KERN_ERR PREFIX + "Error parsing LAPIC address override entry\n"); + + if (acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_NMI, acpi_parse_lapic_nmi, 0) + < 0) + printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n"); + + /* I/O APIC */ + + if (acpi_table_parse_madt + (ACPI_MADT_TYPE_IO_SAPIC, acpi_parse_iosapic, NR_IOSAPICS) < 1) { + if (!ia64_platform_is("sn2")) + printk(KERN_ERR PREFIX + "Error parsing MADT - no IOSAPIC entries\n"); + } + + /* System-Level Interrupt Routing */ + + if (acpi_table_parse_madt + (ACPI_MADT_TYPE_INTERRUPT_SOURCE, acpi_parse_plat_int_src, + ACPI_MAX_PLATFORM_INTERRUPTS) < 0) + printk(KERN_ERR PREFIX + "Error parsing platform interrupt source entry\n"); + + if (acpi_table_parse_madt + (ACPI_MADT_TYPE_INTERRUPT_OVERRIDE, acpi_parse_int_src_ovr, 0) < 0) + printk(KERN_ERR PREFIX + "Error parsing interrupt source overrides entry\n"); + + if (acpi_table_parse_madt(ACPI_MADT_TYPE_NMI_SOURCE, acpi_parse_nmi_src, 0) < 0) + printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n"); + skip_madt: + + /* + * FADT says whether a legacy keyboard controller is present. + * The FADT also contains an SCI_INT line, by which the system + * gets interrupts such as power and sleep buttons. If it's not + * on a Legacy interrupt, it needs to be setup. + */ + if (acpi_table_parse(ACPI_SIG_FADT, acpi_parse_fadt)) + printk(KERN_ERR PREFIX "Can't find FADT\n"); + +#ifdef CONFIG_ACPI_NUMA +#ifdef CONFIG_SMP + if (srat_num_cpus == 0) { + int cpu, i = 1; + for (cpu = 0; cpu < smp_boot_data.cpu_count; cpu++) + if (smp_boot_data.cpu_phys_id[cpu] != + hard_smp_processor_id()) + node_cpuid[i++].phys_id = + smp_boot_data.cpu_phys_id[cpu]; + } +#endif + build_cpu_to_node_map(); +#endif + return 0; +} + +int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) +{ + int tmp; + + if (has_8259 && gsi < 16) + *irq = isa_irq_to_vector(gsi); + else { + tmp = gsi_to_irq(gsi); + if (tmp == -1) + return -1; + *irq = tmp; + } + return 0; +} + +int acpi_isa_irq_to_gsi(unsigned isa_irq, u32 *gsi) +{ + if (isa_irq >= 16) + return -1; + *gsi = isa_irq; + return 0; +} + +/* + * ACPI based hotplug CPU support + */ +#ifdef CONFIG_ACPI_HOTPLUG_CPU +static int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) +{ +#ifdef CONFIG_ACPI_NUMA + /* + * We don't have cpu-only-node hotadd. But if the system equips + * SRAT table, pxm is already found and node is ready. + * So, just pxm_to_nid(pxm) is OK. + * This code here is for the system which doesn't have full SRAT + * table for possible cpus. + */ + node_cpuid[cpu].phys_id = physid; + node_cpuid[cpu].nid = acpi_get_node(handle); +#endif + return 0; +} + +int additional_cpus __initdata = -1; + +static __init int setup_additional_cpus(char *s) +{ + if (s) + additional_cpus = simple_strtol(s, NULL, 0); + + return 0; +} + +early_param("additional_cpus", setup_additional_cpus); + +/* + * cpu_possible_mask should be static, it cannot change as CPUs + * are onlined, or offlined. The reason is per-cpu data-structures + * are allocated by some modules at init time, and dont expect to + * do this dynamically on cpu arrival/departure. + * cpu_present_mask on the other hand can change dynamically. + * In case when cpu_hotplug is not compiled, then we resort to current + * behaviour, which is cpu_possible == cpu_present. + * - Ashok Raj + * + * Three ways to find out the number of additional hotplug CPUs: + * - If the BIOS specified disabled CPUs in ACPI/mptables use that. + * - The user can overwrite it with additional_cpus=NUM + * - Otherwise don't reserve additional CPUs. + */ +__init void prefill_possible_map(void) +{ + int i; + int possible, disabled_cpus; + + disabled_cpus = total_cpus - available_cpus; + + if (additional_cpus == -1) { + if (disabled_cpus > 0) + additional_cpus = disabled_cpus; + else + additional_cpus = 0; + } + + possible = available_cpus + additional_cpus; + + if (possible > nr_cpu_ids) + possible = nr_cpu_ids; + + printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", + possible, max((possible - available_cpus), 0)); + + for (i = 0; i < possible; i++) + set_cpu_possible(i, true); +} + +static int _acpi_map_lsapic(acpi_handle handle, int physid, int *pcpu) +{ + cpumask_t tmp_map; + int cpu; + + cpumask_complement(&tmp_map, cpu_present_mask); + cpu = cpumask_first(&tmp_map); + if (cpu >= nr_cpu_ids) + return -EINVAL; + + acpi_map_cpu2node(handle, cpu, physid); + + set_cpu_present(cpu, true); + ia64_cpu_to_sapicid[cpu] = physid; + + acpi_processor_set_pdc(handle); + + *pcpu = cpu; + return (0); +} + +/* wrapper to silence section mismatch warning */ +int __ref acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, int *pcpu) +{ + return _acpi_map_lsapic(handle, physid, pcpu); +} +EXPORT_SYMBOL(acpi_map_cpu); + +int acpi_unmap_cpu(int cpu) +{ + ia64_cpu_to_sapicid[cpu] = -1; + set_cpu_present(cpu, false); + +#ifdef CONFIG_ACPI_NUMA + /* NUMA specific cleanup's */ +#endif + + return (0); +} +EXPORT_SYMBOL(acpi_unmap_cpu); +#endif /* CONFIG_ACPI_HOTPLUG_CPU */ + +#ifdef CONFIG_ACPI_NUMA +static acpi_status acpi_map_iosapic(acpi_handle handle, u32 depth, + void *context, void **ret) +{ + struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + union acpi_object *obj; + struct acpi_madt_io_sapic *iosapic; + unsigned int gsi_base; + int node; + + /* Only care about objects w/ a method that returns the MADT */ + if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer))) + return AE_OK; + + if (!buffer.length || !buffer.pointer) + return AE_OK; + + obj = buffer.pointer; + if (obj->type != ACPI_TYPE_BUFFER || + obj->buffer.length < sizeof(*iosapic)) { + kfree(buffer.pointer); + return AE_OK; + } + + iosapic = (struct acpi_madt_io_sapic *)obj->buffer.pointer; + + if (iosapic->header.type != ACPI_MADT_TYPE_IO_SAPIC) { + kfree(buffer.pointer); + return AE_OK; + } + + gsi_base = iosapic->global_irq_base; + + kfree(buffer.pointer); + + /* OK, it's an IOSAPIC MADT entry; associate it with a node */ + node = acpi_get_node(handle); + if (node == NUMA_NO_NODE || !node_online(node) || + cpumask_empty(cpumask_of_node(node))) + return AE_OK; + + /* We know a gsi to node mapping! */ + map_iosapic_to_node(gsi_base, node); + return AE_OK; +} + +static int __init +acpi_map_iosapics (void) +{ + acpi_get_devices(NULL, acpi_map_iosapic, NULL, NULL); + return 0; +} + +fs_initcall(acpi_map_iosapics); +#endif /* CONFIG_ACPI_NUMA */ + +int __ref acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base) +{ + int err; + + if ((err = iosapic_init(phys_addr, gsi_base))) + return err; + +#ifdef CONFIG_ACPI_NUMA + acpi_map_iosapic(handle, 0, NULL, NULL); +#endif /* CONFIG_ACPI_NUMA */ + + return 0; +} + +EXPORT_SYMBOL(acpi_register_ioapic); + +int acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base) +{ + return iosapic_remove(gsi_base); +} + +EXPORT_SYMBOL(acpi_unregister_ioapic); + +/* + * acpi_suspend_lowlevel() - save kernel state and suspend. + * + * TBD when when IA64 starts to support suspend... + */ +int acpi_suspend_lowlevel(void) { return 0; } diff --git a/kernel/arch/ia64/kernel/asm-offsets.c b/kernel/arch/ia64/kernel/asm-offsets.c new file mode 100644 index 000000000..60ef83e6d --- /dev/null +++ b/kernel/arch/ia64/kernel/asm-offsets.c @@ -0,0 +1,290 @@ +/* + * Generate definitions needed by assembly language modules. + * This code generates raw asm output which is post-processed + * to extract and format the required data. + */ + +#define ASM_OFFSETS_C 1 + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../kernel/sigframe.h" +#include "../kernel/fsyscall_gtod_data.h" + +void foo(void) +{ + DEFINE(IA64_TASK_SIZE, sizeof (struct task_struct)); + DEFINE(IA64_THREAD_INFO_SIZE, sizeof (struct thread_info)); + DEFINE(IA64_PT_REGS_SIZE, sizeof (struct pt_regs)); + DEFINE(IA64_SWITCH_STACK_SIZE, sizeof (struct switch_stack)); + DEFINE(IA64_SIGINFO_SIZE, sizeof (struct siginfo)); + DEFINE(IA64_CPU_SIZE, sizeof (struct cpuinfo_ia64)); + DEFINE(SIGFRAME_SIZE, sizeof (struct sigframe)); + DEFINE(UNW_FRAME_INFO_SIZE, sizeof (struct unw_frame_info)); + + BUILD_BUG_ON(sizeof(struct upid) != 32); + DEFINE(IA64_UPID_SHIFT, 5); + + BLANK(); + + DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); + DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); + DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count)); +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE + DEFINE(TI_AC_STAMP, offsetof(struct thread_info, ac_stamp)); + DEFINE(TI_AC_LEAVE, offsetof(struct thread_info, ac_leave)); + DEFINE(TI_AC_STIME, offsetof(struct thread_info, ac_stime)); + DEFINE(TI_AC_UTIME, offsetof(struct thread_info, ac_utime)); +#endif + + BLANK(); + + DEFINE(IA64_TASK_BLOCKED_OFFSET,offsetof (struct task_struct, blocked)); + DEFINE(IA64_TASK_CLEAR_CHILD_TID_OFFSET,offsetof (struct task_struct, clear_child_tid)); + DEFINE(IA64_TASK_GROUP_LEADER_OFFSET, offsetof (struct task_struct, group_leader)); + DEFINE(IA64_TASK_TGIDLINK_OFFSET, offsetof (struct task_struct, pids[PIDTYPE_PID].pid)); + DEFINE(IA64_PID_LEVEL_OFFSET, offsetof (struct pid, level)); + DEFINE(IA64_PID_UPID_OFFSET, offsetof (struct pid, numbers[0])); + DEFINE(IA64_TASK_PENDING_OFFSET,offsetof (struct task_struct, pending)); + DEFINE(IA64_TASK_PID_OFFSET, offsetof (struct task_struct, pid)); + DEFINE(IA64_TASK_REAL_PARENT_OFFSET, offsetof (struct task_struct, real_parent)); + DEFINE(IA64_TASK_SIGHAND_OFFSET,offsetof (struct task_struct, sighand)); + DEFINE(IA64_TASK_SIGNAL_OFFSET,offsetof (struct task_struct, signal)); + DEFINE(IA64_TASK_TGID_OFFSET, offsetof (struct task_struct, tgid)); + DEFINE(IA64_TASK_THREAD_KSP_OFFSET, offsetof (struct task_struct, thread.ksp)); + DEFINE(IA64_TASK_THREAD_ON_USTACK_OFFSET, offsetof (struct task_struct, thread.on_ustack)); + + BLANK(); + + DEFINE(IA64_SIGHAND_SIGLOCK_OFFSET,offsetof (struct sighand_struct, siglock)); + + BLANK(); + + DEFINE(IA64_SIGNAL_GROUP_STOP_COUNT_OFFSET,offsetof (struct signal_struct, + group_stop_count)); + DEFINE(IA64_SIGNAL_SHARED_PENDING_OFFSET,offsetof (struct signal_struct, shared_pending)); + + BLANK(); + + DEFINE(IA64_PT_REGS_B6_OFFSET, offsetof (struct pt_regs, b6)); + DEFINE(IA64_PT_REGS_B7_OFFSET, offsetof (struct pt_regs, b7)); + DEFINE(IA64_PT_REGS_AR_CSD_OFFSET, offsetof (struct pt_regs, ar_csd)); + DEFINE(IA64_PT_REGS_AR_SSD_OFFSET, offsetof (struct pt_regs, ar_ssd)); + DEFINE(IA64_PT_REGS_R8_OFFSET, offsetof (struct pt_regs, r8)); + DEFINE(IA64_PT_REGS_R9_OFFSET, offsetof (struct pt_regs, r9)); + DEFINE(IA64_PT_REGS_R10_OFFSET, offsetof (struct pt_regs, r10)); + DEFINE(IA64_PT_REGS_R11_OFFSET, offsetof (struct pt_regs, r11)); + DEFINE(IA64_PT_REGS_CR_IPSR_OFFSET, offsetof (struct pt_regs, cr_ipsr)); + DEFINE(IA64_PT_REGS_CR_IIP_OFFSET, offsetof (struct pt_regs, cr_iip)); + DEFINE(IA64_PT_REGS_CR_IFS_OFFSET, offsetof (struct pt_regs, cr_ifs)); + DEFINE(IA64_PT_REGS_AR_UNAT_OFFSET, offsetof (struct pt_regs, ar_unat)); + DEFINE(IA64_PT_REGS_AR_PFS_OFFSET, offsetof (struct pt_regs, ar_pfs)); + DEFINE(IA64_PT_REGS_AR_RSC_OFFSET, offsetof (struct pt_regs, ar_rsc)); + DEFINE(IA64_PT_REGS_AR_RNAT_OFFSET, offsetof (struct pt_regs, ar_rnat)); + + DEFINE(IA64_PT_REGS_AR_BSPSTORE_OFFSET, offsetof (struct pt_regs, ar_bspstore)); + DEFINE(IA64_PT_REGS_PR_OFFSET, offsetof (struct pt_regs, pr)); + DEFINE(IA64_PT_REGS_B0_OFFSET, offsetof (struct pt_regs, b0)); + DEFINE(IA64_PT_REGS_LOADRS_OFFSET, offsetof (struct pt_regs, loadrs)); + DEFINE(IA64_PT_REGS_R1_OFFSET, offsetof (struct pt_regs, r1)); + DEFINE(IA64_PT_REGS_R12_OFFSET, offsetof (struct pt_regs, r12)); + DEFINE(IA64_PT_REGS_R13_OFFSET, offsetof (struct pt_regs, r13)); + DEFINE(IA64_PT_REGS_AR_FPSR_OFFSET, offsetof (struct pt_regs, ar_fpsr)); + DEFINE(IA64_PT_REGS_R15_OFFSET, offsetof (struct pt_regs, r15)); + DEFINE(IA64_PT_REGS_R14_OFFSET, offsetof (struct pt_regs, r14)); + DEFINE(IA64_PT_REGS_R2_OFFSET, offsetof (struct pt_regs, r2)); + DEFINE(IA64_PT_REGS_R3_OFFSET, offsetof (struct pt_regs, r3)); + DEFINE(IA64_PT_REGS_R16_OFFSET, offsetof (struct pt_regs, r16)); + DEFINE(IA64_PT_REGS_R17_OFFSET, offsetof (struct pt_regs, r17)); + DEFINE(IA64_PT_REGS_R18_OFFSET, offsetof (struct pt_regs, r18)); + DEFINE(IA64_PT_REGS_R19_OFFSET, offsetof (struct pt_regs, r19)); + DEFINE(IA64_PT_REGS_R20_OFFSET, offsetof (struct pt_regs, r20)); + DEFINE(IA64_PT_REGS_R21_OFFSET, offsetof (struct pt_regs, r21)); + DEFINE(IA64_PT_REGS_R22_OFFSET, offsetof (struct pt_regs, r22)); + DEFINE(IA64_PT_REGS_R23_OFFSET, offsetof (struct pt_regs, r23)); + DEFINE(IA64_PT_REGS_R24_OFFSET, offsetof (struct pt_regs, r24)); + DEFINE(IA64_PT_REGS_R25_OFFSET, offsetof (struct pt_regs, r25)); + DEFINE(IA64_PT_REGS_R26_OFFSET, offsetof (struct pt_regs, r26)); + DEFINE(IA64_PT_REGS_R27_OFFSET, offsetof (struct pt_regs, r27)); + DEFINE(IA64_PT_REGS_R28_OFFSET, offsetof (struct pt_regs, r28)); + DEFINE(IA64_PT_REGS_R29_OFFSET, offsetof (struct pt_regs, r29)); + DEFINE(IA64_PT_REGS_R30_OFFSET, offsetof (struct pt_regs, r30)); + DEFINE(IA64_PT_REGS_R31_OFFSET, offsetof (struct pt_regs, r31)); + DEFINE(IA64_PT_REGS_AR_CCV_OFFSET, offsetof (struct pt_regs, ar_ccv)); + DEFINE(IA64_PT_REGS_F6_OFFSET, offsetof (struct pt_regs, f6)); + DEFINE(IA64_PT_REGS_F7_OFFSET, offsetof (struct pt_regs, f7)); + DEFINE(IA64_PT_REGS_F8_OFFSET, offsetof (struct pt_regs, f8)); + DEFINE(IA64_PT_REGS_F9_OFFSET, offsetof (struct pt_regs, f9)); + DEFINE(IA64_PT_REGS_F10_OFFSET, offsetof (struct pt_regs, f10)); + DEFINE(IA64_PT_REGS_F11_OFFSET, offsetof (struct pt_regs, f11)); + + BLANK(); + + DEFINE(IA64_SWITCH_STACK_CALLER_UNAT_OFFSET, offsetof (struct switch_stack, caller_unat)); + DEFINE(IA64_SWITCH_STACK_AR_FPSR_OFFSET, offsetof (struct switch_stack, ar_fpsr)); + DEFINE(IA64_SWITCH_STACK_F2_OFFSET, offsetof (struct switch_stack, f2)); + DEFINE(IA64_SWITCH_STACK_F3_OFFSET, offsetof (struct switch_stack, f3)); + DEFINE(IA64_SWITCH_STACK_F4_OFFSET, offsetof (struct switch_stack, f4)); + DEFINE(IA64_SWITCH_STACK_F5_OFFSET, offsetof (struct switch_stack, f5)); + DEFINE(IA64_SWITCH_STACK_F12_OFFSET, offsetof (struct switch_stack, f12)); + DEFINE(IA64_SWITCH_STACK_F13_OFFSET, offsetof (struct switch_stack, f13)); + DEFINE(IA64_SWITCH_STACK_F14_OFFSET, offsetof (struct switch_stack, f14)); + DEFINE(IA64_SWITCH_STACK_F15_OFFSET, offsetof (struct switch_stack, f15)); + DEFINE(IA64_SWITCH_STACK_F16_OFFSET, offsetof (struct switch_stack, f16)); + DEFINE(IA64_SWITCH_STACK_F17_OFFSET, offsetof (struct switch_stack, f17)); + DEFINE(IA64_SWITCH_STACK_F18_OFFSET, offsetof (struct switch_stack, f18)); + DEFINE(IA64_SWITCH_STACK_F19_OFFSET, offsetof (struct switch_stack, f19)); + DEFINE(IA64_SWITCH_STACK_F20_OFFSET, offsetof (struct switch_stack, f20)); + DEFINE(IA64_SWITCH_STACK_F21_OFFSET, offsetof (struct switch_stack, f21)); + DEFINE(IA64_SWITCH_STACK_F22_OFFSET, offsetof (struct switch_stack, f22)); + DEFINE(IA64_SWITCH_STACK_F23_OFFSET, offsetof (struct switch_stack, f23)); + DEFINE(IA64_SWITCH_STACK_F24_OFFSET, offsetof (struct switch_stack, f24)); + DEFINE(IA64_SWITCH_STACK_F25_OFFSET, offsetof (struct switch_stack, f25)); + DEFINE(IA64_SWITCH_STACK_F26_OFFSET, offsetof (struct switch_stack, f26)); + DEFINE(IA64_SWITCH_STACK_F27_OFFSET, offsetof (struct switch_stack, f27)); + DEFINE(IA64_SWITCH_STACK_F28_OFFSET, offsetof (struct switch_stack, f28)); + DEFINE(IA64_SWITCH_STACK_F29_OFFSET, offsetof (struct switch_stack, f29)); + DEFINE(IA64_SWITCH_STACK_F30_OFFSET, offsetof (struct switch_stack, f30)); + DEFINE(IA64_SWITCH_STACK_F31_OFFSET, offsetof (struct switch_stack, f31)); + DEFINE(IA64_SWITCH_STACK_R4_OFFSET, offsetof (struct switch_stack, r4)); + DEFINE(IA64_SWITCH_STACK_R5_OFFSET, offsetof (struct switch_stack, r5)); + DEFINE(IA64_SWITCH_STACK_R6_OFFSET, offsetof (struct switch_stack, r6)); + DEFINE(IA64_SWITCH_STACK_R7_OFFSET, offsetof (struct switch_stack, r7)); + DEFINE(IA64_SWITCH_STACK_B0_OFFSET, offsetof (struct switch_stack, b0)); + DEFINE(IA64_SWITCH_STACK_B1_OFFSET, offsetof (struct switch_stack, b1)); + DEFINE(IA64_SWITCH_STACK_B2_OFFSET, offsetof (struct switch_stack, b2)); + DEFINE(IA64_SWITCH_STACK_B3_OFFSET, offsetof (struct switch_stack, b3)); + DEFINE(IA64_SWITCH_STACK_B4_OFFSET, offsetof (struct switch_stack, b4)); + DEFINE(IA64_SWITCH_STACK_B5_OFFSET, offsetof (struct switch_stack, b5)); + DEFINE(IA64_SWITCH_STACK_AR_PFS_OFFSET, offsetof (struct switch_stack, ar_pfs)); + DEFINE(IA64_SWITCH_STACK_AR_LC_OFFSET, offsetof (struct switch_stack, ar_lc)); + DEFINE(IA64_SWITCH_STACK_AR_UNAT_OFFSET, offsetof (struct switch_stack, ar_unat)); + DEFINE(IA64_SWITCH_STACK_AR_RNAT_OFFSET, offsetof (struct switch_stack, ar_rnat)); + DEFINE(IA64_SWITCH_STACK_AR_BSPSTORE_OFFSET, offsetof (struct switch_stack, ar_bspstore)); + DEFINE(IA64_SWITCH_STACK_PR_OFFSET, offsetof (struct switch_stack, pr)); + + BLANK(); + + DEFINE(IA64_SIGCONTEXT_IP_OFFSET, offsetof (struct sigcontext, sc_ip)); + DEFINE(IA64_SIGCONTEXT_AR_BSP_OFFSET, offsetof (struct sigcontext, sc_ar_bsp)); + DEFINE(IA64_SIGCONTEXT_AR_FPSR_OFFSET, offsetof (struct sigcontext, sc_ar_fpsr)); + DEFINE(IA64_SIGCONTEXT_AR_RNAT_OFFSET, offsetof (struct sigcontext, sc_ar_rnat)); + DEFINE(IA64_SIGCONTEXT_AR_UNAT_OFFSET, offsetof (struct sigcontext, sc_ar_unat)); + DEFINE(IA64_SIGCONTEXT_B0_OFFSET, offsetof (struct sigcontext, sc_br[0])); + DEFINE(IA64_SIGCONTEXT_CFM_OFFSET, offsetof (struct sigcontext, sc_cfm)); + DEFINE(IA64_SIGCONTEXT_FLAGS_OFFSET, offsetof (struct sigcontext, sc_flags)); + DEFINE(IA64_SIGCONTEXT_FR6_OFFSET, offsetof (struct sigcontext, sc_fr[6])); + DEFINE(IA64_SIGCONTEXT_PR_OFFSET, offsetof (struct sigcontext, sc_pr)); + DEFINE(IA64_SIGCONTEXT_R12_OFFSET, offsetof (struct sigcontext, sc_gr[12])); + DEFINE(IA64_SIGCONTEXT_RBS_BASE_OFFSET,offsetof (struct sigcontext, sc_rbs_base)); + DEFINE(IA64_SIGCONTEXT_LOADRS_OFFSET, offsetof (struct sigcontext, sc_loadrs)); + + BLANK(); + + DEFINE(IA64_SIGPENDING_SIGNAL_OFFSET, offsetof (struct sigpending, signal)); + + BLANK(); + + DEFINE(IA64_SIGFRAME_ARG0_OFFSET, offsetof (struct sigframe, arg0)); + DEFINE(IA64_SIGFRAME_ARG1_OFFSET, offsetof (struct sigframe, arg1)); + DEFINE(IA64_SIGFRAME_ARG2_OFFSET, offsetof (struct sigframe, arg2)); + DEFINE(IA64_SIGFRAME_HANDLER_OFFSET, offsetof (struct sigframe, handler)); + DEFINE(IA64_SIGFRAME_SIGCONTEXT_OFFSET, offsetof (struct sigframe, sc)); + BLANK(); + /* for assembly files which can't include sched.h: */ + DEFINE(IA64_CLONE_VFORK, CLONE_VFORK); + DEFINE(IA64_CLONE_VM, CLONE_VM); + + BLANK(); + DEFINE(IA64_CPUINFO_NSEC_PER_CYC_OFFSET, + offsetof (struct cpuinfo_ia64, nsec_per_cyc)); + DEFINE(IA64_CPUINFO_PTCE_BASE_OFFSET, + offsetof (struct cpuinfo_ia64, ptce_base)); + DEFINE(IA64_CPUINFO_PTCE_COUNT_OFFSET, + offsetof (struct cpuinfo_ia64, ptce_count)); + DEFINE(IA64_CPUINFO_PTCE_STRIDE_OFFSET, + offsetof (struct cpuinfo_ia64, ptce_stride)); + BLANK(); + DEFINE(IA64_TIMESPEC_TV_NSEC_OFFSET, + offsetof (struct timespec, tv_nsec)); + + DEFINE(CLONE_SETTLS_BIT, 19); +#if CLONE_SETTLS != (1<<19) +# error "CLONE_SETTLS_BIT incorrect, please fix" +#endif + + BLANK(); + DEFINE(IA64_MCA_CPU_MCA_STACK_OFFSET, + offsetof (struct ia64_mca_cpu, mca_stack)); + DEFINE(IA64_MCA_CPU_INIT_STACK_OFFSET, + offsetof (struct ia64_mca_cpu, init_stack)); + BLANK(); + DEFINE(IA64_SAL_OS_STATE_OS_GP_OFFSET, + offsetof (struct ia64_sal_os_state, os_gp)); + DEFINE(IA64_SAL_OS_STATE_PROC_STATE_PARAM_OFFSET, + offsetof (struct ia64_sal_os_state, proc_state_param)); + DEFINE(IA64_SAL_OS_STATE_SAL_RA_OFFSET, + offsetof (struct ia64_sal_os_state, sal_ra)); + DEFINE(IA64_SAL_OS_STATE_SAL_GP_OFFSET, + offsetof (struct ia64_sal_os_state, sal_gp)); + DEFINE(IA64_SAL_OS_STATE_PAL_MIN_STATE_OFFSET, + offsetof (struct ia64_sal_os_state, pal_min_state)); + DEFINE(IA64_SAL_OS_STATE_OS_STATUS_OFFSET, + offsetof (struct ia64_sal_os_state, os_status)); + DEFINE(IA64_SAL_OS_STATE_CONTEXT_OFFSET, + offsetof (struct ia64_sal_os_state, context)); + DEFINE(IA64_SAL_OS_STATE_SIZE, + sizeof (struct ia64_sal_os_state)); + BLANK(); + + DEFINE(IA64_PMSA_GR_OFFSET, + offsetof (struct pal_min_state_area_s, pmsa_gr)); + DEFINE(IA64_PMSA_BANK1_GR_OFFSET, + offsetof (struct pal_min_state_area_s, pmsa_bank1_gr)); + DEFINE(IA64_PMSA_PR_OFFSET, + offsetof (struct pal_min_state_area_s, pmsa_pr)); + DEFINE(IA64_PMSA_BR0_OFFSET, + offsetof (struct pal_min_state_area_s, pmsa_br0)); + DEFINE(IA64_PMSA_RSC_OFFSET, + offsetof (struct pal_min_state_area_s, pmsa_rsc)); + DEFINE(IA64_PMSA_IIP_OFFSET, + offsetof (struct pal_min_state_area_s, pmsa_iip)); + DEFINE(IA64_PMSA_IPSR_OFFSET, + offsetof (struct pal_min_state_area_s, pmsa_ipsr)); + DEFINE(IA64_PMSA_IFS_OFFSET, + offsetof (struct pal_min_state_area_s, pmsa_ifs)); + DEFINE(IA64_PMSA_XIP_OFFSET, + offsetof (struct pal_min_state_area_s, pmsa_xip)); + BLANK(); + + /* used by fsys_gettimeofday in arch/ia64/kernel/fsys.S */ + DEFINE(IA64_GTOD_SEQ_OFFSET, + offsetof (struct fsyscall_gtod_data_t, seq)); + DEFINE(IA64_GTOD_WALL_TIME_OFFSET, + offsetof (struct fsyscall_gtod_data_t, wall_time)); + DEFINE(IA64_GTOD_MONO_TIME_OFFSET, + offsetof (struct fsyscall_gtod_data_t, monotonic_time)); + DEFINE(IA64_CLKSRC_MASK_OFFSET, + offsetof (struct fsyscall_gtod_data_t, clk_mask)); + DEFINE(IA64_CLKSRC_MULT_OFFSET, + offsetof (struct fsyscall_gtod_data_t, clk_mult)); + DEFINE(IA64_CLKSRC_SHIFT_OFFSET, + offsetof (struct fsyscall_gtod_data_t, clk_shift)); + DEFINE(IA64_CLKSRC_MMIO_OFFSET, + offsetof (struct fsyscall_gtod_data_t, clk_fsys_mmio)); + DEFINE(IA64_CLKSRC_CYCLE_LAST_OFFSET, + offsetof (struct fsyscall_gtod_data_t, clk_cycle_last)); + DEFINE(IA64_ITC_JITTER_OFFSET, + offsetof (struct itc_jitter_data_t, itc_jitter)); + DEFINE(IA64_ITC_LASTCYCLE_OFFSET, + offsetof (struct itc_jitter_data_t, itc_lastcycle)); + +} diff --git a/kernel/arch/ia64/kernel/audit.c b/kernel/arch/ia64/kernel/audit.c new file mode 100644 index 000000000..96a9d18ff --- /dev/null +++ b/kernel/arch/ia64/kernel/audit.c @@ -0,0 +1,60 @@ +#include +#include +#include +#include + +static unsigned dir_class[] = { +#include +~0U +}; + +static unsigned read_class[] = { +#include +~0U +}; + +static unsigned write_class[] = { +#include +~0U +}; + +static unsigned chattr_class[] = { +#include +~0U +}; + +static unsigned signal_class[] = { +#include +~0U +}; + +int audit_classify_arch(int arch) +{ + return 0; +} + +int audit_classify_syscall(int abi, unsigned syscall) +{ + switch(syscall) { + case __NR_open: + return 2; + case __NR_openat: + return 3; + case __NR_execve: + return 5; + default: + return 0; + } +} + +static int __init audit_classes_init(void) +{ + audit_register_class(AUDIT_CLASS_WRITE, write_class); + audit_register_class(AUDIT_CLASS_READ, read_class); + audit_register_class(AUDIT_CLASS_DIR_WRITE, dir_class); + audit_register_class(AUDIT_CLASS_CHATTR, chattr_class); + audit_register_class(AUDIT_CLASS_SIGNAL, signal_class); + return 0; +} + +__initcall(audit_classes_init); diff --git a/kernel/arch/ia64/kernel/brl_emu.c b/kernel/arch/ia64/kernel/brl_emu.c new file mode 100644 index 000000000..0b286ca16 --- /dev/null +++ b/kernel/arch/ia64/kernel/brl_emu.c @@ -0,0 +1,234 @@ +/* + * Emulation of the "brl" instruction for IA64 processors that + * don't support it in hardware. + * Author: Stephan Zeisset, Intel Corp. + * + * 02/22/02 D. Mosberger Clear si_flgs, si_isr, and si_imm to avoid + * leaking kernel bits. + */ + +#include +#include +#include +#include + +extern char ia64_set_b1, ia64_set_b2, ia64_set_b3, ia64_set_b4, ia64_set_b5; + +struct illegal_op_return { + unsigned long fkt, arg1, arg2, arg3; +}; + +/* + * The unimplemented bits of a virtual address must be set + * to the value of the most significant implemented bit. + * unimpl_va_mask includes all unimplemented bits and + * the most significant implemented bit, so the result + * of an and operation with the mask must be all 0's + * or all 1's for the address to be valid. + */ +#define unimplemented_virtual_address(va) ( \ + ((va) & local_cpu_data->unimpl_va_mask) != 0 && \ + ((va) & local_cpu_data->unimpl_va_mask) != local_cpu_data->unimpl_va_mask \ +) + +/* + * The unimplemented bits of a physical address must be 0. + * unimpl_pa_mask includes all unimplemented bits, so the result + * of an and operation with the mask must be all 0's for the + * address to be valid. + */ +#define unimplemented_physical_address(pa) ( \ + ((pa) & local_cpu_data->unimpl_pa_mask) != 0 \ +) + +/* + * Handle an illegal operation fault that was caused by an + * unimplemented "brl" instruction. + * If we are not successful (e.g because the illegal operation + * wasn't caused by a "brl" after all), we return -1. + * If we are successful, we return either 0 or the address + * of a "fixup" function for manipulating preserved register + * state. + */ + +struct illegal_op_return +ia64_emulate_brl (struct pt_regs *regs, unsigned long ar_ec) +{ + unsigned long bundle[2]; + unsigned long opcode, btype, qp, offset, cpl; + unsigned long next_ip; + struct siginfo siginfo; + struct illegal_op_return rv; + long tmp_taken, unimplemented_address; + + rv.fkt = (unsigned long) -1; + + /* + * Decode the instruction bundle. + */ + + if (copy_from_user(bundle, (void *) (regs->cr_iip), sizeof(bundle))) + return rv; + + next_ip = (unsigned long) regs->cr_iip + 16; + + /* "brl" must be in slot 2. */ + if (ia64_psr(regs)->ri != 1) return rv; + + /* Must be "mlx" template */ + if ((bundle[0] & 0x1e) != 0x4) return rv; + + opcode = (bundle[1] >> 60); + btype = ((bundle[1] >> 29) & 0x7); + qp = ((bundle[1] >> 23) & 0x3f); + offset = ((bundle[1] & 0x0800000000000000L) << 4) + | ((bundle[1] & 0x00fffff000000000L) >> 32) + | ((bundle[1] & 0x00000000007fffffL) << 40) + | ((bundle[0] & 0xffff000000000000L) >> 24); + + tmp_taken = regs->pr & (1L << qp); + + switch(opcode) { + + case 0xC: + /* + * Long Branch. + */ + if (btype != 0) return rv; + rv.fkt = 0; + if (!(tmp_taken)) { + /* + * Qualifying predicate is 0. + * Skip instruction. + */ + regs->cr_iip = next_ip; + ia64_psr(regs)->ri = 0; + return rv; + } + break; + + case 0xD: + /* + * Long Call. + */ + rv.fkt = 0; + if (!(tmp_taken)) { + /* + * Qualifying predicate is 0. + * Skip instruction. + */ + regs->cr_iip = next_ip; + ia64_psr(regs)->ri = 0; + return rv; + } + + /* + * BR[btype] = IP+16 + */ + switch(btype) { + case 0: + regs->b0 = next_ip; + break; + case 1: + rv.fkt = (unsigned long) &ia64_set_b1; + break; + case 2: + rv.fkt = (unsigned long) &ia64_set_b2; + break; + case 3: + rv.fkt = (unsigned long) &ia64_set_b3; + break; + case 4: + rv.fkt = (unsigned long) &ia64_set_b4; + break; + case 5: + rv.fkt = (unsigned long) &ia64_set_b5; + break; + case 6: + regs->b6 = next_ip; + break; + case 7: + regs->b7 = next_ip; + break; + } + rv.arg1 = next_ip; + + /* + * AR[PFS].pfm = CFM + * AR[PFS].pec = AR[EC] + * AR[PFS].ppl = PSR.cpl + */ + cpl = ia64_psr(regs)->cpl; + regs->ar_pfs = ((regs->cr_ifs & 0x3fffffffff) + | (ar_ec << 52) | (cpl << 62)); + + /* + * CFM.sof -= CFM.sol + * CFM.sol = 0 + * CFM.sor = 0 + * CFM.rrb.gr = 0 + * CFM.rrb.fr = 0 + * CFM.rrb.pr = 0 + */ + regs->cr_ifs = ((regs->cr_ifs & 0xffffffc00000007f) + - ((regs->cr_ifs >> 7) & 0x7f)); + + break; + + default: + /* + * Unknown opcode. + */ + return rv; + + } + + regs->cr_iip += offset; + ia64_psr(regs)->ri = 0; + + if (ia64_psr(regs)->it == 0) + unimplemented_address = unimplemented_physical_address(regs->cr_iip); + else + unimplemented_address = unimplemented_virtual_address(regs->cr_iip); + + if (unimplemented_address) { + /* + * The target address contains unimplemented bits. + */ + printk(KERN_DEBUG "Woah! Unimplemented Instruction Address Trap!\n"); + siginfo.si_signo = SIGILL; + siginfo.si_errno = 0; + siginfo.si_flags = 0; + siginfo.si_isr = 0; + siginfo.si_imm = 0; + siginfo.si_code = ILL_BADIADDR; + force_sig_info(SIGILL, &siginfo, current); + } else if (ia64_psr(regs)->tb) { + /* + * Branch Tracing is enabled. + * Force a taken branch signal. + */ + siginfo.si_signo = SIGTRAP; + siginfo.si_errno = 0; + siginfo.si_code = TRAP_BRANCH; + siginfo.si_flags = 0; + siginfo.si_isr = 0; + siginfo.si_addr = 0; + siginfo.si_imm = 0; + force_sig_info(SIGTRAP, &siginfo, current); + } else if (ia64_psr(regs)->ss) { + /* + * Single Step is enabled. + * Force a trace signal. + */ + siginfo.si_signo = SIGTRAP; + siginfo.si_errno = 0; + siginfo.si_code = TRAP_TRACE; + siginfo.si_flags = 0; + siginfo.si_isr = 0; + siginfo.si_addr = 0; + siginfo.si_imm = 0; + force_sig_info(SIGTRAP, &siginfo, current); + } + return rv; +} diff --git a/kernel/arch/ia64/kernel/crash.c b/kernel/arch/ia64/kernel/crash.c new file mode 100644 index 000000000..2955f359e --- /dev/null +++ b/kernel/arch/ia64/kernel/crash.c @@ -0,0 +1,286 @@ +/* + * arch/ia64/kernel/crash.c + * + * Architecture specific (ia64) functions for kexec based crash dumps. + * + * Created by: Khalid Aziz + * Copyright (C) 2005 Hewlett-Packard Development Company, L.P. + * Copyright (C) 2005 Intel Corp Zou Nan hai + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +int kdump_status[NR_CPUS]; +static atomic_t kdump_cpu_frozen; +atomic_t kdump_in_progress; +static int kdump_freeze_monarch; +static int kdump_on_init = 1; +static int kdump_on_fatal_mca = 1; + +static inline Elf64_Word +*append_elf_note(Elf64_Word *buf, char *name, unsigned type, void *data, + size_t data_len) +{ + struct elf_note *note = (struct elf_note *)buf; + note->n_namesz = strlen(name) + 1; + note->n_descsz = data_len; + note->n_type = type; + buf += (sizeof(*note) + 3)/4; + memcpy(buf, name, note->n_namesz); + buf += (note->n_namesz + 3)/4; + memcpy(buf, data, data_len); + buf += (data_len + 3)/4; + return buf; +} + +static void +final_note(void *buf) +{ + memset(buf, 0, sizeof(struct elf_note)); +} + +extern void ia64_dump_cpu_regs(void *); + +static DEFINE_PER_CPU(struct elf_prstatus, elf_prstatus); + +void +crash_save_this_cpu(void) +{ + void *buf; + unsigned long cfm, sof, sol; + + int cpu = smp_processor_id(); + struct elf_prstatus *prstatus = &per_cpu(elf_prstatus, cpu); + + elf_greg_t *dst = (elf_greg_t *)&(prstatus->pr_reg); + memset(prstatus, 0, sizeof(*prstatus)); + prstatus->pr_pid = current->pid; + + ia64_dump_cpu_regs(dst); + cfm = dst[43]; + sol = (cfm >> 7) & 0x7f; + sof = cfm & 0x7f; + dst[46] = (unsigned long)ia64_rse_skip_regs((unsigned long *)dst[46], + sof - sol); + + buf = (u64 *) per_cpu_ptr(crash_notes, cpu); + if (!buf) + return; + buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS, prstatus, + sizeof(*prstatus)); + final_note(buf); +} + +#ifdef CONFIG_SMP +static int +kdump_wait_cpu_freeze(void) +{ + int cpu_num = num_online_cpus() - 1; + int timeout = 1000; + while(timeout-- > 0) { + if (atomic_read(&kdump_cpu_frozen) == cpu_num) + return 0; + udelay(1000); + } + return 1; +} +#endif + +void +machine_crash_shutdown(struct pt_regs *pt) +{ + /* This function is only called after the system + * has paniced or is otherwise in a critical state. + * The minimum amount of code to allow a kexec'd kernel + * to run successfully needs to happen here. + * + * In practice this means shooting down the other cpus in + * an SMP system. + */ + kexec_disable_iosapic(); +#ifdef CONFIG_SMP + /* + * If kdump_on_init is set and an INIT is asserted here, kdump will + * be started again via INIT monarch. + */ + local_irq_disable(); + ia64_set_psr_mc(); /* mask MCA/INIT */ + if (atomic_inc_return(&kdump_in_progress) != 1) + unw_init_running(kdump_cpu_freeze, NULL); + + /* + * Now this cpu is ready for kdump. + * Stop all others by IPI or INIT. They could receive INIT from + * outside and might be INIT monarch, but only thing they have to + * do is falling into kdump_cpu_freeze(). + * + * If an INIT is asserted here: + * - All receivers might be slaves, since some of cpus could already + * be frozen and INIT might be masked on monarch. In this case, + * all slaves will be frozen soon since kdump_in_progress will let + * them into DIE_INIT_SLAVE_LEAVE. + * - One might be a monarch, but INIT rendezvous will fail since + * at least this cpu already have INIT masked so it never join + * to the rendezvous. In this case, all slaves and monarch will + * be frozen soon with no wait since the INIT rendezvous is skipped + * by kdump_in_progress. + */ + kdump_smp_send_stop(); + /* not all cpu response to IPI, send INIT to freeze them */ + if (kdump_wait_cpu_freeze()) { + kdump_smp_send_init(); + /* wait again, don't go ahead if possible */ + kdump_wait_cpu_freeze(); + } +#endif +} + +static void +machine_kdump_on_init(void) +{ + crash_save_vmcoreinfo(); + local_irq_disable(); + kexec_disable_iosapic(); + machine_kexec(ia64_kimage); +} + +void +kdump_cpu_freeze(struct unw_frame_info *info, void *arg) +{ + int cpuid; + + local_irq_disable(); + cpuid = smp_processor_id(); + crash_save_this_cpu(); + current->thread.ksp = (__u64)info->sw - 16; + + ia64_set_psr_mc(); /* mask MCA/INIT and stop reentrance */ + + atomic_inc(&kdump_cpu_frozen); + kdump_status[cpuid] = 1; + mb(); + for (;;) + cpu_relax(); +} + +static int +kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data) +{ + struct ia64_mca_notify_die *nd; + struct die_args *args = data; + + if (atomic_read(&kdump_in_progress)) { + switch (val) { + case DIE_INIT_MONARCH_LEAVE: + if (!kdump_freeze_monarch) + break; + /* fall through */ + case DIE_INIT_SLAVE_LEAVE: + case DIE_INIT_MONARCH_ENTER: + case DIE_MCA_RENDZVOUS_LEAVE: + unw_init_running(kdump_cpu_freeze, NULL); + break; + } + } + + if (!kdump_on_init && !kdump_on_fatal_mca) + return NOTIFY_DONE; + + if (!ia64_kimage) { + if (val == DIE_INIT_MONARCH_LEAVE) + ia64_mca_printk(KERN_NOTICE + "%s: kdump not configured\n", + __func__); + return NOTIFY_DONE; + } + + if (val != DIE_INIT_MONARCH_LEAVE && + val != DIE_INIT_MONARCH_PROCESS && + val != DIE_MCA_MONARCH_LEAVE) + return NOTIFY_DONE; + + nd = (struct ia64_mca_notify_die *)args->err; + + switch (val) { + case DIE_INIT_MONARCH_PROCESS: + /* Reason code 1 means machine check rendezvous*/ + if (kdump_on_init && (nd->sos->rv_rc != 1)) { + if (atomic_inc_return(&kdump_in_progress) != 1) + kdump_freeze_monarch = 1; + } + break; + case DIE_INIT_MONARCH_LEAVE: + /* Reason code 1 means machine check rendezvous*/ + if (kdump_on_init && (nd->sos->rv_rc != 1)) + machine_kdump_on_init(); + break; + case DIE_MCA_MONARCH_LEAVE: + /* *(nd->data) indicate if MCA is recoverable */ + if (kdump_on_fatal_mca && !(*(nd->data))) { + if (atomic_inc_return(&kdump_in_progress) == 1) + machine_kdump_on_init(); + /* We got fatal MCA while kdump!? No way!! */ + } + break; + } + return NOTIFY_DONE; +} + +#ifdef CONFIG_SYSCTL +static struct ctl_table kdump_ctl_table[] = { + { + .procname = "kdump_on_init", + .data = &kdump_on_init, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "kdump_on_fatal_mca", + .data = &kdump_on_fatal_mca, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { } +}; + +static struct ctl_table sys_table[] = { + { + .procname = "kernel", + .mode = 0555, + .child = kdump_ctl_table, + }, + { } +}; +#endif + +static int +machine_crash_setup(void) +{ + /* be notified before default_monarch_init_process */ + static struct notifier_block kdump_init_notifier_nb = { + .notifier_call = kdump_init_notifier, + .priority = 1, + }; + int ret; + if((ret = register_die_notifier(&kdump_init_notifier_nb)) != 0) + return ret; +#ifdef CONFIG_SYSCTL + register_sysctl_table(sys_table); +#endif + return 0; +} + +__initcall(machine_crash_setup); + diff --git a/kernel/arch/ia64/kernel/crash_dump.c b/kernel/arch/ia64/kernel/crash_dump.c new file mode 100644 index 000000000..c8c929866 --- /dev/null +++ b/kernel/arch/ia64/kernel/crash_dump.c @@ -0,0 +1,50 @@ +/* + * kernel/crash_dump.c - Memory preserving reboot related code. + * + * Created by: Simon Horman + * Original code moved from kernel/crash.c + * Original code comment copied from the i386 version of this file + */ + +#include +#include +#include + +#include +#include + +/** + * copy_oldmem_page - copy one page from "oldmem" + * @pfn: page frame number to be copied + * @buf: target memory address for the copy; this can be in kernel address + * space or user address space (see @userbuf) + * @csize: number of bytes to copy + * @offset: offset in bytes into the page (based on pfn) to begin the copy + * @userbuf: if set, @buf is in user address space, use copy_to_user(), + * otherwise @buf is in kernel address space, use memcpy(). + * + * Copy a page from "oldmem". For this page, there is no pte mapped + * in the current kernel. We stitch up a pte, similar to kmap_atomic. + * + * Calling copy_to_user() in atomic context is not desirable. Hence first + * copying the data to a pre-allocated kernel page and then copying to user + * space in non-atomic context. + */ +ssize_t +copy_oldmem_page(unsigned long pfn, char *buf, + size_t csize, unsigned long offset, int userbuf) +{ + void *vaddr; + + if (!csize) + return 0; + vaddr = __va(pfn< +#include +#include +#include +#include +#include +#include + +/* IBM Summit (EXA) Cyclone counter code*/ +#define CYCLONE_CBAR_ADDR 0xFEB00CD0 +#define CYCLONE_PMCC_OFFSET 0x51A0 +#define CYCLONE_MPMC_OFFSET 0x51D0 +#define CYCLONE_MPCS_OFFSET 0x51A8 +#define CYCLONE_TIMER_FREQ 100000000 + +int use_cyclone; +void __init cyclone_setup(void) +{ + use_cyclone = 1; +} + +static void __iomem *cyclone_mc; + +static cycle_t read_cyclone(struct clocksource *cs) +{ + return (cycle_t)readq((void __iomem *)cyclone_mc); +} + +static struct clocksource clocksource_cyclone = { + .name = "cyclone", + .rating = 300, + .read = read_cyclone, + .mask = (1LL << 40) - 1, + .flags = CLOCK_SOURCE_IS_CONTINUOUS, +}; + +int __init init_cyclone_clock(void) +{ + u64 __iomem *reg; + u64 base; /* saved cyclone base address */ + u64 offset; /* offset from pageaddr to cyclone_timer register */ + int i; + u32 __iomem *cyclone_timer; /* Cyclone MPMC0 register */ + + if (!use_cyclone) + return 0; + + printk(KERN_INFO "Summit chipset: Starting Cyclone Counter.\n"); + + /* find base address */ + offset = (CYCLONE_CBAR_ADDR); + reg = ioremap_nocache(offset, sizeof(u64)); + if(!reg){ + printk(KERN_ERR "Summit chipset: Could not find valid CBAR" + " register.\n"); + use_cyclone = 0; + return -ENODEV; + } + base = readq(reg); + iounmap(reg); + if(!base){ + printk(KERN_ERR "Summit chipset: Could not find valid CBAR" + " value.\n"); + use_cyclone = 0; + return -ENODEV; + } + + /* setup PMCC */ + offset = (base + CYCLONE_PMCC_OFFSET); + reg = ioremap_nocache(offset, sizeof(u64)); + if(!reg){ + printk(KERN_ERR "Summit chipset: Could not find valid PMCC" + " register.\n"); + use_cyclone = 0; + return -ENODEV; + } + writel(0x00000001,reg); + iounmap(reg); + + /* setup MPCS */ + offset = (base + CYCLONE_MPCS_OFFSET); + reg = ioremap_nocache(offset, sizeof(u64)); + if(!reg){ + printk(KERN_ERR "Summit chipset: Could not find valid MPCS" + " register.\n"); + use_cyclone = 0; + return -ENODEV; + } + writel(0x00000001,reg); + iounmap(reg); + + /* map in cyclone_timer */ + offset = (base + CYCLONE_MPMC_OFFSET); + cyclone_timer = ioremap_nocache(offset, sizeof(u32)); + if(!cyclone_timer){ + printk(KERN_ERR "Summit chipset: Could not find valid MPMC" + " register.\n"); + use_cyclone = 0; + return -ENODEV; + } + + /*quick test to make sure its ticking*/ + for(i=0; i<3; i++){ + u32 old = readl(cyclone_timer); + int stall = 100; + while(stall--) barrier(); + if(readl(cyclone_timer) == old){ + printk(KERN_ERR "Summit chipset: Counter not counting!" + " DISABLED\n"); + iounmap(cyclone_timer); + cyclone_timer = NULL; + use_cyclone = 0; + return -ENODEV; + } + } + /* initialize last tick */ + cyclone_mc = cyclone_timer; + clocksource_cyclone.archdata.fsys_mmio = cyclone_timer; + clocksource_register_hz(&clocksource_cyclone, CYCLONE_TIMER_FREQ); + + return 0; +} + +__initcall(init_cyclone_clock); diff --git a/kernel/arch/ia64/kernel/dma-mapping.c b/kernel/arch/ia64/kernel/dma-mapping.c new file mode 100644 index 000000000..7f7916238 --- /dev/null +++ b/kernel/arch/ia64/kernel/dma-mapping.c @@ -0,0 +1,24 @@ +#include +#include + +/* Set this to 1 if there is a HW IOMMU in the system */ +int iommu_detected __read_mostly; + +struct dma_map_ops *dma_ops; +EXPORT_SYMBOL(dma_ops); + +#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16) + +static int __init dma_init(void) +{ + dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); + + return 0; +} +fs_initcall(dma_init); + +struct dma_map_ops *dma_get_ops(struct device *dev) +{ + return dma_ops; +} +EXPORT_SYMBOL(dma_get_ops); diff --git a/kernel/arch/ia64/kernel/efi.c b/kernel/arch/ia64/kernel/efi.c new file mode 100644 index 000000000..c52d7540d --- /dev/null +++ b/kernel/arch/ia64/kernel/efi.c @@ -0,0 +1,1342 @@ +/* + * Extensible Firmware Interface + * + * Based on Extensible Firmware Interface Specification version 0.9 + * April 30, 1999 + * + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999 Walt Drummond + * Copyright (C) 1999-2003 Hewlett-Packard Co. + * David Mosberger-Tang + * Stephane Eranian + * (c) Copyright 2006 Hewlett-Packard Development Company, L.P. + * Bjorn Helgaas + * + * All EFI Runtime Services are not implemented yet as EFI only + * supports physical mode addressing on SoftSDV. This is to be fixed + * in a future version. --drummond 1999-07-20 + * + * Implemented EFI runtime services and virtual mode calls. --davidm + * + * Goutham Rao: + * Skip non-WB memory and ignore empty memory ranges. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#define EFI_DEBUG 0 + +static __initdata unsigned long palo_phys; + +static __initdata efi_config_table_type_t arch_tables[] = { + {PROCESSOR_ABSTRACTION_LAYER_OVERWRITE_GUID, "PALO", &palo_phys}, + {NULL_GUID, NULL, 0}, +}; + +extern efi_status_t efi_call_phys (void *, ...); + +static efi_runtime_services_t *runtime; +static u64 mem_limit = ~0UL, max_addr = ~0UL, min_addr = 0UL; + +#define efi_call_virt(f, args...) (*(f))(args) + +#define STUB_GET_TIME(prefix, adjust_arg) \ +static efi_status_t \ +prefix##_get_time (efi_time_t *tm, efi_time_cap_t *tc) \ +{ \ + struct ia64_fpreg fr[6]; \ + efi_time_cap_t *atc = NULL; \ + efi_status_t ret; \ + \ + if (tc) \ + atc = adjust_arg(tc); \ + ia64_save_scratch_fpregs(fr); \ + ret = efi_call_##prefix((efi_get_time_t *) __va(runtime->get_time), \ + adjust_arg(tm), atc); \ + ia64_load_scratch_fpregs(fr); \ + return ret; \ +} + +#define STUB_SET_TIME(prefix, adjust_arg) \ +static efi_status_t \ +prefix##_set_time (efi_time_t *tm) \ +{ \ + struct ia64_fpreg fr[6]; \ + efi_status_t ret; \ + \ + ia64_save_scratch_fpregs(fr); \ + ret = efi_call_##prefix((efi_set_time_t *) __va(runtime->set_time), \ + adjust_arg(tm)); \ + ia64_load_scratch_fpregs(fr); \ + return ret; \ +} + +#define STUB_GET_WAKEUP_TIME(prefix, adjust_arg) \ +static efi_status_t \ +prefix##_get_wakeup_time (efi_bool_t *enabled, efi_bool_t *pending, \ + efi_time_t *tm) \ +{ \ + struct ia64_fpreg fr[6]; \ + efi_status_t ret; \ + \ + ia64_save_scratch_fpregs(fr); \ + ret = efi_call_##prefix( \ + (efi_get_wakeup_time_t *) __va(runtime->get_wakeup_time), \ + adjust_arg(enabled), adjust_arg(pending), adjust_arg(tm)); \ + ia64_load_scratch_fpregs(fr); \ + return ret; \ +} + +#define STUB_SET_WAKEUP_TIME(prefix, adjust_arg) \ +static efi_status_t \ +prefix##_set_wakeup_time (efi_bool_t enabled, efi_time_t *tm) \ +{ \ + struct ia64_fpreg fr[6]; \ + efi_time_t *atm = NULL; \ + efi_status_t ret; \ + \ + if (tm) \ + atm = adjust_arg(tm); \ + ia64_save_scratch_fpregs(fr); \ + ret = efi_call_##prefix( \ + (efi_set_wakeup_time_t *) __va(runtime->set_wakeup_time), \ + enabled, atm); \ + ia64_load_scratch_fpregs(fr); \ + return ret; \ +} + +#define STUB_GET_VARIABLE(prefix, adjust_arg) \ +static efi_status_t \ +prefix##_get_variable (efi_char16_t *name, efi_guid_t *vendor, u32 *attr, \ + unsigned long *data_size, void *data) \ +{ \ + struct ia64_fpreg fr[6]; \ + u32 *aattr = NULL; \ + efi_status_t ret; \ + \ + if (attr) \ + aattr = adjust_arg(attr); \ + ia64_save_scratch_fpregs(fr); \ + ret = efi_call_##prefix( \ + (efi_get_variable_t *) __va(runtime->get_variable), \ + adjust_arg(name), adjust_arg(vendor), aattr, \ + adjust_arg(data_size), adjust_arg(data)); \ + ia64_load_scratch_fpregs(fr); \ + return ret; \ +} + +#define STUB_GET_NEXT_VARIABLE(prefix, adjust_arg) \ +static efi_status_t \ +prefix##_get_next_variable (unsigned long *name_size, efi_char16_t *name, \ + efi_guid_t *vendor) \ +{ \ + struct ia64_fpreg fr[6]; \ + efi_status_t ret; \ + \ + ia64_save_scratch_fpregs(fr); \ + ret = efi_call_##prefix( \ + (efi_get_next_variable_t *) __va(runtime->get_next_variable), \ + adjust_arg(name_size), adjust_arg(name), adjust_arg(vendor)); \ + ia64_load_scratch_fpregs(fr); \ + return ret; \ +} + +#define STUB_SET_VARIABLE(prefix, adjust_arg) \ +static efi_status_t \ +prefix##_set_variable (efi_char16_t *name, efi_guid_t *vendor, \ + u32 attr, unsigned long data_size, \ + void *data) \ +{ \ + struct ia64_fpreg fr[6]; \ + efi_status_t ret; \ + \ + ia64_save_scratch_fpregs(fr); \ + ret = efi_call_##prefix( \ + (efi_set_variable_t *) __va(runtime->set_variable), \ + adjust_arg(name), adjust_arg(vendor), attr, data_size, \ + adjust_arg(data)); \ + ia64_load_scratch_fpregs(fr); \ + return ret; \ +} + +#define STUB_GET_NEXT_HIGH_MONO_COUNT(prefix, adjust_arg) \ +static efi_status_t \ +prefix##_get_next_high_mono_count (u32 *count) \ +{ \ + struct ia64_fpreg fr[6]; \ + efi_status_t ret; \ + \ + ia64_save_scratch_fpregs(fr); \ + ret = efi_call_##prefix((efi_get_next_high_mono_count_t *) \ + __va(runtime->get_next_high_mono_count), \ + adjust_arg(count)); \ + ia64_load_scratch_fpregs(fr); \ + return ret; \ +} + +#define STUB_RESET_SYSTEM(prefix, adjust_arg) \ +static void \ +prefix##_reset_system (int reset_type, efi_status_t status, \ + unsigned long data_size, efi_char16_t *data) \ +{ \ + struct ia64_fpreg fr[6]; \ + efi_char16_t *adata = NULL; \ + \ + if (data) \ + adata = adjust_arg(data); \ + \ + ia64_save_scratch_fpregs(fr); \ + efi_call_##prefix( \ + (efi_reset_system_t *) __va(runtime->reset_system), \ + reset_type, status, data_size, adata); \ + /* should not return, but just in case... */ \ + ia64_load_scratch_fpregs(fr); \ +} + +#define phys_ptr(arg) ((__typeof__(arg)) ia64_tpa(arg)) + +STUB_GET_TIME(phys, phys_ptr) +STUB_SET_TIME(phys, phys_ptr) +STUB_GET_WAKEUP_TIME(phys, phys_ptr) +STUB_SET_WAKEUP_TIME(phys, phys_ptr) +STUB_GET_VARIABLE(phys, phys_ptr) +STUB_GET_NEXT_VARIABLE(phys, phys_ptr) +STUB_SET_VARIABLE(phys, phys_ptr) +STUB_GET_NEXT_HIGH_MONO_COUNT(phys, phys_ptr) +STUB_RESET_SYSTEM(phys, phys_ptr) + +#define id(arg) arg + +STUB_GET_TIME(virt, id) +STUB_SET_TIME(virt, id) +STUB_GET_WAKEUP_TIME(virt, id) +STUB_SET_WAKEUP_TIME(virt, id) +STUB_GET_VARIABLE(virt, id) +STUB_GET_NEXT_VARIABLE(virt, id) +STUB_SET_VARIABLE(virt, id) +STUB_GET_NEXT_HIGH_MONO_COUNT(virt, id) +STUB_RESET_SYSTEM(virt, id) + +void +efi_gettimeofday (struct timespec *ts) +{ + efi_time_t tm; + + if ((*efi.get_time)(&tm, NULL) != EFI_SUCCESS) { + memset(ts, 0, sizeof(*ts)); + return; + } + + ts->tv_sec = mktime(tm.year, tm.month, tm.day, + tm.hour, tm.minute, tm.second); + ts->tv_nsec = tm.nanosecond; +} + +static int +is_memory_available (efi_memory_desc_t *md) +{ + if (!(md->attribute & EFI_MEMORY_WB)) + return 0; + + switch (md->type) { + case EFI_LOADER_CODE: + case EFI_LOADER_DATA: + case EFI_BOOT_SERVICES_CODE: + case EFI_BOOT_SERVICES_DATA: + case EFI_CONVENTIONAL_MEMORY: + return 1; + } + return 0; +} + +typedef struct kern_memdesc { + u64 attribute; + u64 start; + u64 num_pages; +} kern_memdesc_t; + +static kern_memdesc_t *kern_memmap; + +#define efi_md_size(md) (md->num_pages << EFI_PAGE_SHIFT) + +static inline u64 +kmd_end(kern_memdesc_t *kmd) +{ + return (kmd->start + (kmd->num_pages << EFI_PAGE_SHIFT)); +} + +static inline u64 +efi_md_end(efi_memory_desc_t *md) +{ + return (md->phys_addr + efi_md_size(md)); +} + +static inline int +efi_wb(efi_memory_desc_t *md) +{ + return (md->attribute & EFI_MEMORY_WB); +} + +static inline int +efi_uc(efi_memory_desc_t *md) +{ + return (md->attribute & EFI_MEMORY_UC); +} + +static void +walk (efi_freemem_callback_t callback, void *arg, u64 attr) +{ + kern_memdesc_t *k; + u64 start, end, voff; + + voff = (attr == EFI_MEMORY_WB) ? PAGE_OFFSET : __IA64_UNCACHED_OFFSET; + for (k = kern_memmap; k->start != ~0UL; k++) { + if (k->attribute != attr) + continue; + start = PAGE_ALIGN(k->start); + end = (k->start + (k->num_pages << EFI_PAGE_SHIFT)) & PAGE_MASK; + if (start < end) + if ((*callback)(start + voff, end + voff, arg) < 0) + return; + } +} + +/* + * Walk the EFI memory map and call CALLBACK once for each EFI memory + * descriptor that has memory that is available for OS use. + */ +void +efi_memmap_walk (efi_freemem_callback_t callback, void *arg) +{ + walk(callback, arg, EFI_MEMORY_WB); +} + +/* + * Walk the EFI memory map and call CALLBACK once for each EFI memory + * descriptor that has memory that is available for uncached allocator. + */ +void +efi_memmap_walk_uc (efi_freemem_callback_t callback, void *arg) +{ + walk(callback, arg, EFI_MEMORY_UC); +} + +/* + * Look for the PAL_CODE region reported by EFI and map it using an + * ITR to enable safe PAL calls in virtual mode. See IA-64 Processor + * Abstraction Layer chapter 11 in ADAG + */ +void * +efi_get_pal_addr (void) +{ + void *efi_map_start, *efi_map_end, *p; + efi_memory_desc_t *md; + u64 efi_desc_size; + int pal_code_count = 0; + u64 vaddr, mask; + + efi_map_start = __va(ia64_boot_param->efi_memmap); + efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; + efi_desc_size = ia64_boot_param->efi_memdesc_size; + + for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { + md = p; + if (md->type != EFI_PAL_CODE) + continue; + + if (++pal_code_count > 1) { + printk(KERN_ERR "Too many EFI Pal Code memory ranges, " + "dropped @ %llx\n", md->phys_addr); + continue; + } + /* + * The only ITLB entry in region 7 that is used is the one + * installed by __start(). That entry covers a 64MB range. + */ + mask = ~((1 << KERNEL_TR_PAGE_SHIFT) - 1); + vaddr = PAGE_OFFSET + md->phys_addr; + + /* + * We must check that the PAL mapping won't overlap with the + * kernel mapping. + * + * PAL code is guaranteed to be aligned on a power of 2 between + * 4k and 256KB and that only one ITR is needed to map it. This + * implies that the PAL code is always aligned on its size, + * i.e., the closest matching page size supported by the TLB. + * Therefore PAL code is guaranteed never to cross a 64MB unless + * it is bigger than 64MB (very unlikely!). So for now the + * following test is enough to determine whether or not we need + * a dedicated ITR for the PAL code. + */ + if ((vaddr & mask) == (KERNEL_START & mask)) { + printk(KERN_INFO "%s: no need to install ITR for PAL code\n", + __func__); + continue; + } + + if (efi_md_size(md) > IA64_GRANULE_SIZE) + panic("Whoa! PAL code size bigger than a granule!"); + +#if EFI_DEBUG + mask = ~((1 << IA64_GRANULE_SHIFT) - 1); + + printk(KERN_INFO "CPU %d: mapping PAL code " + "[0x%lx-0x%lx) into [0x%lx-0x%lx)\n", + smp_processor_id(), md->phys_addr, + md->phys_addr + efi_md_size(md), + vaddr & mask, (vaddr & mask) + IA64_GRANULE_SIZE); +#endif + return __va(md->phys_addr); + } + printk(KERN_WARNING "%s: no PAL-code memory-descriptor found\n", + __func__); + return NULL; +} + + +static u8 __init palo_checksum(u8 *buffer, u32 length) +{ + u8 sum = 0; + u8 *end = buffer + length; + + while (buffer < end) + sum = (u8) (sum + *(buffer++)); + + return sum; +} + +/* + * Parse and handle PALO table which is published at: + * http://www.dig64.org/home/DIG64_PALO_R1_0.pdf + */ +static void __init handle_palo(unsigned long phys_addr) +{ + struct palo_table *palo = __va(phys_addr); + u8 checksum; + + if (strncmp(palo->signature, PALO_SIG, sizeof(PALO_SIG) - 1)) { + printk(KERN_INFO "PALO signature incorrect.\n"); + return; + } + + checksum = palo_checksum((u8 *)palo, palo->length); + if (checksum) { + printk(KERN_INFO "PALO checksum incorrect.\n"); + return; + } + + setup_ptcg_sem(palo->max_tlb_purges, NPTCG_FROM_PALO); +} + +void +efi_map_pal_code (void) +{ + void *pal_vaddr = efi_get_pal_addr (); + u64 psr; + + if (!pal_vaddr) + return; + + /* + * Cannot write to CRx with PSR.ic=1 + */ + psr = ia64_clear_ic(); + ia64_itr(0x1, IA64_TR_PALCODE, + GRANULEROUNDDOWN((unsigned long) pal_vaddr), + pte_val(pfn_pte(__pa(pal_vaddr) >> PAGE_SHIFT, PAGE_KERNEL)), + IA64_GRANULE_SHIFT); + paravirt_dv_serialize_data(); + ia64_set_psr(psr); /* restore psr */ +} + +void __init +efi_init (void) +{ + void *efi_map_start, *efi_map_end; + efi_char16_t *c16; + u64 efi_desc_size; + char *cp, vendor[100] = "unknown"; + int i; + + set_bit(EFI_BOOT, &efi.flags); + set_bit(EFI_64BIT, &efi.flags); + + /* + * It's too early to be able to use the standard kernel command line + * support... + */ + for (cp = boot_command_line; *cp; ) { + if (memcmp(cp, "mem=", 4) == 0) { + mem_limit = memparse(cp + 4, &cp); + } else if (memcmp(cp, "max_addr=", 9) == 0) { + max_addr = GRANULEROUNDDOWN(memparse(cp + 9, &cp)); + } else if (memcmp(cp, "min_addr=", 9) == 0) { + min_addr = GRANULEROUNDDOWN(memparse(cp + 9, &cp)); + } else { + while (*cp != ' ' && *cp) + ++cp; + while (*cp == ' ') + ++cp; + } + } + if (min_addr != 0UL) + printk(KERN_INFO "Ignoring memory below %lluMB\n", + min_addr >> 20); + if (max_addr != ~0UL) + printk(KERN_INFO "Ignoring memory above %lluMB\n", + max_addr >> 20); + + efi.systab = __va(ia64_boot_param->efi_systab); + + /* + * Verify the EFI Table + */ + if (efi.systab == NULL) + panic("Whoa! Can't find EFI system table.\n"); + if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) + panic("Whoa! EFI system table signature incorrect\n"); + if ((efi.systab->hdr.revision >> 16) == 0) + printk(KERN_WARNING "Warning: EFI system table version " + "%d.%02d, expected 1.00 or greater\n", + efi.systab->hdr.revision >> 16, + efi.systab->hdr.revision & 0xffff); + + /* Show what we know for posterity */ + c16 = __va(efi.systab->fw_vendor); + if (c16) { + for (i = 0;i < (int) sizeof(vendor) - 1 && *c16; ++i) + vendor[i] = *c16++; + vendor[i] = '\0'; + } + + printk(KERN_INFO "EFI v%u.%.02u by %s:", + efi.systab->hdr.revision >> 16, + efi.systab->hdr.revision & 0xffff, vendor); + + set_bit(EFI_SYSTEM_TABLES, &efi.flags); + + palo_phys = EFI_INVALID_TABLE_ADDR; + + if (efi_config_init(arch_tables) != 0) + return; + + if (palo_phys != EFI_INVALID_TABLE_ADDR) + handle_palo(palo_phys); + + runtime = __va(efi.systab->runtime); + efi.get_time = phys_get_time; + efi.set_time = phys_set_time; + efi.get_wakeup_time = phys_get_wakeup_time; + efi.set_wakeup_time = phys_set_wakeup_time; + efi.get_variable = phys_get_variable; + efi.get_next_variable = phys_get_next_variable; + efi.set_variable = phys_set_variable; + efi.get_next_high_mono_count = phys_get_next_high_mono_count; + efi.reset_system = phys_reset_system; + + efi_map_start = __va(ia64_boot_param->efi_memmap); + efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; + efi_desc_size = ia64_boot_param->efi_memdesc_size; + +#if EFI_DEBUG + /* print EFI memory map: */ + { + efi_memory_desc_t *md; + void *p; + + for (i = 0, p = efi_map_start; p < efi_map_end; + ++i, p += efi_desc_size) + { + const char *unit; + unsigned long size; + char buf[64]; + + md = p; + size = md->num_pages << EFI_PAGE_SHIFT; + + if ((size >> 40) > 0) { + size >>= 40; + unit = "TB"; + } else if ((size >> 30) > 0) { + size >>= 30; + unit = "GB"; + } else if ((size >> 20) > 0) { + size >>= 20; + unit = "MB"; + } else { + size >>= 10; + unit = "KB"; + } + + printk("mem%02d: %s " + "range=[0x%016lx-0x%016lx) (%4lu%s)\n", + i, efi_md_typeattr_format(buf, sizeof(buf), md), + md->phys_addr, + md->phys_addr + efi_md_size(md), size, unit); + } + } +#endif + + efi_map_pal_code(); + efi_enter_virtual_mode(); +} + +void +efi_enter_virtual_mode (void) +{ + void *efi_map_start, *efi_map_end, *p; + efi_memory_desc_t *md; + efi_status_t status; + u64 efi_desc_size; + + efi_map_start = __va(ia64_boot_param->efi_memmap); + efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; + efi_desc_size = ia64_boot_param->efi_memdesc_size; + + for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { + md = p; + if (md->attribute & EFI_MEMORY_RUNTIME) { + /* + * Some descriptors have multiple bits set, so the + * order of the tests is relevant. + */ + if (md->attribute & EFI_MEMORY_WB) { + md->virt_addr = (u64) __va(md->phys_addr); + } else if (md->attribute & EFI_MEMORY_UC) { + md->virt_addr = (u64) ioremap(md->phys_addr, 0); + } else if (md->attribute & EFI_MEMORY_WC) { +#if 0 + md->virt_addr = ia64_remap(md->phys_addr, + (_PAGE_A | + _PAGE_P | + _PAGE_D | + _PAGE_MA_WC | + _PAGE_PL_0 | + _PAGE_AR_RW)); +#else + printk(KERN_INFO "EFI_MEMORY_WC mapping\n"); + md->virt_addr = (u64) ioremap(md->phys_addr, 0); +#endif + } else if (md->attribute & EFI_MEMORY_WT) { +#if 0 + md->virt_addr = ia64_remap(md->phys_addr, + (_PAGE_A | + _PAGE_P | + _PAGE_D | + _PAGE_MA_WT | + _PAGE_PL_0 | + _PAGE_AR_RW)); +#else + printk(KERN_INFO "EFI_MEMORY_WT mapping\n"); + md->virt_addr = (u64) ioremap(md->phys_addr, 0); +#endif + } + } + } + + status = efi_call_phys(__va(runtime->set_virtual_address_map), + ia64_boot_param->efi_memmap_size, + efi_desc_size, + ia64_boot_param->efi_memdesc_version, + ia64_boot_param->efi_memmap); + if (status != EFI_SUCCESS) { + printk(KERN_WARNING "warning: unable to switch EFI into " + "virtual mode (status=%lu)\n", status); + return; + } + + set_bit(EFI_RUNTIME_SERVICES, &efi.flags); + + /* + * Now that EFI is in virtual mode, we call the EFI functions more + * efficiently: + */ + efi.get_time = virt_get_time; + efi.set_time = virt_set_time; + efi.get_wakeup_time = virt_get_wakeup_time; + efi.set_wakeup_time = virt_set_wakeup_time; + efi.get_variable = virt_get_variable; + efi.get_next_variable = virt_get_next_variable; + efi.set_variable = virt_set_variable; + efi.get_next_high_mono_count = virt_get_next_high_mono_count; + efi.reset_system = virt_reset_system; +} + +/* + * Walk the EFI memory map looking for the I/O port range. There can only be + * one entry of this type, other I/O port ranges should be described via ACPI. + */ +u64 +efi_get_iobase (void) +{ + void *efi_map_start, *efi_map_end, *p; + efi_memory_desc_t *md; + u64 efi_desc_size; + + efi_map_start = __va(ia64_boot_param->efi_memmap); + efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; + efi_desc_size = ia64_boot_param->efi_memdesc_size; + + for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { + md = p; + if (md->type == EFI_MEMORY_MAPPED_IO_PORT_SPACE) { + if (md->attribute & EFI_MEMORY_UC) + return md->phys_addr; + } + } + return 0; +} + +static struct kern_memdesc * +kern_memory_descriptor (unsigned long phys_addr) +{ + struct kern_memdesc *md; + + for (md = kern_memmap; md->start != ~0UL; md++) { + if (phys_addr - md->start < (md->num_pages << EFI_PAGE_SHIFT)) + return md; + } + return NULL; +} + +static efi_memory_desc_t * +efi_memory_descriptor (unsigned long phys_addr) +{ + void *efi_map_start, *efi_map_end, *p; + efi_memory_desc_t *md; + u64 efi_desc_size; + + efi_map_start = __va(ia64_boot_param->efi_memmap); + efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; + efi_desc_size = ia64_boot_param->efi_memdesc_size; + + for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { + md = p; + + if (phys_addr - md->phys_addr < efi_md_size(md)) + return md; + } + return NULL; +} + +static int +efi_memmap_intersects (unsigned long phys_addr, unsigned long size) +{ + void *efi_map_start, *efi_map_end, *p; + efi_memory_desc_t *md; + u64 efi_desc_size; + unsigned long end; + + efi_map_start = __va(ia64_boot_param->efi_memmap); + efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; + efi_desc_size = ia64_boot_param->efi_memdesc_size; + + end = phys_addr + size; + + for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { + md = p; + if (md->phys_addr < end && efi_md_end(md) > phys_addr) + return 1; + } + return 0; +} + +u32 +efi_mem_type (unsigned long phys_addr) +{ + efi_memory_desc_t *md = efi_memory_descriptor(phys_addr); + + if (md) + return md->type; + return 0; +} + +u64 +efi_mem_attributes (unsigned long phys_addr) +{ + efi_memory_desc_t *md = efi_memory_descriptor(phys_addr); + + if (md) + return md->attribute; + return 0; +} +EXPORT_SYMBOL(efi_mem_attributes); + +u64 +efi_mem_attribute (unsigned long phys_addr, unsigned long size) +{ + unsigned long end = phys_addr + size; + efi_memory_desc_t *md = efi_memory_descriptor(phys_addr); + u64 attr; + + if (!md) + return 0; + + /* + * EFI_MEMORY_RUNTIME is not a memory attribute; it just tells + * the kernel that firmware needs this region mapped. + */ + attr = md->attribute & ~EFI_MEMORY_RUNTIME; + do { + unsigned long md_end = efi_md_end(md); + + if (end <= md_end) + return attr; + + md = efi_memory_descriptor(md_end); + if (!md || (md->attribute & ~EFI_MEMORY_RUNTIME) != attr) + return 0; + } while (md); + return 0; /* never reached */ +} + +u64 +kern_mem_attribute (unsigned long phys_addr, unsigned long size) +{ + unsigned long end = phys_addr + size; + struct kern_memdesc *md; + u64 attr; + + /* + * This is a hack for ioremap calls before we set up kern_memmap. + * Maybe we should do efi_memmap_init() earlier instead. + */ + if (!kern_memmap) { + attr = efi_mem_attribute(phys_addr, size); + if (attr & EFI_MEMORY_WB) + return EFI_MEMORY_WB; + return 0; + } + + md = kern_memory_descriptor(phys_addr); + if (!md) + return 0; + + attr = md->attribute; + do { + unsigned long md_end = kmd_end(md); + + if (end <= md_end) + return attr; + + md = kern_memory_descriptor(md_end); + if (!md || md->attribute != attr) + return 0; + } while (md); + return 0; /* never reached */ +} +EXPORT_SYMBOL(kern_mem_attribute); + +int +valid_phys_addr_range (phys_addr_t phys_addr, unsigned long size) +{ + u64 attr; + + /* + * /dev/mem reads and writes use copy_to_user(), which implicitly + * uses a granule-sized kernel identity mapping. It's really + * only safe to do this for regions in kern_memmap. For more + * details, see Documentation/ia64/aliasing.txt. + */ + attr = kern_mem_attribute(phys_addr, size); + if (attr & EFI_MEMORY_WB || attr & EFI_MEMORY_UC) + return 1; + return 0; +} + +int +valid_mmap_phys_addr_range (unsigned long pfn, unsigned long size) +{ + unsigned long phys_addr = pfn << PAGE_SHIFT; + u64 attr; + + attr = efi_mem_attribute(phys_addr, size); + + /* + * /dev/mem mmap uses normal user pages, so we don't need the entire + * granule, but the entire region we're mapping must support the same + * attribute. + */ + if (attr & EFI_MEMORY_WB || attr & EFI_MEMORY_UC) + return 1; + + /* + * Intel firmware doesn't tell us about all the MMIO regions, so + * in general we have to allow mmap requests. But if EFI *does* + * tell us about anything inside this region, we should deny it. + * The user can always map a smaller region to avoid the overlap. + */ + if (efi_memmap_intersects(phys_addr, size)) + return 0; + + return 1; +} + +pgprot_t +phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, + pgprot_t vma_prot) +{ + unsigned long phys_addr = pfn << PAGE_SHIFT; + u64 attr; + + /* + * For /dev/mem mmap, we use user mappings, but if the region is + * in kern_memmap (and hence may be covered by a kernel mapping), + * we must use the same attribute as the kernel mapping. + */ + attr = kern_mem_attribute(phys_addr, size); + if (attr & EFI_MEMORY_WB) + return pgprot_cacheable(vma_prot); + else if (attr & EFI_MEMORY_UC) + return pgprot_noncached(vma_prot); + + /* + * Some chipsets don't support UC access to memory. If + * WB is supported, we prefer that. + */ + if (efi_mem_attribute(phys_addr, size) & EFI_MEMORY_WB) + return pgprot_cacheable(vma_prot); + + return pgprot_noncached(vma_prot); +} + +int __init +efi_uart_console_only(void) +{ + efi_status_t status; + char *s, name[] = "ConOut"; + efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID; + efi_char16_t *utf16, name_utf16[32]; + unsigned char data[1024]; + unsigned long size = sizeof(data); + struct efi_generic_dev_path *hdr, *end_addr; + int uart = 0; + + /* Convert to UTF-16 */ + utf16 = name_utf16; + s = name; + while (*s) + *utf16++ = *s++ & 0x7f; + *utf16 = 0; + + status = efi.get_variable(name_utf16, &guid, NULL, &size, data); + if (status != EFI_SUCCESS) { + printk(KERN_ERR "No EFI %s variable?\n", name); + return 0; + } + + hdr = (struct efi_generic_dev_path *) data; + end_addr = (struct efi_generic_dev_path *) ((u8 *) data + size); + while (hdr < end_addr) { + if (hdr->type == EFI_DEV_MSG && + hdr->sub_type == EFI_DEV_MSG_UART) + uart = 1; + else if (hdr->type == EFI_DEV_END_PATH || + hdr->type == EFI_DEV_END_PATH2) { + if (!uart) + return 0; + if (hdr->sub_type == EFI_DEV_END_ENTIRE) + return 1; + uart = 0; + } + hdr = (struct efi_generic_dev_path *)((u8 *) hdr + hdr->length); + } + printk(KERN_ERR "Malformed %s value\n", name); + return 0; +} + +/* + * Look for the first granule aligned memory descriptor memory + * that is big enough to hold EFI memory map. Make sure this + * descriptor is atleast granule sized so it does not get trimmed + */ +struct kern_memdesc * +find_memmap_space (void) +{ + u64 contig_low=0, contig_high=0; + u64 as = 0, ae; + void *efi_map_start, *efi_map_end, *p, *q; + efi_memory_desc_t *md, *pmd = NULL, *check_md; + u64 space_needed, efi_desc_size; + unsigned long total_mem = 0; + + efi_map_start = __va(ia64_boot_param->efi_memmap); + efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; + efi_desc_size = ia64_boot_param->efi_memdesc_size; + + /* + * Worst case: we need 3 kernel descriptors for each efi descriptor + * (if every entry has a WB part in the middle, and UC head and tail), + * plus one for the end marker. + */ + space_needed = sizeof(kern_memdesc_t) * + (3 * (ia64_boot_param->efi_memmap_size/efi_desc_size) + 1); + + for (p = efi_map_start; p < efi_map_end; pmd = md, p += efi_desc_size) { + md = p; + if (!efi_wb(md)) { + continue; + } + if (pmd == NULL || !efi_wb(pmd) || + efi_md_end(pmd) != md->phys_addr) { + contig_low = GRANULEROUNDUP(md->phys_addr); + contig_high = efi_md_end(md); + for (q = p + efi_desc_size; q < efi_map_end; + q += efi_desc_size) { + check_md = q; + if (!efi_wb(check_md)) + break; + if (contig_high != check_md->phys_addr) + break; + contig_high = efi_md_end(check_md); + } + contig_high = GRANULEROUNDDOWN(contig_high); + } + if (!is_memory_available(md) || md->type == EFI_LOADER_DATA) + continue; + + /* Round ends inward to granule boundaries */ + as = max(contig_low, md->phys_addr); + ae = min(contig_high, efi_md_end(md)); + + /* keep within max_addr= and min_addr= command line arg */ + as = max(as, min_addr); + ae = min(ae, max_addr); + if (ae <= as) + continue; + + /* avoid going over mem= command line arg */ + if (total_mem + (ae - as) > mem_limit) + ae -= total_mem + (ae - as) - mem_limit; + + if (ae <= as) + continue; + + if (ae - as > space_needed) + break; + } + if (p >= efi_map_end) + panic("Can't allocate space for kernel memory descriptors"); + + return __va(as); +} + +/* + * Walk the EFI memory map and gather all memory available for kernel + * to use. We can allocate partial granules only if the unavailable + * parts exist, and are WB. + */ +unsigned long +efi_memmap_init(u64 *s, u64 *e) +{ + struct kern_memdesc *k, *prev = NULL; + u64 contig_low=0, contig_high=0; + u64 as, ae, lim; + void *efi_map_start, *efi_map_end, *p, *q; + efi_memory_desc_t *md, *pmd = NULL, *check_md; + u64 efi_desc_size; + unsigned long total_mem = 0; + + k = kern_memmap = find_memmap_space(); + + efi_map_start = __va(ia64_boot_param->efi_memmap); + efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; + efi_desc_size = ia64_boot_param->efi_memdesc_size; + + for (p = efi_map_start; p < efi_map_end; pmd = md, p += efi_desc_size) { + md = p; + if (!efi_wb(md)) { + if (efi_uc(md) && + (md->type == EFI_CONVENTIONAL_MEMORY || + md->type == EFI_BOOT_SERVICES_DATA)) { + k->attribute = EFI_MEMORY_UC; + k->start = md->phys_addr; + k->num_pages = md->num_pages; + k++; + } + continue; + } + if (pmd == NULL || !efi_wb(pmd) || + efi_md_end(pmd) != md->phys_addr) { + contig_low = GRANULEROUNDUP(md->phys_addr); + contig_high = efi_md_end(md); + for (q = p + efi_desc_size; q < efi_map_end; + q += efi_desc_size) { + check_md = q; + if (!efi_wb(check_md)) + break; + if (contig_high != check_md->phys_addr) + break; + contig_high = efi_md_end(check_md); + } + contig_high = GRANULEROUNDDOWN(contig_high); + } + if (!is_memory_available(md)) + continue; + + /* + * Round ends inward to granule boundaries + * Give trimmings to uncached allocator + */ + if (md->phys_addr < contig_low) { + lim = min(efi_md_end(md), contig_low); + if (efi_uc(md)) { + if (k > kern_memmap && + (k-1)->attribute == EFI_MEMORY_UC && + kmd_end(k-1) == md->phys_addr) { + (k-1)->num_pages += + (lim - md->phys_addr) + >> EFI_PAGE_SHIFT; + } else { + k->attribute = EFI_MEMORY_UC; + k->start = md->phys_addr; + k->num_pages = (lim - md->phys_addr) + >> EFI_PAGE_SHIFT; + k++; + } + } + as = contig_low; + } else + as = md->phys_addr; + + if (efi_md_end(md) > contig_high) { + lim = max(md->phys_addr, contig_high); + if (efi_uc(md)) { + if (lim == md->phys_addr && k > kern_memmap && + (k-1)->attribute == EFI_MEMORY_UC && + kmd_end(k-1) == md->phys_addr) { + (k-1)->num_pages += md->num_pages; + } else { + k->attribute = EFI_MEMORY_UC; + k->start = lim; + k->num_pages = (efi_md_end(md) - lim) + >> EFI_PAGE_SHIFT; + k++; + } + } + ae = contig_high; + } else + ae = efi_md_end(md); + + /* keep within max_addr= and min_addr= command line arg */ + as = max(as, min_addr); + ae = min(ae, max_addr); + if (ae <= as) + continue; + + /* avoid going over mem= command line arg */ + if (total_mem + (ae - as) > mem_limit) + ae -= total_mem + (ae - as) - mem_limit; + + if (ae <= as) + continue; + if (prev && kmd_end(prev) == md->phys_addr) { + prev->num_pages += (ae - as) >> EFI_PAGE_SHIFT; + total_mem += ae - as; + continue; + } + k->attribute = EFI_MEMORY_WB; + k->start = as; + k->num_pages = (ae - as) >> EFI_PAGE_SHIFT; + total_mem += ae - as; + prev = k++; + } + k->start = ~0L; /* end-marker */ + + /* reserve the memory we are using for kern_memmap */ + *s = (u64)kern_memmap; + *e = (u64)++k; + + return total_mem; +} + +void +efi_initialize_iomem_resources(struct resource *code_resource, + struct resource *data_resource, + struct resource *bss_resource) +{ + struct resource *res; + void *efi_map_start, *efi_map_end, *p; + efi_memory_desc_t *md; + u64 efi_desc_size; + char *name; + unsigned long flags; + + efi_map_start = __va(ia64_boot_param->efi_memmap); + efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; + efi_desc_size = ia64_boot_param->efi_memdesc_size; + + res = NULL; + + for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { + md = p; + + if (md->num_pages == 0) /* should not happen */ + continue; + + flags = IORESOURCE_MEM | IORESOURCE_BUSY; + switch (md->type) { + + case EFI_MEMORY_MAPPED_IO: + case EFI_MEMORY_MAPPED_IO_PORT_SPACE: + continue; + + case EFI_LOADER_CODE: + case EFI_LOADER_DATA: + case EFI_BOOT_SERVICES_DATA: + case EFI_BOOT_SERVICES_CODE: + case EFI_CONVENTIONAL_MEMORY: + if (md->attribute & EFI_MEMORY_WP) { + name = "System ROM"; + flags |= IORESOURCE_READONLY; + } else if (md->attribute == EFI_MEMORY_UC) + name = "Uncached RAM"; + else + name = "System RAM"; + break; + + case EFI_ACPI_MEMORY_NVS: + name = "ACPI Non-volatile Storage"; + break; + + case EFI_UNUSABLE_MEMORY: + name = "reserved"; + flags |= IORESOURCE_DISABLED; + break; + + case EFI_RESERVED_TYPE: + case EFI_RUNTIME_SERVICES_CODE: + case EFI_RUNTIME_SERVICES_DATA: + case EFI_ACPI_RECLAIM_MEMORY: + default: + name = "reserved"; + break; + } + + if ((res = kzalloc(sizeof(struct resource), + GFP_KERNEL)) == NULL) { + printk(KERN_ERR + "failed to allocate resource for iomem\n"); + return; + } + + res->name = name; + res->start = md->phys_addr; + res->end = md->phys_addr + efi_md_size(md) - 1; + res->flags = flags; + + if (insert_resource(&iomem_resource, res) < 0) + kfree(res); + else { + /* + * We don't know which region contains + * kernel data so we try it repeatedly and + * let the resource manager test it. + */ + insert_resource(res, code_resource); + insert_resource(res, data_resource); + insert_resource(res, bss_resource); +#ifdef CONFIG_KEXEC + insert_resource(res, &efi_memmap_res); + insert_resource(res, &boot_param_res); + if (crashk_res.end > crashk_res.start) + insert_resource(res, &crashk_res); +#endif + } + } +} + +#ifdef CONFIG_KEXEC +/* find a block of memory aligned to 64M exclude reserved regions + rsvd_regions are sorted + */ +unsigned long __init +kdump_find_rsvd_region (unsigned long size, struct rsvd_region *r, int n) +{ + int i; + u64 start, end; + u64 alignment = 1UL << _PAGE_SIZE_64M; + void *efi_map_start, *efi_map_end, *p; + efi_memory_desc_t *md; + u64 efi_desc_size; + + efi_map_start = __va(ia64_boot_param->efi_memmap); + efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; + efi_desc_size = ia64_boot_param->efi_memdesc_size; + + for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { + md = p; + if (!efi_wb(md)) + continue; + start = ALIGN(md->phys_addr, alignment); + end = efi_md_end(md); + for (i = 0; i < n; i++) { + if (__pa(r[i].start) >= start && __pa(r[i].end) < end) { + if (__pa(r[i].start) > start + size) + return start; + start = ALIGN(__pa(r[i].end), alignment); + if (i < n-1 && + __pa(r[i+1].start) < start + size) + continue; + else + break; + } + } + if (end > start + size) + return start; + } + + printk(KERN_WARNING + "Cannot reserve 0x%lx byte of memory for crashdump\n", size); + return ~0UL; +} +#endif + +#ifdef CONFIG_CRASH_DUMP +/* locate the size find a the descriptor at a certain address */ +unsigned long __init +vmcore_find_descriptor_size (unsigned long address) +{ + void *efi_map_start, *efi_map_end, *p; + efi_memory_desc_t *md; + u64 efi_desc_size; + unsigned long ret = 0; + + efi_map_start = __va(ia64_boot_param->efi_memmap); + efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; + efi_desc_size = ia64_boot_param->efi_memdesc_size; + + for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { + md = p; + if (efi_wb(md) && md->type == EFI_LOADER_DATA + && md->phys_addr == address) { + ret = efi_md_size(md); + break; + } + } + + if (ret == 0) + printk(KERN_WARNING "Cannot locate EFI vmcore descriptor\n"); + + return ret; +} +#endif diff --git a/kernel/arch/ia64/kernel/efi_stub.S b/kernel/arch/ia64/kernel/efi_stub.S new file mode 100644 index 000000000..a56e161d7 --- /dev/null +++ b/kernel/arch/ia64/kernel/efi_stub.S @@ -0,0 +1,86 @@ +/* + * EFI call stub. + * + * Copyright (C) 1999-2001 Hewlett-Packard Co + * David Mosberger + * + * This stub allows us to make EFI calls in physical mode with interrupts + * turned off. We need this because we can't call SetVirtualMap() until + * the kernel has booted far enough to allow allocation of struct vma_struct + * entries (which we would need to map stuff with memory attributes other + * than uncached or writeback...). Since the GetTime() service gets called + * earlier than that, we need to be able to make physical mode EFI calls from + * the kernel. + */ + +/* + * PSR settings as per SAL spec (Chapter 8 in the "IA-64 System + * Abstraction Layer Specification", revision 2.6e). Note that + * psr.dfl and psr.dfh MUST be cleared, despite what this manual says. + * Otherwise, SAL dies whenever it's trying to do an IA-32 BIOS call + * (the br.ia instruction fails unless psr.dfl and psr.dfh are + * cleared). Fortunately, SAL promises not to touch the floating + * point regs, so at least we don't have to save f2-f127. + */ +#define PSR_BITS_TO_CLEAR \ + (IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT | IA64_PSR_RT | \ + IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | \ + IA64_PSR_DFL | IA64_PSR_DFH) + +#define PSR_BITS_TO_SET \ + (IA64_PSR_BN) + +#include +#include + +/* + * Inputs: + * in0 = address of function descriptor of EFI routine to call + * in1..in7 = arguments to routine + * + * Outputs: + * r8 = EFI_STATUS returned by called function + */ + +GLOBAL_ENTRY(efi_call_phys) + .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8) + alloc loc1=ar.pfs,8,7,7,0 + ld8 r2=[in0],8 // load EFI function's entry point + mov loc0=rp + .body + ;; + mov loc2=gp // save global pointer + mov loc4=ar.rsc // save RSE configuration + mov ar.rsc=0 // put RSE in enforced lazy, LE mode + ;; + ld8 gp=[in0] // load EFI function's global pointer + movl r16=PSR_BITS_TO_CLEAR + mov loc3=psr // save processor status word + movl r17=PSR_BITS_TO_SET + ;; + or loc3=loc3,r17 + mov b6=r2 + ;; + andcm r16=loc3,r16 // get psr with IT, DT, and RT bits cleared + br.call.sptk.many rp=ia64_switch_mode_phys +.ret0: mov out4=in5 + mov out0=in1 + mov out1=in2 + mov out2=in3 + mov out3=in4 + mov out5=in6 + mov out6=in7 + mov loc5=r19 + mov loc6=r20 + br.call.sptk.many rp=b6 // call the EFI function +.ret1: mov ar.rsc=0 // put RSE in enforced lazy, LE mode + mov r16=loc3 + mov r19=loc5 + mov r20=loc6 + br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode +.ret2: mov ar.rsc=loc4 // restore RSE configuration + mov ar.pfs=loc1 + mov rp=loc0 + mov gp=loc2 + br.ret.sptk.many rp +END(efi_call_phys) diff --git a/kernel/arch/ia64/kernel/elfcore.c b/kernel/arch/ia64/kernel/elfcore.c new file mode 100644 index 000000000..04bc8fd5f --- /dev/null +++ b/kernel/arch/ia64/kernel/elfcore.c @@ -0,0 +1,76 @@ +#include +#include +#include +#include + +#include + + +Elf64_Half elf_core_extra_phdrs(void) +{ + return GATE_EHDR->e_phnum; +} + +int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset) +{ + const struct elf_phdr *const gate_phdrs = + (const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff); + int i; + Elf64_Off ofs = 0; + + for (i = 0; i < GATE_EHDR->e_phnum; ++i) { + struct elf_phdr phdr = gate_phdrs[i]; + + if (phdr.p_type == PT_LOAD) { + phdr.p_memsz = PAGE_ALIGN(phdr.p_memsz); + phdr.p_filesz = phdr.p_memsz; + if (ofs == 0) { + ofs = phdr.p_offset = offset; + offset += phdr.p_filesz; + } else { + phdr.p_offset = ofs; + } + } else { + phdr.p_offset += ofs; + } + phdr.p_paddr = 0; /* match other core phdrs */ + if (!dump_emit(cprm, &phdr, sizeof(phdr))) + return 0; + } + return 1; +} + +int elf_core_write_extra_data(struct coredump_params *cprm) +{ + const struct elf_phdr *const gate_phdrs = + (const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff); + int i; + + for (i = 0; i < GATE_EHDR->e_phnum; ++i) { + if (gate_phdrs[i].p_type == PT_LOAD) { + void *addr = (void *)gate_phdrs[i].p_vaddr; + size_t memsz = PAGE_ALIGN(gate_phdrs[i].p_memsz); + + if (!dump_emit(cprm, addr, memsz)) + return 0; + break; + } + } + return 1; +} + +size_t elf_core_extra_data_size(void) +{ + const struct elf_phdr *const gate_phdrs = + (const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff); + int i; + size_t size = 0; + + for (i = 0; i < GATE_EHDR->e_phnum; ++i) { + if (gate_phdrs[i].p_type == PT_LOAD) { + size += PAGE_ALIGN(gate_phdrs[i].p_memsz); + break; + } + } + return size; +} diff --git a/kernel/arch/ia64/kernel/entry.S b/kernel/arch/ia64/kernel/entry.S new file mode 100644 index 000000000..fcf8b8cbc --- /dev/null +++ b/kernel/arch/ia64/kernel/entry.S @@ -0,0 +1,1785 @@ +/* + * arch/ia64/kernel/entry.S + * + * Kernel entry points. + * + * Copyright (C) 1998-2003, 2005 Hewlett-Packard Co + * David Mosberger-Tang + * Copyright (C) 1999, 2002-2003 + * Asit Mallick + * Don Dugger + * Suresh Siddha + * Fenghua Yu + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999 Walt Drummond + */ +/* + * ia64_switch_to now places correct virtual mapping in in TR2 for + * kernel stack. This allows us to handle interrupts without changing + * to physical mode. + * + * Jonathan Nicklin + * Patrick O'Rourke + * 11/07/2000 + */ +/* + * Copyright (c) 2008 Isaku Yamahata + * VA Linux Systems Japan K.K. + * pv_ops. + */ +/* + * Global (preserved) predicate usage on syscall entry/exit path: + * + * pKStk: See entry.h. + * pUStk: See entry.h. + * pSys: See entry.h. + * pNonSys: !pSys + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "minstate.h" + +#ifdef __IA64_ASM_PARAVIRTUALIZED_NATIVE + /* + * execve() is special because in case of success, we need to + * setup a null register window frame. + */ +ENTRY(ia64_execve) + /* + * Allocate 8 input registers since ptrace() may clobber them + */ + .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8) + alloc loc1=ar.pfs,8,2,3,0 + mov loc0=rp + .body + mov out0=in0 // filename + ;; // stop bit between alloc and call + mov out1=in1 // argv + mov out2=in2 // envp + br.call.sptk.many rp=sys_execve +.ret0: + cmp4.ge p6,p7=r8,r0 + mov ar.pfs=loc1 // restore ar.pfs + sxt4 r8=r8 // return 64-bit result + ;; + stf.spill [sp]=f0 + mov rp=loc0 +(p6) mov ar.pfs=r0 // clear ar.pfs on success +(p7) br.ret.sptk.many rp + + /* + * In theory, we'd have to zap this state only to prevent leaking of + * security sensitive state (e.g., if current->mm->dumpable is zero). However, + * this executes in less than 20 cycles even on Itanium, so it's not worth + * optimizing for...). + */ + mov ar.unat=0; mov ar.lc=0 + mov r4=0; mov f2=f0; mov b1=r0 + mov r5=0; mov f3=f0; mov b2=r0 + mov r6=0; mov f4=f0; mov b3=r0 + mov r7=0; mov f5=f0; mov b4=r0 + ldf.fill f12=[sp]; mov f13=f0; mov b5=r0 + ldf.fill f14=[sp]; ldf.fill f15=[sp]; mov f16=f0 + ldf.fill f17=[sp]; ldf.fill f18=[sp]; mov f19=f0 + ldf.fill f20=[sp]; ldf.fill f21=[sp]; mov f22=f0 + ldf.fill f23=[sp]; ldf.fill f24=[sp]; mov f25=f0 + ldf.fill f26=[sp]; ldf.fill f27=[sp]; mov f28=f0 + ldf.fill f29=[sp]; ldf.fill f30=[sp]; mov f31=f0 + br.ret.sptk.many rp +END(ia64_execve) + +/* + * sys_clone2(u64 flags, u64 ustack_base, u64 ustack_size, u64 parent_tidptr, u64 child_tidptr, + * u64 tls) + */ +GLOBAL_ENTRY(sys_clone2) + /* + * Allocate 8 input registers since ptrace() may clobber them + */ + .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8) + alloc r16=ar.pfs,8,2,6,0 + DO_SAVE_SWITCH_STACK + adds r2=PT(R16)+IA64_SWITCH_STACK_SIZE+16,sp + mov loc0=rp + mov loc1=r16 // save ar.pfs across do_fork + .body + mov out1=in1 + mov out2=in2 + tbit.nz p6,p0=in0,CLONE_SETTLS_BIT + mov out3=in3 // parent_tidptr: valid only w/CLONE_PARENT_SETTID + ;; +(p6) st8 [r2]=in5 // store TLS in r16 for copy_thread() + mov out4=in4 // child_tidptr: valid only w/CLONE_CHILD_SETTID or CLONE_CHILD_CLEARTID + mov out0=in0 // out0 = clone_flags + br.call.sptk.many rp=do_fork +.ret1: .restore sp + adds sp=IA64_SWITCH_STACK_SIZE,sp // pop the switch stack + mov ar.pfs=loc1 + mov rp=loc0 + br.ret.sptk.many rp +END(sys_clone2) + +/* + * sys_clone(u64 flags, u64 ustack_base, u64 parent_tidptr, u64 child_tidptr, u64 tls) + * Deprecated. Use sys_clone2() instead. + */ +GLOBAL_ENTRY(sys_clone) + /* + * Allocate 8 input registers since ptrace() may clobber them + */ + .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8) + alloc r16=ar.pfs,8,2,6,0 + DO_SAVE_SWITCH_STACK + adds r2=PT(R16)+IA64_SWITCH_STACK_SIZE+16,sp + mov loc0=rp + mov loc1=r16 // save ar.pfs across do_fork + .body + mov out1=in1 + mov out2=16 // stacksize (compensates for 16-byte scratch area) + tbit.nz p6,p0=in0,CLONE_SETTLS_BIT + mov out3=in2 // parent_tidptr: valid only w/CLONE_PARENT_SETTID + ;; +(p6) st8 [r2]=in4 // store TLS in r13 (tp) + mov out4=in3 // child_tidptr: valid only w/CLONE_CHILD_SETTID or CLONE_CHILD_CLEARTID + mov out0=in0 // out0 = clone_flags + br.call.sptk.many rp=do_fork +.ret2: .restore sp + adds sp=IA64_SWITCH_STACK_SIZE,sp // pop the switch stack + mov ar.pfs=loc1 + mov rp=loc0 + br.ret.sptk.many rp +END(sys_clone) +#endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */ + +/* + * prev_task <- ia64_switch_to(struct task_struct *next) + * With Ingo's new scheduler, interrupts are disabled when this routine gets + * called. The code starting at .map relies on this. The rest of the code + * doesn't care about the interrupt masking status. + */ +GLOBAL_ENTRY(__paravirt_switch_to) + .prologue + alloc r16=ar.pfs,1,0,0,0 + DO_SAVE_SWITCH_STACK + .body + + adds r22=IA64_TASK_THREAD_KSP_OFFSET,r13 + movl r25=init_task + mov r27=IA64_KR(CURRENT_STACK) + adds r21=IA64_TASK_THREAD_KSP_OFFSET,in0 + dep r20=0,in0,61,3 // physical address of "next" + ;; + st8 [r22]=sp // save kernel stack pointer of old task + shr.u r26=r20,IA64_GRANULE_SHIFT + cmp.eq p7,p6=r25,in0 + ;; + /* + * If we've already mapped this task's page, we can skip doing it again. + */ +(p6) cmp.eq p7,p6=r26,r27 +(p6) br.cond.dpnt .map + ;; +.done: + ld8 sp=[r21] // load kernel stack pointer of new task + MOV_TO_KR(CURRENT, in0, r8, r9) // update "current" application register + mov r8=r13 // return pointer to previously running task + mov r13=in0 // set "current" pointer + ;; + DO_LOAD_SWITCH_STACK + +#ifdef CONFIG_SMP + sync.i // ensure "fc"s done by this CPU are visible on other CPUs +#endif + br.ret.sptk.many rp // boogie on out in new context + +.map: + RSM_PSR_IC(r25) // interrupts (psr.i) are already disabled here + movl r25=PAGE_KERNEL + ;; + srlz.d + or r23=r25,r20 // construct PA | page properties + mov r25=IA64_GRANULE_SHIFT<<2 + ;; + MOV_TO_ITIR(p0, r25, r8) + MOV_TO_IFA(in0, r8) // VA of next task... + ;; + mov r25=IA64_TR_CURRENT_STACK + MOV_TO_KR(CURRENT_STACK, r26, r8, r9) // remember last page we mapped... + ;; + itr.d dtr[r25]=r23 // wire in new mapping... + SSM_PSR_IC_AND_SRLZ_D(r8, r9) // reenable the psr.ic bit + br.cond.sptk .done +END(__paravirt_switch_to) + +#ifdef __IA64_ASM_PARAVIRTUALIZED_NATIVE +/* + * Note that interrupts are enabled during save_switch_stack and load_switch_stack. This + * means that we may get an interrupt with "sp" pointing to the new kernel stack while + * ar.bspstore is still pointing to the old kernel backing store area. Since ar.rsc, + * ar.rnat, ar.bsp, and ar.bspstore are all preserved by interrupts, this is not a + * problem. Also, we don't need to specify unwind information for preserved registers + * that are not modified in save_switch_stack as the right unwind information is already + * specified at the call-site of save_switch_stack. + */ + +/* + * save_switch_stack: + * - r16 holds ar.pfs + * - b7 holds address to return to + * - rp (b0) holds return address to save + */ +GLOBAL_ENTRY(save_switch_stack) + .prologue + .altrp b7 + flushrs // flush dirty regs to backing store (must be first in insn group) + .save @priunat,r17 + mov r17=ar.unat // preserve caller's + .body +#ifdef CONFIG_ITANIUM + adds r2=16+128,sp + adds r3=16+64,sp + adds r14=SW(R4)+16,sp + ;; + st8.spill [r14]=r4,16 // spill r4 + lfetch.fault.excl.nt1 [r3],128 + ;; + lfetch.fault.excl.nt1 [r2],128 + lfetch.fault.excl.nt1 [r3],128 + ;; + lfetch.fault.excl [r2] + lfetch.fault.excl [r3] + adds r15=SW(R5)+16,sp +#else + add r2=16+3*128,sp + add r3=16,sp + add r14=SW(R4)+16,sp + ;; + st8.spill [r14]=r4,SW(R6)-SW(R4) // spill r4 and prefetch offset 0x1c0 + lfetch.fault.excl.nt1 [r3],128 // prefetch offset 0x010 + ;; + lfetch.fault.excl.nt1 [r3],128 // prefetch offset 0x090 + lfetch.fault.excl.nt1 [r2],128 // prefetch offset 0x190 + ;; + lfetch.fault.excl.nt1 [r3] // prefetch offset 0x110 + lfetch.fault.excl.nt1 [r2] // prefetch offset 0x210 + adds r15=SW(R5)+16,sp +#endif + ;; + st8.spill [r15]=r5,SW(R7)-SW(R5) // spill r5 + mov.m ar.rsc=0 // put RSE in mode: enforced lazy, little endian, pl 0 + add r2=SW(F2)+16,sp // r2 = &sw->f2 + ;; + st8.spill [r14]=r6,SW(B0)-SW(R6) // spill r6 + mov.m r18=ar.fpsr // preserve fpsr + add r3=SW(F3)+16,sp // r3 = &sw->f3 + ;; + stf.spill [r2]=f2,32 + mov.m r19=ar.rnat + mov r21=b0 + + stf.spill [r3]=f3,32 + st8.spill [r15]=r7,SW(B2)-SW(R7) // spill r7 + mov r22=b1 + ;; + // since we're done with the spills, read and save ar.unat: + mov.m r29=ar.unat + mov.m r20=ar.bspstore + mov r23=b2 + stf.spill [r2]=f4,32 + stf.spill [r3]=f5,32 + mov r24=b3 + ;; + st8 [r14]=r21,SW(B1)-SW(B0) // save b0 + st8 [r15]=r23,SW(B3)-SW(B2) // save b2 + mov r25=b4 + mov r26=b5 + ;; + st8 [r14]=r22,SW(B4)-SW(B1) // save b1 + st8 [r15]=r24,SW(AR_PFS)-SW(B3) // save b3 + mov r21=ar.lc // I-unit + stf.spill [r2]=f12,32 + stf.spill [r3]=f13,32 + ;; + st8 [r14]=r25,SW(B5)-SW(B4) // save b4 + st8 [r15]=r16,SW(AR_LC)-SW(AR_PFS) // save ar.pfs + stf.spill [r2]=f14,32 + stf.spill [r3]=f15,32 + ;; + st8 [r14]=r26 // save b5 + st8 [r15]=r21 // save ar.lc + stf.spill [r2]=f16,32 + stf.spill [r3]=f17,32 + ;; + stf.spill [r2]=f18,32 + stf.spill [r3]=f19,32 + ;; + stf.spill [r2]=f20,32 + stf.spill [r3]=f21,32 + ;; + stf.spill [r2]=f22,32 + stf.spill [r3]=f23,32 + ;; + stf.spill [r2]=f24,32 + stf.spill [r3]=f25,32 + ;; + stf.spill [r2]=f26,32 + stf.spill [r3]=f27,32 + ;; + stf.spill [r2]=f28,32 + stf.spill [r3]=f29,32 + ;; + stf.spill [r2]=f30,SW(AR_UNAT)-SW(F30) + stf.spill [r3]=f31,SW(PR)-SW(F31) + add r14=SW(CALLER_UNAT)+16,sp + ;; + st8 [r2]=r29,SW(AR_RNAT)-SW(AR_UNAT) // save ar.unat + st8 [r14]=r17,SW(AR_FPSR)-SW(CALLER_UNAT) // save caller_unat + mov r21=pr + ;; + st8 [r2]=r19,SW(AR_BSPSTORE)-SW(AR_RNAT) // save ar.rnat + st8 [r3]=r21 // save predicate registers + ;; + st8 [r2]=r20 // save ar.bspstore + st8 [r14]=r18 // save fpsr + mov ar.rsc=3 // put RSE back into eager mode, pl 0 + br.cond.sptk.many b7 +END(save_switch_stack) + +/* + * load_switch_stack: + * - "invala" MUST be done at call site (normally in DO_LOAD_SWITCH_STACK) + * - b7 holds address to return to + * - must not touch r8-r11 + */ +GLOBAL_ENTRY(load_switch_stack) + .prologue + .altrp b7 + + .body + lfetch.fault.nt1 [sp] + adds r2=SW(AR_BSPSTORE)+16,sp + adds r3=SW(AR_UNAT)+16,sp + mov ar.rsc=0 // put RSE into enforced lazy mode + adds r14=SW(CALLER_UNAT)+16,sp + adds r15=SW(AR_FPSR)+16,sp + ;; + ld8 r27=[r2],(SW(B0)-SW(AR_BSPSTORE)) // bspstore + ld8 r29=[r3],(SW(B1)-SW(AR_UNAT)) // unat + ;; + ld8 r21=[r2],16 // restore b0 + ld8 r22=[r3],16 // restore b1 + ;; + ld8 r23=[r2],16 // restore b2 + ld8 r24=[r3],16 // restore b3 + ;; + ld8 r25=[r2],16 // restore b4 + ld8 r26=[r3],16 // restore b5 + ;; + ld8 r16=[r2],(SW(PR)-SW(AR_PFS)) // ar.pfs + ld8 r17=[r3],(SW(AR_RNAT)-SW(AR_LC)) // ar.lc + ;; + ld8 r28=[r2] // restore pr + ld8 r30=[r3] // restore rnat + ;; + ld8 r18=[r14],16 // restore caller's unat + ld8 r19=[r15],24 // restore fpsr + ;; + ldf.fill f2=[r14],32 + ldf.fill f3=[r15],32 + ;; + ldf.fill f4=[r14],32 + ldf.fill f5=[r15],32 + ;; + ldf.fill f12=[r14],32 + ldf.fill f13=[r15],32 + ;; + ldf.fill f14=[r14],32 + ldf.fill f15=[r15],32 + ;; + ldf.fill f16=[r14],32 + ldf.fill f17=[r15],32 + ;; + ldf.fill f18=[r14],32 + ldf.fill f19=[r15],32 + mov b0=r21 + ;; + ldf.fill f20=[r14],32 + ldf.fill f21=[r15],32 + mov b1=r22 + ;; + ldf.fill f22=[r14],32 + ldf.fill f23=[r15],32 + mov b2=r23 + ;; + mov ar.bspstore=r27 + mov ar.unat=r29 // establish unat holding the NaT bits for r4-r7 + mov b3=r24 + ;; + ldf.fill f24=[r14],32 + ldf.fill f25=[r15],32 + mov b4=r25 + ;; + ldf.fill f26=[r14],32 + ldf.fill f27=[r15],32 + mov b5=r26 + ;; + ldf.fill f28=[r14],32 + ldf.fill f29=[r15],32 + mov ar.pfs=r16 + ;; + ldf.fill f30=[r14],32 + ldf.fill f31=[r15],24 + mov ar.lc=r17 + ;; + ld8.fill r4=[r14],16 + ld8.fill r5=[r15],16 + mov pr=r28,-1 + ;; + ld8.fill r6=[r14],16 + ld8.fill r7=[r15],16 + + mov ar.unat=r18 // restore caller's unat + mov ar.rnat=r30 // must restore after bspstore but before rsc! + mov ar.fpsr=r19 // restore fpsr + mov ar.rsc=3 // put RSE back into eager mode, pl 0 + br.cond.sptk.many b7 +END(load_switch_stack) + +GLOBAL_ENTRY(prefetch_stack) + add r14 = -IA64_SWITCH_STACK_SIZE, sp + add r15 = IA64_TASK_THREAD_KSP_OFFSET, in0 + ;; + ld8 r16 = [r15] // load next's stack pointer + lfetch.fault.excl [r14], 128 + ;; + lfetch.fault.excl [r14], 128 + lfetch.fault [r16], 128 + ;; + lfetch.fault.excl [r14], 128 + lfetch.fault [r16], 128 + ;; + lfetch.fault.excl [r14], 128 + lfetch.fault [r16], 128 + ;; + lfetch.fault.excl [r14], 128 + lfetch.fault [r16], 128 + ;; + lfetch.fault [r16], 128 + br.ret.sptk.many rp +END(prefetch_stack) + + /* + * Invoke a system call, but do some tracing before and after the call. + * We MUST preserve the current register frame throughout this routine + * because some system calls (such as ia64_execve) directly + * manipulate ar.pfs. + */ +GLOBAL_ENTRY(ia64_trace_syscall) + PT_REGS_UNWIND_INFO(0) + /* + * We need to preserve the scratch registers f6-f11 in case the system + * call is sigreturn. + */ + adds r16=PT(F6)+16,sp + adds r17=PT(F7)+16,sp + ;; + stf.spill [r16]=f6,32 + stf.spill [r17]=f7,32 + ;; + stf.spill [r16]=f8,32 + stf.spill [r17]=f9,32 + ;; + stf.spill [r16]=f10 + stf.spill [r17]=f11 + br.call.sptk.many rp=syscall_trace_enter // give parent a chance to catch syscall args + cmp.lt p6,p0=r8,r0 // check tracehook + adds r2=PT(R8)+16,sp // r2 = &pt_regs.r8 + adds r3=PT(R10)+16,sp // r3 = &pt_regs.r10 + mov r10=0 +(p6) br.cond.sptk strace_error // syscall failed -> + adds r16=PT(F6)+16,sp + adds r17=PT(F7)+16,sp + ;; + ldf.fill f6=[r16],32 + ldf.fill f7=[r17],32 + ;; + ldf.fill f8=[r16],32 + ldf.fill f9=[r17],32 + ;; + ldf.fill f10=[r16] + ldf.fill f11=[r17] + // the syscall number may have changed, so re-load it and re-calculate the + // syscall entry-point: + adds r15=PT(R15)+16,sp // r15 = &pt_regs.r15 (syscall #) + ;; + ld8 r15=[r15] + mov r3=NR_syscalls - 1 + ;; + adds r15=-1024,r15 + movl r16=sys_call_table + ;; + shladd r20=r15,3,r16 // r20 = sys_call_table + 8*(syscall-1024) + cmp.leu p6,p7=r15,r3 + ;; +(p6) ld8 r20=[r20] // load address of syscall entry point +(p7) movl r20=sys_ni_syscall + ;; + mov b6=r20 + br.call.sptk.many rp=b6 // do the syscall +.strace_check_retval: + cmp.lt p6,p0=r8,r0 // syscall failed? + adds r2=PT(R8)+16,sp // r2 = &pt_regs.r8 + adds r3=PT(R10)+16,sp // r3 = &pt_regs.r10 + mov r10=0 +(p6) br.cond.sptk strace_error // syscall failed -> + ;; // avoid RAW on r10 +.strace_save_retval: +.mem.offset 0,0; st8.spill [r2]=r8 // store return value in slot for r8 +.mem.offset 8,0; st8.spill [r3]=r10 // clear error indication in slot for r10 + br.call.sptk.many rp=syscall_trace_leave // give parent a chance to catch return value +.ret3: +(pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk +(pUStk) rsm psr.i // disable interrupts + br.cond.sptk ia64_work_pending_syscall_end + +strace_error: + ld8 r3=[r2] // load pt_regs.r8 + sub r9=0,r8 // negate return value to get errno value + ;; + cmp.ne p6,p0=r3,r0 // is pt_regs.r8!=0? + adds r3=16,r2 // r3=&pt_regs.r10 + ;; +(p6) mov r10=-1 +(p6) mov r8=r9 + br.cond.sptk .strace_save_retval +END(ia64_trace_syscall) + + /* + * When traced and returning from sigreturn, we invoke syscall_trace but then + * go straight to ia64_leave_kernel rather than ia64_leave_syscall. + */ +GLOBAL_ENTRY(ia64_strace_leave_kernel) + PT_REGS_UNWIND_INFO(0) +{ /* + * Some versions of gas generate bad unwind info if the first instruction of a + * procedure doesn't go into the first slot of a bundle. This is a workaround. + */ + nop.m 0 + nop.i 0 + br.call.sptk.many rp=syscall_trace_leave // give parent a chance to catch return value +} +.ret4: br.cond.sptk ia64_leave_kernel +END(ia64_strace_leave_kernel) + +ENTRY(call_payload) + .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(0) + /* call the kernel_thread payload; fn is in r4, arg - in r5 */ + alloc loc1=ar.pfs,0,3,1,0 + mov loc0=rp + mov loc2=gp + mov out0=r5 // arg + ld8 r14 = [r4], 8 // fn.address + ;; + mov b6 = r14 + ld8 gp = [r4] // fn.gp + ;; + br.call.sptk.many rp=b6 // fn(arg) +.ret12: mov gp=loc2 + mov rp=loc0 + mov ar.pfs=loc1 + /* ... and if it has returned, we are going to userland */ + cmp.ne pKStk,pUStk=r0,r0 + br.ret.sptk.many rp +END(call_payload) + +GLOBAL_ENTRY(ia64_ret_from_clone) + PT_REGS_UNWIND_INFO(0) +{ /* + * Some versions of gas generate bad unwind info if the first instruction of a + * procedure doesn't go into the first slot of a bundle. This is a workaround. + */ + nop.m 0 + nop.i 0 + /* + * We need to call schedule_tail() to complete the scheduling process. + * Called by ia64_switch_to() after do_fork()->copy_thread(). r8 contains the + * address of the previously executing task. + */ + br.call.sptk.many rp=ia64_invoke_schedule_tail +} +.ret8: +(pKStk) br.call.sptk.many rp=call_payload + adds r2=TI_FLAGS+IA64_TASK_SIZE,r13 + ;; + ld4 r2=[r2] + ;; + mov r8=0 + and r2=_TIF_SYSCALL_TRACEAUDIT,r2 + ;; + cmp.ne p6,p0=r2,r0 +(p6) br.cond.spnt .strace_check_retval + ;; // added stop bits to prevent r8 dependency +END(ia64_ret_from_clone) + // fall through +GLOBAL_ENTRY(ia64_ret_from_syscall) + PT_REGS_UNWIND_INFO(0) + cmp.ge p6,p7=r8,r0 // syscall executed successfully? + adds r2=PT(R8)+16,sp // r2 = &pt_regs.r8 + mov r10=r0 // clear error indication in r10 +(p7) br.cond.spnt handle_syscall_error // handle potential syscall failure +#ifdef CONFIG_PARAVIRT + ;; + br.cond.sptk.few ia64_leave_syscall + ;; +#endif /* CONFIG_PARAVIRT */ +END(ia64_ret_from_syscall) +#ifndef CONFIG_PARAVIRT + // fall through +#endif +#endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */ + +/* + * ia64_leave_syscall(): Same as ia64_leave_kernel, except that it doesn't + * need to switch to bank 0 and doesn't restore the scratch registers. + * To avoid leaking kernel bits, the scratch registers are set to + * the following known-to-be-safe values: + * + * r1: restored (global pointer) + * r2: cleared + * r3: 1 (when returning to user-level) + * r8-r11: restored (syscall return value(s)) + * r12: restored (user-level stack pointer) + * r13: restored (user-level thread pointer) + * r14: set to __kernel_syscall_via_epc + * r15: restored (syscall #) + * r16-r17: cleared + * r18: user-level b6 + * r19: cleared + * r20: user-level ar.fpsr + * r21: user-level b0 + * r22: cleared + * r23: user-level ar.bspstore + * r24: user-level ar.rnat + * r25: user-level ar.unat + * r26: user-level ar.pfs + * r27: user-level ar.rsc + * r28: user-level ip + * r29: user-level psr + * r30: user-level cfm + * r31: user-level pr + * f6-f11: cleared + * pr: restored (user-level pr) + * b0: restored (user-level rp) + * b6: restored + * b7: set to __kernel_syscall_via_epc + * ar.unat: restored (user-level ar.unat) + * ar.pfs: restored (user-level ar.pfs) + * ar.rsc: restored (user-level ar.rsc) + * ar.rnat: restored (user-level ar.rnat) + * ar.bspstore: restored (user-level ar.bspstore) + * ar.fpsr: restored (user-level ar.fpsr) + * ar.ccv: cleared + * ar.csd: cleared + * ar.ssd: cleared + */ +GLOBAL_ENTRY(__paravirt_leave_syscall) + PT_REGS_UNWIND_INFO(0) + /* + * work.need_resched etc. mustn't get changed by this CPU before it returns to + * user- or fsys-mode, hence we disable interrupts early on. + * + * p6 controls whether current_thread_info()->flags needs to be check for + * extra work. We always check for extra work when returning to user-level. + * With CONFIG_PREEMPT, we also check for extra work when the preempt_count + * is 0. After extra work processing has been completed, execution + * resumes at ia64_work_processed_syscall with p6 set to 1 if the extra-work-check + * needs to be redone. + */ +#ifdef CONFIG_PREEMPT + RSM_PSR_I(p0, r2, r18) // disable interrupts + cmp.eq pLvSys,p0=r0,r0 // pLvSys=1: leave from syscall +(pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13 + ;; + .pred.rel.mutex pUStk,pKStk +(pKStk) ld4 r21=[r20] // r21 <- preempt_count +(pUStk) mov r21=0 // r21 <- 0 + ;; + cmp.eq p6,p0=r21,r0 // p6 <- pUStk || (preempt_count == 0) +#else /* !CONFIG_PREEMPT */ + RSM_PSR_I(pUStk, r2, r18) + cmp.eq pLvSys,p0=r0,r0 // pLvSys=1: leave from syscall +(pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk +#endif +.global __paravirt_work_processed_syscall; +__paravirt_work_processed_syscall: +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE + adds r2=PT(LOADRS)+16,r12 + MOV_FROM_ITC(pUStk, p9, r22, r19) // fetch time at leave + adds r18=TI_FLAGS+IA64_TASK_SIZE,r13 + ;; +(p6) ld4 r31=[r18] // load current_thread_info()->flags + ld8 r19=[r2],PT(B6)-PT(LOADRS) // load ar.rsc value for "loadrs" + adds r3=PT(AR_BSPSTORE)+16,r12 // deferred + ;; +#else + adds r2=PT(LOADRS)+16,r12 + adds r3=PT(AR_BSPSTORE)+16,r12 + adds r18=TI_FLAGS+IA64_TASK_SIZE,r13 + ;; +(p6) ld4 r31=[r18] // load current_thread_info()->flags + ld8 r19=[r2],PT(B6)-PT(LOADRS) // load ar.rsc value for "loadrs" + nop.i 0 + ;; +#endif + mov r16=ar.bsp // M2 get existing backing store pointer + ld8 r18=[r2],PT(R9)-PT(B6) // load b6 +(p6) and r15=TIF_WORK_MASK,r31 // any work other than TIF_SYSCALL_TRACE? + ;; + ld8 r23=[r3],PT(R11)-PT(AR_BSPSTORE) // load ar.bspstore (may be garbage) +(p6) cmp4.ne.unc p6,p0=r15, r0 // any special work pending? +(p6) br.cond.spnt .work_pending_syscall + ;; + // start restoring the state saved on the kernel stack (struct pt_regs): + ld8 r9=[r2],PT(CR_IPSR)-PT(R9) + ld8 r11=[r3],PT(CR_IIP)-PT(R11) +(pNonSys) break 0 // bug check: we shouldn't be here if pNonSys is TRUE! + ;; + invala // M0|1 invalidate ALAT + RSM_PSR_I_IC(r28, r29, r30) // M2 turn off interrupts and interruption collection + cmp.eq p9,p0=r0,r0 // A set p9 to indicate that we should restore cr.ifs + + ld8 r29=[r2],16 // M0|1 load cr.ipsr + ld8 r28=[r3],16 // M0|1 load cr.iip +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE +(pUStk) add r14=TI_AC_LEAVE+IA64_TASK_SIZE,r13 + ;; + ld8 r30=[r2],16 // M0|1 load cr.ifs + ld8 r25=[r3],16 // M0|1 load ar.unat +(pUStk) add r15=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13 + ;; +#else + mov r22=r0 // A clear r22 + ;; + ld8 r30=[r2],16 // M0|1 load cr.ifs + ld8 r25=[r3],16 // M0|1 load ar.unat +(pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13 + ;; +#endif + ld8 r26=[r2],PT(B0)-PT(AR_PFS) // M0|1 load ar.pfs + MOV_FROM_PSR(pKStk, r22, r21) // M2 read PSR now that interrupts are disabled + nop 0 + ;; + ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // M0|1 load b0 + ld8 r27=[r3],PT(PR)-PT(AR_RSC) // M0|1 load ar.rsc + mov f6=f0 // F clear f6 + ;; + ld8 r24=[r2],PT(AR_FPSR)-PT(AR_RNAT) // M0|1 load ar.rnat (may be garbage) + ld8 r31=[r3],PT(R1)-PT(PR) // M0|1 load predicates + mov f7=f0 // F clear f7 + ;; + ld8 r20=[r2],PT(R12)-PT(AR_FPSR) // M0|1 load ar.fpsr + ld8.fill r1=[r3],16 // M0|1 load r1 +(pUStk) mov r17=1 // A + ;; +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE +(pUStk) st1 [r15]=r17 // M2|3 +#else +(pUStk) st1 [r14]=r17 // M2|3 +#endif + ld8.fill r13=[r3],16 // M0|1 + mov f8=f0 // F clear f8 + ;; + ld8.fill r12=[r2] // M0|1 restore r12 (sp) + ld8.fill r15=[r3] // M0|1 restore r15 + mov b6=r18 // I0 restore b6 + + LOAD_PHYS_STACK_REG_SIZE(r17) + mov f9=f0 // F clear f9 +(pKStk) br.cond.dpnt.many skip_rbs_switch // B + + srlz.d // M0 ensure interruption collection is off (for cover) + shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition + COVER // B add current frame into dirty partition & set cr.ifs + ;; +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE + mov r19=ar.bsp // M2 get new backing store pointer + st8 [r14]=r22 // M save time at leave + mov f10=f0 // F clear f10 + + mov r22=r0 // A clear r22 + movl r14=__kernel_syscall_via_epc // X + ;; +#else + mov r19=ar.bsp // M2 get new backing store pointer + mov f10=f0 // F clear f10 + + nop.m 0 + movl r14=__kernel_syscall_via_epc // X + ;; +#endif + mov.m ar.csd=r0 // M2 clear ar.csd + mov.m ar.ccv=r0 // M2 clear ar.ccv + mov b7=r14 // I0 clear b7 (hint with __kernel_syscall_via_epc) + + mov.m ar.ssd=r0 // M2 clear ar.ssd + mov f11=f0 // F clear f11 + br.cond.sptk.many rbs_switch // B +END(__paravirt_leave_syscall) + +GLOBAL_ENTRY(__paravirt_leave_kernel) + PT_REGS_UNWIND_INFO(0) + /* + * work.need_resched etc. mustn't get changed by this CPU before it returns to + * user- or fsys-mode, hence we disable interrupts early on. + * + * p6 controls whether current_thread_info()->flags needs to be check for + * extra work. We always check for extra work when returning to user-level. + * With CONFIG_PREEMPT, we also check for extra work when the preempt_count + * is 0. After extra work processing has been completed, execution + * resumes at .work_processed_syscall with p6 set to 1 if the extra-work-check + * needs to be redone. + */ +#ifdef CONFIG_PREEMPT + RSM_PSR_I(p0, r17, r31) // disable interrupts + cmp.eq p0,pLvSys=r0,r0 // pLvSys=0: leave from kernel +(pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13 + ;; + .pred.rel.mutex pUStk,pKStk +(pKStk) ld4 r21=[r20] // r21 <- preempt_count +(pUStk) mov r21=0 // r21 <- 0 + ;; + cmp.eq p6,p0=r21,r0 // p6 <- pUStk || (preempt_count == 0) +#else + RSM_PSR_I(pUStk, r17, r31) + cmp.eq p0,pLvSys=r0,r0 // pLvSys=0: leave from kernel +(pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk +#endif +.work_processed_kernel: + adds r17=TI_FLAGS+IA64_TASK_SIZE,r13 + ;; +(p6) ld4 r31=[r17] // load current_thread_info()->flags + adds r21=PT(PR)+16,r12 + ;; + + lfetch [r21],PT(CR_IPSR)-PT(PR) + adds r2=PT(B6)+16,r12 + adds r3=PT(R16)+16,r12 + ;; + lfetch [r21] + ld8 r28=[r2],8 // load b6 + adds r29=PT(R24)+16,r12 + + ld8.fill r16=[r3],PT(AR_CSD)-PT(R16) + adds r30=PT(AR_CCV)+16,r12 +(p6) and r19=TIF_WORK_MASK,r31 // any work other than TIF_SYSCALL_TRACE? + ;; + ld8.fill r24=[r29] + ld8 r15=[r30] // load ar.ccv +(p6) cmp4.ne.unc p6,p0=r19, r0 // any special work pending? + ;; + ld8 r29=[r2],16 // load b7 + ld8 r30=[r3],16 // load ar.csd +(p6) br.cond.spnt .work_pending + ;; + ld8 r31=[r2],16 // load ar.ssd + ld8.fill r8=[r3],16 + ;; + ld8.fill r9=[r2],16 + ld8.fill r10=[r3],PT(R17)-PT(R10) + ;; + ld8.fill r11=[r2],PT(R18)-PT(R11) + ld8.fill r17=[r3],16 + ;; + ld8.fill r18=[r2],16 + ld8.fill r19=[r3],16 + ;; + ld8.fill r20=[r2],16 + ld8.fill r21=[r3],16 + mov ar.csd=r30 + mov ar.ssd=r31 + ;; + RSM_PSR_I_IC(r23, r22, r25) // initiate turning off of interrupt and interruption collection + invala // invalidate ALAT + ;; + ld8.fill r22=[r2],24 + ld8.fill r23=[r3],24 + mov b6=r28 + ;; + ld8.fill r25=[r2],16 + ld8.fill r26=[r3],16 + mov b7=r29 + ;; + ld8.fill r27=[r2],16 + ld8.fill r28=[r3],16 + ;; + ld8.fill r29=[r2],16 + ld8.fill r30=[r3],24 + ;; + ld8.fill r31=[r2],PT(F9)-PT(R31) + adds r3=PT(F10)-PT(F6),r3 + ;; + ldf.fill f9=[r2],PT(F6)-PT(F9) + ldf.fill f10=[r3],PT(F8)-PT(F10) + ;; + ldf.fill f6=[r2],PT(F7)-PT(F6) + ;; + ldf.fill f7=[r2],PT(F11)-PT(F7) + ldf.fill f8=[r3],32 + ;; + srlz.d // ensure that inter. collection is off (VHPT is don't care, since text is pinned) + mov ar.ccv=r15 + ;; + ldf.fill f11=[r2] + BSW_0(r2, r3, r15) // switch back to bank 0 (no stop bit required beforehand...) + ;; +(pUStk) mov r18=IA64_KR(CURRENT)// M2 (12 cycle read latency) + adds r16=PT(CR_IPSR)+16,r12 + adds r17=PT(CR_IIP)+16,r12 + +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE + .pred.rel.mutex pUStk,pKStk + MOV_FROM_PSR(pKStk, r22, r29) // M2 read PSR now that interrupts are disabled + MOV_FROM_ITC(pUStk, p9, r22, r29) // M fetch time at leave + nop.i 0 + ;; +#else + MOV_FROM_PSR(pKStk, r22, r29) // M2 read PSR now that interrupts are disabled + nop.i 0 + nop.i 0 + ;; +#endif + ld8 r29=[r16],16 // load cr.ipsr + ld8 r28=[r17],16 // load cr.iip + ;; + ld8 r30=[r16],16 // load cr.ifs + ld8 r25=[r17],16 // load ar.unat + ;; + ld8 r26=[r16],16 // load ar.pfs + ld8 r27=[r17],16 // load ar.rsc + cmp.eq p9,p0=r0,r0 // set p9 to indicate that we should restore cr.ifs + ;; + ld8 r24=[r16],16 // load ar.rnat (may be garbage) + ld8 r23=[r17],16 // load ar.bspstore (may be garbage) + ;; + ld8 r31=[r16],16 // load predicates + ld8 r21=[r17],16 // load b0 + ;; + ld8 r19=[r16],16 // load ar.rsc value for "loadrs" + ld8.fill r1=[r17],16 // load r1 + ;; + ld8.fill r12=[r16],16 + ld8.fill r13=[r17],16 +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE +(pUStk) adds r3=TI_AC_LEAVE+IA64_TASK_SIZE,r18 +#else +(pUStk) adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18 +#endif + ;; + ld8 r20=[r16],16 // ar.fpsr + ld8.fill r15=[r17],16 +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE +(pUStk) adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18 // deferred +#endif + ;; + ld8.fill r14=[r16],16 + ld8.fill r2=[r17] +(pUStk) mov r17=1 + ;; +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE + // mmi_ : ld8 st1 shr;; mmi_ : st8 st1 shr;; + // mib : mov add br -> mib : ld8 add br + // bbb_ : br nop cover;; mbb_ : mov br cover;; + // + // no one require bsp in r16 if (pKStk) branch is selected. +(pUStk) st8 [r3]=r22 // save time at leave +(pUStk) st1 [r18]=r17 // restore current->thread.on_ustack + shr.u r18=r19,16 // get byte size of existing "dirty" partition + ;; + ld8.fill r3=[r16] // deferred + LOAD_PHYS_STACK_REG_SIZE(r17) +(pKStk) br.cond.dpnt skip_rbs_switch + mov r16=ar.bsp // get existing backing store pointer +#else + ld8.fill r3=[r16] +(pUStk) st1 [r18]=r17 // restore current->thread.on_ustack + shr.u r18=r19,16 // get byte size of existing "dirty" partition + ;; + mov r16=ar.bsp // get existing backing store pointer + LOAD_PHYS_STACK_REG_SIZE(r17) +(pKStk) br.cond.dpnt skip_rbs_switch +#endif + + /* + * Restore user backing store. + * + * NOTE: alloc, loadrs, and cover can't be predicated. + */ +(pNonSys) br.cond.dpnt dont_preserve_current_frame + COVER // add current frame into dirty partition and set cr.ifs + ;; + mov r19=ar.bsp // get new backing store pointer +rbs_switch: + sub r16=r16,r18 // krbs = old bsp - size of dirty partition + cmp.ne p9,p0=r0,r0 // clear p9 to skip restore of cr.ifs + ;; + sub r19=r19,r16 // calculate total byte size of dirty partition + add r18=64,r18 // don't force in0-in7 into memory... + ;; + shl r19=r19,16 // shift size of dirty partition into loadrs position + ;; +dont_preserve_current_frame: + /* + * To prevent leaking bits between the kernel and user-space, + * we must clear the stacked registers in the "invalid" partition here. + * Not pretty, but at least it's fast (3.34 registers/cycle on Itanium, + * 5 registers/cycle on McKinley). + */ +# define pRecurse p6 +# define pReturn p7 +#ifdef CONFIG_ITANIUM +# define Nregs 10 +#else +# define Nregs 14 +#endif + alloc loc0=ar.pfs,2,Nregs-2,2,0 + shr.u loc1=r18,9 // RNaTslots <= floor(dirtySize / (64*8)) + sub r17=r17,r18 // r17 = (physStackedSize + 8) - dirtySize + ;; + mov ar.rsc=r19 // load ar.rsc to be used for "loadrs" + shladd in0=loc1,3,r17 + mov in1=0 + ;; + TEXT_ALIGN(32) +rse_clear_invalid: +#ifdef CONFIG_ITANIUM + // cycle 0 + { .mii + alloc loc0=ar.pfs,2,Nregs-2,2,0 + cmp.lt pRecurse,p0=Nregs*8,in0 // if more than Nregs regs left to clear, (re)curse + add out0=-Nregs*8,in0 +}{ .mfb + add out1=1,in1 // increment recursion count + nop.f 0 + nop.b 0 // can't do br.call here because of alloc (WAW on CFM) + ;; +}{ .mfi // cycle 1 + mov loc1=0 + nop.f 0 + mov loc2=0 +}{ .mib + mov loc3=0 + mov loc4=0 +(pRecurse) br.call.sptk.many b0=rse_clear_invalid + +}{ .mfi // cycle 2 + mov loc5=0 + nop.f 0 + cmp.ne pReturn,p0=r0,in1 // if recursion count != 0, we need to do a br.ret +}{ .mib + mov loc6=0 + mov loc7=0 +(pReturn) br.ret.sptk.many b0 +} +#else /* !CONFIG_ITANIUM */ + alloc loc0=ar.pfs,2,Nregs-2,2,0 + cmp.lt pRecurse,p0=Nregs*8,in0 // if more than Nregs regs left to clear, (re)curse + add out0=-Nregs*8,in0 + add out1=1,in1 // increment recursion count + mov loc1=0 + mov loc2=0 + ;; + mov loc3=0 + mov loc4=0 + mov loc5=0 + mov loc6=0 + mov loc7=0 +(pRecurse) br.call.dptk.few b0=rse_clear_invalid + ;; + mov loc8=0 + mov loc9=0 + cmp.ne pReturn,p0=r0,in1 // if recursion count != 0, we need to do a br.ret + mov loc10=0 + mov loc11=0 +(pReturn) br.ret.dptk.many b0 +#endif /* !CONFIG_ITANIUM */ +# undef pRecurse +# undef pReturn + ;; + alloc r17=ar.pfs,0,0,0,0 // drop current register frame + ;; + loadrs + ;; +skip_rbs_switch: + mov ar.unat=r25 // M2 +(pKStk) extr.u r22=r22,21,1 // I0 extract current value of psr.pp from r22 +(pLvSys)mov r19=r0 // A clear r19 for leave_syscall, no-op otherwise + ;; +(pUStk) mov ar.bspstore=r23 // M2 +(pKStk) dep r29=r22,r29,21,1 // I0 update ipsr.pp with psr.pp +(pLvSys)mov r16=r0 // A clear r16 for leave_syscall, no-op otherwise + ;; + MOV_TO_IPSR(p0, r29, r25) // M2 + mov ar.pfs=r26 // I0 +(pLvSys)mov r17=r0 // A clear r17 for leave_syscall, no-op otherwise + + MOV_TO_IFS(p9, r30, r25)// M2 + mov b0=r21 // I0 +(pLvSys)mov r18=r0 // A clear r18 for leave_syscall, no-op otherwise + + mov ar.fpsr=r20 // M2 + MOV_TO_IIP(r28, r25) // M2 + nop 0 + ;; +(pUStk) mov ar.rnat=r24 // M2 must happen with RSE in lazy mode + nop 0 +(pLvSys)mov r2=r0 + + mov ar.rsc=r27 // M2 + mov pr=r31,-1 // I0 + RFI // B + + /* + * On entry: + * r20 = ¤t->thread_info->pre_count (if CONFIG_PREEMPT) + * r31 = current->thread_info->flags + * On exit: + * p6 = TRUE if work-pending-check needs to be redone + * + * Interrupts are disabled on entry, reenabled depend on work, and + * disabled on exit. + */ +.work_pending_syscall: + add r2=-8,r2 + add r3=-8,r3 + ;; + st8 [r2]=r8 + st8 [r3]=r10 +.work_pending: + tbit.z p6,p0=r31,TIF_NEED_RESCHED // is resched not needed? +(p6) br.cond.sptk.few .notify + br.call.spnt.many rp=preempt_schedule_irq +.ret9: cmp.eq p6,p0=r0,r0 // p6 <- 1 (re-check) +(pLvSys)br.cond.sptk.few __paravirt_pending_syscall_end + br.cond.sptk.many .work_processed_kernel + +.notify: +(pUStk) br.call.spnt.many rp=notify_resume_user +.ret10: cmp.ne p6,p0=r0,r0 // p6 <- 0 (don't re-check) +(pLvSys)br.cond.sptk.few __paravirt_pending_syscall_end + br.cond.sptk.many .work_processed_kernel + +.global __paravirt_pending_syscall_end; +__paravirt_pending_syscall_end: + adds r2=PT(R8)+16,r12 + adds r3=PT(R10)+16,r12 + ;; + ld8 r8=[r2] + ld8 r10=[r3] + br.cond.sptk.many __paravirt_work_processed_syscall_target +END(__paravirt_leave_kernel) + +#ifdef __IA64_ASM_PARAVIRTUALIZED_NATIVE +ENTRY(handle_syscall_error) + /* + * Some system calls (e.g., ptrace, mmap) can return arbitrary values which could + * lead us to mistake a negative return value as a failed syscall. Those syscall + * must deposit a non-zero value in pt_regs.r8 to indicate an error. If + * pt_regs.r8 is zero, we assume that the call completed successfully. + */ + PT_REGS_UNWIND_INFO(0) + ld8 r3=[r2] // load pt_regs.r8 + ;; + cmp.eq p6,p7=r3,r0 // is pt_regs.r8==0? + ;; +(p7) mov r10=-1 +(p7) sub r8=0,r8 // negate return value to get errno + br.cond.sptk ia64_leave_syscall +END(handle_syscall_error) + + /* + * Invoke schedule_tail(task) while preserving in0-in7, which may be needed + * in case a system call gets restarted. + */ +GLOBAL_ENTRY(ia64_invoke_schedule_tail) + .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8) + alloc loc1=ar.pfs,8,2,1,0 + mov loc0=rp + mov out0=r8 // Address of previous task + ;; + br.call.sptk.many rp=schedule_tail +.ret11: mov ar.pfs=loc1 + mov rp=loc0 + br.ret.sptk.many rp +END(ia64_invoke_schedule_tail) + + /* + * Setup stack and call do_notify_resume_user(), keeping interrupts + * disabled. + * + * Note that pSys and pNonSys need to be set up by the caller. + * We declare 8 input registers so the system call args get preserved, + * in case we need to restart a system call. + */ +GLOBAL_ENTRY(notify_resume_user) + .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8) + alloc loc1=ar.pfs,8,2,3,0 // preserve all eight input regs in case of syscall restart! + mov r9=ar.unat + mov loc0=rp // save return address + mov out0=0 // there is no "oldset" + adds out1=8,sp // out1=&sigscratch->ar_pfs +(pSys) mov out2=1 // out2==1 => we're in a syscall + ;; +(pNonSys) mov out2=0 // out2==0 => not a syscall + .fframe 16 + .spillsp ar.unat, 16 + st8 [sp]=r9,-16 // allocate space for ar.unat and save it + st8 [out1]=loc1,-8 // save ar.pfs, out1=&sigscratch + .body + br.call.sptk.many rp=do_notify_resume_user +.ret15: .restore sp + adds sp=16,sp // pop scratch stack space + ;; + ld8 r9=[sp] // load new unat from sigscratch->scratch_unat + mov rp=loc0 + ;; + mov ar.unat=r9 + mov ar.pfs=loc1 + br.ret.sptk.many rp +END(notify_resume_user) + +ENTRY(sys_rt_sigreturn) + PT_REGS_UNWIND_INFO(0) + /* + * Allocate 8 input registers since ptrace() may clobber them + */ + alloc r2=ar.pfs,8,0,1,0 + .prologue + PT_REGS_SAVES(16) + adds sp=-16,sp + .body + cmp.eq pNonSys,pSys=r0,r0 // sigreturn isn't a normal syscall... + ;; + /* + * leave_kernel() restores f6-f11 from pt_regs, but since the streamlined + * syscall-entry path does not save them we save them here instead. Note: we + * don't need to save any other registers that are not saved by the stream-lined + * syscall path, because restore_sigcontext() restores them. + */ + adds r16=PT(F6)+32,sp + adds r17=PT(F7)+32,sp + ;; + stf.spill [r16]=f6,32 + stf.spill [r17]=f7,32 + ;; + stf.spill [r16]=f8,32 + stf.spill [r17]=f9,32 + ;; + stf.spill [r16]=f10 + stf.spill [r17]=f11 + adds out0=16,sp // out0 = &sigscratch + br.call.sptk.many rp=ia64_rt_sigreturn +.ret19: .restore sp,0 + adds sp=16,sp + ;; + ld8 r9=[sp] // load new ar.unat + mov.sptk b7=r8,ia64_native_leave_kernel + ;; + mov ar.unat=r9 + br.many b7 +END(sys_rt_sigreturn) + +GLOBAL_ENTRY(ia64_prepare_handle_unaligned) + .prologue + /* + * r16 = fake ar.pfs, we simply need to make sure privilege is still 0 + */ + mov r16=r0 + DO_SAVE_SWITCH_STACK + br.call.sptk.many rp=ia64_handle_unaligned // stack frame setup in ivt +.ret21: .body + DO_LOAD_SWITCH_STACK + br.cond.sptk.many rp // goes to ia64_leave_kernel +END(ia64_prepare_handle_unaligned) + + // + // unw_init_running(void (*callback)(info, arg), void *arg) + // +# define EXTRA_FRAME_SIZE ((UNW_FRAME_INFO_SIZE+15)&~15) + +GLOBAL_ENTRY(unw_init_running) + .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2) + alloc loc1=ar.pfs,2,3,3,0 + ;; + ld8 loc2=[in0],8 + mov loc0=rp + mov r16=loc1 + DO_SAVE_SWITCH_STACK + .body + + .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2) + .fframe IA64_SWITCH_STACK_SIZE+EXTRA_FRAME_SIZE + SWITCH_STACK_SAVES(EXTRA_FRAME_SIZE) + adds sp=-EXTRA_FRAME_SIZE,sp + .body + ;; + adds out0=16,sp // &info + mov out1=r13 // current + adds out2=16+EXTRA_FRAME_SIZE,sp // &switch_stack + br.call.sptk.many rp=unw_init_frame_info +1: adds out0=16,sp // &info + mov b6=loc2 + mov loc2=gp // save gp across indirect function call + ;; + ld8 gp=[in0] + mov out1=in1 // arg + br.call.sptk.many rp=b6 // invoke the callback function +1: mov gp=loc2 // restore gp + + // For now, we don't allow changing registers from within + // unw_init_running; if we ever want to allow that, we'd + // have to do a load_switch_stack here: + .restore sp + adds sp=IA64_SWITCH_STACK_SIZE+EXTRA_FRAME_SIZE,sp + + mov ar.pfs=loc1 + mov rp=loc0 + br.ret.sptk.many rp +END(unw_init_running) + +#ifdef CONFIG_FUNCTION_TRACER +#ifdef CONFIG_DYNAMIC_FTRACE +GLOBAL_ENTRY(_mcount) + br ftrace_stub +END(_mcount) + +.here: + br.ret.sptk.many b0 + +GLOBAL_ENTRY(ftrace_caller) + alloc out0 = ar.pfs, 8, 0, 4, 0 + mov out3 = r0 + ;; + mov out2 = b0 + add r3 = 0x20, r3 + mov out1 = r1; + br.call.sptk.many b0 = ftrace_patch_gp + //this might be called from module, so we must patch gp +ftrace_patch_gp: + movl gp=__gp + mov b0 = r3 + ;; +.global ftrace_call; +ftrace_call: +{ + .mlx + nop.m 0x0 + movl r3 = .here;; +} + alloc loc0 = ar.pfs, 4, 4, 2, 0 + ;; + mov loc1 = b0 + mov out0 = b0 + mov loc2 = r8 + mov loc3 = r15 + ;; + adds out0 = -MCOUNT_INSN_SIZE, out0 + mov out1 = in2 + mov b6 = r3 + + br.call.sptk.many b0 = b6 + ;; + mov ar.pfs = loc0 + mov b0 = loc1 + mov r8 = loc2 + mov r15 = loc3 + br ftrace_stub + ;; +END(ftrace_caller) + +#else +GLOBAL_ENTRY(_mcount) + movl r2 = ftrace_stub + movl r3 = ftrace_trace_function;; + ld8 r3 = [r3];; + ld8 r3 = [r3];; + cmp.eq p7,p0 = r2, r3 +(p7) br.sptk.many ftrace_stub + ;; + + alloc loc0 = ar.pfs, 4, 4, 2, 0 + ;; + mov loc1 = b0 + mov out0 = b0 + mov loc2 = r8 + mov loc3 = r15 + ;; + adds out0 = -MCOUNT_INSN_SIZE, out0 + mov out1 = in2 + mov b6 = r3 + + br.call.sptk.many b0 = b6 + ;; + mov ar.pfs = loc0 + mov b0 = loc1 + mov r8 = loc2 + mov r15 = loc3 + br ftrace_stub + ;; +END(_mcount) +#endif + +GLOBAL_ENTRY(ftrace_stub) + mov r3 = b0 + movl r2 = _mcount_ret_helper + ;; + mov b6 = r2 + mov b7 = r3 + br.ret.sptk.many b6 + +_mcount_ret_helper: + mov b0 = r42 + mov r1 = r41 + mov ar.pfs = r40 + br b7 +END(ftrace_stub) + +#endif /* CONFIG_FUNCTION_TRACER */ + + .rodata + .align 8 + .globl sys_call_table +sys_call_table: + data8 sys_ni_syscall // This must be sys_ni_syscall! See ivt.S. + data8 sys_exit // 1025 + data8 sys_read + data8 sys_write + data8 sys_open + data8 sys_close + data8 sys_creat // 1030 + data8 sys_link + data8 sys_unlink + data8 ia64_execve + data8 sys_chdir + data8 sys_fchdir // 1035 + data8 sys_utimes + data8 sys_mknod + data8 sys_chmod + data8 sys_chown + data8 sys_lseek // 1040 + data8 sys_getpid + data8 sys_getppid + data8 sys_mount + data8 sys_umount + data8 sys_setuid // 1045 + data8 sys_getuid + data8 sys_geteuid + data8 sys_ptrace + data8 sys_access + data8 sys_sync // 1050 + data8 sys_fsync + data8 sys_fdatasync + data8 sys_kill + data8 sys_rename + data8 sys_mkdir // 1055 + data8 sys_rmdir + data8 sys_dup + data8 sys_ia64_pipe + data8 sys_times + data8 ia64_brk // 1060 + data8 sys_setgid + data8 sys_getgid + data8 sys_getegid + data8 sys_acct + data8 sys_ioctl // 1065 + data8 sys_fcntl + data8 sys_umask + data8 sys_chroot + data8 sys_ustat + data8 sys_dup2 // 1070 + data8 sys_setreuid + data8 sys_setregid + data8 sys_getresuid + data8 sys_setresuid + data8 sys_getresgid // 1075 + data8 sys_setresgid + data8 sys_getgroups + data8 sys_setgroups + data8 sys_getpgid + data8 sys_setpgid // 1080 + data8 sys_setsid + data8 sys_getsid + data8 sys_sethostname + data8 sys_setrlimit + data8 sys_getrlimit // 1085 + data8 sys_getrusage + data8 sys_gettimeofday + data8 sys_settimeofday + data8 sys_select + data8 sys_poll // 1090 + data8 sys_symlink + data8 sys_readlink + data8 sys_uselib + data8 sys_swapon + data8 sys_swapoff // 1095 + data8 sys_reboot + data8 sys_truncate + data8 sys_ftruncate + data8 sys_fchmod + data8 sys_fchown // 1100 + data8 ia64_getpriority + data8 sys_setpriority + data8 sys_statfs + data8 sys_fstatfs + data8 sys_gettid // 1105 + data8 sys_semget + data8 sys_semop + data8 sys_semctl + data8 sys_msgget + data8 sys_msgsnd // 1110 + data8 sys_msgrcv + data8 sys_msgctl + data8 sys_shmget + data8 sys_shmat + data8 sys_shmdt // 1115 + data8 sys_shmctl + data8 sys_syslog + data8 sys_setitimer + data8 sys_getitimer + data8 sys_ni_syscall // 1120 /* was: ia64_oldstat */ + data8 sys_ni_syscall /* was: ia64_oldlstat */ + data8 sys_ni_syscall /* was: ia64_oldfstat */ + data8 sys_vhangup + data8 sys_lchown + data8 sys_remap_file_pages // 1125 + data8 sys_wait4 + data8 sys_sysinfo + data8 sys_clone + data8 sys_setdomainname + data8 sys_newuname // 1130 + data8 sys_adjtimex + data8 sys_ni_syscall /* was: ia64_create_module */ + data8 sys_init_module + data8 sys_delete_module + data8 sys_ni_syscall // 1135 /* was: sys_get_kernel_syms */ + data8 sys_ni_syscall /* was: sys_query_module */ + data8 sys_quotactl + data8 sys_bdflush + data8 sys_sysfs + data8 sys_personality // 1140 + data8 sys_ni_syscall // sys_afs_syscall + data8 sys_setfsuid + data8 sys_setfsgid + data8 sys_getdents + data8 sys_flock // 1145 + data8 sys_readv + data8 sys_writev + data8 sys_pread64 + data8 sys_pwrite64 + data8 sys_sysctl // 1150 + data8 sys_mmap + data8 sys_munmap + data8 sys_mlock + data8 sys_mlockall + data8 sys_mprotect // 1155 + data8 ia64_mremap + data8 sys_msync + data8 sys_munlock + data8 sys_munlockall + data8 sys_sched_getparam // 1160 + data8 sys_sched_setparam + data8 sys_sched_getscheduler + data8 sys_sched_setscheduler + data8 sys_sched_yield + data8 sys_sched_get_priority_max // 1165 + data8 sys_sched_get_priority_min + data8 sys_sched_rr_get_interval + data8 sys_nanosleep + data8 sys_ni_syscall // old nfsservctl + data8 sys_prctl // 1170 + data8 sys_getpagesize + data8 sys_mmap2 + data8 sys_pciconfig_read + data8 sys_pciconfig_write + data8 sys_perfmonctl // 1175 + data8 sys_sigaltstack + data8 sys_rt_sigaction + data8 sys_rt_sigpending + data8 sys_rt_sigprocmask + data8 sys_rt_sigqueueinfo // 1180 + data8 sys_rt_sigreturn + data8 sys_rt_sigsuspend + data8 sys_rt_sigtimedwait + data8 sys_getcwd + data8 sys_capget // 1185 + data8 sys_capset + data8 sys_sendfile64 + data8 sys_ni_syscall // sys_getpmsg (STREAMS) + data8 sys_ni_syscall // sys_putpmsg (STREAMS) + data8 sys_socket // 1190 + data8 sys_bind + data8 sys_connect + data8 sys_listen + data8 sys_accept + data8 sys_getsockname // 1195 + data8 sys_getpeername + data8 sys_socketpair + data8 sys_send + data8 sys_sendto + data8 sys_recv // 1200 + data8 sys_recvfrom + data8 sys_shutdown + data8 sys_setsockopt + data8 sys_getsockopt + data8 sys_sendmsg // 1205 + data8 sys_recvmsg + data8 sys_pivot_root + data8 sys_mincore + data8 sys_madvise + data8 sys_newstat // 1210 + data8 sys_newlstat + data8 sys_newfstat + data8 sys_clone2 + data8 sys_getdents64 + data8 sys_getunwind // 1215 + data8 sys_readahead + data8 sys_setxattr + data8 sys_lsetxattr + data8 sys_fsetxattr + data8 sys_getxattr // 1220 + data8 sys_lgetxattr + data8 sys_fgetxattr + data8 sys_listxattr + data8 sys_llistxattr + data8 sys_flistxattr // 1225 + data8 sys_removexattr + data8 sys_lremovexattr + data8 sys_fremovexattr + data8 sys_tkill + data8 sys_futex // 1230 + data8 sys_sched_setaffinity + data8 sys_sched_getaffinity + data8 sys_set_tid_address + data8 sys_fadvise64_64 + data8 sys_tgkill // 1235 + data8 sys_exit_group + data8 sys_lookup_dcookie + data8 sys_io_setup + data8 sys_io_destroy + data8 sys_io_getevents // 1240 + data8 sys_io_submit + data8 sys_io_cancel + data8 sys_epoll_create + data8 sys_epoll_ctl + data8 sys_epoll_wait // 1245 + data8 sys_restart_syscall + data8 sys_semtimedop + data8 sys_timer_create + data8 sys_timer_settime + data8 sys_timer_gettime // 1250 + data8 sys_timer_getoverrun + data8 sys_timer_delete + data8 sys_clock_settime + data8 sys_clock_gettime + data8 sys_clock_getres // 1255 + data8 sys_clock_nanosleep + data8 sys_fstatfs64 + data8 sys_statfs64 + data8 sys_mbind + data8 sys_get_mempolicy // 1260 + data8 sys_set_mempolicy + data8 sys_mq_open + data8 sys_mq_unlink + data8 sys_mq_timedsend + data8 sys_mq_timedreceive // 1265 + data8 sys_mq_notify + data8 sys_mq_getsetattr + data8 sys_kexec_load + data8 sys_ni_syscall // reserved for vserver + data8 sys_waitid // 1270 + data8 sys_add_key + data8 sys_request_key + data8 sys_keyctl + data8 sys_ioprio_set + data8 sys_ioprio_get // 1275 + data8 sys_move_pages + data8 sys_inotify_init + data8 sys_inotify_add_watch + data8 sys_inotify_rm_watch + data8 sys_migrate_pages // 1280 + data8 sys_openat + data8 sys_mkdirat + data8 sys_mknodat + data8 sys_fchownat + data8 sys_futimesat // 1285 + data8 sys_newfstatat + data8 sys_unlinkat + data8 sys_renameat + data8 sys_linkat + data8 sys_symlinkat // 1290 + data8 sys_readlinkat + data8 sys_fchmodat + data8 sys_faccessat + data8 sys_pselect6 + data8 sys_ppoll // 1295 + data8 sys_unshare + data8 sys_splice + data8 sys_set_robust_list + data8 sys_get_robust_list + data8 sys_sync_file_range // 1300 + data8 sys_tee + data8 sys_vmsplice + data8 sys_fallocate + data8 sys_getcpu + data8 sys_epoll_pwait // 1305 + data8 sys_utimensat + data8 sys_signalfd + data8 sys_ni_syscall + data8 sys_eventfd + data8 sys_timerfd_create // 1310 + data8 sys_timerfd_settime + data8 sys_timerfd_gettime + data8 sys_signalfd4 + data8 sys_eventfd2 + data8 sys_epoll_create1 // 1315 + data8 sys_dup3 + data8 sys_pipe2 + data8 sys_inotify_init1 + data8 sys_preadv + data8 sys_pwritev // 1320 + data8 sys_rt_tgsigqueueinfo + data8 sys_recvmmsg + data8 sys_fanotify_init + data8 sys_fanotify_mark + data8 sys_prlimit64 // 1325 + data8 sys_name_to_handle_at + data8 sys_open_by_handle_at + data8 sys_clock_adjtime + data8 sys_syncfs + data8 sys_setns // 1330 + data8 sys_sendmmsg + data8 sys_process_vm_readv + data8 sys_process_vm_writev + data8 sys_accept4 + data8 sys_finit_module // 1335 + data8 sys_sched_setattr + data8 sys_sched_getattr + data8 sys_renameat2 + data8 sys_getrandom + data8 sys_memfd_create // 1340 + data8 sys_bpf + data8 sys_execveat + + .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls +#endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */ diff --git a/kernel/arch/ia64/kernel/entry.h b/kernel/arch/ia64/kernel/entry.h new file mode 100644 index 000000000..b83edac02 --- /dev/null +++ b/kernel/arch/ia64/kernel/entry.h @@ -0,0 +1,82 @@ + +/* + * Preserved registers that are shared between code in ivt.S and + * entry.S. Be careful not to step on these! + */ +#define PRED_LEAVE_SYSCALL 1 /* TRUE iff leave from syscall */ +#define PRED_KERNEL_STACK 2 /* returning to kernel-stacks? */ +#define PRED_USER_STACK 3 /* returning to user-stacks? */ +#define PRED_SYSCALL 4 /* inside a system call? */ +#define PRED_NON_SYSCALL 5 /* complement of PRED_SYSCALL */ + +#ifdef __ASSEMBLY__ +# define PASTE2(x,y) x##y +# define PASTE(x,y) PASTE2(x,y) + +# define pLvSys PASTE(p,PRED_LEAVE_SYSCALL) +# define pKStk PASTE(p,PRED_KERNEL_STACK) +# define pUStk PASTE(p,PRED_USER_STACK) +# define pSys PASTE(p,PRED_SYSCALL) +# define pNonSys PASTE(p,PRED_NON_SYSCALL) +#endif + +#define PT(f) (IA64_PT_REGS_##f##_OFFSET) +#define SW(f) (IA64_SWITCH_STACK_##f##_OFFSET) +#define SOS(f) (IA64_SAL_OS_STATE_##f##_OFFSET) + +#define PT_REGS_SAVES(off) \ + .unwabi 3, 'i'; \ + .fframe IA64_PT_REGS_SIZE+16+(off); \ + .spillsp rp, PT(CR_IIP)+16+(off); \ + .spillsp ar.pfs, PT(CR_IFS)+16+(off); \ + .spillsp ar.unat, PT(AR_UNAT)+16+(off); \ + .spillsp ar.fpsr, PT(AR_FPSR)+16+(off); \ + .spillsp pr, PT(PR)+16+(off); + +#define PT_REGS_UNWIND_INFO(off) \ + .prologue; \ + PT_REGS_SAVES(off); \ + .body + +#define SWITCH_STACK_SAVES(off) \ + .savesp ar.unat,SW(CALLER_UNAT)+16+(off); \ + .savesp ar.fpsr,SW(AR_FPSR)+16+(off); \ + .spillsp f2,SW(F2)+16+(off); .spillsp f3,SW(F3)+16+(off); \ + .spillsp f4,SW(F4)+16+(off); .spillsp f5,SW(F5)+16+(off); \ + .spillsp f16,SW(F16)+16+(off); .spillsp f17,SW(F17)+16+(off); \ + .spillsp f18,SW(F18)+16+(off); .spillsp f19,SW(F19)+16+(off); \ + .spillsp f20,SW(F20)+16+(off); .spillsp f21,SW(F21)+16+(off); \ + .spillsp f22,SW(F22)+16+(off); .spillsp f23,SW(F23)+16+(off); \ + .spillsp f24,SW(F24)+16+(off); .spillsp f25,SW(F25)+16+(off); \ + .spillsp f26,SW(F26)+16+(off); .spillsp f27,SW(F27)+16+(off); \ + .spillsp f28,SW(F28)+16+(off); .spillsp f29,SW(F29)+16+(off); \ + .spillsp f30,SW(F30)+16+(off); .spillsp f31,SW(F31)+16+(off); \ + .spillsp r4,SW(R4)+16+(off); .spillsp r5,SW(R5)+16+(off); \ + .spillsp r6,SW(R6)+16+(off); .spillsp r7,SW(R7)+16+(off); \ + .spillsp b0,SW(B0)+16+(off); .spillsp b1,SW(B1)+16+(off); \ + .spillsp b2,SW(B2)+16+(off); .spillsp b3,SW(B3)+16+(off); \ + .spillsp b4,SW(B4)+16+(off); .spillsp b5,SW(B5)+16+(off); \ + .spillsp ar.pfs,SW(AR_PFS)+16+(off); .spillsp ar.lc,SW(AR_LC)+16+(off); \ + .spillsp @priunat,SW(AR_UNAT)+16+(off); \ + .spillsp ar.rnat,SW(AR_RNAT)+16+(off); \ + .spillsp ar.bspstore,SW(AR_BSPSTORE)+16+(off); \ + .spillsp pr,SW(PR)+16+(off) + +#define DO_SAVE_SWITCH_STACK \ + movl r28=1f; \ + ;; \ + .fframe IA64_SWITCH_STACK_SIZE; \ + adds sp=-IA64_SWITCH_STACK_SIZE,sp; \ + mov.ret.sptk b7=r28,1f; \ + SWITCH_STACK_SAVES(0); \ + br.cond.sptk.many save_switch_stack; \ +1: + +#define DO_LOAD_SWITCH_STACK \ + movl r28=1f; \ + ;; \ + invala; \ + mov.ret.sptk b7=r28,1f; \ + br.cond.sptk.many load_switch_stack; \ +1: .restore sp; \ + adds sp=IA64_SWITCH_STACK_SIZE,sp diff --git a/kernel/arch/ia64/kernel/err_inject.c b/kernel/arch/ia64/kernel/err_inject.c new file mode 100644 index 000000000..0c161ed6d --- /dev/null +++ b/kernel/arch/ia64/kernel/err_inject.c @@ -0,0 +1,314 @@ +/* + * err_inject.c - + * 1.) Inject errors to a processor. + * 2.) Query error injection capabilities. + * This driver along with user space code can be acting as an error + * injection tool. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Written by: Fenghua Yu , Intel Corporation + * Copyright (C) 2006, Intel Corp. All rights reserved. + * + */ +#include +#include +#include +#include +#include + +#define ERR_INJ_DEBUG + +#define ERR_DATA_BUFFER_SIZE 3 // Three 8-byte; + +#define define_one_ro(name) \ +static DEVICE_ATTR(name, 0444, show_##name, NULL) + +#define define_one_rw(name) \ +static DEVICE_ATTR(name, 0644, show_##name, store_##name) + +static u64 call_start[NR_CPUS]; +static u64 phys_addr[NR_CPUS]; +static u64 err_type_info[NR_CPUS]; +static u64 err_struct_info[NR_CPUS]; +static struct { + u64 data1; + u64 data2; + u64 data3; +} __attribute__((__aligned__(16))) err_data_buffer[NR_CPUS]; +static s64 status[NR_CPUS]; +static u64 capabilities[NR_CPUS]; +static u64 resources[NR_CPUS]; + +#define show(name) \ +static ssize_t \ +show_##name(struct device *dev, struct device_attribute *attr, \ + char *buf) \ +{ \ + u32 cpu=dev->id; \ + return sprintf(buf, "%lx\n", name[cpu]); \ +} + +#define store(name) \ +static ssize_t \ +store_##name(struct device *dev, struct device_attribute *attr, \ + const char *buf, size_t size) \ +{ \ + unsigned int cpu=dev->id; \ + name[cpu] = simple_strtoull(buf, NULL, 16); \ + return size; \ +} + +show(call_start) + +/* It's user's responsibility to call the PAL procedure on a specific + * processor. The cpu number in driver is only used for storing data. + */ +static ssize_t +store_call_start(struct device *dev, struct device_attribute *attr, + const char *buf, size_t size) +{ + unsigned int cpu=dev->id; + unsigned long call_start = simple_strtoull(buf, NULL, 16); + +#ifdef ERR_INJ_DEBUG + printk(KERN_DEBUG "pal_mc_err_inject for cpu%d:\n", cpu); + printk(KERN_DEBUG "err_type_info=%lx,\n", err_type_info[cpu]); + printk(KERN_DEBUG "err_struct_info=%lx,\n", err_struct_info[cpu]); + printk(KERN_DEBUG "err_data_buffer=%lx, %lx, %lx.\n", + err_data_buffer[cpu].data1, + err_data_buffer[cpu].data2, + err_data_buffer[cpu].data3); +#endif + switch (call_start) { + case 0: /* Do nothing. */ + break; + case 1: /* Call pal_mc_error_inject in physical mode. */ + status[cpu]=ia64_pal_mc_error_inject_phys(err_type_info[cpu], + err_struct_info[cpu], + ia64_tpa(&err_data_buffer[cpu]), + &capabilities[cpu], + &resources[cpu]); + break; + case 2: /* Call pal_mc_error_inject in virtual mode. */ + status[cpu]=ia64_pal_mc_error_inject_virt(err_type_info[cpu], + err_struct_info[cpu], + ia64_tpa(&err_data_buffer[cpu]), + &capabilities[cpu], + &resources[cpu]); + break; + default: + status[cpu] = -EINVAL; + break; + } + +#ifdef ERR_INJ_DEBUG + printk(KERN_DEBUG "Returns: status=%d,\n", (int)status[cpu]); + printk(KERN_DEBUG "capapbilities=%lx,\n", capabilities[cpu]); + printk(KERN_DEBUG "resources=%lx\n", resources[cpu]); +#endif + return size; +} + +show(err_type_info) +store(err_type_info) + +static ssize_t +show_virtual_to_phys(struct device *dev, struct device_attribute *attr, + char *buf) +{ + unsigned int cpu=dev->id; + return sprintf(buf, "%lx\n", phys_addr[cpu]); +} + +static ssize_t +store_virtual_to_phys(struct device *dev, struct device_attribute *attr, + const char *buf, size_t size) +{ + unsigned int cpu=dev->id; + u64 virt_addr=simple_strtoull(buf, NULL, 16); + int ret; + + ret = get_user_pages(current, current->mm, virt_addr, + 1, VM_READ, 0, NULL, NULL); + if (ret<=0) { +#ifdef ERR_INJ_DEBUG + printk("Virtual address %lx is not existing.\n",virt_addr); +#endif + return -EINVAL; + } + + phys_addr[cpu] = ia64_tpa(virt_addr); + return size; +} + +show(err_struct_info) +store(err_struct_info) + +static ssize_t +show_err_data_buffer(struct device *dev, + struct device_attribute *attr, char *buf) +{ + unsigned int cpu=dev->id; + + return sprintf(buf, "%lx, %lx, %lx\n", + err_data_buffer[cpu].data1, + err_data_buffer[cpu].data2, + err_data_buffer[cpu].data3); +} + +static ssize_t +store_err_data_buffer(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + unsigned int cpu=dev->id; + int ret; + +#ifdef ERR_INJ_DEBUG + printk("write err_data_buffer=[%lx,%lx,%lx] on cpu%d\n", + err_data_buffer[cpu].data1, + err_data_buffer[cpu].data2, + err_data_buffer[cpu].data3, + cpu); +#endif + ret=sscanf(buf, "%lx, %lx, %lx", + &err_data_buffer[cpu].data1, + &err_data_buffer[cpu].data2, + &err_data_buffer[cpu].data3); + if (ret!=ERR_DATA_BUFFER_SIZE) + return -EINVAL; + + return size; +} + +show(status) +show(capabilities) +show(resources) + +define_one_rw(call_start); +define_one_rw(err_type_info); +define_one_rw(err_struct_info); +define_one_rw(err_data_buffer); +define_one_rw(virtual_to_phys); +define_one_ro(status); +define_one_ro(capabilities); +define_one_ro(resources); + +static struct attribute *default_attrs[] = { + &dev_attr_call_start.attr, + &dev_attr_virtual_to_phys.attr, + &dev_attr_err_type_info.attr, + &dev_attr_err_struct_info.attr, + &dev_attr_err_data_buffer.attr, + &dev_attr_status.attr, + &dev_attr_capabilities.attr, + &dev_attr_resources.attr, + NULL +}; + +static struct attribute_group err_inject_attr_group = { + .attrs = default_attrs, + .name = "err_inject" +}; +/* Add/Remove err_inject interface for CPU device */ +static int err_inject_add_dev(struct device *sys_dev) +{ + return sysfs_create_group(&sys_dev->kobj, &err_inject_attr_group); +} + +static int err_inject_remove_dev(struct device *sys_dev) +{ + sysfs_remove_group(&sys_dev->kobj, &err_inject_attr_group); + return 0; +} +static int err_inject_cpu_callback(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + struct device *sys_dev; + + sys_dev = get_cpu_device(cpu); + switch (action) { + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + err_inject_add_dev(sys_dev); + break; + case CPU_DEAD: + case CPU_DEAD_FROZEN: + err_inject_remove_dev(sys_dev); + break; + } + + return NOTIFY_OK; +} + +static struct notifier_block err_inject_cpu_notifier = +{ + .notifier_call = err_inject_cpu_callback, +}; + +static int __init +err_inject_init(void) +{ + int i; + +#ifdef ERR_INJ_DEBUG + printk(KERN_INFO "Enter error injection driver.\n"); +#endif + + cpu_notifier_register_begin(); + + for_each_online_cpu(i) { + err_inject_cpu_callback(&err_inject_cpu_notifier, CPU_ONLINE, + (void *)(long)i); + } + + __register_hotcpu_notifier(&err_inject_cpu_notifier); + + cpu_notifier_register_done(); + + return 0; +} + +static void __exit +err_inject_exit(void) +{ + int i; + struct device *sys_dev; + +#ifdef ERR_INJ_DEBUG + printk(KERN_INFO "Exit error injection driver.\n"); +#endif + + cpu_notifier_register_begin(); + + for_each_online_cpu(i) { + sys_dev = get_cpu_device(i); + sysfs_remove_group(&sys_dev->kobj, &err_inject_attr_group); + } + + __unregister_hotcpu_notifier(&err_inject_cpu_notifier); + + cpu_notifier_register_done(); +} + +module_init(err_inject_init); +module_exit(err_inject_exit); + +MODULE_AUTHOR("Fenghua Yu "); +MODULE_DESCRIPTION("MC error injection kernel sysfs interface"); +MODULE_LICENSE("GPL"); diff --git a/kernel/arch/ia64/kernel/esi.c b/kernel/arch/ia64/kernel/esi.c new file mode 100644 index 000000000..b09111127 --- /dev/null +++ b/kernel/arch/ia64/kernel/esi.c @@ -0,0 +1,205 @@ +/* + * Extensible SAL Interface (ESI) support routines. + * + * Copyright (C) 2006 Hewlett-Packard Co + * Alex Williamson + */ +#include +#include +#include +#include + +#include +#include + +MODULE_AUTHOR("Alex Williamson "); +MODULE_DESCRIPTION("Extensible SAL Interface (ESI) support"); +MODULE_LICENSE("GPL"); + +#define MODULE_NAME "esi" + +#define ESI_TABLE_GUID \ + EFI_GUID(0x43EA58DC, 0xCF28, 0x4b06, 0xB3, \ + 0x91, 0xB7, 0x50, 0x59, 0x34, 0x2B, 0xD4) + +enum esi_systab_entry_type { + ESI_DESC_ENTRY_POINT = 0 +}; + +/* + * Entry type: Size: + * 0 48 + */ +#define ESI_DESC_SIZE(type) "\060"[(unsigned) (type)] + +typedef struct ia64_esi_desc_entry_point { + u8 type; + u8 reserved1[15]; + u64 esi_proc; + u64 gp; + efi_guid_t guid; +} ia64_esi_desc_entry_point_t; + +struct pdesc { + void *addr; + void *gp; +}; + +static struct ia64_sal_systab *esi_systab; + +static int __init esi_init (void) +{ + efi_config_table_t *config_tables; + struct ia64_sal_systab *systab; + unsigned long esi = 0; + char *p; + int i; + + config_tables = __va(efi.systab->tables); + + for (i = 0; i < (int) efi.systab->nr_tables; ++i) { + if (efi_guidcmp(config_tables[i].guid, ESI_TABLE_GUID) == 0) { + esi = config_tables[i].table; + break; + } + } + + if (!esi) + return -ENODEV; + + systab = __va(esi); + + if (strncmp(systab->signature, "ESIT", 4) != 0) { + printk(KERN_ERR "bad signature in ESI system table!"); + return -ENODEV; + } + + p = (char *) (systab + 1); + for (i = 0; i < systab->entry_count; i++) { + /* + * The first byte of each entry type contains the type + * descriptor. + */ + switch (*p) { + case ESI_DESC_ENTRY_POINT: + break; + default: + printk(KERN_WARNING "Unknown table type %d found in " + "ESI table, ignoring rest of table\n", *p); + return -ENODEV; + } + + p += ESI_DESC_SIZE(*p); + } + + esi_systab = systab; + return 0; +} + + +int ia64_esi_call (efi_guid_t guid, struct ia64_sal_retval *isrvp, + enum esi_proc_type proc_type, u64 func, + u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6, + u64 arg7) +{ + struct ia64_fpreg fr[6]; + unsigned long flags = 0; + int i; + char *p; + + if (!esi_systab) + return -1; + + p = (char *) (esi_systab + 1); + for (i = 0; i < esi_systab->entry_count; i++) { + if (*p == ESI_DESC_ENTRY_POINT) { + ia64_esi_desc_entry_point_t *esi = (void *)p; + if (!efi_guidcmp(guid, esi->guid)) { + ia64_sal_handler esi_proc; + struct pdesc pdesc; + + pdesc.addr = __va(esi->esi_proc); + pdesc.gp = __va(esi->gp); + + esi_proc = (ia64_sal_handler) &pdesc; + + ia64_save_scratch_fpregs(fr); + if (proc_type == ESI_PROC_SERIALIZED) + spin_lock_irqsave(&sal_lock, flags); + else if (proc_type == ESI_PROC_MP_SAFE) + local_irq_save(flags); + else + preempt_disable(); + *isrvp = (*esi_proc)(func, arg1, arg2, arg3, + arg4, arg5, arg6, arg7); + if (proc_type == ESI_PROC_SERIALIZED) + spin_unlock_irqrestore(&sal_lock, + flags); + else if (proc_type == ESI_PROC_MP_SAFE) + local_irq_restore(flags); + else + preempt_enable(); + ia64_load_scratch_fpregs(fr); + return 0; + } + } + p += ESI_DESC_SIZE(*p); + } + return -1; +} +EXPORT_SYMBOL_GPL(ia64_esi_call); + +int ia64_esi_call_phys (efi_guid_t guid, struct ia64_sal_retval *isrvp, + u64 func, u64 arg1, u64 arg2, u64 arg3, u64 arg4, + u64 arg5, u64 arg6, u64 arg7) +{ + struct ia64_fpreg fr[6]; + unsigned long flags; + u64 esi_params[8]; + char *p; + int i; + + if (!esi_systab) + return -1; + + p = (char *) (esi_systab + 1); + for (i = 0; i < esi_systab->entry_count; i++) { + if (*p == ESI_DESC_ENTRY_POINT) { + ia64_esi_desc_entry_point_t *esi = (void *)p; + if (!efi_guidcmp(guid, esi->guid)) { + ia64_sal_handler esi_proc; + struct pdesc pdesc; + + pdesc.addr = (void *)esi->esi_proc; + pdesc.gp = (void *)esi->gp; + + esi_proc = (ia64_sal_handler) &pdesc; + + esi_params[0] = func; + esi_params[1] = arg1; + esi_params[2] = arg2; + esi_params[3] = arg3; + esi_params[4] = arg4; + esi_params[5] = arg5; + esi_params[6] = arg6; + esi_params[7] = arg7; + ia64_save_scratch_fpregs(fr); + spin_lock_irqsave(&sal_lock, flags); + *isrvp = esi_call_phys(esi_proc, esi_params); + spin_unlock_irqrestore(&sal_lock, flags); + ia64_load_scratch_fpregs(fr); + return 0; + } + } + p += ESI_DESC_SIZE(*p); + } + return -1; +} +EXPORT_SYMBOL_GPL(ia64_esi_call_phys); + +static void __exit esi_exit (void) +{ +} + +module_init(esi_init); +module_exit(esi_exit); /* makes module removable... */ diff --git a/kernel/arch/ia64/kernel/esi_stub.S b/kernel/arch/ia64/kernel/esi_stub.S new file mode 100644 index 000000000..6b3d6c1f9 --- /dev/null +++ b/kernel/arch/ia64/kernel/esi_stub.S @@ -0,0 +1,96 @@ +/* + * ESI call stub. + * + * Copyright (C) 2005 Hewlett-Packard Co + * Alex Williamson + * + * Based on EFI call stub by David Mosberger. The stub is virtually + * identical to the one for EFI phys-mode calls, except that ESI + * calls may have up to 8 arguments, so they get passed to this routine + * through memory. + * + * This stub allows us to make ESI calls in physical mode with interrupts + * turned off. ESI calls may not support calling from virtual mode. + * + * Google for "Extensible SAL specification" for a document describing the + * ESI standard. + */ + +/* + * PSR settings as per SAL spec (Chapter 8 in the "IA-64 System + * Abstraction Layer Specification", revision 2.6e). Note that + * psr.dfl and psr.dfh MUST be cleared, despite what this manual says. + * Otherwise, SAL dies whenever it's trying to do an IA-32 BIOS call + * (the br.ia instruction fails unless psr.dfl and psr.dfh are + * cleared). Fortunately, SAL promises not to touch the floating + * point regs, so at least we don't have to save f2-f127. + */ +#define PSR_BITS_TO_CLEAR \ + (IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT | IA64_PSR_RT | \ + IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | \ + IA64_PSR_DFL | IA64_PSR_DFH) + +#define PSR_BITS_TO_SET \ + (IA64_PSR_BN) + +#include +#include + +/* + * Inputs: + * in0 = address of function descriptor of ESI routine to call + * in1 = address of array of ESI parameters + * + * Outputs: + * r8 = result returned by called function + */ +GLOBAL_ENTRY(esi_call_phys) + .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2) + alloc loc1=ar.pfs,2,7,8,0 + ld8 r2=[in0],8 // load ESI function's entry point + mov loc0=rp + .body + ;; + ld8 out0=[in1],8 // ESI params loaded from array + ;; // passing all as inputs doesn't work + ld8 out1=[in1],8 + ;; + ld8 out2=[in1],8 + ;; + ld8 out3=[in1],8 + ;; + ld8 out4=[in1],8 + ;; + ld8 out5=[in1],8 + ;; + ld8 out6=[in1],8 + ;; + ld8 out7=[in1] + mov loc2=gp // save global pointer + mov loc4=ar.rsc // save RSE configuration + mov ar.rsc=0 // put RSE in enforced lazy, LE mode + ;; + ld8 gp=[in0] // load ESI function's global pointer + movl r16=PSR_BITS_TO_CLEAR + mov loc3=psr // save processor status word + movl r17=PSR_BITS_TO_SET + ;; + or loc3=loc3,r17 + mov b6=r2 + ;; + andcm r16=loc3,r16 // get psr with IT, DT, and RT bits cleared + br.call.sptk.many rp=ia64_switch_mode_phys +.ret0: mov loc5=r19 // old ar.bsp + mov loc6=r20 // old sp + br.call.sptk.many rp=b6 // call the ESI function +.ret1: mov ar.rsc=0 // put RSE in enforced lazy, LE mode + mov r16=loc3 // save virtual mode psr + mov r19=loc5 // save virtual mode bspstore + mov r20=loc6 // save virtual mode sp + br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode +.ret2: mov ar.rsc=loc4 // restore RSE configuration + mov ar.pfs=loc1 + mov rp=loc0 + mov gp=loc2 + br.ret.sptk.many rp +END(esi_call_phys) diff --git a/kernel/arch/ia64/kernel/fsys.S b/kernel/arch/ia64/kernel/fsys.S new file mode 100644 index 000000000..abc6dee37 --- /dev/null +++ b/kernel/arch/ia64/kernel/fsys.S @@ -0,0 +1,836 @@ +/* + * This file contains the light-weight system call handlers (fsyscall-handlers). + * + * Copyright (C) 2003 Hewlett-Packard Co + * David Mosberger-Tang + * + * 25-Sep-03 davidm Implement fsys_rt_sigprocmask(). + * 18-Feb-03 louisk Implement fsys_gettimeofday(). + * 28-Feb-03 davidm Fixed several bugs in fsys_gettimeofday(). Tuned it some more, + * probably broke it along the way... ;-) + * 13-Jul-04 clameter Implement fsys_clock_gettime and revise fsys_gettimeofday to make + * it capable of using memory based clocks without falling back to C code. + * 08-Feb-07 Fenghua Yu Implement fsys_getcpu. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "entry.h" +#include "paravirt_inst.h" + +/* + * See Documentation/ia64/fsys.txt for details on fsyscalls. + * + * On entry to an fsyscall handler: + * r10 = 0 (i.e., defaults to "successful syscall return") + * r11 = saved ar.pfs (a user-level value) + * r15 = system call number + * r16 = "current" task pointer (in normal kernel-mode, this is in r13) + * r32-r39 = system call arguments + * b6 = return address (a user-level value) + * ar.pfs = previous frame-state (a user-level value) + * PSR.be = cleared to zero (i.e., little-endian byte order is in effect) + * all other registers may contain values passed in from user-mode + * + * On return from an fsyscall handler: + * r11 = saved ar.pfs (as passed into the fsyscall handler) + * r15 = system call number (as passed into the fsyscall handler) + * r32-r39 = system call arguments (as passed into the fsyscall handler) + * b6 = return address (as passed into the fsyscall handler) + * ar.pfs = previous frame-state (as passed into the fsyscall handler) + */ + +ENTRY(fsys_ni_syscall) + .prologue + .altrp b6 + .body + mov r8=ENOSYS + mov r10=-1 + FSYS_RETURN +END(fsys_ni_syscall) + +ENTRY(fsys_getpid) + .prologue + .altrp b6 + .body + add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16 + ;; + ld8 r17=[r17] // r17 = current->group_leader + add r9=TI_FLAGS+IA64_TASK_SIZE,r16 + ;; + ld4 r9=[r9] + add r17=IA64_TASK_TGIDLINK_OFFSET,r17 + ;; + and r9=TIF_ALLWORK_MASK,r9 + ld8 r17=[r17] // r17 = current->group_leader->pids[PIDTYPE_PID].pid + ;; + add r8=IA64_PID_LEVEL_OFFSET,r17 + ;; + ld4 r8=[r8] // r8 = pid->level + add r17=IA64_PID_UPID_OFFSET,r17 // r17 = &pid->numbers[0] + ;; + shl r8=r8,IA64_UPID_SHIFT + ;; + add r17=r17,r8 // r17 = &pid->numbers[pid->level] + ;; + ld4 r8=[r17] // r8 = pid->numbers[pid->level].nr + ;; + mov r17=0 + ;; + cmp.ne p8,p0=0,r9 +(p8) br.spnt.many fsys_fallback_syscall + FSYS_RETURN +END(fsys_getpid) + +ENTRY(fsys_set_tid_address) + .prologue + .altrp b6 + .body + add r9=TI_FLAGS+IA64_TASK_SIZE,r16 + add r17=IA64_TASK_TGIDLINK_OFFSET,r16 + ;; + ld4 r9=[r9] + tnat.z p6,p7=r32 // check argument register for being NaT + ld8 r17=[r17] // r17 = current->pids[PIDTYPE_PID].pid + ;; + and r9=TIF_ALLWORK_MASK,r9 + add r8=IA64_PID_LEVEL_OFFSET,r17 + add r18=IA64_TASK_CLEAR_CHILD_TID_OFFSET,r16 + ;; + ld4 r8=[r8] // r8 = pid->level + add r17=IA64_PID_UPID_OFFSET,r17 // r17 = &pid->numbers[0] + ;; + shl r8=r8,IA64_UPID_SHIFT + ;; + add r17=r17,r8 // r17 = &pid->numbers[pid->level] + ;; + ld4 r8=[r17] // r8 = pid->numbers[pid->level].nr + ;; + cmp.ne p8,p0=0,r9 + mov r17=-1 + ;; +(p6) st8 [r18]=r32 +(p7) st8 [r18]=r17 +(p8) br.spnt.many fsys_fallback_syscall + ;; + mov r17=0 // i must not leak kernel bits... + mov r18=0 // i must not leak kernel bits... + FSYS_RETURN +END(fsys_set_tid_address) + +#if IA64_GTOD_SEQ_OFFSET !=0 +#error fsys_gettimeofday incompatible with changes to struct fsyscall_gtod_data_t +#endif +#if IA64_ITC_JITTER_OFFSET !=0 +#error fsys_gettimeofday incompatible with changes to struct itc_jitter_data_t +#endif +#define CLOCK_REALTIME 0 +#define CLOCK_MONOTONIC 1 +#define CLOCK_DIVIDE_BY_1000 0x4000 +#define CLOCK_ADD_MONOTONIC 0x8000 + +ENTRY(fsys_gettimeofday) + .prologue + .altrp b6 + .body + mov r31 = r32 + tnat.nz p6,p0 = r33 // guard against NaT argument +(p6) br.cond.spnt.few .fail_einval + mov r30 = CLOCK_DIVIDE_BY_1000 + ;; +.gettime: + // Register map + // Incoming r31 = pointer to address where to place result + // r30 = flags determining how time is processed + // r2,r3 = temp r4-r7 preserved + // r8 = result nanoseconds + // r9 = result seconds + // r10 = temporary storage for clock difference + // r11 = preserved: saved ar.pfs + // r12 = preserved: memory stack + // r13 = preserved: thread pointer + // r14 = address of mask / mask value + // r15 = preserved: system call number + // r16 = preserved: current task pointer + // r17 = (not used) + // r18 = (not used) + // r19 = address of itc_lastcycle + // r20 = struct fsyscall_gtod_data (= address of gtod_lock.sequence) + // r21 = address of mmio_ptr + // r22 = address of wall_time or monotonic_time + // r23 = address of shift / value + // r24 = address mult factor / cycle_last value + // r25 = itc_lastcycle value + // r26 = address clocksource cycle_last + // r27 = (not used) + // r28 = sequence number at the beginning of critcal section + // r29 = address of itc_jitter + // r30 = time processing flags / memory address + // r31 = pointer to result + // Predicates + // p6,p7 short term use + // p8 = timesource ar.itc + // p9 = timesource mmio64 + // p10 = timesource mmio32 - not used + // p11 = timesource not to be handled by asm code + // p12 = memory time source ( = p9 | p10) - not used + // p13 = do cmpxchg with itc_lastcycle + // p14 = Divide by 1000 + // p15 = Add monotonic + // + // Note that instructions are optimized for McKinley. McKinley can + // process two bundles simultaneously and therefore we continuously + // try to feed the CPU two bundles and then a stop. + + add r2 = TI_FLAGS+IA64_TASK_SIZE,r16 + tnat.nz p6,p0 = r31 // guard against Nat argument +(p6) br.cond.spnt.few .fail_einval + movl r20 = fsyscall_gtod_data // load fsyscall gettimeofday data address + ;; + ld4 r2 = [r2] // process work pending flags + movl r29 = itc_jitter_data // itc_jitter + add r22 = IA64_GTOD_WALL_TIME_OFFSET,r20 // wall_time + add r21 = IA64_CLKSRC_MMIO_OFFSET,r20 + mov pr = r30,0xc000 // Set predicates according to function + ;; + and r2 = TIF_ALLWORK_MASK,r2 + add r19 = IA64_ITC_LASTCYCLE_OFFSET,r29 +(p15) add r22 = IA64_GTOD_MONO_TIME_OFFSET,r20 // monotonic_time + ;; + add r26 = IA64_CLKSRC_CYCLE_LAST_OFFSET,r20 // clksrc_cycle_last + cmp.ne p6, p0 = 0, r2 // Fallback if work is scheduled +(p6) br.cond.spnt.many fsys_fallback_syscall + ;; + // Begin critical section +.time_redo: + ld4.acq r28 = [r20] // gtod_lock.sequence, Must take first + ;; + and r28 = ~1,r28 // And make sequence even to force retry if odd + ;; + ld8 r30 = [r21] // clocksource->mmio_ptr + add r24 = IA64_CLKSRC_MULT_OFFSET,r20 + ld4 r2 = [r29] // itc_jitter value + add r23 = IA64_CLKSRC_SHIFT_OFFSET,r20 + add r14 = IA64_CLKSRC_MASK_OFFSET,r20 + ;; + ld4 r3 = [r24] // clocksource mult value + ld8 r14 = [r14] // clocksource mask value + cmp.eq p8,p9 = 0,r30 // use cpu timer if no mmio_ptr + ;; + setf.sig f7 = r3 // Setup for mult scaling of counter +(p8) cmp.ne p13,p0 = r2,r0 // need itc_jitter compensation, set p13 + ld4 r23 = [r23] // clocksource shift value + ld8 r24 = [r26] // get clksrc_cycle_last value +(p9) cmp.eq p13,p0 = 0,r30 // if mmio_ptr, clear p13 jitter control + ;; + .pred.rel.mutex p8,p9 + MOV_FROM_ITC(p8, p6, r2, r10) // CPU_TIMER. 36 clocks latency!!! +(p9) ld8 r2 = [r30] // MMIO_TIMER. Could also have latency issues.. +(p13) ld8 r25 = [r19] // get itc_lastcycle value + ld8 r9 = [r22],IA64_TIMESPEC_TV_NSEC_OFFSET // tv_sec + ;; + ld8 r8 = [r22],-IA64_TIMESPEC_TV_NSEC_OFFSET // tv_nsec +(p13) sub r3 = r25,r2 // Diff needed before comparison (thanks davidm) + ;; +(p13) cmp.gt.unc p6,p7 = r3,r0 // check if it is less than last. p6,p7 cleared + sub r10 = r2,r24 // current_cycle - last_cycle + ;; +(p6) sub r10 = r25,r24 // time we got was less than last_cycle +(p7) mov ar.ccv = r25 // more than last_cycle. Prep for cmpxchg + ;; +(p7) cmpxchg8.rel r3 = [r19],r2,ar.ccv + ;; +(p7) cmp.ne p7,p0 = r25,r3 // if cmpxchg not successful + ;; +(p7) sub r10 = r3,r24 // then use new last_cycle instead + ;; + and r10 = r10,r14 // Apply mask + ;; + setf.sig f8 = r10 + nop.i 123 + ;; + // fault check takes 5 cycles and we have spare time +EX(.fail_efault, probe.w.fault r31, 3) + xmpy.l f8 = f8,f7 // nsec_per_cyc*(counter-last_counter) + ;; + getf.sig r2 = f8 + mf + ;; + ld4 r10 = [r20] // gtod_lock.sequence + shr.u r2 = r2,r23 // shift by factor + ;; + add r8 = r8,r2 // Add xtime.nsecs + cmp4.ne p7,p0 = r28,r10 +(p7) br.cond.dpnt.few .time_redo // sequence number changed, redo + // End critical section. + // Now r8=tv->tv_nsec and r9=tv->tv_sec + mov r10 = r0 + movl r2 = 1000000000 + add r23 = IA64_TIMESPEC_TV_NSEC_OFFSET, r31 +(p14) movl r3 = 2361183241434822607 // Prep for / 1000 hack + ;; +.time_normalize: + mov r21 = r8 + cmp.ge p6,p0 = r8,r2 +(p14) shr.u r20 = r8, 3 // We can repeat this if necessary just wasting time + ;; +(p14) setf.sig f8 = r20 +(p6) sub r8 = r8,r2 +(p6) add r9 = 1,r9 // two nops before the branch. +(p14) setf.sig f7 = r3 // Chances for repeats are 1 in 10000 for gettod +(p6) br.cond.dpnt.few .time_normalize + ;; + // Divided by 8 though shift. Now divide by 125 + // The compiler was able to do that with a multiply + // and a shift and we do the same +EX(.fail_efault, probe.w.fault r23, 3) // This also costs 5 cycles +(p14) xmpy.hu f8 = f8, f7 // xmpy has 5 cycles latency so use it + ;; +(p14) getf.sig r2 = f8 + ;; + mov r8 = r0 +(p14) shr.u r21 = r2, 4 + ;; +EX(.fail_efault, st8 [r31] = r9) +EX(.fail_efault, st8 [r23] = r21) + FSYS_RETURN +.fail_einval: + mov r8 = EINVAL + mov r10 = -1 + FSYS_RETURN +.fail_efault: + mov r8 = EFAULT + mov r10 = -1 + FSYS_RETURN +END(fsys_gettimeofday) + +ENTRY(fsys_clock_gettime) + .prologue + .altrp b6 + .body + cmp4.ltu p6, p0 = CLOCK_MONOTONIC, r32 + // Fallback if this is not CLOCK_REALTIME or CLOCK_MONOTONIC +(p6) br.spnt.few fsys_fallback_syscall + mov r31 = r33 + shl r30 = r32,15 + br.many .gettime +END(fsys_clock_gettime) + +/* + * fsys_getcpu doesn't use the third parameter in this implementation. It reads + * current_thread_info()->cpu and corresponding node in cpu_to_node_map. + */ +ENTRY(fsys_getcpu) + .prologue + .altrp b6 + .body + ;; + add r2=TI_FLAGS+IA64_TASK_SIZE,r16 + tnat.nz p6,p0 = r32 // guard against NaT argument + add r3=TI_CPU+IA64_TASK_SIZE,r16 + ;; + ld4 r3=[r3] // M r3 = thread_info->cpu + ld4 r2=[r2] // M r2 = thread_info->flags +(p6) br.cond.spnt.few .fail_einval // B + ;; + tnat.nz p7,p0 = r33 // I guard against NaT argument +(p7) br.cond.spnt.few .fail_einval // B + ;; + cmp.ne p6,p0=r32,r0 + cmp.ne p7,p0=r33,r0 + ;; +#ifdef CONFIG_NUMA + movl r17=cpu_to_node_map + ;; +EX(.fail_efault, (p6) probe.w.fault r32, 3) // M This takes 5 cycles +EX(.fail_efault, (p7) probe.w.fault r33, 3) // M This takes 5 cycles + shladd r18=r3,1,r17 + ;; + ld2 r20=[r18] // r20 = cpu_to_node_map[cpu] + and r2 = TIF_ALLWORK_MASK,r2 + ;; + cmp.ne p8,p0=0,r2 +(p8) br.spnt.many fsys_fallback_syscall + ;; + ;; +EX(.fail_efault, (p6) st4 [r32] = r3) +EX(.fail_efault, (p7) st2 [r33] = r20) + mov r8=0 + ;; +#else +EX(.fail_efault, (p6) probe.w.fault r32, 3) // M This takes 5 cycles +EX(.fail_efault, (p7) probe.w.fault r33, 3) // M This takes 5 cycles + and r2 = TIF_ALLWORK_MASK,r2 + ;; + cmp.ne p8,p0=0,r2 +(p8) br.spnt.many fsys_fallback_syscall + ;; +EX(.fail_efault, (p6) st4 [r32] = r3) +EX(.fail_efault, (p7) st2 [r33] = r0) + mov r8=0 + ;; +#endif + FSYS_RETURN +END(fsys_getcpu) + +ENTRY(fsys_fallback_syscall) + .prologue + .altrp b6 + .body + /* + * We only get here from light-weight syscall handlers. Thus, we already + * know that r15 contains a valid syscall number. No need to re-check. + */ + adds r17=-1024,r15 + movl r14=sys_call_table + ;; + RSM_PSR_I(p0, r26, r27) + shladd r18=r17,3,r14 + ;; + ld8 r18=[r18] // load normal (heavy-weight) syscall entry-point + MOV_FROM_PSR(p0, r29, r26) // read psr (12 cyc load latency) + mov r27=ar.rsc + mov r21=ar.fpsr + mov r26=ar.pfs +END(fsys_fallback_syscall) + /* FALL THROUGH */ +GLOBAL_ENTRY(paravirt_fsys_bubble_down) + .prologue + .altrp b6 + .body + /* + * We get here for syscalls that don't have a lightweight + * handler. For those, we need to bubble down into the kernel + * and that requires setting up a minimal pt_regs structure, + * and initializing the CPU state more or less as if an + * interruption had occurred. To make syscall-restarts work, + * we setup pt_regs such that cr_iip points to the second + * instruction in syscall_via_break. Decrementing the IP + * hence will restart the syscall via break and not + * decrementing IP will return us to the caller, as usual. + * Note that we preserve the value of psr.pp rather than + * initializing it from dcr.pp. This makes it possible to + * distinguish fsyscall execution from other privileged + * execution. + * + * On entry: + * - normal fsyscall handler register usage, except + * that we also have: + * - r18: address of syscall entry point + * - r21: ar.fpsr + * - r26: ar.pfs + * - r27: ar.rsc + * - r29: psr + * + * We used to clear some PSR bits here but that requires slow + * serialization. Fortuntely, that isn't really necessary. + * The rationale is as follows: we used to clear bits + * ~PSR_PRESERVED_BITS in PSR.L. Since + * PSR_PRESERVED_BITS==PSR.{UP,MFL,MFH,PK,DT,PP,SP,RT,IC}, we + * ended up clearing PSR.{BE,AC,I,DFL,DFH,DI,DB,SI,TB}. + * However, + * + * PSR.BE : already is turned off in __kernel_syscall_via_epc() + * PSR.AC : don't care (kernel normally turns PSR.AC on) + * PSR.I : already turned off by the time paravirt_fsys_bubble_down gets + * invoked + * PSR.DFL: always 0 (kernel never turns it on) + * PSR.DFH: don't care --- kernel never touches f32-f127 on its own + * initiative + * PSR.DI : always 0 (kernel never turns it on) + * PSR.SI : always 0 (kernel never turns it on) + * PSR.DB : don't care --- kernel never enables kernel-level + * breakpoints + * PSR.TB : must be 0 already; if it wasn't zero on entry to + * __kernel_syscall_via_epc, the branch to paravirt_fsys_bubble_down + * will trigger a taken branch; the taken-trap-handler then + * converts the syscall into a break-based system-call. + */ + /* + * Reading psr.l gives us only bits 0-31, psr.it, and psr.mc. + * The rest we have to synthesize. + */ +# define PSR_ONE_BITS ((3 << IA64_PSR_CPL0_BIT) \ + | (0x1 << IA64_PSR_RI_BIT) \ + | IA64_PSR_BN | IA64_PSR_I) + + invala // M0|1 + movl r14=ia64_ret_from_syscall // X + + nop.m 0 + movl r28=__kernel_syscall_via_break // X create cr.iip + ;; + + mov r2=r16 // A get task addr to addl-addressable register + adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 // A + mov r31=pr // I0 save pr (2 cyc) + ;; + st1 [r16]=r0 // M2|3 clear current->thread.on_ustack flag + addl r22=IA64_RBS_OFFSET,r2 // A compute base of RBS + add r3=TI_FLAGS+IA64_TASK_SIZE,r2 // A + ;; + ld4 r3=[r3] // M0|1 r3 = current_thread_info()->flags + lfetch.fault.excl.nt1 [r22] // M0|1 prefetch register backing-store + nop.i 0 + ;; + mov ar.rsc=0 // M2 set enforced lazy mode, pl 0, LE, loadrs=0 +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE + MOV_FROM_ITC(p0, p6, r30, r23) // M get cycle for accounting +#else + nop.m 0 +#endif + nop.i 0 + ;; + mov r23=ar.bspstore // M2 (12 cyc) save ar.bspstore + mov.m r24=ar.rnat // M2 (5 cyc) read ar.rnat (dual-issues!) + nop.i 0 + ;; + mov ar.bspstore=r22 // M2 (6 cyc) switch to kernel RBS + movl r8=PSR_ONE_BITS // X + ;; + mov r25=ar.unat // M2 (5 cyc) save ar.unat + mov r19=b6 // I0 save b6 (2 cyc) + mov r20=r1 // A save caller's gp in r20 + ;; + or r29=r8,r29 // A construct cr.ipsr value to save + mov b6=r18 // I0 copy syscall entry-point to b6 (7 cyc) + addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2 // A compute base of memory stack + + mov r18=ar.bsp // M2 save (kernel) ar.bsp (12 cyc) + cmp.ne pKStk,pUStk=r0,r0 // A set pKStk <- 0, pUStk <- 1 + br.call.sptk.many b7=ia64_syscall_setup // B + ;; +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE + // mov.m r30=ar.itc is called in advance + add r16=TI_AC_STAMP+IA64_TASK_SIZE,r2 + add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r2 + ;; + ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP // time at last check in kernel + ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE // time at leave kernel + ;; + ld8 r20=[r16],TI_AC_STAMP-TI_AC_STIME // cumulated stime + ld8 r21=[r17] // cumulated utime + sub r22=r19,r18 // stime before leave kernel + ;; + st8 [r16]=r30,TI_AC_STIME-TI_AC_STAMP // update stamp + sub r18=r30,r19 // elapsed time in user mode + ;; + add r20=r20,r22 // sum stime + add r21=r21,r18 // sum utime + ;; + st8 [r16]=r20 // update stime + st8 [r17]=r21 // update utime + ;; +#endif + mov ar.rsc=0x3 // M2 set eager mode, pl 0, LE, loadrs=0 + mov rp=r14 // I0 set the real return addr + and r3=_TIF_SYSCALL_TRACEAUDIT,r3 // A + ;; + SSM_PSR_I(p0, p6, r22) // M2 we're on kernel stacks now, reenable irqs + cmp.eq p8,p0=r3,r0 // A +(p10) br.cond.spnt.many ia64_ret_from_syscall // B return if bad call-frame or r15 is a NaT + + nop.m 0 +(p8) br.call.sptk.many b6=b6 // B (ignore return address) + br.cond.spnt ia64_trace_syscall // B +END(paravirt_fsys_bubble_down) + + .rodata + .align 8 + .globl paravirt_fsyscall_table + + data8 paravirt_fsys_bubble_down +paravirt_fsyscall_table: + data8 fsys_ni_syscall + data8 0 // exit // 1025 + data8 0 // read + data8 0 // write + data8 0 // open + data8 0 // close + data8 0 // creat // 1030 + data8 0 // link + data8 0 // unlink + data8 0 // execve + data8 0 // chdir + data8 0 // fchdir // 1035 + data8 0 // utimes + data8 0 // mknod + data8 0 // chmod + data8 0 // chown + data8 0 // lseek // 1040 + data8 fsys_getpid // getpid + data8 0 // getppid + data8 0 // mount + data8 0 // umount + data8 0 // setuid // 1045 + data8 0 // getuid + data8 0 // geteuid + data8 0 // ptrace + data8 0 // access + data8 0 // sync // 1050 + data8 0 // fsync + data8 0 // fdatasync + data8 0 // kill + data8 0 // rename + data8 0 // mkdir // 1055 + data8 0 // rmdir + data8 0 // dup + data8 0 // pipe + data8 0 // times + data8 0 // brk // 1060 + data8 0 // setgid + data8 0 // getgid + data8 0 // getegid + data8 0 // acct + data8 0 // ioctl // 1065 + data8 0 // fcntl + data8 0 // umask + data8 0 // chroot + data8 0 // ustat + data8 0 // dup2 // 1070 + data8 0 // setreuid + data8 0 // setregid + data8 0 // getresuid + data8 0 // setresuid + data8 0 // getresgid // 1075 + data8 0 // setresgid + data8 0 // getgroups + data8 0 // setgroups + data8 0 // getpgid + data8 0 // setpgid // 1080 + data8 0 // setsid + data8 0 // getsid + data8 0 // sethostname + data8 0 // setrlimit + data8 0 // getrlimit // 1085 + data8 0 // getrusage + data8 fsys_gettimeofday // gettimeofday + data8 0 // settimeofday + data8 0 // select + data8 0 // poll // 1090 + data8 0 // symlink + data8 0 // readlink + data8 0 // uselib + data8 0 // swapon + data8 0 // swapoff // 1095 + data8 0 // reboot + data8 0 // truncate + data8 0 // ftruncate + data8 0 // fchmod + data8 0 // fchown // 1100 + data8 0 // getpriority + data8 0 // setpriority + data8 0 // statfs + data8 0 // fstatfs + data8 0 // gettid // 1105 + data8 0 // semget + data8 0 // semop + data8 0 // semctl + data8 0 // msgget + data8 0 // msgsnd // 1110 + data8 0 // msgrcv + data8 0 // msgctl + data8 0 // shmget + data8 0 // shmat + data8 0 // shmdt // 1115 + data8 0 // shmctl + data8 0 // syslog + data8 0 // setitimer + data8 0 // getitimer + data8 0 // 1120 + data8 0 + data8 0 + data8 0 // vhangup + data8 0 // lchown + data8 0 // remap_file_pages // 1125 + data8 0 // wait4 + data8 0 // sysinfo + data8 0 // clone + data8 0 // setdomainname + data8 0 // newuname // 1130 + data8 0 // adjtimex + data8 0 + data8 0 // init_module + data8 0 // delete_module + data8 0 // 1135 + data8 0 + data8 0 // quotactl + data8 0 // bdflush + data8 0 // sysfs + data8 0 // personality // 1140 + data8 0 // afs_syscall + data8 0 // setfsuid + data8 0 // setfsgid + data8 0 // getdents + data8 0 // flock // 1145 + data8 0 // readv + data8 0 // writev + data8 0 // pread64 + data8 0 // pwrite64 + data8 0 // sysctl // 1150 + data8 0 // mmap + data8 0 // munmap + data8 0 // mlock + data8 0 // mlockall + data8 0 // mprotect // 1155 + data8 0 // mremap + data8 0 // msync + data8 0 // munlock + data8 0 // munlockall + data8 0 // sched_getparam // 1160 + data8 0 // sched_setparam + data8 0 // sched_getscheduler + data8 0 // sched_setscheduler + data8 0 // sched_yield + data8 0 // sched_get_priority_max // 1165 + data8 0 // sched_get_priority_min + data8 0 // sched_rr_get_interval + data8 0 // nanosleep + data8 0 // nfsservctl + data8 0 // prctl // 1170 + data8 0 // getpagesize + data8 0 // mmap2 + data8 0 // pciconfig_read + data8 0 // pciconfig_write + data8 0 // perfmonctl // 1175 + data8 0 // sigaltstack + data8 0 // rt_sigaction + data8 0 // rt_sigpending + data8 0 // rt_sigprocmask + data8 0 // rt_sigqueueinfo // 1180 + data8 0 // rt_sigreturn + data8 0 // rt_sigsuspend + data8 0 // rt_sigtimedwait + data8 0 // getcwd + data8 0 // capget // 1185 + data8 0 // capset + data8 0 // sendfile + data8 0 + data8 0 + data8 0 // socket // 1190 + data8 0 // bind + data8 0 // connect + data8 0 // listen + data8 0 // accept + data8 0 // getsockname // 1195 + data8 0 // getpeername + data8 0 // socketpair + data8 0 // send + data8 0 // sendto + data8 0 // recv // 1200 + data8 0 // recvfrom + data8 0 // shutdown + data8 0 // setsockopt + data8 0 // getsockopt + data8 0 // sendmsg // 1205 + data8 0 // recvmsg + data8 0 // pivot_root + data8 0 // mincore + data8 0 // madvise + data8 0 // newstat // 1210 + data8 0 // newlstat + data8 0 // newfstat + data8 0 // clone2 + data8 0 // getdents64 + data8 0 // getunwind // 1215 + data8 0 // readahead + data8 0 // setxattr + data8 0 // lsetxattr + data8 0 // fsetxattr + data8 0 // getxattr // 1220 + data8 0 // lgetxattr + data8 0 // fgetxattr + data8 0 // listxattr + data8 0 // llistxattr + data8 0 // flistxattr // 1225 + data8 0 // removexattr + data8 0 // lremovexattr + data8 0 // fremovexattr + data8 0 // tkill + data8 0 // futex // 1230 + data8 0 // sched_setaffinity + data8 0 // sched_getaffinity + data8 fsys_set_tid_address // set_tid_address + data8 0 // fadvise64_64 + data8 0 // tgkill // 1235 + data8 0 // exit_group + data8 0 // lookup_dcookie + data8 0 // io_setup + data8 0 // io_destroy + data8 0 // io_getevents // 1240 + data8 0 // io_submit + data8 0 // io_cancel + data8 0 // epoll_create + data8 0 // epoll_ctl + data8 0 // epoll_wait // 1245 + data8 0 // restart_syscall + data8 0 // semtimedop + data8 0 // timer_create + data8 0 // timer_settime + data8 0 // timer_gettime // 1250 + data8 0 // timer_getoverrun + data8 0 // timer_delete + data8 0 // clock_settime + data8 fsys_clock_gettime // clock_gettime + data8 0 // clock_getres // 1255 + data8 0 // clock_nanosleep + data8 0 // fstatfs64 + data8 0 // statfs64 + data8 0 // mbind + data8 0 // get_mempolicy // 1260 + data8 0 // set_mempolicy + data8 0 // mq_open + data8 0 // mq_unlink + data8 0 // mq_timedsend + data8 0 // mq_timedreceive // 1265 + data8 0 // mq_notify + data8 0 // mq_getsetattr + data8 0 // kexec_load + data8 0 // vserver + data8 0 // waitid // 1270 + data8 0 // add_key + data8 0 // request_key + data8 0 // keyctl + data8 0 // ioprio_set + data8 0 // ioprio_get // 1275 + data8 0 // move_pages + data8 0 // inotify_init + data8 0 // inotify_add_watch + data8 0 // inotify_rm_watch + data8 0 // migrate_pages // 1280 + data8 0 // openat + data8 0 // mkdirat + data8 0 // mknodat + data8 0 // fchownat + data8 0 // futimesat // 1285 + data8 0 // newfstatat + data8 0 // unlinkat + data8 0 // renameat + data8 0 // linkat + data8 0 // symlinkat // 1290 + data8 0 // readlinkat + data8 0 // fchmodat + data8 0 // faccessat + data8 0 + data8 0 // 1295 + data8 0 // unshare + data8 0 // splice + data8 0 // set_robust_list + data8 0 // get_robust_list + data8 0 // sync_file_range // 1300 + data8 0 // tee + data8 0 // vmsplice + data8 0 + data8 fsys_getcpu // getcpu // 1304 + + // fill in zeros for the remaining entries + .zero: + .space paravirt_fsyscall_table + 8*NR_syscalls - .zero, 0 diff --git a/kernel/arch/ia64/kernel/fsyscall_gtod_data.h b/kernel/arch/ia64/kernel/fsyscall_gtod_data.h new file mode 100644 index 000000000..146b15b5f --- /dev/null +++ b/kernel/arch/ia64/kernel/fsyscall_gtod_data.h @@ -0,0 +1,23 @@ +/* + * (c) Copyright 2007 Hewlett-Packard Development Company, L.P. + * Contributed by Peter Keilty + * + * fsyscall gettimeofday data + */ + +struct fsyscall_gtod_data_t { + seqcount_t seq; + struct timespec wall_time; + struct timespec monotonic_time; + cycle_t clk_mask; + u32 clk_mult; + u32 clk_shift; + void *clk_fsys_mmio; + cycle_t clk_cycle_last; +} ____cacheline_aligned; + +struct itc_jitter_data_t { + int itc_jitter; + cycle_t itc_lastcycle; +} ____cacheline_aligned; + diff --git a/kernel/arch/ia64/kernel/ftrace.c b/kernel/arch/ia64/kernel/ftrace.c new file mode 100644 index 000000000..3b0c2aa07 --- /dev/null +++ b/kernel/arch/ia64/kernel/ftrace.c @@ -0,0 +1,204 @@ +/* + * Dynamic function tracing support. + * + * Copyright (C) 2008 Shaohua Li + * + * For licencing details, see COPYING. + * + * Defines low-level handling of mcount calls when the kernel + * is compiled with the -pg flag. When using dynamic ftrace, the + * mcount call-sites get patched lazily with NOP till they are + * enabled. All code mutation routines here take effect atomically. + */ + +#include +#include + +#include +#include + +/* In IA64, each function will be added below two bundles with -pg option */ +static unsigned char __attribute__((aligned(8))) +ftrace_orig_code[MCOUNT_INSN_SIZE] = { + 0x02, 0x40, 0x31, 0x10, 0x80, 0x05, /* alloc r40=ar.pfs,12,8,0 */ + 0xb0, 0x02, 0x00, 0x00, 0x42, 0x40, /* mov r43=r0;; */ + 0x05, 0x00, 0xc4, 0x00, /* mov r42=b0 */ + 0x11, 0x48, 0x01, 0x02, 0x00, 0x21, /* mov r41=r1 */ + 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, /* nop.i 0x0 */ + 0x08, 0x00, 0x00, 0x50 /* br.call.sptk.many b0 = _mcount;; */ +}; + +struct ftrace_orig_insn { + u64 dummy1, dummy2, dummy3; + u64 dummy4:64-41+13; + u64 imm20:20; + u64 dummy5:3; + u64 sign:1; + u64 dummy6:4; +}; + +/* mcount stub will be converted below for nop */ +static unsigned char ftrace_nop_code[MCOUNT_INSN_SIZE] = { + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MII] nop.m 0x0 */ + 0x30, 0x00, 0x00, 0x60, 0x00, 0x00, /* mov r3=ip */ + 0x00, 0x00, 0x04, 0x00, /* nop.i 0x0 */ + 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0x0 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* nop.x 0x0;; */ + 0x00, 0x00, 0x04, 0x00 +}; + +static unsigned char *ftrace_nop_replace(void) +{ + return ftrace_nop_code; +} + +/* + * mcount stub will be converted below for call + * Note: Just the last instruction is changed against nop + * */ +static unsigned char __attribute__((aligned(8))) +ftrace_call_code[MCOUNT_INSN_SIZE] = { + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MII] nop.m 0x0 */ + 0x30, 0x00, 0x00, 0x60, 0x00, 0x00, /* mov r3=ip */ + 0x00, 0x00, 0x04, 0x00, /* nop.i 0x0 */ + 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0x0 */ + 0xff, 0xff, 0xff, 0xff, 0x7f, 0x00, /* brl.many .;;*/ + 0xf8, 0xff, 0xff, 0xc8 +}; + +struct ftrace_call_insn { + u64 dummy1, dummy2; + u64 dummy3:48; + u64 imm39_l:16; + u64 imm39_h:23; + u64 dummy4:13; + u64 imm20:20; + u64 dummy5:3; + u64 i:1; + u64 dummy6:4; +}; + +static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) +{ + struct ftrace_call_insn *code = (void *)ftrace_call_code; + unsigned long offset = addr - (ip + 0x10); + + code->imm39_l = offset >> 24; + code->imm39_h = offset >> 40; + code->imm20 = offset >> 4; + code->i = offset >> 63; + return ftrace_call_code; +} + +static int +ftrace_modify_code(unsigned long ip, unsigned char *old_code, + unsigned char *new_code, int do_check) +{ + unsigned char replaced[MCOUNT_INSN_SIZE]; + + /* + * Note: Due to modules and __init, code can + * disappear and change, we need to protect against faulting + * as well as code changing. We do this by using the + * probe_kernel_* functions. + * + * No real locking needed, this code is run through + * kstop_machine, or before SMP starts. + */ + + if (!do_check) + goto skip_check; + + /* read the text we want to modify */ + if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) + return -EFAULT; + + /* Make sure it is what we expect it to be */ + if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0) + return -EINVAL; + +skip_check: + /* replace the text with the new text */ + if (probe_kernel_write(((void *)ip), new_code, MCOUNT_INSN_SIZE)) + return -EPERM; + flush_icache_range(ip, ip + MCOUNT_INSN_SIZE); + + return 0; +} + +static int ftrace_make_nop_check(struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned char __attribute__((aligned(8))) replaced[MCOUNT_INSN_SIZE]; + unsigned long ip = rec->ip; + + if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) + return -EFAULT; + if (rec->flags & FTRACE_FL_CONVERTED) { + struct ftrace_call_insn *call_insn, *tmp_call; + + call_insn = (void *)ftrace_call_code; + tmp_call = (void *)replaced; + call_insn->imm39_l = tmp_call->imm39_l; + call_insn->imm39_h = tmp_call->imm39_h; + call_insn->imm20 = tmp_call->imm20; + call_insn->i = tmp_call->i; + if (memcmp(replaced, ftrace_call_code, MCOUNT_INSN_SIZE) != 0) + return -EINVAL; + return 0; + } else { + struct ftrace_orig_insn *call_insn, *tmp_call; + + call_insn = (void *)ftrace_orig_code; + tmp_call = (void *)replaced; + call_insn->sign = tmp_call->sign; + call_insn->imm20 = tmp_call->imm20; + if (memcmp(replaced, ftrace_orig_code, MCOUNT_INSN_SIZE) != 0) + return -EINVAL; + return 0; + } +} + +int ftrace_make_nop(struct module *mod, + struct dyn_ftrace *rec, unsigned long addr) +{ + int ret; + char *new; + + ret = ftrace_make_nop_check(rec, addr); + if (ret) + return ret; + new = ftrace_nop_replace(); + return ftrace_modify_code(rec->ip, NULL, new, 0); +} + +int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned long ip = rec->ip; + unsigned char *old, *new; + + old= ftrace_nop_replace(); + new = ftrace_call_replace(ip, addr); + return ftrace_modify_code(ip, old, new, 1); +} + +/* in IA64, _mcount can't directly call ftrace_stub. Only jump is ok */ +int ftrace_update_ftrace_func(ftrace_func_t func) +{ + unsigned long ip; + unsigned long addr = ((struct fnptr *)ftrace_call)->ip; + + if (func == ftrace_stub) + return 0; + ip = ((struct fnptr *)func)->ip; + + ia64_patch_imm64(addr + 2, ip); + + flush_icache_range(addr, addr + 16); + return 0; +} + +/* run from kstop_machine */ +int __init ftrace_dyn_arch_init(void) +{ + return 0; +} diff --git a/kernel/arch/ia64/kernel/gate-data.S b/kernel/arch/ia64/kernel/gate-data.S new file mode 100644 index 000000000..b3ef1c72e --- /dev/null +++ b/kernel/arch/ia64/kernel/gate-data.S @@ -0,0 +1,3 @@ + .section .data..gate, "aw" + + .incbin "arch/ia64/kernel/gate.so" diff --git a/kernel/arch/ia64/kernel/gate.S b/kernel/arch/ia64/kernel/gate.S new file mode 100644 index 000000000..b5f8bdd86 --- /dev/null +++ b/kernel/arch/ia64/kernel/gate.S @@ -0,0 +1,386 @@ +/* + * This file contains the code that gets mapped at the upper end of each task's text + * region. For now, it contains the signal trampoline code only. + * + * Copyright (C) 1999-2003 Hewlett-Packard Co + * David Mosberger-Tang + */ + + +#include +#include +#include +#include +#include +#include +#include +#include "paravirt_inst.h" + +/* + * We can't easily refer to symbols inside the kernel. To avoid full runtime relocation, + * complications with the linker (which likes to create PLT stubs for branches + * to targets outside the shared object) and to avoid multi-phase kernel builds, we + * simply create minimalistic "patch lists" in special ELF sections. + */ + .section ".data..patch.fsyscall_table", "a" + .previous +#define LOAD_FSYSCALL_TABLE(reg) \ +[1:] movl reg=0; \ + .xdata4 ".data..patch.fsyscall_table", 1b-. + + .section ".data..patch.brl_fsys_bubble_down", "a" + .previous +#define BRL_COND_FSYS_BUBBLE_DOWN(pr) \ +[1:](pr)brl.cond.sptk 0; \ + ;; \ + .xdata4 ".data..patch.brl_fsys_bubble_down", 1b-. + +GLOBAL_ENTRY(__kernel_syscall_via_break) + .prologue + .altrp b6 + .body + /* + * Note: for (fast) syscall restart to work, the break instruction must be + * the first one in the bundle addressed by syscall_via_break. + */ +{ .mib + break 0x100000 + nop.i 0 + br.ret.sptk.many b6 +} +END(__kernel_syscall_via_break) + +# define ARG0_OFF (16 + IA64_SIGFRAME_ARG0_OFFSET) +# define ARG1_OFF (16 + IA64_SIGFRAME_ARG1_OFFSET) +# define ARG2_OFF (16 + IA64_SIGFRAME_ARG2_OFFSET) +# define SIGHANDLER_OFF (16 + IA64_SIGFRAME_HANDLER_OFFSET) +# define SIGCONTEXT_OFF (16 + IA64_SIGFRAME_SIGCONTEXT_OFFSET) + +# define FLAGS_OFF IA64_SIGCONTEXT_FLAGS_OFFSET +# define CFM_OFF IA64_SIGCONTEXT_CFM_OFFSET +# define FR6_OFF IA64_SIGCONTEXT_FR6_OFFSET +# define BSP_OFF IA64_SIGCONTEXT_AR_BSP_OFFSET +# define RNAT_OFF IA64_SIGCONTEXT_AR_RNAT_OFFSET +# define UNAT_OFF IA64_SIGCONTEXT_AR_UNAT_OFFSET +# define FPSR_OFF IA64_SIGCONTEXT_AR_FPSR_OFFSET +# define PR_OFF IA64_SIGCONTEXT_PR_OFFSET +# define RP_OFF IA64_SIGCONTEXT_IP_OFFSET +# define SP_OFF IA64_SIGCONTEXT_R12_OFFSET +# define RBS_BASE_OFF IA64_SIGCONTEXT_RBS_BASE_OFFSET +# define LOADRS_OFF IA64_SIGCONTEXT_LOADRS_OFFSET +# define base0 r2 +# define base1 r3 + /* + * When we get here, the memory stack looks like this: + * + * +===============================+ + * | | + * // struct sigframe // + * | | + * +-------------------------------+ <-- sp+16 + * | 16 byte of scratch | + * | space | + * +-------------------------------+ <-- sp + * + * The register stack looks _exactly_ the way it looked at the time the signal + * occurred. In other words, we're treading on a potential mine-field: each + * incoming general register may be a NaT value (including sp, in which case the + * process ends up dying with a SIGSEGV). + * + * The first thing need to do is a cover to get the registers onto the backing + * store. Once that is done, we invoke the signal handler which may modify some + * of the machine state. After returning from the signal handler, we return + * control to the previous context by executing a sigreturn system call. A signal + * handler may call the rt_sigreturn() function to directly return to a given + * sigcontext. However, the user-level sigreturn() needs to do much more than + * calling the rt_sigreturn() system call as it needs to unwind the stack to + * restore preserved registers that may have been saved on the signal handler's + * call stack. + */ + +#define SIGTRAMP_SAVES \ + .unwabi 3, 's'; /* mark this as a sigtramp handler (saves scratch regs) */ \ + .unwabi @svr4, 's'; /* backwards compatibility with old unwinders (remove in v2.7) */ \ + .savesp ar.unat, UNAT_OFF+SIGCONTEXT_OFF; \ + .savesp ar.fpsr, FPSR_OFF+SIGCONTEXT_OFF; \ + .savesp pr, PR_OFF+SIGCONTEXT_OFF; \ + .savesp rp, RP_OFF+SIGCONTEXT_OFF; \ + .savesp ar.pfs, CFM_OFF+SIGCONTEXT_OFF; \ + .vframesp SP_OFF+SIGCONTEXT_OFF + +GLOBAL_ENTRY(__kernel_sigtramp) + // describe the state that is active when we get here: + .prologue + SIGTRAMP_SAVES + .body + + .label_state 1 + + adds base0=SIGHANDLER_OFF,sp + adds base1=RBS_BASE_OFF+SIGCONTEXT_OFF,sp + br.call.sptk.many rp=1f +1: + ld8 r17=[base0],(ARG0_OFF-SIGHANDLER_OFF) // get pointer to signal handler's plabel + ld8 r15=[base1] // get address of new RBS base (or NULL) + cover // push args in interrupted frame onto backing store + ;; + cmp.ne p1,p0=r15,r0 // do we need to switch rbs? (note: pr is saved by kernel) + mov.m r9=ar.bsp // fetch ar.bsp + .spillsp.p p1, ar.rnat, RNAT_OFF+SIGCONTEXT_OFF +(p1) br.cond.spnt setup_rbs // yup -> (clobbers p8, r14-r16, and r18-r20) +back_from_setup_rbs: + alloc r8=ar.pfs,0,0,3,0 + ld8 out0=[base0],16 // load arg0 (signum) + adds base1=(ARG1_OFF-(RBS_BASE_OFF+SIGCONTEXT_OFF)),base1 + ;; + ld8 out1=[base1] // load arg1 (siginfop) + ld8 r10=[r17],8 // get signal handler entry point + ;; + ld8 out2=[base0] // load arg2 (sigcontextp) + ld8 gp=[r17] // get signal handler's global pointer + adds base0=(BSP_OFF+SIGCONTEXT_OFF),sp + ;; + .spillsp ar.bsp, BSP_OFF+SIGCONTEXT_OFF + st8 [base0]=r9 // save sc_ar_bsp + adds base0=(FR6_OFF+SIGCONTEXT_OFF),sp + adds base1=(FR6_OFF+16+SIGCONTEXT_OFF),sp + ;; + stf.spill [base0]=f6,32 + stf.spill [base1]=f7,32 + ;; + stf.spill [base0]=f8,32 + stf.spill [base1]=f9,32 + mov b6=r10 + ;; + stf.spill [base0]=f10,32 + stf.spill [base1]=f11,32 + ;; + stf.spill [base0]=f12,32 + stf.spill [base1]=f13,32 + ;; + stf.spill [base0]=f14,32 + stf.spill [base1]=f15,32 + br.call.sptk.many rp=b6 // call the signal handler +.ret0: adds base0=(BSP_OFF+SIGCONTEXT_OFF),sp + ;; + ld8 r15=[base0] // fetch sc_ar_bsp + mov r14=ar.bsp + ;; + cmp.ne p1,p0=r14,r15 // do we need to restore the rbs? +(p1) br.cond.spnt restore_rbs // yup -> (clobbers r14-r18, f6 & f7) + ;; +back_from_restore_rbs: + adds base0=(FR6_OFF+SIGCONTEXT_OFF),sp + adds base1=(FR6_OFF+16+SIGCONTEXT_OFF),sp + ;; + ldf.fill f6=[base0],32 + ldf.fill f7=[base1],32 + ;; + ldf.fill f8=[base0],32 + ldf.fill f9=[base1],32 + ;; + ldf.fill f10=[base0],32 + ldf.fill f11=[base1],32 + ;; + ldf.fill f12=[base0],32 + ldf.fill f13=[base1],32 + ;; + ldf.fill f14=[base0],32 + ldf.fill f15=[base1],32 + mov r15=__NR_rt_sigreturn + .restore sp // pop .prologue + break __BREAK_SYSCALL + + .prologue + SIGTRAMP_SAVES +setup_rbs: + mov ar.rsc=0 // put RSE into enforced lazy mode + ;; + .save ar.rnat, r19 + mov r19=ar.rnat // save RNaT before switching backing store area + adds r14=(RNAT_OFF+SIGCONTEXT_OFF),sp + + mov r18=ar.bspstore + mov ar.bspstore=r15 // switch over to new register backing store area + ;; + + .spillsp ar.rnat, RNAT_OFF+SIGCONTEXT_OFF + st8 [r14]=r19 // save sc_ar_rnat + .body + mov.m r16=ar.bsp // sc_loadrs <- (new bsp - new bspstore) << 16 + adds r14=(LOADRS_OFF+SIGCONTEXT_OFF),sp + ;; + invala + sub r15=r16,r15 + extr.u r20=r18,3,6 + ;; + mov ar.rsc=0xf // set RSE into eager mode, pl 3 + cmp.eq p8,p0=63,r20 + shl r15=r15,16 + ;; + st8 [r14]=r15 // save sc_loadrs +(p8) st8 [r18]=r19 // if bspstore points at RNaT slot, store RNaT there now + .restore sp // pop .prologue + br.cond.sptk back_from_setup_rbs + + .prologue + SIGTRAMP_SAVES + .spillsp ar.rnat, RNAT_OFF+SIGCONTEXT_OFF + .body +restore_rbs: + // On input: + // r14 = bsp1 (bsp at the time of return from signal handler) + // r15 = bsp0 (bsp at the time the signal occurred) + // + // Here, we need to calculate bspstore0, the value that ar.bspstore needs + // to be set to, based on bsp0 and the size of the dirty partition on + // the alternate stack (sc_loadrs >> 16). This can be done with the + // following algorithm: + // + // bspstore0 = rse_skip_regs(bsp0, -rse_num_regs(bsp1 - (loadrs >> 19), bsp1)); + // + // This is what the code below does. + // + alloc r2=ar.pfs,0,0,0,0 // alloc null frame + adds r16=(LOADRS_OFF+SIGCONTEXT_OFF),sp + adds r18=(RNAT_OFF+SIGCONTEXT_OFF),sp + ;; + ld8 r17=[r16] + ld8 r16=[r18] // get new rnat + extr.u r18=r15,3,6 // r18 <- rse_slot_num(bsp0) + ;; + mov ar.rsc=r17 // put RSE into enforced lazy mode + shr.u r17=r17,16 + ;; + sub r14=r14,r17 // r14 (bspstore1) <- bsp1 - (sc_loadrs >> 16) + shr.u r17=r17,3 // r17 <- (sc_loadrs >> 19) + ;; + loadrs // restore dirty partition + extr.u r14=r14,3,6 // r14 <- rse_slot_num(bspstore1) + ;; + add r14=r14,r17 // r14 <- rse_slot_num(bspstore1) + (sc_loadrs >> 19) + ;; + shr.u r14=r14,6 // r14 <- (rse_slot_num(bspstore1) + (sc_loadrs >> 19))/0x40 + ;; + sub r14=r14,r17 // r14 <- -rse_num_regs(bspstore1, bsp1) + movl r17=0x8208208208208209 + ;; + add r18=r18,r14 // r18 (delta) <- rse_slot_num(bsp0) - rse_num_regs(bspstore1,bsp1) + setf.sig f7=r17 + cmp.lt p7,p0=r14,r0 // p7 <- (r14 < 0)? + ;; +(p7) adds r18=-62,r18 // delta -= 62 + ;; + setf.sig f6=r18 + ;; + xmpy.h f6=f6,f7 + ;; + getf.sig r17=f6 + ;; + add r17=r17,r18 + shr r18=r18,63 + ;; + shr r17=r17,5 + ;; + sub r17=r17,r18 // r17 = delta/63 + ;; + add r17=r14,r17 // r17 <- delta/63 - rse_num_regs(bspstore1, bsp1) + ;; + shladd r15=r17,3,r15 // r15 <- bsp0 + 8*(delta/63 - rse_num_regs(bspstore1, bsp1)) + ;; + mov ar.bspstore=r15 // switch back to old register backing store area + ;; + mov ar.rnat=r16 // restore RNaT + mov ar.rsc=0xf // (will be restored later on from sc_ar_rsc) + // invala not necessary as that will happen when returning to user-mode + br.cond.sptk back_from_restore_rbs +END(__kernel_sigtramp) + +/* + * On entry: + * r11 = saved ar.pfs + * r15 = system call # + * b0 = saved return address + * b6 = return address + * On exit: + * r11 = saved ar.pfs + * r15 = system call # + * b0 = saved return address + * all other "scratch" registers: undefined + * all "preserved" registers: same as on entry + */ + +GLOBAL_ENTRY(__kernel_syscall_via_epc) + .prologue + .altrp b6 + .body +{ + /* + * Note: the kernel cannot assume that the first two instructions in this + * bundle get executed. The remaining code must be safe even if + * they do not get executed. + */ + adds r17=-1024,r15 // A + mov r10=0 // A default to successful syscall execution + epc // B causes split-issue +} + ;; + RSM_PSR_BE_I(r20, r22) // M2 (5 cyc to srlz.d) + LOAD_FSYSCALL_TABLE(r14) // X + ;; + mov r16=IA64_KR(CURRENT) // M2 (12 cyc) + shladd r18=r17,3,r14 // A + mov r19=NR_syscalls-1 // A + ;; + lfetch [r18] // M0|1 + MOV_FROM_PSR(p0, r29, r8) // M2 (12 cyc) + // If r17 is a NaT, p6 will be zero + cmp.geu p6,p7=r19,r17 // A (sysnr > 0 && sysnr < 1024+NR_syscalls)? + ;; + mov r21=ar.fpsr // M2 (12 cyc) + tnat.nz p10,p9=r15 // I0 + mov.i r26=ar.pfs // I0 (would stall anyhow due to srlz.d...) + ;; + srlz.d // M0 (forces split-issue) ensure PSR.BE==0 +(p6) ld8 r18=[r18] // M0|1 + nop.i 0 + ;; + nop.m 0 +(p6) tbit.z.unc p8,p0=r18,0 // I0 (dual-issues with "mov b7=r18"!) + nop.i 0 + ;; + SSM_PSR_I(p8, p14, r25) +(p6) mov b7=r18 // I0 +(p8) br.dptk.many b7 // B + + mov r27=ar.rsc // M2 (12 cyc) +/* + * brl.cond doesn't work as intended because the linker would convert this branch + * into a branch to a PLT. Perhaps there will be a way to avoid this with some + * future version of the linker. In the meantime, we just use an indirect branch + * instead. + */ +#ifdef CONFIG_ITANIUM +(p6) add r14=-8,r14 // r14 <- addr of fsys_bubble_down entry + ;; +(p6) ld8 r14=[r14] // r14 <- fsys_bubble_down + ;; +(p6) mov b7=r14 +(p6) br.sptk.many b7 +#else + BRL_COND_FSYS_BUBBLE_DOWN(p6) +#endif + SSM_PSR_I(p0, p14, r10) + mov r10=-1 +(p10) mov r8=EINVAL +(p9) mov r8=ENOSYS + FSYS_RETURN + +#ifdef CONFIG_PARAVIRT + /* + * padd to make the size of this symbol constant + * independent of paravirtualization. + */ + .align PAGE_SIZE / 8 +#endif +END(__kernel_syscall_via_epc) diff --git a/kernel/arch/ia64/kernel/gate.lds.S b/kernel/arch/ia64/kernel/gate.lds.S new file mode 100644 index 000000000..e518f7902 --- /dev/null +++ b/kernel/arch/ia64/kernel/gate.lds.S @@ -0,0 +1,108 @@ +/* + * Linker script for gate DSO. The gate pages are an ELF shared object + * prelinked to its virtual address, with only one read-only segment and + * one execute-only segment (both fit in one page). This script controls + * its layout. + */ + +#include +#include "paravirt_patchlist.h" + +SECTIONS +{ + . = GATE_ADDR + SIZEOF_HEADERS; + + .hash : { *(.hash) } :readable + .gnu.hash : { *(.gnu.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + + .note : { *(.note*) } :readable :note + + .dynamic : { *(.dynamic) } :readable :dynamic + + /* + * This linker script is used both with -r and with -shared. For + * the layouts to match, we need to skip more than enough space for + * the dynamic symbol table et al. If this amount is insufficient, + * ld -shared will barf. Just increase it here. + */ + . = GATE_ADDR + 0x600; + + .data..patch : { + __paravirt_start_gate_mckinley_e9_patchlist = .; + *(.data..patch.mckinley_e9) + __paravirt_end_gate_mckinley_e9_patchlist = .; + + __paravirt_start_gate_vtop_patchlist = .; + *(.data..patch.vtop) + __paravirt_end_gate_vtop_patchlist = .; + + __paravirt_start_gate_fsyscall_patchlist = .; + *(.data..patch.fsyscall_table) + __paravirt_end_gate_fsyscall_patchlist = .; + + __paravirt_start_gate_brl_fsys_bubble_down_patchlist = .; + *(.data..patch.brl_fsys_bubble_down) + __paravirt_end_gate_brl_fsys_bubble_down_patchlist = .; + } :readable + + .IA_64.unwind_info : { *(.IA_64.unwind_info*) } + .IA_64.unwind : { *(.IA_64.unwind*) } :readable :unwind +#ifdef HAVE_BUGGY_SEGREL + .text (GATE_ADDR + PAGE_SIZE) : { *(.text) *(.text.*) } :readable +#else + . = ALIGN(PERCPU_PAGE_SIZE) + (. & (PERCPU_PAGE_SIZE - 1)); + .text : { *(.text) *(.text.*) } :epc +#endif + + /DISCARD/ : { + *(.got.plt) *(.got) + *(.data .data.* .gnu.linkonce.d.*) + *(.dynbss) + *(.bss .bss.* .gnu.linkonce.b.*) + *(__ex_table) + *(__mca_table) + } +} + +/* + * ld does not recognize this name token; use the constant. + */ +#define PT_IA_64_UNWIND 0x70000001 + +/* + * We must supply the ELF program headers explicitly to get just one + * PT_LOAD segment, and set the flags explicitly to make segments read-only. + */ +PHDRS +{ + readable PT_LOAD FILEHDR PHDRS FLAGS(4); /* PF_R */ +#ifndef HAVE_BUGGY_SEGREL + epc PT_LOAD FILEHDR PHDRS FLAGS(1); /* PF_X */ +#endif + dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ + note PT_NOTE FLAGS(4); /* PF_R */ + unwind PT_IA_64_UNWIND; +} + +/* + * This controls what symbols we export from the DSO. + */ +VERSION +{ + LINUX_2.5 { + global: + __kernel_syscall_via_break; + __kernel_syscall_via_epc; + __kernel_sigtramp; + + local: *; + }; +} + +/* The ELF entry point can be used to set the AT_SYSINFO value. */ +ENTRY(__kernel_syscall_via_epc) diff --git a/kernel/arch/ia64/kernel/head.S b/kernel/arch/ia64/kernel/head.S new file mode 100644 index 000000000..a4acddad0 --- /dev/null +++ b/kernel/arch/ia64/kernel/head.S @@ -0,0 +1,1212 @@ +/* + * Here is where the ball gets rolling as far as the kernel is concerned. + * When control is transferred to _start, the bootload has already + * loaded us to the correct address. All that's left to do here is + * to set up the kernel's global pointer and jump to the kernel + * entry point. + * + * Copyright (C) 1998-2001, 2003, 2005 Hewlett-Packard Co + * David Mosberger-Tang + * Stephane Eranian + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999 Walt Drummond + * Copyright (C) 1999 Intel Corp. + * Copyright (C) 1999 Asit Mallick + * Copyright (C) 1999 Don Dugger + * Copyright (C) 2002 Fenghua Yu + * -Optimize __ia64_save_fpu() and __ia64_load_fpu() for Itanium 2. + * Copyright (C) 2004 Ashok Raj + * Support for CPU Hotplug + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_HOTPLUG_CPU +#define SAL_PSR_BITS_TO_SET \ + (IA64_PSR_AC | IA64_PSR_BN | IA64_PSR_MFH | IA64_PSR_MFL) + +#define SAVE_FROM_REG(src, ptr, dest) \ + mov dest=src;; \ + st8 [ptr]=dest,0x08 + +#define RESTORE_REG(reg, ptr, _tmp) \ + ld8 _tmp=[ptr],0x08;; \ + mov reg=_tmp + +#define SAVE_BREAK_REGS(ptr, _idx, _breg, _dest)\ + mov ar.lc=IA64_NUM_DBG_REGS-1;; \ + mov _idx=0;; \ +1: \ + SAVE_FROM_REG(_breg[_idx], ptr, _dest);; \ + add _idx=1,_idx;; \ + br.cloop.sptk.many 1b + +#define RESTORE_BREAK_REGS(ptr, _idx, _breg, _tmp, _lbl)\ + mov ar.lc=IA64_NUM_DBG_REGS-1;; \ + mov _idx=0;; \ +_lbl: RESTORE_REG(_breg[_idx], ptr, _tmp);; \ + add _idx=1, _idx;; \ + br.cloop.sptk.many _lbl + +#define SAVE_ONE_RR(num, _reg, _tmp) \ + movl _tmp=(num<<61);; \ + mov _reg=rr[_tmp] + +#define SAVE_REGION_REGS(_tmp, _r0, _r1, _r2, _r3, _r4, _r5, _r6, _r7) \ + SAVE_ONE_RR(0,_r0, _tmp);; \ + SAVE_ONE_RR(1,_r1, _tmp);; \ + SAVE_ONE_RR(2,_r2, _tmp);; \ + SAVE_ONE_RR(3,_r3, _tmp);; \ + SAVE_ONE_RR(4,_r4, _tmp);; \ + SAVE_ONE_RR(5,_r5, _tmp);; \ + SAVE_ONE_RR(6,_r6, _tmp);; \ + SAVE_ONE_RR(7,_r7, _tmp);; + +#define STORE_REGION_REGS(ptr, _r0, _r1, _r2, _r3, _r4, _r5, _r6, _r7) \ + st8 [ptr]=_r0, 8;; \ + st8 [ptr]=_r1, 8;; \ + st8 [ptr]=_r2, 8;; \ + st8 [ptr]=_r3, 8;; \ + st8 [ptr]=_r4, 8;; \ + st8 [ptr]=_r5, 8;; \ + st8 [ptr]=_r6, 8;; \ + st8 [ptr]=_r7, 8;; + +#define RESTORE_REGION_REGS(ptr, _idx1, _idx2, _tmp) \ + mov ar.lc=0x08-1;; \ + movl _idx1=0x00;; \ +RestRR: \ + dep.z _idx2=_idx1,61,3;; \ + ld8 _tmp=[ptr],8;; \ + mov rr[_idx2]=_tmp;; \ + srlz.d;; \ + add _idx1=1,_idx1;; \ + br.cloop.sptk.few RestRR + +#define SET_AREA_FOR_BOOTING_CPU(reg1, reg2) \ + movl reg1=sal_state_for_booting_cpu;; \ + ld8 reg2=[reg1];; + +/* + * Adjust region registers saved before starting to save + * break regs and rest of the states that need to be preserved. + */ +#define SAL_TO_OS_BOOT_HANDOFF_STATE_SAVE(_reg1,_reg2,_pred) \ + SAVE_FROM_REG(b0,_reg1,_reg2);; \ + SAVE_FROM_REG(b1,_reg1,_reg2);; \ + SAVE_FROM_REG(b2,_reg1,_reg2);; \ + SAVE_FROM_REG(b3,_reg1,_reg2);; \ + SAVE_FROM_REG(b4,_reg1,_reg2);; \ + SAVE_FROM_REG(b5,_reg1,_reg2);; \ + st8 [_reg1]=r1,0x08;; \ + st8 [_reg1]=r12,0x08;; \ + st8 [_reg1]=r13,0x08;; \ + SAVE_FROM_REG(ar.fpsr,_reg1,_reg2);; \ + SAVE_FROM_REG(ar.pfs,_reg1,_reg2);; \ + SAVE_FROM_REG(ar.rnat,_reg1,_reg2);; \ + SAVE_FROM_REG(ar.unat,_reg1,_reg2);; \ + SAVE_FROM_REG(ar.bspstore,_reg1,_reg2);; \ + SAVE_FROM_REG(cr.dcr,_reg1,_reg2);; \ + SAVE_FROM_REG(cr.iva,_reg1,_reg2);; \ + SAVE_FROM_REG(cr.pta,_reg1,_reg2);; \ + SAVE_FROM_REG(cr.itv,_reg1,_reg2);; \ + SAVE_FROM_REG(cr.pmv,_reg1,_reg2);; \ + SAVE_FROM_REG(cr.cmcv,_reg1,_reg2);; \ + SAVE_FROM_REG(cr.lrr0,_reg1,_reg2);; \ + SAVE_FROM_REG(cr.lrr1,_reg1,_reg2);; \ + st8 [_reg1]=r4,0x08;; \ + st8 [_reg1]=r5,0x08;; \ + st8 [_reg1]=r6,0x08;; \ + st8 [_reg1]=r7,0x08;; \ + st8 [_reg1]=_pred,0x08;; \ + SAVE_FROM_REG(ar.lc, _reg1, _reg2);; \ + stf.spill.nta [_reg1]=f2,16;; \ + stf.spill.nta [_reg1]=f3,16;; \ + stf.spill.nta [_reg1]=f4,16;; \ + stf.spill.nta [_reg1]=f5,16;; \ + stf.spill.nta [_reg1]=f16,16;; \ + stf.spill.nta [_reg1]=f17,16;; \ + stf.spill.nta [_reg1]=f18,16;; \ + stf.spill.nta [_reg1]=f19,16;; \ + stf.spill.nta [_reg1]=f20,16;; \ + stf.spill.nta [_reg1]=f21,16;; \ + stf.spill.nta [_reg1]=f22,16;; \ + stf.spill.nta [_reg1]=f23,16;; \ + stf.spill.nta [_reg1]=f24,16;; \ + stf.spill.nta [_reg1]=f25,16;; \ + stf.spill.nta [_reg1]=f26,16;; \ + stf.spill.nta [_reg1]=f27,16;; \ + stf.spill.nta [_reg1]=f28,16;; \ + stf.spill.nta [_reg1]=f29,16;; \ + stf.spill.nta [_reg1]=f30,16;; \ + stf.spill.nta [_reg1]=f31,16;; + +#else +#define SET_AREA_FOR_BOOTING_CPU(a1, a2) +#define SAL_TO_OS_BOOT_HANDOFF_STATE_SAVE(a1,a2, a3) +#define SAVE_REGION_REGS(_tmp, _r0, _r1, _r2, _r3, _r4, _r5, _r6, _r7) +#define STORE_REGION_REGS(ptr, _r0, _r1, _r2, _r3, _r4, _r5, _r6, _r7) +#endif + +#define SET_ONE_RR(num, pgsize, _tmp1, _tmp2, vhpt) \ + movl _tmp1=(num << 61);; \ + mov _tmp2=((ia64_rid(IA64_REGION_ID_KERNEL, (num<<61)) << 8) | (pgsize << 2) | vhpt);; \ + mov rr[_tmp1]=_tmp2 + + __PAGE_ALIGNED_DATA + + .global empty_zero_page +empty_zero_page: + .skip PAGE_SIZE + + .global swapper_pg_dir +swapper_pg_dir: + .skip PAGE_SIZE + + .rodata +halt_msg: + stringz "Halting kernel\n" + + __REF + + .global start_ap + + /* + * Start the kernel. When the bootloader passes control to _start(), r28 + * points to the address of the boot parameter area. Execution reaches + * here in physical mode. + */ +GLOBAL_ENTRY(_start) +start_ap: + .prologue + .save rp, r0 // terminate unwind chain with a NULL rp + .body + + rsm psr.i | psr.ic + ;; + srlz.i + ;; + { + flushrs // must be first insn in group + srlz.i + } + ;; + /* + * Save the region registers, predicate before they get clobbered + */ + SAVE_REGION_REGS(r2, r8,r9,r10,r11,r12,r13,r14,r15); + mov r25=pr;; + + /* + * Initialize kernel region registers: + * rr[0]: VHPT enabled, page size = PAGE_SHIFT + * rr[1]: VHPT enabled, page size = PAGE_SHIFT + * rr[2]: VHPT enabled, page size = PAGE_SHIFT + * rr[3]: VHPT enabled, page size = PAGE_SHIFT + * rr[4]: VHPT enabled, page size = PAGE_SHIFT + * rr[5]: VHPT enabled, page size = PAGE_SHIFT + * rr[6]: VHPT disabled, page size = IA64_GRANULE_SHIFT + * rr[7]: VHPT disabled, page size = IA64_GRANULE_SHIFT + * We initialize all of them to prevent inadvertently assuming + * something about the state of address translation early in boot. + */ + SET_ONE_RR(0, PAGE_SHIFT, r2, r16, 1);; + SET_ONE_RR(1, PAGE_SHIFT, r2, r16, 1);; + SET_ONE_RR(2, PAGE_SHIFT, r2, r16, 1);; + SET_ONE_RR(3, PAGE_SHIFT, r2, r16, 1);; + SET_ONE_RR(4, PAGE_SHIFT, r2, r16, 1);; + SET_ONE_RR(5, PAGE_SHIFT, r2, r16, 1);; + SET_ONE_RR(6, IA64_GRANULE_SHIFT, r2, r16, 0);; + SET_ONE_RR(7, IA64_GRANULE_SHIFT, r2, r16, 0);; + /* + * Now pin mappings into the TLB for kernel text and data + */ + mov r18=KERNEL_TR_PAGE_SHIFT<<2 + movl r17=KERNEL_START + ;; + mov cr.itir=r18 + mov cr.ifa=r17 + mov r16=IA64_TR_KERNEL + mov r3=ip + movl r18=PAGE_KERNEL + ;; + dep r2=0,r3,0,KERNEL_TR_PAGE_SHIFT + ;; + or r18=r2,r18 + ;; + srlz.i + ;; + itr.i itr[r16]=r18 + ;; + itr.d dtr[r16]=r18 + ;; + srlz.i + + /* + * Switch into virtual mode: + */ + movl r16=(IA64_PSR_IT|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_DFH|IA64_PSR_BN \ + |IA64_PSR_DI) + ;; + mov cr.ipsr=r16 + movl r17=1f + ;; + mov cr.iip=r17 + mov cr.ifs=r0 + ;; + rfi + ;; +1: // now we are in virtual mode + + SET_AREA_FOR_BOOTING_CPU(r2, r16); + + STORE_REGION_REGS(r16, r8,r9,r10,r11,r12,r13,r14,r15); + SAL_TO_OS_BOOT_HANDOFF_STATE_SAVE(r16,r17,r25) + ;; + + // set IVT entry point---can't access I/O ports without it + movl r3=ia64_ivt + ;; + mov cr.iva=r3 + movl r2=FPSR_DEFAULT + ;; + srlz.i + movl gp=__gp + + mov ar.fpsr=r2 + ;; + +#define isAP p2 // are we an Application Processor? +#define isBP p3 // are we the Bootstrap Processor? + +#ifdef CONFIG_SMP + /* + * Find the init_task for the currently booting CPU. At poweron, and in + * UP mode, task_for_booting_cpu is NULL. + */ + movl r3=task_for_booting_cpu + ;; + ld8 r3=[r3] + movl r2=init_task + ;; + cmp.eq isBP,isAP=r3,r0 + ;; +(isAP) mov r2=r3 +#else + movl r2=init_task + cmp.eq isBP,isAP=r0,r0 +#endif + ;; + tpa r3=r2 // r3 == phys addr of task struct + mov r16=-1 +(isBP) br.cond.dpnt .load_current // BP stack is on region 5 --- no need to map it + + // load mapping for stack (virtaddr in r2, physaddr in r3) + rsm psr.ic + movl r17=PAGE_KERNEL + ;; + srlz.d + dep r18=0,r3,0,12 + ;; + or r18=r17,r18 + dep r2=-1,r3,61,3 // IMVA of task + ;; + mov r17=rr[r2] + shr.u r16=r3,IA64_GRANULE_SHIFT + ;; + dep r17=0,r17,8,24 + ;; + mov cr.itir=r17 + mov cr.ifa=r2 + + mov r19=IA64_TR_CURRENT_STACK + ;; + itr.d dtr[r19]=r18 + ;; + ssm psr.ic + srlz.d + ;; + +.load_current: + // load the "current" pointer (r13) and ar.k6 with the current task + mov IA64_KR(CURRENT)=r2 // virtual address + mov IA64_KR(CURRENT_STACK)=r16 + mov r13=r2 + /* + * Reserve space at the top of the stack for "struct pt_regs". Kernel + * threads don't store interesting values in that structure, but the space + * still needs to be there because time-critical stuff such as the context + * switching can be implemented more efficiently (for example, __switch_to() + * always sets the psr.dfh bit of the task it is switching to). + */ + + addl r12=IA64_STK_OFFSET-IA64_PT_REGS_SIZE-16,r2 + addl r2=IA64_RBS_OFFSET,r2 // initialize the RSE + mov ar.rsc=0 // place RSE in enforced lazy mode + ;; + loadrs // clear the dirty partition + movl r19=__phys_per_cpu_start + mov r18=PERCPU_PAGE_SIZE + ;; +#ifndef CONFIG_SMP + add r19=r19,r18 + ;; +#else +(isAP) br.few 2f + movl r20=__cpu0_per_cpu + ;; + shr.u r18=r18,3 +1: + ld8 r21=[r19],8;; + st8[r20]=r21,8 + adds r18=-1,r18;; + cmp4.lt p7,p6=0,r18 +(p7) br.cond.dptk.few 1b + mov r19=r20 + ;; +2: +#endif + tpa r19=r19 + ;; + .pred.rel.mutex isBP,isAP +(isBP) mov IA64_KR(PER_CPU_DATA)=r19 // per-CPU base for cpu0 +(isAP) mov IA64_KR(PER_CPU_DATA)=r0 // clear physical per-CPU base + ;; + mov ar.bspstore=r2 // establish the new RSE stack + ;; + mov ar.rsc=0x3 // place RSE in eager mode + +(isBP) dep r28=-1,r28,61,3 // make address virtual +(isBP) movl r2=ia64_boot_param + ;; +(isBP) st8 [r2]=r28 // save the address of the boot param area passed by the bootloader + +#ifdef CONFIG_PARAVIRT + + movl r14=hypervisor_setup_hooks + movl r15=hypervisor_type + mov r16=num_hypervisor_hooks + ;; + ld8 r2=[r15] + ;; + cmp.ltu p7,p0=r2,r16 // array size check + shladd r8=r2,3,r14 + ;; +(p7) ld8 r9=[r8] + ;; +(p7) mov b1=r9 +(p7) cmp.ne.unc p7,p0=r9,r0 // no actual branch to NULL + ;; +(p7) br.call.sptk.many rp=b1 + + __INITDATA + +default_setup_hook = 0 // Currently nothing needs to be done. + + .global hypervisor_type +hypervisor_type: + data8 PARAVIRT_HYPERVISOR_TYPE_DEFAULT + + // must have the same order with PARAVIRT_HYPERVISOR_TYPE_xxx + +hypervisor_setup_hooks: + data8 default_setup_hook +num_hypervisor_hooks = (. - hypervisor_setup_hooks) / 8 + .previous + +#endif + +#ifdef CONFIG_SMP +(isAP) br.call.sptk.many rp=start_secondary +.ret0: +(isAP) br.cond.sptk self +#endif + + // This is executed by the bootstrap processor (bsp) only: + +#ifdef CONFIG_IA64_FW_EMU + // initialize PAL & SAL emulator: + br.call.sptk.many rp=sys_fw_init +.ret1: +#endif + br.call.sptk.many rp=start_kernel +.ret2: addl r3=@ltoff(halt_msg),gp + ;; + alloc r2=ar.pfs,8,0,2,0 + ;; + ld8 out0=[r3] + br.call.sptk.many b0=console_print + +self: hint @pause + br.sptk.many self // endless loop +END(_start) + + .text + +GLOBAL_ENTRY(ia64_save_debug_regs) + alloc r16=ar.pfs,1,0,0,0 + mov r20=ar.lc // preserve ar.lc + mov ar.lc=IA64_NUM_DBG_REGS-1 + mov r18=0 + add r19=IA64_NUM_DBG_REGS*8,in0 + ;; +1: mov r16=dbr[r18] +#ifdef CONFIG_ITANIUM + ;; + srlz.d +#endif + mov r17=ibr[r18] + add r18=1,r18 + ;; + st8.nta [in0]=r16,8 + st8.nta [r19]=r17,8 + br.cloop.sptk.many 1b + ;; + mov ar.lc=r20 // restore ar.lc + br.ret.sptk.many rp +END(ia64_save_debug_regs) + +GLOBAL_ENTRY(ia64_load_debug_regs) + alloc r16=ar.pfs,1,0,0,0 + lfetch.nta [in0] + mov r20=ar.lc // preserve ar.lc + add r19=IA64_NUM_DBG_REGS*8,in0 + mov ar.lc=IA64_NUM_DBG_REGS-1 + mov r18=-1 + ;; +1: ld8.nta r16=[in0],8 + ld8.nta r17=[r19],8 + add r18=1,r18 + ;; + mov dbr[r18]=r16 +#ifdef CONFIG_ITANIUM + ;; + srlz.d // Errata 132 (NoFix status) +#endif + mov ibr[r18]=r17 + br.cloop.sptk.many 1b + ;; + mov ar.lc=r20 // restore ar.lc + br.ret.sptk.many rp +END(ia64_load_debug_regs) + +GLOBAL_ENTRY(__ia64_save_fpu) + alloc r2=ar.pfs,1,4,0,0 + adds loc0=96*16-16,in0 + adds loc1=96*16-16-128,in0 + ;; + stf.spill.nta [loc0]=f127,-256 + stf.spill.nta [loc1]=f119,-256 + ;; + stf.spill.nta [loc0]=f111,-256 + stf.spill.nta [loc1]=f103,-256 + ;; + stf.spill.nta [loc0]=f95,-256 + stf.spill.nta [loc1]=f87,-256 + ;; + stf.spill.nta [loc0]=f79,-256 + stf.spill.nta [loc1]=f71,-256 + ;; + stf.spill.nta [loc0]=f63,-256 + stf.spill.nta [loc1]=f55,-256 + adds loc2=96*16-32,in0 + ;; + stf.spill.nta [loc0]=f47,-256 + stf.spill.nta [loc1]=f39,-256 + adds loc3=96*16-32-128,in0 + ;; + stf.spill.nta [loc2]=f126,-256 + stf.spill.nta [loc3]=f118,-256 + ;; + stf.spill.nta [loc2]=f110,-256 + stf.spill.nta [loc3]=f102,-256 + ;; + stf.spill.nta [loc2]=f94,-256 + stf.spill.nta [loc3]=f86,-256 + ;; + stf.spill.nta [loc2]=f78,-256 + stf.spill.nta [loc3]=f70,-256 + ;; + stf.spill.nta [loc2]=f62,-256 + stf.spill.nta [loc3]=f54,-256 + adds loc0=96*16-48,in0 + ;; + stf.spill.nta [loc2]=f46,-256 + stf.spill.nta [loc3]=f38,-256 + adds loc1=96*16-48-128,in0 + ;; + stf.spill.nta [loc0]=f125,-256 + stf.spill.nta [loc1]=f117,-256 + ;; + stf.spill.nta [loc0]=f109,-256 + stf.spill.nta [loc1]=f101,-256 + ;; + stf.spill.nta [loc0]=f93,-256 + stf.spill.nta [loc1]=f85,-256 + ;; + stf.spill.nta [loc0]=f77,-256 + stf.spill.nta [loc1]=f69,-256 + ;; + stf.spill.nta [loc0]=f61,-256 + stf.spill.nta [loc1]=f53,-256 + adds loc2=96*16-64,in0 + ;; + stf.spill.nta [loc0]=f45,-256 + stf.spill.nta [loc1]=f37,-256 + adds loc3=96*16-64-128,in0 + ;; + stf.spill.nta [loc2]=f124,-256 + stf.spill.nta [loc3]=f116,-256 + ;; + stf.spill.nta [loc2]=f108,-256 + stf.spill.nta [loc3]=f100,-256 + ;; + stf.spill.nta [loc2]=f92,-256 + stf.spill.nta [loc3]=f84,-256 + ;; + stf.spill.nta [loc2]=f76,-256 + stf.spill.nta [loc3]=f68,-256 + ;; + stf.spill.nta [loc2]=f60,-256 + stf.spill.nta [loc3]=f52,-256 + adds loc0=96*16-80,in0 + ;; + stf.spill.nta [loc2]=f44,-256 + stf.spill.nta [loc3]=f36,-256 + adds loc1=96*16-80-128,in0 + ;; + stf.spill.nta [loc0]=f123,-256 + stf.spill.nta [loc1]=f115,-256 + ;; + stf.spill.nta [loc0]=f107,-256 + stf.spill.nta [loc1]=f99,-256 + ;; + stf.spill.nta [loc0]=f91,-256 + stf.spill.nta [loc1]=f83,-256 + ;; + stf.spill.nta [loc0]=f75,-256 + stf.spill.nta [loc1]=f67,-256 + ;; + stf.spill.nta [loc0]=f59,-256 + stf.spill.nta [loc1]=f51,-256 + adds loc2=96*16-96,in0 + ;; + stf.spill.nta [loc0]=f43,-256 + stf.spill.nta [loc1]=f35,-256 + adds loc3=96*16-96-128,in0 + ;; + stf.spill.nta [loc2]=f122,-256 + stf.spill.nta [loc3]=f114,-256 + ;; + stf.spill.nta [loc2]=f106,-256 + stf.spill.nta [loc3]=f98,-256 + ;; + stf.spill.nta [loc2]=f90,-256 + stf.spill.nta [loc3]=f82,-256 + ;; + stf.spill.nta [loc2]=f74,-256 + stf.spill.nta [loc3]=f66,-256 + ;; + stf.spill.nta [loc2]=f58,-256 + stf.spill.nta [loc3]=f50,-256 + adds loc0=96*16-112,in0 + ;; + stf.spill.nta [loc2]=f42,-256 + stf.spill.nta [loc3]=f34,-256 + adds loc1=96*16-112-128,in0 + ;; + stf.spill.nta [loc0]=f121,-256 + stf.spill.nta [loc1]=f113,-256 + ;; + stf.spill.nta [loc0]=f105,-256 + stf.spill.nta [loc1]=f97,-256 + ;; + stf.spill.nta [loc0]=f89,-256 + stf.spill.nta [loc1]=f81,-256 + ;; + stf.spill.nta [loc0]=f73,-256 + stf.spill.nta [loc1]=f65,-256 + ;; + stf.spill.nta [loc0]=f57,-256 + stf.spill.nta [loc1]=f49,-256 + adds loc2=96*16-128,in0 + ;; + stf.spill.nta [loc0]=f41,-256 + stf.spill.nta [loc1]=f33,-256 + adds loc3=96*16-128-128,in0 + ;; + stf.spill.nta [loc2]=f120,-256 + stf.spill.nta [loc3]=f112,-256 + ;; + stf.spill.nta [loc2]=f104,-256 + stf.spill.nta [loc3]=f96,-256 + ;; + stf.spill.nta [loc2]=f88,-256 + stf.spill.nta [loc3]=f80,-256 + ;; + stf.spill.nta [loc2]=f72,-256 + stf.spill.nta [loc3]=f64,-256 + ;; + stf.spill.nta [loc2]=f56,-256 + stf.spill.nta [loc3]=f48,-256 + ;; + stf.spill.nta [loc2]=f40 + stf.spill.nta [loc3]=f32 + br.ret.sptk.many rp +END(__ia64_save_fpu) + +GLOBAL_ENTRY(__ia64_load_fpu) + alloc r2=ar.pfs,1,2,0,0 + adds r3=128,in0 + adds r14=256,in0 + adds r15=384,in0 + mov loc0=512 + mov loc1=-1024+16 + ;; + ldf.fill.nta f32=[in0],loc0 + ldf.fill.nta f40=[ r3],loc0 + ldf.fill.nta f48=[r14],loc0 + ldf.fill.nta f56=[r15],loc0 + ;; + ldf.fill.nta f64=[in0],loc0 + ldf.fill.nta f72=[ r3],loc0 + ldf.fill.nta f80=[r14],loc0 + ldf.fill.nta f88=[r15],loc0 + ;; + ldf.fill.nta f96=[in0],loc1 + ldf.fill.nta f104=[ r3],loc1 + ldf.fill.nta f112=[r14],loc1 + ldf.fill.nta f120=[r15],loc1 + ;; + ldf.fill.nta f33=[in0],loc0 + ldf.fill.nta f41=[ r3],loc0 + ldf.fill.nta f49=[r14],loc0 + ldf.fill.nta f57=[r15],loc0 + ;; + ldf.fill.nta f65=[in0],loc0 + ldf.fill.nta f73=[ r3],loc0 + ldf.fill.nta f81=[r14],loc0 + ldf.fill.nta f89=[r15],loc0 + ;; + ldf.fill.nta f97=[in0],loc1 + ldf.fill.nta f105=[ r3],loc1 + ldf.fill.nta f113=[r14],loc1 + ldf.fill.nta f121=[r15],loc1 + ;; + ldf.fill.nta f34=[in0],loc0 + ldf.fill.nta f42=[ r3],loc0 + ldf.fill.nta f50=[r14],loc0 + ldf.fill.nta f58=[r15],loc0 + ;; + ldf.fill.nta f66=[in0],loc0 + ldf.fill.nta f74=[ r3],loc0 + ldf.fill.nta f82=[r14],loc0 + ldf.fill.nta f90=[r15],loc0 + ;; + ldf.fill.nta f98=[in0],loc1 + ldf.fill.nta f106=[ r3],loc1 + ldf.fill.nta f114=[r14],loc1 + ldf.fill.nta f122=[r15],loc1 + ;; + ldf.fill.nta f35=[in0],loc0 + ldf.fill.nta f43=[ r3],loc0 + ldf.fill.nta f51=[r14],loc0 + ldf.fill.nta f59=[r15],loc0 + ;; + ldf.fill.nta f67=[in0],loc0 + ldf.fill.nta f75=[ r3],loc0 + ldf.fill.nta f83=[r14],loc0 + ldf.fill.nta f91=[r15],loc0 + ;; + ldf.fill.nta f99=[in0],loc1 + ldf.fill.nta f107=[ r3],loc1 + ldf.fill.nta f115=[r14],loc1 + ldf.fill.nta f123=[r15],loc1 + ;; + ldf.fill.nta f36=[in0],loc0 + ldf.fill.nta f44=[ r3],loc0 + ldf.fill.nta f52=[r14],loc0 + ldf.fill.nta f60=[r15],loc0 + ;; + ldf.fill.nta f68=[in0],loc0 + ldf.fill.nta f76=[ r3],loc0 + ldf.fill.nta f84=[r14],loc0 + ldf.fill.nta f92=[r15],loc0 + ;; + ldf.fill.nta f100=[in0],loc1 + ldf.fill.nta f108=[ r3],loc1 + ldf.fill.nta f116=[r14],loc1 + ldf.fill.nta f124=[r15],loc1 + ;; + ldf.fill.nta f37=[in0],loc0 + ldf.fill.nta f45=[ r3],loc0 + ldf.fill.nta f53=[r14],loc0 + ldf.fill.nta f61=[r15],loc0 + ;; + ldf.fill.nta f69=[in0],loc0 + ldf.fill.nta f77=[ r3],loc0 + ldf.fill.nta f85=[r14],loc0 + ldf.fill.nta f93=[r15],loc0 + ;; + ldf.fill.nta f101=[in0],loc1 + ldf.fill.nta f109=[ r3],loc1 + ldf.fill.nta f117=[r14],loc1 + ldf.fill.nta f125=[r15],loc1 + ;; + ldf.fill.nta f38 =[in0],loc0 + ldf.fill.nta f46 =[ r3],loc0 + ldf.fill.nta f54 =[r14],loc0 + ldf.fill.nta f62 =[r15],loc0 + ;; + ldf.fill.nta f70 =[in0],loc0 + ldf.fill.nta f78 =[ r3],loc0 + ldf.fill.nta f86 =[r14],loc0 + ldf.fill.nta f94 =[r15],loc0 + ;; + ldf.fill.nta f102=[in0],loc1 + ldf.fill.nta f110=[ r3],loc1 + ldf.fill.nta f118=[r14],loc1 + ldf.fill.nta f126=[r15],loc1 + ;; + ldf.fill.nta f39 =[in0],loc0 + ldf.fill.nta f47 =[ r3],loc0 + ldf.fill.nta f55 =[r14],loc0 + ldf.fill.nta f63 =[r15],loc0 + ;; + ldf.fill.nta f71 =[in0],loc0 + ldf.fill.nta f79 =[ r3],loc0 + ldf.fill.nta f87 =[r14],loc0 + ldf.fill.nta f95 =[r15],loc0 + ;; + ldf.fill.nta f103=[in0] + ldf.fill.nta f111=[ r3] + ldf.fill.nta f119=[r14] + ldf.fill.nta f127=[r15] + br.ret.sptk.many rp +END(__ia64_load_fpu) + +GLOBAL_ENTRY(__ia64_init_fpu) + stf.spill [sp]=f0 // M3 + mov f32=f0 // F + nop.b 0 + + ldfps f33,f34=[sp] // M0 + ldfps f35,f36=[sp] // M1 + mov f37=f0 // F + ;; + + setf.s f38=r0 // M2 + setf.s f39=r0 // M3 + mov f40=f0 // F + + ldfps f41,f42=[sp] // M0 + ldfps f43,f44=[sp] // M1 + mov f45=f0 // F + + setf.s f46=r0 // M2 + setf.s f47=r0 // M3 + mov f48=f0 // F + + ldfps f49,f50=[sp] // M0 + ldfps f51,f52=[sp] // M1 + mov f53=f0 // F + + setf.s f54=r0 // M2 + setf.s f55=r0 // M3 + mov f56=f0 // F + + ldfps f57,f58=[sp] // M0 + ldfps f59,f60=[sp] // M1 + mov f61=f0 // F + + setf.s f62=r0 // M2 + setf.s f63=r0 // M3 + mov f64=f0 // F + + ldfps f65,f66=[sp] // M0 + ldfps f67,f68=[sp] // M1 + mov f69=f0 // F + + setf.s f70=r0 // M2 + setf.s f71=r0 // M3 + mov f72=f0 // F + + ldfps f73,f74=[sp] // M0 + ldfps f75,f76=[sp] // M1 + mov f77=f0 // F + + setf.s f78=r0 // M2 + setf.s f79=r0 // M3 + mov f80=f0 // F + + ldfps f81,f82=[sp] // M0 + ldfps f83,f84=[sp] // M1 + mov f85=f0 // F + + setf.s f86=r0 // M2 + setf.s f87=r0 // M3 + mov f88=f0 // F + + /* + * When the instructions are cached, it would be faster to initialize + * the remaining registers with simply mov instructions (F-unit). + * This gets the time down to ~29 cycles. However, this would use up + * 33 bundles, whereas continuing with the above pattern yields + * 10 bundles and ~30 cycles. + */ + + ldfps f89,f90=[sp] // M0 + ldfps f91,f92=[sp] // M1 + mov f93=f0 // F + + setf.s f94=r0 // M2 + setf.s f95=r0 // M3 + mov f96=f0 // F + + ldfps f97,f98=[sp] // M0 + ldfps f99,f100=[sp] // M1 + mov f101=f0 // F + + setf.s f102=r0 // M2 + setf.s f103=r0 // M3 + mov f104=f0 // F + + ldfps f105,f106=[sp] // M0 + ldfps f107,f108=[sp] // M1 + mov f109=f0 // F + + setf.s f110=r0 // M2 + setf.s f111=r0 // M3 + mov f112=f0 // F + + ldfps f113,f114=[sp] // M0 + ldfps f115,f116=[sp] // M1 + mov f117=f0 // F + + setf.s f118=r0 // M2 + setf.s f119=r0 // M3 + mov f120=f0 // F + + ldfps f121,f122=[sp] // M0 + ldfps f123,f124=[sp] // M1 + mov f125=f0 // F + + setf.s f126=r0 // M2 + setf.s f127=r0 // M3 + br.ret.sptk.many rp // F +END(__ia64_init_fpu) + +/* + * Switch execution mode from virtual to physical + * + * Inputs: + * r16 = new psr to establish + * Output: + * r19 = old virtual address of ar.bsp + * r20 = old virtual address of sp + * + * Note: RSE must already be in enforced lazy mode + */ +GLOBAL_ENTRY(ia64_switch_mode_phys) + { + rsm psr.i | psr.ic // disable interrupts and interrupt collection + mov r15=ip + } + ;; + { + flushrs // must be first insn in group + srlz.i + } + ;; + mov cr.ipsr=r16 // set new PSR + add r3=1f-ia64_switch_mode_phys,r15 + + mov r19=ar.bsp + mov r20=sp + mov r14=rp // get return address into a general register + ;; + + // going to physical mode, use tpa to translate virt->phys + tpa r17=r19 + tpa r3=r3 + tpa sp=sp + tpa r14=r14 + ;; + + mov r18=ar.rnat // save ar.rnat + mov ar.bspstore=r17 // this steps on ar.rnat + mov cr.iip=r3 + mov cr.ifs=r0 + ;; + mov ar.rnat=r18 // restore ar.rnat + rfi // must be last insn in group + ;; +1: mov rp=r14 + br.ret.sptk.many rp +END(ia64_switch_mode_phys) + +/* + * Switch execution mode from physical to virtual + * + * Inputs: + * r16 = new psr to establish + * r19 = new bspstore to establish + * r20 = new sp to establish + * + * Note: RSE must already be in enforced lazy mode + */ +GLOBAL_ENTRY(ia64_switch_mode_virt) + { + rsm psr.i | psr.ic // disable interrupts and interrupt collection + mov r15=ip + } + ;; + { + flushrs // must be first insn in group + srlz.i + } + ;; + mov cr.ipsr=r16 // set new PSR + add r3=1f-ia64_switch_mode_virt,r15 + + mov r14=rp // get return address into a general register + ;; + + // going to virtual + // - for code addresses, set upper bits of addr to KERNEL_START + // - for stack addresses, copy from input argument + movl r18=KERNEL_START + dep r3=0,r3,KERNEL_TR_PAGE_SHIFT,64-KERNEL_TR_PAGE_SHIFT + dep r14=0,r14,KERNEL_TR_PAGE_SHIFT,64-KERNEL_TR_PAGE_SHIFT + mov sp=r20 + ;; + or r3=r3,r18 + or r14=r14,r18 + ;; + + mov r18=ar.rnat // save ar.rnat + mov ar.bspstore=r19 // this steps on ar.rnat + mov cr.iip=r3 + mov cr.ifs=r0 + ;; + mov ar.rnat=r18 // restore ar.rnat + rfi // must be last insn in group + ;; +1: mov rp=r14 + br.ret.sptk.many rp +END(ia64_switch_mode_virt) + +GLOBAL_ENTRY(ia64_delay_loop) + .prologue +{ nop 0 // work around GAS unwind info generation bug... + .save ar.lc,r2 + mov r2=ar.lc + .body + ;; + mov ar.lc=r32 +} + ;; + // force loop to be 32-byte aligned (GAS bug means we cannot use .align + // inside function body without corrupting unwind info). +{ nop 0 } +1: br.cloop.sptk.few 1b + ;; + mov ar.lc=r2 + br.ret.sptk.many rp +END(ia64_delay_loop) + +/* + * Return a CPU-local timestamp in nano-seconds. This timestamp is + * NOT synchronized across CPUs its return value must never be + * compared against the values returned on another CPU. The usage in + * kernel/sched/core.c ensures that. + * + * The return-value of sched_clock() is NOT supposed to wrap-around. + * If it did, it would cause some scheduling hiccups (at the worst). + * Fortunately, with a 64-bit cycle-counter ticking at 100GHz, even + * that would happen only once every 5+ years. + * + * The code below basically calculates: + * + * (ia64_get_itc() * local_cpu_data->nsec_per_cyc) >> IA64_NSEC_PER_CYC_SHIFT + * + * except that the multiplication and the shift are done with 128-bit + * intermediate precision so that we can produce a full 64-bit result. + */ +GLOBAL_ENTRY(ia64_native_sched_clock) + addl r8=THIS_CPU(ia64_cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0 + mov.m r9=ar.itc // fetch cycle-counter (35 cyc) + ;; + ldf8 f8=[r8] + ;; + setf.sig f9=r9 // certain to stall, so issue it _after_ ldf8... + ;; + xmpy.lu f10=f9,f8 // calculate low 64 bits of 128-bit product (4 cyc) + xmpy.hu f11=f9,f8 // calculate high 64 bits of 128-bit product + ;; + getf.sig r8=f10 // (5 cyc) + getf.sig r9=f11 + ;; + shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT + br.ret.sptk.many rp +END(ia64_native_sched_clock) +#ifndef CONFIG_PARAVIRT + //unsigned long long + //sched_clock(void) __attribute__((alias("ia64_native_sched_clock"))); + .global sched_clock +sched_clock = ia64_native_sched_clock +#endif + +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE +GLOBAL_ENTRY(cycle_to_cputime) + alloc r16=ar.pfs,1,0,0,0 + addl r8=THIS_CPU(ia64_cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0 + ;; + ldf8 f8=[r8] + ;; + setf.sig f9=r32 + ;; + xmpy.lu f10=f9,f8 // calculate low 64 bits of 128-bit product (4 cyc) + xmpy.hu f11=f9,f8 // calculate high 64 bits of 128-bit product + ;; + getf.sig r8=f10 // (5 cyc) + getf.sig r9=f11 + ;; + shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT + br.ret.sptk.many rp +END(cycle_to_cputime) +#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ + +#ifdef CONFIG_IA64_BRL_EMU + +/* + * Assembly routines used by brl_emu.c to set preserved register state. + */ + +#define SET_REG(reg) \ + GLOBAL_ENTRY(ia64_set_##reg); \ + alloc r16=ar.pfs,1,0,0,0; \ + mov reg=r32; \ + ;; \ + br.ret.sptk.many rp; \ + END(ia64_set_##reg) + +SET_REG(b1); +SET_REG(b2); +SET_REG(b3); +SET_REG(b4); +SET_REG(b5); + +#endif /* CONFIG_IA64_BRL_EMU */ + +#ifdef CONFIG_SMP + +#ifdef CONFIG_HOTPLUG_CPU +GLOBAL_ENTRY(ia64_jump_to_sal) + alloc r16=ar.pfs,1,0,0,0;; + rsm psr.i | psr.ic +{ + flushrs + srlz.i +} + tpa r25=in0 + movl r18=tlb_purge_done;; + DATA_VA_TO_PA(r18);; + mov b1=r18 // Return location + movl r18=ia64_do_tlb_purge;; + DATA_VA_TO_PA(r18);; + mov b2=r18 // doing tlb_flush work + mov ar.rsc=0 // Put RSE in enforced lazy, LE mode + movl r17=1f;; + DATA_VA_TO_PA(r17);; + mov cr.iip=r17 + movl r16=SAL_PSR_BITS_TO_SET;; + mov cr.ipsr=r16 + mov cr.ifs=r0;; + rfi;; // note: this unmask MCA/INIT (psr.mc) +1: + /* + * Invalidate all TLB data/inst + */ + br.sptk.many b2;; // jump to tlb purge code + +tlb_purge_done: + RESTORE_REGION_REGS(r25, r17,r18,r19);; + RESTORE_REG(b0, r25, r17);; + RESTORE_REG(b1, r25, r17);; + RESTORE_REG(b2, r25, r17);; + RESTORE_REG(b3, r25, r17);; + RESTORE_REG(b4, r25, r17);; + RESTORE_REG(b5, r25, r17);; + ld8 r1=[r25],0x08;; + ld8 r12=[r25],0x08;; + ld8 r13=[r25],0x08;; + RESTORE_REG(ar.fpsr, r25, r17);; + RESTORE_REG(ar.pfs, r25, r17);; + RESTORE_REG(ar.rnat, r25, r17);; + RESTORE_REG(ar.unat, r25, r17);; + RESTORE_REG(ar.bspstore, r25, r17);; + RESTORE_REG(cr.dcr, r25, r17);; + RESTORE_REG(cr.iva, r25, r17);; + RESTORE_REG(cr.pta, r25, r17);; + srlz.d;; // required not to violate RAW dependency + RESTORE_REG(cr.itv, r25, r17);; + RESTORE_REG(cr.pmv, r25, r17);; + RESTORE_REG(cr.cmcv, r25, r17);; + RESTORE_REG(cr.lrr0, r25, r17);; + RESTORE_REG(cr.lrr1, r25, r17);; + ld8 r4=[r25],0x08;; + ld8 r5=[r25],0x08;; + ld8 r6=[r25],0x08;; + ld8 r7=[r25],0x08;; + ld8 r17=[r25],0x08;; + mov pr=r17,-1;; + RESTORE_REG(ar.lc, r25, r17);; + /* + * Now Restore floating point regs + */ + ldf.fill.nta f2=[r25],16;; + ldf.fill.nta f3=[r25],16;; + ldf.fill.nta f4=[r25],16;; + ldf.fill.nta f5=[r25],16;; + ldf.fill.nta f16=[r25],16;; + ldf.fill.nta f17=[r25],16;; + ldf.fill.nta f18=[r25],16;; + ldf.fill.nta f19=[r25],16;; + ldf.fill.nta f20=[r25],16;; + ldf.fill.nta f21=[r25],16;; + ldf.fill.nta f22=[r25],16;; + ldf.fill.nta f23=[r25],16;; + ldf.fill.nta f24=[r25],16;; + ldf.fill.nta f25=[r25],16;; + ldf.fill.nta f26=[r25],16;; + ldf.fill.nta f27=[r25],16;; + ldf.fill.nta f28=[r25],16;; + ldf.fill.nta f29=[r25],16;; + ldf.fill.nta f30=[r25],16;; + ldf.fill.nta f31=[r25],16;; + + /* + * Now that we have done all the register restores + * we are now ready for the big DIVE to SAL Land + */ + ssm psr.ic;; + srlz.d;; + br.ret.sptk.many b0;; +END(ia64_jump_to_sal) +#endif /* CONFIG_HOTPLUG_CPU */ + +#endif /* CONFIG_SMP */ diff --git a/kernel/arch/ia64/kernel/ia64_ksyms.c b/kernel/arch/ia64/kernel/ia64_ksyms.c new file mode 100644 index 000000000..5b7791dd3 --- /dev/null +++ b/kernel/arch/ia64/kernel/ia64_ksyms.c @@ -0,0 +1,98 @@ +/* + * Architecture-specific kernel symbols + * + * Don't put any exports here unless it's defined in an assembler file. + * All other exports should be put directly after the definition. + */ + +#include + +#include +EXPORT_SYMBOL(memset); +EXPORT_SYMBOL(memcpy); +EXPORT_SYMBOL(strlen); + +#include +EXPORT_SYMBOL_GPL(empty_zero_page); + +#include +EXPORT_SYMBOL(ip_fast_csum); /* hand-coded assembly */ +EXPORT_SYMBOL(csum_ipv6_magic); + +#include +EXPORT_SYMBOL(clear_page); +EXPORT_SYMBOL(copy_page); + +#ifdef CONFIG_VIRTUAL_MEM_MAP +#include +EXPORT_SYMBOL(min_low_pfn); /* defined by bootmem.c, but not exported by generic code */ +EXPORT_SYMBOL(max_low_pfn); /* defined by bootmem.c, but not exported by generic code */ +#endif + +#include +EXPORT_SYMBOL(ia64_cpu_info); +#ifdef CONFIG_SMP +EXPORT_SYMBOL(local_per_cpu_offset); +#endif + +#include +EXPORT_SYMBOL(__copy_user); +EXPORT_SYMBOL(__do_clear_user); +EXPORT_SYMBOL(__strlen_user); +EXPORT_SYMBOL(__strncpy_from_user); +EXPORT_SYMBOL(__strnlen_user); + +/* from arch/ia64/lib */ +extern void __divsi3(void); +extern void __udivsi3(void); +extern void __modsi3(void); +extern void __umodsi3(void); +extern void __divdi3(void); +extern void __udivdi3(void); +extern void __moddi3(void); +extern void __umoddi3(void); + +EXPORT_SYMBOL(__divsi3); +EXPORT_SYMBOL(__udivsi3); +EXPORT_SYMBOL(__modsi3); +EXPORT_SYMBOL(__umodsi3); +EXPORT_SYMBOL(__divdi3); +EXPORT_SYMBOL(__udivdi3); +EXPORT_SYMBOL(__moddi3); +EXPORT_SYMBOL(__umoddi3); + +#if defined(CONFIG_MD_RAID456) || defined(CONFIG_MD_RAID456_MODULE) +extern void xor_ia64_2(void); +extern void xor_ia64_3(void); +extern void xor_ia64_4(void); +extern void xor_ia64_5(void); + +EXPORT_SYMBOL(xor_ia64_2); +EXPORT_SYMBOL(xor_ia64_3); +EXPORT_SYMBOL(xor_ia64_4); +EXPORT_SYMBOL(xor_ia64_5); +#endif + +#include +EXPORT_SYMBOL(ia64_pal_call_phys_stacked); +EXPORT_SYMBOL(ia64_pal_call_phys_static); +EXPORT_SYMBOL(ia64_pal_call_stacked); +EXPORT_SYMBOL(ia64_pal_call_static); +EXPORT_SYMBOL(ia64_load_scratch_fpregs); +EXPORT_SYMBOL(ia64_save_scratch_fpregs); + +#include +EXPORT_SYMBOL(unw_init_running); + +#if defined(CONFIG_IA64_ESI) || defined(CONFIG_IA64_ESI_MODULE) +extern void esi_call_phys (void); +EXPORT_SYMBOL_GPL(esi_call_phys); +#endif +extern char ia64_ivt[]; +EXPORT_SYMBOL(ia64_ivt); + +#include +#ifdef CONFIG_FUNCTION_TRACER +/* mcount is defined in assembly */ +EXPORT_SYMBOL(_mcount); +#endif diff --git a/kernel/arch/ia64/kernel/init_task.c b/kernel/arch/ia64/kernel/init_task.c new file mode 100644 index 000000000..f9efe9739 --- /dev/null +++ b/kernel/arch/ia64/kernel/init_task.c @@ -0,0 +1,42 @@ +/* + * This is where we statically allocate and initialize the initial + * task. + * + * Copyright (C) 1999, 2002-2003 Hewlett-Packard Co + * David Mosberger-Tang + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +static struct signal_struct init_signals = INIT_SIGNALS(init_signals); +static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); +/* + * Initial task structure. + * + * We need to make sure that this is properly aligned due to the way process stacks are + * handled. This is done by having a special ".data..init_task" section... + */ +#define init_thread_info init_task_mem.s.thread_info + +union { + struct { + struct task_struct task; + struct thread_info thread_info; + } s; + unsigned long stack[KERNEL_STACK_SIZE/sizeof (unsigned long)]; +} init_task_mem asm ("init_task") __init_task_data = + {{ + .task = INIT_TASK(init_task_mem.s.task), + .thread_info = INIT_THREAD_INFO(init_task_mem.s.task) +}}; + +EXPORT_SYMBOL(init_task); diff --git a/kernel/arch/ia64/kernel/iosapic.c b/kernel/arch/ia64/kernel/iosapic.c new file mode 100644 index 000000000..bc9501e36 --- /dev/null +++ b/kernel/arch/ia64/kernel/iosapic.c @@ -0,0 +1,1141 @@ +/* + * I/O SAPIC support. + * + * Copyright (C) 1999 Intel Corp. + * Copyright (C) 1999 Asit Mallick + * Copyright (C) 2000-2002 J.I. Lee + * Copyright (C) 1999-2000, 2002-2003 Hewlett-Packard Co. + * David Mosberger-Tang + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999,2000 Walt Drummond + * + * 00/04/19 D. Mosberger Rewritten to mirror more closely the x86 I/O + * APIC code. In particular, we now have separate + * handlers for edge and level triggered + * interrupts. + * 00/10/27 Asit Mallick, Goutham Rao IRQ vector + * allocation PCI to vector mapping, shared PCI + * interrupts. + * 00/10/27 D. Mosberger Document things a bit more to make them more + * understandable. Clean up much of the old + * IOSAPIC cruft. + * 01/07/27 J.I. Lee PCI irq routing, Platform/Legacy interrupts + * and fixes for ACPI S5(SoftOff) support. + * 02/01/23 J.I. Lee iosapic pgm fixes for PCI irq routing from _PRT + * 02/01/07 E. Focht Redirectable interrupt + * vectors in iosapic_set_affinity(), + * initializations for /proc/irq/#/smp_affinity + * 02/04/02 P. Diefenbaugh Cleaned up ACPI PCI IRQ routing. + * 02/04/18 J.I. Lee bug fix in iosapic_init_pci_irq + * 02/04/30 J.I. Lee bug fix in find_iosapic to fix ACPI PCI IRQ to + * IOSAPIC mapping error + * 02/07/29 T. Kochi Allocate interrupt vectors dynamically + * 02/08/04 T. Kochi Cleaned up terminology (irq, global system + * interrupt, vector, etc.) + * 02/09/20 D. Mosberger Simplified by taking advantage of ACPI's + * pci_irq code. + * 03/02/19 B. Helgaas Make pcat_compat system-wide, not per-IOSAPIC. + * Remove iosapic_address & gsi_base from + * external interfaces. Rationalize + * __init/__devinit attributes. + * 04/12/04 Ashok Raj Intel Corporation 2004 + * Updated to work with irq migration necessary + * for CPU Hotplug + */ +/* + * Here is what the interrupt logic between a PCI device and the kernel looks + * like: + * + * (1) A PCI device raises one of the four interrupt pins (INTA, INTB, INTC, + * INTD). The device is uniquely identified by its bus-, and slot-number + * (the function number does not matter here because all functions share + * the same interrupt lines). + * + * (2) The motherboard routes the interrupt line to a pin on a IOSAPIC + * controller. Multiple interrupt lines may have to share the same + * IOSAPIC pin (if they're level triggered and use the same polarity). + * Each interrupt line has a unique Global System Interrupt (GSI) number + * which can be calculated as the sum of the controller's base GSI number + * and the IOSAPIC pin number to which the line connects. + * + * (3) The IOSAPIC uses an internal routing table entries (RTEs) to map the + * IOSAPIC pin into the IA-64 interrupt vector. This interrupt vector is then + * sent to the CPU. + * + * (4) The kernel recognizes an interrupt as an IRQ. The IRQ interface is + * used as architecture-independent interrupt handling mechanism in Linux. + * As an IRQ is a number, we have to have + * IA-64 interrupt vector number <-> IRQ number mapping. On smaller + * systems, we use one-to-one mapping between IA-64 vector and IRQ. A + * platform can implement platform_irq_to_vector(irq) and + * platform_local_vector_to_irq(vector) APIs to differentiate the mapping. + * Please see also arch/ia64/include/asm/hw_irq.h for those APIs. + * + * To sum up, there are three levels of mappings involved: + * + * PCI pin -> global system interrupt (GSI) -> IA-64 vector <-> IRQ + * + * Note: The term "IRQ" is loosely used everywhere in Linux kernel to + * describe interrupts. Now we use "IRQ" only for Linux IRQ's. ISA IRQ + * (isa_irq) is the only exception in this source code. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#undef DEBUG_INTERRUPT_ROUTING + +#ifdef DEBUG_INTERRUPT_ROUTING +#define DBG(fmt...) printk(fmt) +#else +#define DBG(fmt...) +#endif + +static DEFINE_SPINLOCK(iosapic_lock); + +/* + * These tables map IA-64 vectors to the IOSAPIC pin that generates this + * vector. + */ + +#define NO_REF_RTE 0 + +static struct iosapic { + char __iomem *addr; /* base address of IOSAPIC */ + unsigned int gsi_base; /* GSI base */ + unsigned short num_rte; /* # of RTEs on this IOSAPIC */ + int rtes_inuse; /* # of RTEs in use on this IOSAPIC */ +#ifdef CONFIG_NUMA + unsigned short node; /* numa node association via pxm */ +#endif + spinlock_t lock; /* lock for indirect reg access */ +} iosapic_lists[NR_IOSAPICS]; + +struct iosapic_rte_info { + struct list_head rte_list; /* RTEs sharing the same vector */ + char rte_index; /* IOSAPIC RTE index */ + int refcnt; /* reference counter */ + struct iosapic *iosapic; +} ____cacheline_aligned; + +static struct iosapic_intr_info { + struct list_head rtes; /* RTEs using this vector (empty => + * not an IOSAPIC interrupt) */ + int count; /* # of registered RTEs */ + u32 low32; /* current value of low word of + * Redirection table entry */ + unsigned int dest; /* destination CPU physical ID */ + unsigned char dmode : 3; /* delivery mode (see iosapic.h) */ + unsigned char polarity: 1; /* interrupt polarity + * (see iosapic.h) */ + unsigned char trigger : 1; /* trigger mode (see iosapic.h) */ +} iosapic_intr_info[NR_IRQS]; + +static unsigned char pcat_compat; /* 8259 compatibility flag */ + +static inline void +iosapic_write(struct iosapic *iosapic, unsigned int reg, u32 val) +{ + unsigned long flags; + + spin_lock_irqsave(&iosapic->lock, flags); + __iosapic_write(iosapic->addr, reg, val); + spin_unlock_irqrestore(&iosapic->lock, flags); +} + +/* + * Find an IOSAPIC associated with a GSI + */ +static inline int +find_iosapic (unsigned int gsi) +{ + int i; + + for (i = 0; i < NR_IOSAPICS; i++) { + if ((unsigned) (gsi - iosapic_lists[i].gsi_base) < + iosapic_lists[i].num_rte) + return i; + } + + return -1; +} + +static inline int __gsi_to_irq(unsigned int gsi) +{ + int irq; + struct iosapic_intr_info *info; + struct iosapic_rte_info *rte; + + for (irq = 0; irq < NR_IRQS; irq++) { + info = &iosapic_intr_info[irq]; + list_for_each_entry(rte, &info->rtes, rte_list) + if (rte->iosapic->gsi_base + rte->rte_index == gsi) + return irq; + } + return -1; +} + +int +gsi_to_irq (unsigned int gsi) +{ + unsigned long flags; + int irq; + + spin_lock_irqsave(&iosapic_lock, flags); + irq = __gsi_to_irq(gsi); + spin_unlock_irqrestore(&iosapic_lock, flags); + return irq; +} + +static struct iosapic_rte_info *find_rte(unsigned int irq, unsigned int gsi) +{ + struct iosapic_rte_info *rte; + + list_for_each_entry(rte, &iosapic_intr_info[irq].rtes, rte_list) + if (rte->iosapic->gsi_base + rte->rte_index == gsi) + return rte; + return NULL; +} + +static void +set_rte (unsigned int gsi, unsigned int irq, unsigned int dest, int mask) +{ + unsigned long pol, trigger, dmode; + u32 low32, high32; + int rte_index; + char redir; + struct iosapic_rte_info *rte; + ia64_vector vector = irq_to_vector(irq); + + DBG(KERN_DEBUG"IOSAPIC: routing vector %d to 0x%x\n", vector, dest); + + rte = find_rte(irq, gsi); + if (!rte) + return; /* not an IOSAPIC interrupt */ + + rte_index = rte->rte_index; + pol = iosapic_intr_info[irq].polarity; + trigger = iosapic_intr_info[irq].trigger; + dmode = iosapic_intr_info[irq].dmode; + + redir = (dmode == IOSAPIC_LOWEST_PRIORITY) ? 1 : 0; + +#ifdef CONFIG_SMP + set_irq_affinity_info(irq, (int)(dest & 0xffff), redir); +#endif + + low32 = ((pol << IOSAPIC_POLARITY_SHIFT) | + (trigger << IOSAPIC_TRIGGER_SHIFT) | + (dmode << IOSAPIC_DELIVERY_SHIFT) | + ((mask ? 1 : 0) << IOSAPIC_MASK_SHIFT) | + vector); + + /* dest contains both id and eid */ + high32 = (dest << IOSAPIC_DEST_SHIFT); + + iosapic_write(rte->iosapic, IOSAPIC_RTE_HIGH(rte_index), high32); + iosapic_write(rte->iosapic, IOSAPIC_RTE_LOW(rte_index), low32); + iosapic_intr_info[irq].low32 = low32; + iosapic_intr_info[irq].dest = dest; +} + +static void +nop (struct irq_data *data) +{ + /* do nothing... */ +} + + +#ifdef CONFIG_KEXEC +void +kexec_disable_iosapic(void) +{ + struct iosapic_intr_info *info; + struct iosapic_rte_info *rte; + ia64_vector vec; + int irq; + + for (irq = 0; irq < NR_IRQS; irq++) { + info = &iosapic_intr_info[irq]; + vec = irq_to_vector(irq); + list_for_each_entry(rte, &info->rtes, + rte_list) { + iosapic_write(rte->iosapic, + IOSAPIC_RTE_LOW(rte->rte_index), + IOSAPIC_MASK|vec); + iosapic_eoi(rte->iosapic->addr, vec); + } + } +} +#endif + +static void +mask_irq (struct irq_data *data) +{ + unsigned int irq = data->irq; + u32 low32; + int rte_index; + struct iosapic_rte_info *rte; + + if (!iosapic_intr_info[irq].count) + return; /* not an IOSAPIC interrupt! */ + + /* set only the mask bit */ + low32 = iosapic_intr_info[irq].low32 |= IOSAPIC_MASK; + list_for_each_entry(rte, &iosapic_intr_info[irq].rtes, rte_list) { + rte_index = rte->rte_index; + iosapic_write(rte->iosapic, IOSAPIC_RTE_LOW(rte_index), low32); + } +} + +static void +unmask_irq (struct irq_data *data) +{ + unsigned int irq = data->irq; + u32 low32; + int rte_index; + struct iosapic_rte_info *rte; + + if (!iosapic_intr_info[irq].count) + return; /* not an IOSAPIC interrupt! */ + + low32 = iosapic_intr_info[irq].low32 &= ~IOSAPIC_MASK; + list_for_each_entry(rte, &iosapic_intr_info[irq].rtes, rte_list) { + rte_index = rte->rte_index; + iosapic_write(rte->iosapic, IOSAPIC_RTE_LOW(rte_index), low32); + } +} + + +static int +iosapic_set_affinity(struct irq_data *data, const struct cpumask *mask, + bool force) +{ +#ifdef CONFIG_SMP + unsigned int irq = data->irq; + u32 high32, low32; + int cpu, dest, rte_index; + int redir = (irq & IA64_IRQ_REDIRECTED) ? 1 : 0; + struct iosapic_rte_info *rte; + struct iosapic *iosapic; + + irq &= (~IA64_IRQ_REDIRECTED); + + cpu = cpumask_first_and(cpu_online_mask, mask); + if (cpu >= nr_cpu_ids) + return -1; + + if (irq_prepare_move(irq, cpu)) + return -1; + + dest = cpu_physical_id(cpu); + + if (!iosapic_intr_info[irq].count) + return -1; /* not an IOSAPIC interrupt */ + + set_irq_affinity_info(irq, dest, redir); + + /* dest contains both id and eid */ + high32 = dest << IOSAPIC_DEST_SHIFT; + + low32 = iosapic_intr_info[irq].low32 & ~(7 << IOSAPIC_DELIVERY_SHIFT); + if (redir) + /* change delivery mode to lowest priority */ + low32 |= (IOSAPIC_LOWEST_PRIORITY << IOSAPIC_DELIVERY_SHIFT); + else + /* change delivery mode to fixed */ + low32 |= (IOSAPIC_FIXED << IOSAPIC_DELIVERY_SHIFT); + low32 &= IOSAPIC_VECTOR_MASK; + low32 |= irq_to_vector(irq); + + iosapic_intr_info[irq].low32 = low32; + iosapic_intr_info[irq].dest = dest; + list_for_each_entry(rte, &iosapic_intr_info[irq].rtes, rte_list) { + iosapic = rte->iosapic; + rte_index = rte->rte_index; + iosapic_write(iosapic, IOSAPIC_RTE_HIGH(rte_index), high32); + iosapic_write(iosapic, IOSAPIC_RTE_LOW(rte_index), low32); + } + +#endif + return 0; +} + +/* + * Handlers for level-triggered interrupts. + */ + +static unsigned int +iosapic_startup_level_irq (struct irq_data *data) +{ + unmask_irq(data); + return 0; +} + +static void +iosapic_unmask_level_irq (struct irq_data *data) +{ + unsigned int irq = data->irq; + ia64_vector vec = irq_to_vector(irq); + struct iosapic_rte_info *rte; + int do_unmask_irq = 0; + + irq_complete_move(irq); + if (unlikely(irqd_is_setaffinity_pending(data))) { + do_unmask_irq = 1; + mask_irq(data); + } else + unmask_irq(data); + + list_for_each_entry(rte, &iosapic_intr_info[irq].rtes, rte_list) + iosapic_eoi(rte->iosapic->addr, vec); + + if (unlikely(do_unmask_irq)) { + irq_move_masked_irq(data); + unmask_irq(data); + } +} + +#define iosapic_shutdown_level_irq mask_irq +#define iosapic_enable_level_irq unmask_irq +#define iosapic_disable_level_irq mask_irq +#define iosapic_ack_level_irq nop + +static struct irq_chip irq_type_iosapic_level = { + .name = "IO-SAPIC-level", + .irq_startup = iosapic_startup_level_irq, + .irq_shutdown = iosapic_shutdown_level_irq, + .irq_enable = iosapic_enable_level_irq, + .irq_disable = iosapic_disable_level_irq, + .irq_ack = iosapic_ack_level_irq, + .irq_mask = mask_irq, + .irq_unmask = iosapic_unmask_level_irq, + .irq_set_affinity = iosapic_set_affinity +}; + +/* + * Handlers for edge-triggered interrupts. + */ + +static unsigned int +iosapic_startup_edge_irq (struct irq_data *data) +{ + unmask_irq(data); + /* + * IOSAPIC simply drops interrupts pended while the + * corresponding pin was masked, so we can't know if an + * interrupt is pending already. Let's hope not... + */ + return 0; +} + +static void +iosapic_ack_edge_irq (struct irq_data *data) +{ + irq_complete_move(data->irq); + irq_move_irq(data); +} + +#define iosapic_enable_edge_irq unmask_irq +#define iosapic_disable_edge_irq nop + +static struct irq_chip irq_type_iosapic_edge = { + .name = "IO-SAPIC-edge", + .irq_startup = iosapic_startup_edge_irq, + .irq_shutdown = iosapic_disable_edge_irq, + .irq_enable = iosapic_enable_edge_irq, + .irq_disable = iosapic_disable_edge_irq, + .irq_ack = iosapic_ack_edge_irq, + .irq_mask = mask_irq, + .irq_unmask = unmask_irq, + .irq_set_affinity = iosapic_set_affinity +}; + +static unsigned int +iosapic_version (char __iomem *addr) +{ + /* + * IOSAPIC Version Register return 32 bit structure like: + * { + * unsigned int version : 8; + * unsigned int reserved1 : 8; + * unsigned int max_redir : 8; + * unsigned int reserved2 : 8; + * } + */ + return __iosapic_read(addr, IOSAPIC_VERSION); +} + +static int iosapic_find_sharable_irq(unsigned long trigger, unsigned long pol) +{ + int i, irq = -ENOSPC, min_count = -1; + struct iosapic_intr_info *info; + + /* + * shared vectors for edge-triggered interrupts are not + * supported yet + */ + if (trigger == IOSAPIC_EDGE) + return -EINVAL; + + for (i = 0; i < NR_IRQS; i++) { + info = &iosapic_intr_info[i]; + if (info->trigger == trigger && info->polarity == pol && + (info->dmode == IOSAPIC_FIXED || + info->dmode == IOSAPIC_LOWEST_PRIORITY) && + can_request_irq(i, IRQF_SHARED)) { + if (min_count == -1 || info->count < min_count) { + irq = i; + min_count = info->count; + } + } + } + return irq; +} + +/* + * if the given vector is already owned by other, + * assign a new vector for the other and make the vector available + */ +static void __init +iosapic_reassign_vector (int irq) +{ + int new_irq; + + if (iosapic_intr_info[irq].count) { + new_irq = create_irq(); + if (new_irq < 0) + panic("%s: out of interrupt vectors!\n", __func__); + printk(KERN_INFO "Reassigning vector %d to %d\n", + irq_to_vector(irq), irq_to_vector(new_irq)); + memcpy(&iosapic_intr_info[new_irq], &iosapic_intr_info[irq], + sizeof(struct iosapic_intr_info)); + INIT_LIST_HEAD(&iosapic_intr_info[new_irq].rtes); + list_move(iosapic_intr_info[irq].rtes.next, + &iosapic_intr_info[new_irq].rtes); + memset(&iosapic_intr_info[irq], 0, + sizeof(struct iosapic_intr_info)); + iosapic_intr_info[irq].low32 = IOSAPIC_MASK; + INIT_LIST_HEAD(&iosapic_intr_info[irq].rtes); + } +} + +static inline int irq_is_shared (int irq) +{ + return (iosapic_intr_info[irq].count > 1); +} + +struct irq_chip* +ia64_native_iosapic_get_irq_chip(unsigned long trigger) +{ + if (trigger == IOSAPIC_EDGE) + return &irq_type_iosapic_edge; + else + return &irq_type_iosapic_level; +} + +static int +register_intr (unsigned int gsi, int irq, unsigned char delivery, + unsigned long polarity, unsigned long trigger) +{ + struct irq_chip *chip, *irq_type; + int index; + struct iosapic_rte_info *rte; + + index = find_iosapic(gsi); + if (index < 0) { + printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n", + __func__, gsi); + return -ENODEV; + } + + rte = find_rte(irq, gsi); + if (!rte) { + rte = kzalloc(sizeof (*rte), GFP_ATOMIC); + if (!rte) { + printk(KERN_WARNING "%s: cannot allocate memory\n", + __func__); + return -ENOMEM; + } + + rte->iosapic = &iosapic_lists[index]; + rte->rte_index = gsi - rte->iosapic->gsi_base; + rte->refcnt++; + list_add_tail(&rte->rte_list, &iosapic_intr_info[irq].rtes); + iosapic_intr_info[irq].count++; + iosapic_lists[index].rtes_inuse++; + } + else if (rte->refcnt == NO_REF_RTE) { + struct iosapic_intr_info *info = &iosapic_intr_info[irq]; + if (info->count > 0 && + (info->trigger != trigger || info->polarity != polarity)){ + printk (KERN_WARNING + "%s: cannot override the interrupt\n", + __func__); + return -EINVAL; + } + rte->refcnt++; + iosapic_intr_info[irq].count++; + iosapic_lists[index].rtes_inuse++; + } + + iosapic_intr_info[irq].polarity = polarity; + iosapic_intr_info[irq].dmode = delivery; + iosapic_intr_info[irq].trigger = trigger; + + irq_type = iosapic_get_irq_chip(trigger); + + chip = irq_get_chip(irq); + if (irq_type != NULL && chip != irq_type) { + if (chip != &no_irq_chip) + printk(KERN_WARNING + "%s: changing vector %d from %s to %s\n", + __func__, irq_to_vector(irq), + chip->name, irq_type->name); + chip = irq_type; + } + __irq_set_chip_handler_name_locked(irq, chip, trigger == IOSAPIC_EDGE ? + handle_edge_irq : handle_level_irq, + NULL); + return 0; +} + +static unsigned int +get_target_cpu (unsigned int gsi, int irq) +{ +#ifdef CONFIG_SMP + static int cpu = -1; + extern int cpe_vector; + cpumask_t domain = irq_to_domain(irq); + + /* + * In case of vector shared by multiple RTEs, all RTEs that + * share the vector need to use the same destination CPU. + */ + if (iosapic_intr_info[irq].count) + return iosapic_intr_info[irq].dest; + + /* + * If the platform supports redirection via XTP, let it + * distribute interrupts. + */ + if (smp_int_redirect & SMP_IRQ_REDIRECTION) + return cpu_physical_id(smp_processor_id()); + + /* + * Some interrupts (ACPI SCI, for instance) are registered + * before the BSP is marked as online. + */ + if (!cpu_online(smp_processor_id())) + return cpu_physical_id(smp_processor_id()); + +#ifdef CONFIG_ACPI + if (cpe_vector > 0 && irq_to_vector(irq) == IA64_CPEP_VECTOR) + return get_cpei_target_cpu(); +#endif + +#ifdef CONFIG_NUMA + { + int num_cpus, cpu_index, iosapic_index, numa_cpu, i = 0; + const struct cpumask *cpu_mask; + + iosapic_index = find_iosapic(gsi); + if (iosapic_index < 0 || + iosapic_lists[iosapic_index].node == MAX_NUMNODES) + goto skip_numa_setup; + + cpu_mask = cpumask_of_node(iosapic_lists[iosapic_index].node); + num_cpus = 0; + for_each_cpu_and(numa_cpu, cpu_mask, &domain) { + if (cpu_online(numa_cpu)) + num_cpus++; + } + + if (!num_cpus) + goto skip_numa_setup; + + /* Use irq assignment to distribute across cpus in node */ + cpu_index = irq % num_cpus; + + for_each_cpu_and(numa_cpu, cpu_mask, &domain) + if (cpu_online(numa_cpu) && i++ >= cpu_index) + break; + + if (numa_cpu < nr_cpu_ids) + return cpu_physical_id(numa_cpu); + } +skip_numa_setup: +#endif + /* + * Otherwise, round-robin interrupt vectors across all the + * processors. (It'd be nice if we could be smarter in the + * case of NUMA.) + */ + do { + if (++cpu >= nr_cpu_ids) + cpu = 0; + } while (!cpu_online(cpu) || !cpumask_test_cpu(cpu, &domain)); + + return cpu_physical_id(cpu); +#else /* CONFIG_SMP */ + return cpu_physical_id(smp_processor_id()); +#endif +} + +static inline unsigned char choose_dmode(void) +{ +#ifdef CONFIG_SMP + if (smp_int_redirect & SMP_IRQ_REDIRECTION) + return IOSAPIC_LOWEST_PRIORITY; +#endif + return IOSAPIC_FIXED; +} + +/* + * ACPI can describe IOSAPIC interrupts via static tables and namespace + * methods. This provides an interface to register those interrupts and + * program the IOSAPIC RTE. + */ +int +iosapic_register_intr (unsigned int gsi, + unsigned long polarity, unsigned long trigger) +{ + int irq, mask = 1, err; + unsigned int dest; + unsigned long flags; + struct iosapic_rte_info *rte; + u32 low32; + unsigned char dmode; + struct irq_desc *desc; + + /* + * If this GSI has already been registered (i.e., it's a + * shared interrupt, or we lost a race to register it), + * don't touch the RTE. + */ + spin_lock_irqsave(&iosapic_lock, flags); + irq = __gsi_to_irq(gsi); + if (irq > 0) { + rte = find_rte(irq, gsi); + if(iosapic_intr_info[irq].count == 0) { + assign_irq_vector(irq); + irq_init_desc(irq); + } else if (rte->refcnt != NO_REF_RTE) { + rte->refcnt++; + goto unlock_iosapic_lock; + } + } else + irq = create_irq(); + + /* If vector is running out, we try to find a sharable vector */ + if (irq < 0) { + irq = iosapic_find_sharable_irq(trigger, polarity); + if (irq < 0) + goto unlock_iosapic_lock; + } + + desc = irq_to_desc(irq); + raw_spin_lock(&desc->lock); + dest = get_target_cpu(gsi, irq); + dmode = choose_dmode(); + err = register_intr(gsi, irq, dmode, polarity, trigger); + if (err < 0) { + raw_spin_unlock(&desc->lock); + irq = err; + goto unlock_iosapic_lock; + } + + /* + * If the vector is shared and already unmasked for other + * interrupt sources, don't mask it. + */ + low32 = iosapic_intr_info[irq].low32; + if (irq_is_shared(irq) && !(low32 & IOSAPIC_MASK)) + mask = 0; + set_rte(gsi, irq, dest, mask); + + printk(KERN_INFO "GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d\n", + gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"), + (polarity == IOSAPIC_POL_HIGH ? "high" : "low"), + cpu_logical_id(dest), dest, irq_to_vector(irq)); + + raw_spin_unlock(&desc->lock); + unlock_iosapic_lock: + spin_unlock_irqrestore(&iosapic_lock, flags); + return irq; +} + +void +iosapic_unregister_intr (unsigned int gsi) +{ + unsigned long flags; + int irq, index; + u32 low32; + unsigned long trigger, polarity; + unsigned int dest; + struct iosapic_rte_info *rte; + + /* + * If the irq associated with the gsi is not found, + * iosapic_unregister_intr() is unbalanced. We need to check + * this again after getting locks. + */ + irq = gsi_to_irq(gsi); + if (irq < 0) { + printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n", + gsi); + WARN_ON(1); + return; + } + + spin_lock_irqsave(&iosapic_lock, flags); + if ((rte = find_rte(irq, gsi)) == NULL) { + printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n", + gsi); + WARN_ON(1); + goto out; + } + + if (--rte->refcnt > 0) + goto out; + + rte->refcnt = NO_REF_RTE; + + /* Mask the interrupt */ + low32 = iosapic_intr_info[irq].low32 | IOSAPIC_MASK; + iosapic_write(rte->iosapic, IOSAPIC_RTE_LOW(rte->rte_index), low32); + + iosapic_intr_info[irq].count--; + index = find_iosapic(gsi); + iosapic_lists[index].rtes_inuse--; + WARN_ON(iosapic_lists[index].rtes_inuse < 0); + + trigger = iosapic_intr_info[irq].trigger; + polarity = iosapic_intr_info[irq].polarity; + dest = iosapic_intr_info[irq].dest; + printk(KERN_INFO + "GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d unregistered\n", + gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"), + (polarity == IOSAPIC_POL_HIGH ? "high" : "low"), + cpu_logical_id(dest), dest, irq_to_vector(irq)); + + if (iosapic_intr_info[irq].count == 0) { +#ifdef CONFIG_SMP + /* Clear affinity */ + cpumask_setall(irq_get_irq_data(irq)->affinity); +#endif + /* Clear the interrupt information */ + iosapic_intr_info[irq].dest = 0; + iosapic_intr_info[irq].dmode = 0; + iosapic_intr_info[irq].polarity = 0; + iosapic_intr_info[irq].trigger = 0; + iosapic_intr_info[irq].low32 |= IOSAPIC_MASK; + + /* Destroy and reserve IRQ */ + destroy_and_reserve_irq(irq); + } + out: + spin_unlock_irqrestore(&iosapic_lock, flags); +} + +/* + * ACPI calls this when it finds an entry for a platform interrupt. + */ +int __init +iosapic_register_platform_intr (u32 int_type, unsigned int gsi, + int iosapic_vector, u16 eid, u16 id, + unsigned long polarity, unsigned long trigger) +{ + static const char * const name[] = {"unknown", "PMI", "INIT", "CPEI"}; + unsigned char delivery; + int irq, vector, mask = 0; + unsigned int dest = ((id << 8) | eid) & 0xffff; + + switch (int_type) { + case ACPI_INTERRUPT_PMI: + irq = vector = iosapic_vector; + bind_irq_vector(irq, vector, CPU_MASK_ALL); + /* + * since PMI vector is alloc'd by FW(ACPI) not by kernel, + * we need to make sure the vector is available + */ + iosapic_reassign_vector(irq); + delivery = IOSAPIC_PMI; + break; + case ACPI_INTERRUPT_INIT: + irq = create_irq(); + if (irq < 0) + panic("%s: out of interrupt vectors!\n", __func__); + vector = irq_to_vector(irq); + delivery = IOSAPIC_INIT; + break; + case ACPI_INTERRUPT_CPEI: + irq = vector = IA64_CPE_VECTOR; + BUG_ON(bind_irq_vector(irq, vector, CPU_MASK_ALL)); + delivery = IOSAPIC_FIXED; + mask = 1; + break; + default: + printk(KERN_ERR "%s: invalid int type 0x%x\n", __func__, + int_type); + return -1; + } + + register_intr(gsi, irq, delivery, polarity, trigger); + + printk(KERN_INFO + "PLATFORM int %s (0x%x): GSI %u (%s, %s) -> CPU %d (0x%04x)" + " vector %d\n", + int_type < ARRAY_SIZE(name) ? name[int_type] : "unknown", + int_type, gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"), + (polarity == IOSAPIC_POL_HIGH ? "high" : "low"), + cpu_logical_id(dest), dest, vector); + + set_rte(gsi, irq, dest, mask); + return vector; +} + +/* + * ACPI calls this when it finds an entry for a legacy ISA IRQ override. + */ +void iosapic_override_isa_irq(unsigned int isa_irq, unsigned int gsi, + unsigned long polarity, unsigned long trigger) +{ + int vector, irq; + unsigned int dest = cpu_physical_id(smp_processor_id()); + unsigned char dmode; + + irq = vector = isa_irq_to_vector(isa_irq); + BUG_ON(bind_irq_vector(irq, vector, CPU_MASK_ALL)); + dmode = choose_dmode(); + register_intr(gsi, irq, dmode, polarity, trigger); + + DBG("ISA: IRQ %u -> GSI %u (%s,%s) -> CPU %d (0x%04x) vector %d\n", + isa_irq, gsi, trigger == IOSAPIC_EDGE ? "edge" : "level", + polarity == IOSAPIC_POL_HIGH ? "high" : "low", + cpu_logical_id(dest), dest, vector); + + set_rte(gsi, irq, dest, 1); +} + +void __init +ia64_native_iosapic_pcat_compat_init(void) +{ + if (pcat_compat) { + /* + * Disable the compatibility mode interrupts (8259 style), + * needs IN/OUT support enabled. + */ + printk(KERN_INFO + "%s: Disabling PC-AT compatible 8259 interrupts\n", + __func__); + outb(0xff, 0xA1); + outb(0xff, 0x21); + } +} + +void __init +iosapic_system_init (int system_pcat_compat) +{ + int irq; + + for (irq = 0; irq < NR_IRQS; ++irq) { + iosapic_intr_info[irq].low32 = IOSAPIC_MASK; + /* mark as unused */ + INIT_LIST_HEAD(&iosapic_intr_info[irq].rtes); + + iosapic_intr_info[irq].count = 0; + } + + pcat_compat = system_pcat_compat; + if (pcat_compat) + iosapic_pcat_compat_init(); +} + +static inline int +iosapic_alloc (void) +{ + int index; + + for (index = 0; index < NR_IOSAPICS; index++) + if (!iosapic_lists[index].addr) + return index; + + printk(KERN_WARNING "%s: failed to allocate iosapic\n", __func__); + return -1; +} + +static inline void +iosapic_free (int index) +{ + memset(&iosapic_lists[index], 0, sizeof(iosapic_lists[0])); +} + +static inline int +iosapic_check_gsi_range (unsigned int gsi_base, unsigned int ver) +{ + int index; + unsigned int gsi_end, base, end; + + /* check gsi range */ + gsi_end = gsi_base + ((ver >> 16) & 0xff); + for (index = 0; index < NR_IOSAPICS; index++) { + if (!iosapic_lists[index].addr) + continue; + + base = iosapic_lists[index].gsi_base; + end = base + iosapic_lists[index].num_rte - 1; + + if (gsi_end < base || end < gsi_base) + continue; /* OK */ + + return -EBUSY; + } + return 0; +} + +static int +iosapic_delete_rte(unsigned int irq, unsigned int gsi) +{ + struct iosapic_rte_info *rte, *temp; + + list_for_each_entry_safe(rte, temp, &iosapic_intr_info[irq].rtes, + rte_list) { + if (rte->iosapic->gsi_base + rte->rte_index == gsi) { + if (rte->refcnt) + return -EBUSY; + + list_del(&rte->rte_list); + kfree(rte); + return 0; + } + } + + return -EINVAL; +} + +int iosapic_init(unsigned long phys_addr, unsigned int gsi_base) +{ + int num_rte, err, index; + unsigned int isa_irq, ver; + char __iomem *addr; + unsigned long flags; + + spin_lock_irqsave(&iosapic_lock, flags); + index = find_iosapic(gsi_base); + if (index >= 0) { + spin_unlock_irqrestore(&iosapic_lock, flags); + return -EBUSY; + } + + addr = ioremap(phys_addr, 0); + if (addr == NULL) { + spin_unlock_irqrestore(&iosapic_lock, flags); + return -ENOMEM; + } + ver = iosapic_version(addr); + if ((err = iosapic_check_gsi_range(gsi_base, ver))) { + iounmap(addr); + spin_unlock_irqrestore(&iosapic_lock, flags); + return err; + } + + /* + * The MAX_REDIR register holds the highest input pin number + * (starting from 0). We add 1 so that we can use it for + * number of pins (= RTEs) + */ + num_rte = ((ver >> 16) & 0xff) + 1; + + index = iosapic_alloc(); + iosapic_lists[index].addr = addr; + iosapic_lists[index].gsi_base = gsi_base; + iosapic_lists[index].num_rte = num_rte; +#ifdef CONFIG_NUMA + iosapic_lists[index].node = MAX_NUMNODES; +#endif + spin_lock_init(&iosapic_lists[index].lock); + spin_unlock_irqrestore(&iosapic_lock, flags); + + if ((gsi_base == 0) && pcat_compat) { + /* + * Map the legacy ISA devices into the IOSAPIC data. Some of + * these may get reprogrammed later on with data from the ACPI + * Interrupt Source Override table. + */ + for (isa_irq = 0; isa_irq < 16; ++isa_irq) + iosapic_override_isa_irq(isa_irq, isa_irq, + IOSAPIC_POL_HIGH, + IOSAPIC_EDGE); + } + return 0; +} + +int iosapic_remove(unsigned int gsi_base) +{ + int i, irq, index, err = 0; + unsigned long flags; + + spin_lock_irqsave(&iosapic_lock, flags); + index = find_iosapic(gsi_base); + if (index < 0) { + printk(KERN_WARNING "%s: No IOSAPIC for GSI base %u\n", + __func__, gsi_base); + goto out; + } + + if (iosapic_lists[index].rtes_inuse) { + err = -EBUSY; + printk(KERN_WARNING "%s: IOSAPIC for GSI base %u is busy\n", + __func__, gsi_base); + goto out; + } + + for (i = gsi_base; i < gsi_base + iosapic_lists[index].num_rte; i++) { + irq = __gsi_to_irq(i); + if (irq < 0) + continue; + + err = iosapic_delete_rte(irq, i); + if (err) + goto out; + } + + iounmap(iosapic_lists[index].addr); + iosapic_free(index); + out: + spin_unlock_irqrestore(&iosapic_lock, flags); + return err; +} + +#ifdef CONFIG_NUMA +void map_iosapic_to_node(unsigned int gsi_base, int node) +{ + int index; + + index = find_iosapic(gsi_base); + if (index < 0) { + printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n", + __func__, gsi_base); + return; + } + iosapic_lists[index].node = node; + return; +} +#endif diff --git a/kernel/arch/ia64/kernel/irq.c b/kernel/arch/ia64/kernel/irq.c new file mode 100644 index 000000000..812a1e6b3 --- /dev/null +++ b/kernel/arch/ia64/kernel/irq.c @@ -0,0 +1,202 @@ +/* + * linux/arch/ia64/kernel/irq.c + * + * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar + * + * This file contains the code used by various IRQ handling routines: + * asking for different IRQs should be done through these routines + * instead of just grabbing them. Thus setups with different IRQ numbers + * shouldn't result in any weird surprises, and installing new handlers + * should be easier. + * + * Copyright (C) Ashok Raj, Intel Corporation 2004 + * + * 4/14/2004: Added code to handle cpu migration and do safe irq + * migration without losing interrupts for iosapic + * architecture. + */ + +#include +#include +#include +#include +#include +#include + +#include + +/* + * 'what should we do if we get a hw irq event on an illegal vector'. + * each architecture has to answer this themselves. + */ +void ack_bad_irq(unsigned int irq) +{ + printk(KERN_ERR "Unexpected irq vector 0x%x on CPU %u!\n", irq, smp_processor_id()); +} + +#ifdef CONFIG_IA64_GENERIC +ia64_vector __ia64_irq_to_vector(int irq) +{ + return irq_cfg[irq].vector; +} + +unsigned int __ia64_local_vector_to_irq (ia64_vector vec) +{ + return __this_cpu_read(vector_irq[vec]); +} +#endif + +/* + * Interrupt statistics: + */ + +atomic_t irq_err_count; + +/* + * /proc/interrupts printing: + */ +int arch_show_interrupts(struct seq_file *p, int prec) +{ + seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); + return 0; +} + +#ifdef CONFIG_SMP +static char irq_redir [NR_IRQS]; // = { [0 ... NR_IRQS-1] = 1 }; + +void set_irq_affinity_info (unsigned int irq, int hwid, int redir) +{ + if (irq < NR_IRQS) { + cpumask_copy(irq_get_irq_data(irq)->affinity, + cpumask_of(cpu_logical_id(hwid))); + irq_redir[irq] = (char) (redir & 0xff); + } +} + +bool is_affinity_mask_valid(const struct cpumask *cpumask) +{ + if (ia64_platform_is("sn2")) { + /* Only allow one CPU to be specified in the smp_affinity mask */ + if (cpumask_weight(cpumask) != 1) + return false; + } + return true; +} + +#endif /* CONFIG_SMP */ + +int __init arch_early_irq_init(void) +{ + ia64_mca_irq_init(); + return 0; +} + +#ifdef CONFIG_HOTPLUG_CPU +unsigned int vectors_in_migration[NR_IRQS]; + +/* + * Since cpu_online_mask is already updated, we just need to check for + * affinity that has zeros + */ +static void migrate_irqs(void) +{ + int irq, new_cpu; + + for (irq=0; irq < NR_IRQS; irq++) { + struct irq_desc *desc = irq_to_desc(irq); + struct irq_data *data = irq_desc_get_irq_data(desc); + struct irq_chip *chip = irq_data_get_irq_chip(data); + + if (irqd_irq_disabled(data)) + continue; + + /* + * No handling for now. + * TBD: Implement a disable function so we can now + * tell CPU not to respond to these local intr sources. + * such as ITV,CPEI,MCA etc. + */ + if (irqd_is_per_cpu(data)) + continue; + + if (cpumask_any_and(data->affinity, cpu_online_mask) + >= nr_cpu_ids) { + /* + * Save it for phase 2 processing + */ + vectors_in_migration[irq] = irq; + + new_cpu = cpumask_any(cpu_online_mask); + + /* + * Al three are essential, currently WARN_ON.. maybe panic? + */ + if (chip && chip->irq_disable && + chip->irq_enable && chip->irq_set_affinity) { + chip->irq_disable(data); + chip->irq_set_affinity(data, + cpumask_of(new_cpu), false); + chip->irq_enable(data); + } else { + WARN_ON((!chip || !chip->irq_disable || + !chip->irq_enable || + !chip->irq_set_affinity)); + } + } + } +} + +void fixup_irqs(void) +{ + unsigned int irq; + extern void ia64_process_pending_intr(void); + extern volatile int time_keeper_id; + + /* Mask ITV to disable timer */ + ia64_set_itv(1 << 16); + + /* + * Find a new timesync master + */ + if (smp_processor_id() == time_keeper_id) { + time_keeper_id = cpumask_first(cpu_online_mask); + printk ("CPU %d is now promoted to time-keeper master\n", time_keeper_id); + } + + /* + * Phase 1: Locate IRQs bound to this cpu and + * relocate them for cpu removal. + */ + migrate_irqs(); + + /* + * Phase 2: Perform interrupt processing for all entries reported in + * local APIC. + */ + ia64_process_pending_intr(); + + /* + * Phase 3: Now handle any interrupts not captured in local APIC. + * This is to account for cases that device interrupted during the time the + * rte was being disabled and re-programmed. + */ + for (irq=0; irq < NR_IRQS; irq++) { + if (vectors_in_migration[irq]) { + struct pt_regs *old_regs = set_irq_regs(NULL); + + vectors_in_migration[irq]=0; + generic_handle_irq(irq); + set_irq_regs(old_regs); + } + } + + /* + * Now let processor die. We do irq disable and max_xtp() to + * ensure there is no more interrupts routed to this processor. + * But the local timer interrupt can have 1 pending which we + * take care in timer_interrupt(). + */ + max_xtp(); + local_irq_disable(); +} +#endif diff --git a/kernel/arch/ia64/kernel/irq_ia64.c b/kernel/arch/ia64/kernel/irq_ia64.c new file mode 100644 index 000000000..eaa3199f9 --- /dev/null +++ b/kernel/arch/ia64/kernel/irq_ia64.c @@ -0,0 +1,670 @@ +/* + * linux/arch/ia64/kernel/irq_ia64.c + * + * Copyright (C) 1998-2001 Hewlett-Packard Co + * Stephane Eranian + * David Mosberger-Tang + * + * 6/10/99: Updated to bring in sync with x86 version to facilitate + * support for SMP and different interrupt controllers. + * + * 09/15/00 Goutham Rao Implemented pci_irq_to_vector + * PCI to vector allocation routine. + * 04/14/2004 Ashok Raj + * Added CPU Hotplug handling for IPF. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_PERFMON +# include +#endif + +#define IRQ_DEBUG 0 + +#define IRQ_VECTOR_UNASSIGNED (0) + +#define IRQ_UNUSED (0) +#define IRQ_USED (1) +#define IRQ_RSVD (2) + +/* These can be overridden in platform_irq_init */ +int ia64_first_device_vector = IA64_DEF_FIRST_DEVICE_VECTOR; +int ia64_last_device_vector = IA64_DEF_LAST_DEVICE_VECTOR; + +/* default base addr of IPI table */ +void __iomem *ipi_base_addr = ((void __iomem *) + (__IA64_UNCACHED_OFFSET | IA64_IPI_DEFAULT_BASE_ADDR)); + +static cpumask_t vector_allocation_domain(int cpu); + +/* + * Legacy IRQ to IA-64 vector translation table. + */ +__u8 isa_irq_to_vector_map[16] = { + /* 8259 IRQ translation, first 16 entries */ + 0x2f, 0x20, 0x2e, 0x2d, 0x2c, 0x2b, 0x2a, 0x29, + 0x28, 0x27, 0x26, 0x25, 0x24, 0x23, 0x22, 0x21 +}; +EXPORT_SYMBOL(isa_irq_to_vector_map); + +DEFINE_SPINLOCK(vector_lock); + +struct irq_cfg irq_cfg[NR_IRQS] __read_mostly = { + [0 ... NR_IRQS - 1] = { + .vector = IRQ_VECTOR_UNASSIGNED, + .domain = CPU_MASK_NONE + } +}; + +DEFINE_PER_CPU(int[IA64_NUM_VECTORS], vector_irq) = { + [0 ... IA64_NUM_VECTORS - 1] = -1 +}; + +static cpumask_t vector_table[IA64_NUM_VECTORS] = { + [0 ... IA64_NUM_VECTORS - 1] = CPU_MASK_NONE +}; + +static int irq_status[NR_IRQS] = { + [0 ... NR_IRQS -1] = IRQ_UNUSED +}; + +static inline int find_unassigned_irq(void) +{ + int irq; + + for (irq = IA64_FIRST_DEVICE_VECTOR; irq < NR_IRQS; irq++) + if (irq_status[irq] == IRQ_UNUSED) + return irq; + return -ENOSPC; +} + +static inline int find_unassigned_vector(cpumask_t domain) +{ + cpumask_t mask; + int pos, vector; + + cpumask_and(&mask, &domain, cpu_online_mask); + if (cpumask_empty(&mask)) + return -EINVAL; + + for (pos = 0; pos < IA64_NUM_DEVICE_VECTORS; pos++) { + vector = IA64_FIRST_DEVICE_VECTOR + pos; + cpumask_and(&mask, &domain, &vector_table[vector]); + if (!cpumask_empty(&mask)) + continue; + return vector; + } + return -ENOSPC; +} + +static int __bind_irq_vector(int irq, int vector, cpumask_t domain) +{ + cpumask_t mask; + int cpu; + struct irq_cfg *cfg = &irq_cfg[irq]; + + BUG_ON((unsigned)irq >= NR_IRQS); + BUG_ON((unsigned)vector >= IA64_NUM_VECTORS); + + cpumask_and(&mask, &domain, cpu_online_mask); + if (cpumask_empty(&mask)) + return -EINVAL; + if ((cfg->vector == vector) && cpumask_equal(&cfg->domain, &domain)) + return 0; + if (cfg->vector != IRQ_VECTOR_UNASSIGNED) + return -EBUSY; + for_each_cpu(cpu, &mask) + per_cpu(vector_irq, cpu)[vector] = irq; + cfg->vector = vector; + cfg->domain = domain; + irq_status[irq] = IRQ_USED; + cpumask_or(&vector_table[vector], &vector_table[vector], &domain); + return 0; +} + +int bind_irq_vector(int irq, int vector, cpumask_t domain) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&vector_lock, flags); + ret = __bind_irq_vector(irq, vector, domain); + spin_unlock_irqrestore(&vector_lock, flags); + return ret; +} + +static void __clear_irq_vector(int irq) +{ + int vector, cpu; + cpumask_t domain; + struct irq_cfg *cfg = &irq_cfg[irq]; + + BUG_ON((unsigned)irq >= NR_IRQS); + BUG_ON(cfg->vector == IRQ_VECTOR_UNASSIGNED); + vector = cfg->vector; + domain = cfg->domain; + for_each_cpu_and(cpu, &cfg->domain, cpu_online_mask) + per_cpu(vector_irq, cpu)[vector] = -1; + cfg->vector = IRQ_VECTOR_UNASSIGNED; + cfg->domain = CPU_MASK_NONE; + irq_status[irq] = IRQ_UNUSED; + cpumask_andnot(&vector_table[vector], &vector_table[vector], &domain); +} + +static void clear_irq_vector(int irq) +{ + unsigned long flags; + + spin_lock_irqsave(&vector_lock, flags); + __clear_irq_vector(irq); + spin_unlock_irqrestore(&vector_lock, flags); +} + +int +ia64_native_assign_irq_vector (int irq) +{ + unsigned long flags; + int vector, cpu; + cpumask_t domain = CPU_MASK_NONE; + + vector = -ENOSPC; + + spin_lock_irqsave(&vector_lock, flags); + for_each_online_cpu(cpu) { + domain = vector_allocation_domain(cpu); + vector = find_unassigned_vector(domain); + if (vector >= 0) + break; + } + if (vector < 0) + goto out; + if (irq == AUTO_ASSIGN) + irq = vector; + BUG_ON(__bind_irq_vector(irq, vector, domain)); + out: + spin_unlock_irqrestore(&vector_lock, flags); + return vector; +} + +void +ia64_native_free_irq_vector (int vector) +{ + if (vector < IA64_FIRST_DEVICE_VECTOR || + vector > IA64_LAST_DEVICE_VECTOR) + return; + clear_irq_vector(vector); +} + +int +reserve_irq_vector (int vector) +{ + if (vector < IA64_FIRST_DEVICE_VECTOR || + vector > IA64_LAST_DEVICE_VECTOR) + return -EINVAL; + return !!bind_irq_vector(vector, vector, CPU_MASK_ALL); +} + +/* + * Initialize vector_irq on a new cpu. This function must be called + * with vector_lock held. + */ +void __setup_vector_irq(int cpu) +{ + int irq, vector; + + /* Clear vector_irq */ + for (vector = 0; vector < IA64_NUM_VECTORS; ++vector) + per_cpu(vector_irq, cpu)[vector] = -1; + /* Mark the inuse vectors */ + for (irq = 0; irq < NR_IRQS; ++irq) { + if (!cpumask_test_cpu(cpu, &irq_cfg[irq].domain)) + continue; + vector = irq_to_vector(irq); + per_cpu(vector_irq, cpu)[vector] = irq; + } +} + +#if defined(CONFIG_SMP) && (defined(CONFIG_IA64_GENERIC) || defined(CONFIG_IA64_DIG)) + +static enum vector_domain_type { + VECTOR_DOMAIN_NONE, + VECTOR_DOMAIN_PERCPU +} vector_domain_type = VECTOR_DOMAIN_NONE; + +static cpumask_t vector_allocation_domain(int cpu) +{ + if (vector_domain_type == VECTOR_DOMAIN_PERCPU) + return *cpumask_of(cpu); + return CPU_MASK_ALL; +} + +static int __irq_prepare_move(int irq, int cpu) +{ + struct irq_cfg *cfg = &irq_cfg[irq]; + int vector; + cpumask_t domain; + + if (cfg->move_in_progress || cfg->move_cleanup_count) + return -EBUSY; + if (cfg->vector == IRQ_VECTOR_UNASSIGNED || !cpu_online(cpu)) + return -EINVAL; + if (cpumask_test_cpu(cpu, &cfg->domain)) + return 0; + domain = vector_allocation_domain(cpu); + vector = find_unassigned_vector(domain); + if (vector < 0) + return -ENOSPC; + cfg->move_in_progress = 1; + cfg->old_domain = cfg->domain; + cfg->vector = IRQ_VECTOR_UNASSIGNED; + cfg->domain = CPU_MASK_NONE; + BUG_ON(__bind_irq_vector(irq, vector, domain)); + return 0; +} + +int irq_prepare_move(int irq, int cpu) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&vector_lock, flags); + ret = __irq_prepare_move(irq, cpu); + spin_unlock_irqrestore(&vector_lock, flags); + return ret; +} + +void irq_complete_move(unsigned irq) +{ + struct irq_cfg *cfg = &irq_cfg[irq]; + cpumask_t cleanup_mask; + int i; + + if (likely(!cfg->move_in_progress)) + return; + + if (unlikely(cpumask_test_cpu(smp_processor_id(), &cfg->old_domain))) + return; + + cpumask_and(&cleanup_mask, &cfg->old_domain, cpu_online_mask); + cfg->move_cleanup_count = cpumask_weight(&cleanup_mask); + for_each_cpu(i, &cleanup_mask) + platform_send_ipi(i, IA64_IRQ_MOVE_VECTOR, IA64_IPI_DM_INT, 0); + cfg->move_in_progress = 0; +} + +static irqreturn_t smp_irq_move_cleanup_interrupt(int irq, void *dev_id) +{ + int me = smp_processor_id(); + ia64_vector vector; + unsigned long flags; + + for (vector = IA64_FIRST_DEVICE_VECTOR; + vector < IA64_LAST_DEVICE_VECTOR; vector++) { + int irq; + struct irq_desc *desc; + struct irq_cfg *cfg; + irq = __this_cpu_read(vector_irq[vector]); + if (irq < 0) + continue; + + desc = irq_to_desc(irq); + cfg = irq_cfg + irq; + raw_spin_lock(&desc->lock); + if (!cfg->move_cleanup_count) + goto unlock; + + if (!cpumask_test_cpu(me, &cfg->old_domain)) + goto unlock; + + spin_lock_irqsave(&vector_lock, flags); + __this_cpu_write(vector_irq[vector], -1); + cpumask_clear_cpu(me, &vector_table[vector]); + spin_unlock_irqrestore(&vector_lock, flags); + cfg->move_cleanup_count--; + unlock: + raw_spin_unlock(&desc->lock); + } + return IRQ_HANDLED; +} + +static struct irqaction irq_move_irqaction = { + .handler = smp_irq_move_cleanup_interrupt, + .name = "irq_move" +}; + +static int __init parse_vector_domain(char *arg) +{ + if (!arg) + return -EINVAL; + if (!strcmp(arg, "percpu")) { + vector_domain_type = VECTOR_DOMAIN_PERCPU; + no_int_routing = 1; + } + return 0; +} +early_param("vector", parse_vector_domain); +#else +static cpumask_t vector_allocation_domain(int cpu) +{ + return CPU_MASK_ALL; +} +#endif + + +void destroy_and_reserve_irq(unsigned int irq) +{ + unsigned long flags; + + irq_init_desc(irq); + spin_lock_irqsave(&vector_lock, flags); + __clear_irq_vector(irq); + irq_status[irq] = IRQ_RSVD; + spin_unlock_irqrestore(&vector_lock, flags); +} + +/* + * Dynamic irq allocate and deallocation for MSI + */ +int create_irq(void) +{ + unsigned long flags; + int irq, vector, cpu; + cpumask_t domain = CPU_MASK_NONE; + + irq = vector = -ENOSPC; + spin_lock_irqsave(&vector_lock, flags); + for_each_online_cpu(cpu) { + domain = vector_allocation_domain(cpu); + vector = find_unassigned_vector(domain); + if (vector >= 0) + break; + } + if (vector < 0) + goto out; + irq = find_unassigned_irq(); + if (irq < 0) + goto out; + BUG_ON(__bind_irq_vector(irq, vector, domain)); + out: + spin_unlock_irqrestore(&vector_lock, flags); + if (irq >= 0) + irq_init_desc(irq); + return irq; +} + +void destroy_irq(unsigned int irq) +{ + irq_init_desc(irq); + clear_irq_vector(irq); +} + +#ifdef CONFIG_SMP +# define IS_RESCHEDULE(vec) (vec == IA64_IPI_RESCHEDULE) +# define IS_LOCAL_TLB_FLUSH(vec) (vec == IA64_IPI_LOCAL_TLB_FLUSH) +#else +# define IS_RESCHEDULE(vec) (0) +# define IS_LOCAL_TLB_FLUSH(vec) (0) +#endif +/* + * That's where the IVT branches when we get an external + * interrupt. This branches to the correct hardware IRQ handler via + * function ptr. + */ +void +ia64_handle_irq (ia64_vector vector, struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + unsigned long saved_tpr; + +#if IRQ_DEBUG + { + unsigned long bsp, sp; + + /* + * Note: if the interrupt happened while executing in + * the context switch routine (ia64_switch_to), we may + * get a spurious stack overflow here. This is + * because the register and the memory stack are not + * switched atomically. + */ + bsp = ia64_getreg(_IA64_REG_AR_BSP); + sp = ia64_getreg(_IA64_REG_SP); + + if ((sp - bsp) < 1024) { + static DEFINE_RATELIMIT_STATE(ratelimit, 5 * HZ, 5); + + if (__ratelimit(&ratelimit)) { + printk("ia64_handle_irq: DANGER: less than " + "1KB of free stack space!!\n" + "(bsp=0x%lx, sp=%lx)\n", bsp, sp); + } + } + } +#endif /* IRQ_DEBUG */ + + /* + * Always set TPR to limit maximum interrupt nesting depth to + * 16 (without this, it would be ~240, which could easily lead + * to kernel stack overflows). + */ + irq_enter(); + saved_tpr = ia64_getreg(_IA64_REG_CR_TPR); + ia64_srlz_d(); + while (vector != IA64_SPURIOUS_INT_VECTOR) { + int irq = local_vector_to_irq(vector); + + if (unlikely(IS_LOCAL_TLB_FLUSH(vector))) { + smp_local_flush_tlb(); + kstat_incr_irq_this_cpu(irq); + } else if (unlikely(IS_RESCHEDULE(vector))) { + scheduler_ipi(); + kstat_incr_irq_this_cpu(irq); + } else { + ia64_setreg(_IA64_REG_CR_TPR, vector); + ia64_srlz_d(); + + if (unlikely(irq < 0)) { + printk(KERN_ERR "%s: Unexpected interrupt " + "vector %d on CPU %d is not mapped " + "to any IRQ!\n", __func__, vector, + smp_processor_id()); + } else + generic_handle_irq(irq); + + /* + * Disable interrupts and send EOI: + */ + local_irq_disable(); + ia64_setreg(_IA64_REG_CR_TPR, saved_tpr); + } + ia64_eoi(); + vector = ia64_get_ivr(); + } + /* + * This must be done *after* the ia64_eoi(). For example, the keyboard softirq + * handler needs to be able to wait for further keyboard interrupts, which can't + * come through until ia64_eoi() has been done. + */ + irq_exit(); + set_irq_regs(old_regs); +} + +#ifdef CONFIG_HOTPLUG_CPU +/* + * This function emulates a interrupt processing when a cpu is about to be + * brought down. + */ +void ia64_process_pending_intr(void) +{ + ia64_vector vector; + unsigned long saved_tpr; + extern unsigned int vectors_in_migration[NR_IRQS]; + + vector = ia64_get_ivr(); + + irq_enter(); + saved_tpr = ia64_getreg(_IA64_REG_CR_TPR); + ia64_srlz_d(); + + /* + * Perform normal interrupt style processing + */ + while (vector != IA64_SPURIOUS_INT_VECTOR) { + int irq = local_vector_to_irq(vector); + + if (unlikely(IS_LOCAL_TLB_FLUSH(vector))) { + smp_local_flush_tlb(); + kstat_incr_irq_this_cpu(irq); + } else if (unlikely(IS_RESCHEDULE(vector))) { + kstat_incr_irq_this_cpu(irq); + } else { + struct pt_regs *old_regs = set_irq_regs(NULL); + + ia64_setreg(_IA64_REG_CR_TPR, vector); + ia64_srlz_d(); + + /* + * Now try calling normal ia64_handle_irq as it would have got called + * from a real intr handler. Try passing null for pt_regs, hopefully + * it will work. I hope it works!. + * Probably could shared code. + */ + if (unlikely(irq < 0)) { + printk(KERN_ERR "%s: Unexpected interrupt " + "vector %d on CPU %d not being mapped " + "to any IRQ!!\n", __func__, vector, + smp_processor_id()); + } else { + vectors_in_migration[irq]=0; + generic_handle_irq(irq); + } + set_irq_regs(old_regs); + + /* + * Disable interrupts and send EOI + */ + local_irq_disable(); + ia64_setreg(_IA64_REG_CR_TPR, saved_tpr); + } + ia64_eoi(); + vector = ia64_get_ivr(); + } + irq_exit(); +} +#endif + + +#ifdef CONFIG_SMP + +static irqreturn_t dummy_handler (int irq, void *dev_id) +{ + BUG(); +} + +static struct irqaction ipi_irqaction = { + .handler = handle_IPI, + .name = "IPI" +}; + +/* + * KVM uses this interrupt to force a cpu out of guest mode + */ +static struct irqaction resched_irqaction = { + .handler = dummy_handler, + .name = "resched" +}; + +static struct irqaction tlb_irqaction = { + .handler = dummy_handler, + .name = "tlb_flush" +}; + +#endif + +void +ia64_native_register_percpu_irq (ia64_vector vec, struct irqaction *action) +{ + unsigned int irq; + + irq = vec; + BUG_ON(bind_irq_vector(irq, vec, CPU_MASK_ALL)); + irq_set_status_flags(irq, IRQ_PER_CPU); + irq_set_chip(irq, &irq_type_ia64_lsapic); + if (action) + setup_irq(irq, action); + irq_set_handler(irq, handle_percpu_irq); +} + +void __init +ia64_native_register_ipi(void) +{ +#ifdef CONFIG_SMP + register_percpu_irq(IA64_IPI_VECTOR, &ipi_irqaction); + register_percpu_irq(IA64_IPI_RESCHEDULE, &resched_irqaction); + register_percpu_irq(IA64_IPI_LOCAL_TLB_FLUSH, &tlb_irqaction); +#endif +} + +void __init +init_IRQ (void) +{ +#ifdef CONFIG_ACPI + acpi_boot_init(); +#endif + ia64_register_ipi(); + register_percpu_irq(IA64_SPURIOUS_INT_VECTOR, NULL); +#ifdef CONFIG_SMP +#if defined(CONFIG_IA64_GENERIC) || defined(CONFIG_IA64_DIG) + if (vector_domain_type != VECTOR_DOMAIN_NONE) + register_percpu_irq(IA64_IRQ_MOVE_VECTOR, &irq_move_irqaction); +#endif +#endif +#ifdef CONFIG_PERFMON + pfm_init_percpu(); +#endif + platform_irq_init(); +} + +void +ia64_send_ipi (int cpu, int vector, int delivery_mode, int redirect) +{ + void __iomem *ipi_addr; + unsigned long ipi_data; + unsigned long phys_cpu_id; + + phys_cpu_id = cpu_physical_id(cpu); + + /* + * cpu number is in 8bit ID and 8bit EID + */ + + ipi_data = (delivery_mode << 8) | (vector & 0xff); + ipi_addr = ipi_base_addr + ((phys_cpu_id << 4) | ((redirect & 1) << 3)); + + writeq(ipi_data, ipi_addr); +} diff --git a/kernel/arch/ia64/kernel/irq_lsapic.c b/kernel/arch/ia64/kernel/irq_lsapic.c new file mode 100644 index 000000000..1b3a776e5 --- /dev/null +++ b/kernel/arch/ia64/kernel/irq_lsapic.c @@ -0,0 +1,44 @@ +/* + * LSAPIC Interrupt Controller + * + * This takes care of interrupts that are generated by the CPU's + * internal Streamlined Advanced Programmable Interrupt Controller + * (LSAPIC), such as the ITC and IPI interrupts. + * + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999 Walt Drummond + * Copyright (C) 2000 Hewlett-Packard Co + * Copyright (C) 2000 David Mosberger-Tang + */ + +#include +#include + +static unsigned int +lsapic_noop_startup (struct irq_data *data) +{ + return 0; +} + +static void +lsapic_noop (struct irq_data *data) +{ + /* nothing to do... */ +} + +static int lsapic_retrigger(struct irq_data *data) +{ + ia64_resend_irq(data->irq); + + return 1; +} + +struct irq_chip irq_type_ia64_lsapic = { + .name = "LSAPIC", + .irq_startup = lsapic_noop_startup, + .irq_shutdown = lsapic_noop, + .irq_enable = lsapic_noop, + .irq_disable = lsapic_noop, + .irq_ack = lsapic_noop, + .irq_retrigger = lsapic_retrigger, +}; diff --git a/kernel/arch/ia64/kernel/ivt.S b/kernel/arch/ia64/kernel/ivt.S new file mode 100644 index 000000000..e42bf7a91 --- /dev/null +++ b/kernel/arch/ia64/kernel/ivt.S @@ -0,0 +1,1688 @@ +/* + * arch/ia64/kernel/ivt.S + * + * Copyright (C) 1998-2001, 2003, 2005 Hewlett-Packard Co + * Stephane Eranian + * David Mosberger + * Copyright (C) 2000, 2002-2003 Intel Co + * Asit Mallick + * Suresh Siddha + * Kenneth Chen + * Fenghua Yu + * + * 00/08/23 Asit Mallick TLB handling for SMP + * 00/12/20 David Mosberger-Tang DTLB/ITLB handler now uses virtual PT. + * + * Copyright (C) 2005 Hewlett-Packard Co + * Dan Magenheimer + * Xen paravirtualization + * Copyright (c) 2008 Isaku Yamahata + * VA Linux Systems Japan K.K. + * pv_ops. + * Yaozu (Eddie) Dong + */ +/* + * This file defines the interruption vector table used by the CPU. + * It does not include one entry per possible cause of interruption. + * + * The first 20 entries of the table contain 64 bundles each while the + * remaining 48 entries contain only 16 bundles each. + * + * The 64 bundles are used to allow inlining the whole handler for critical + * interruptions like TLB misses. + * + * For each entry, the comment is as follows: + * + * // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51) + * entry offset ----/ / / / / + * entry number ---------/ / / / + * size of the entry -------------/ / / + * vector name -------------------------------------/ / + * interruptions triggering this vector ----------------------/ + * + * The table is 32KB in size and must be aligned on 32KB boundary. + * (The CPU ignores the 15 lower bits of the address) + * + * Table is based upon EAS2.6 (Oct 1999) + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if 0 +# define PSR_DEFAULT_BITS psr.ac +#else +# define PSR_DEFAULT_BITS 0 +#endif + +#if 0 + /* + * This lets you track the last eight faults that occurred on the CPU. Make sure ar.k2 isn't + * needed for something else before enabling this... + */ +# define DBG_FAULT(i) mov r16=ar.k2;; shl r16=r16,8;; add r16=(i),r16;;mov ar.k2=r16 +#else +# define DBG_FAULT(i) +#endif + +#include "minstate.h" + +#define FAULT(n) \ + mov r31=pr; \ + mov r19=n;; /* prepare to save predicates */ \ + br.sptk.many dispatch_to_fault_handler + + .section .text..ivt,"ax" + + .align 32768 // align on 32KB boundary + .global ia64_ivt +ia64_ivt: +///////////////////////////////////////////////////////////////////////////////////////// +// 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47) +ENTRY(vhpt_miss) + DBG_FAULT(0) + /* + * The VHPT vector is invoked when the TLB entry for the virtual page table + * is missing. This happens only as a result of a previous + * (the "original") TLB miss, which may either be caused by an instruction + * fetch or a data access (or non-access). + * + * What we do here is normal TLB miss handing for the _original_ miss, + * followed by inserting the TLB entry for the virtual page table page + * that the VHPT walker was attempting to access. The latter gets + * inserted as long as page table entry above pte level have valid + * mappings for the faulting address. The TLB entry for the original + * miss gets inserted only if the pte entry indicates that the page is + * present. + * + * do_page_fault gets invoked in the following cases: + * - the faulting virtual address uses unimplemented address bits + * - the faulting virtual address has no valid page table mapping + */ + MOV_FROM_IFA(r16) // get address that caused the TLB miss +#ifdef CONFIG_HUGETLB_PAGE + movl r18=PAGE_SHIFT + MOV_FROM_ITIR(r25) +#endif + ;; + RSM_PSR_DT // use physical addressing for data + mov r31=pr // save the predicate registers + mov r19=IA64_KR(PT_BASE) // get page table base address + shl r21=r16,3 // shift bit 60 into sign bit + shr.u r17=r16,61 // get the region number into r17 + ;; + shr.u r22=r21,3 +#ifdef CONFIG_HUGETLB_PAGE + extr.u r26=r25,2,6 + ;; + cmp.ne p8,p0=r18,r26 + sub r27=r26,r18 + ;; +(p8) dep r25=r18,r25,2,6 +(p8) shr r22=r22,r27 +#endif + ;; + cmp.eq p6,p7=5,r17 // is IFA pointing into to region 5? + shr.u r18=r22,PGDIR_SHIFT // get bottom portion of pgd index bit + ;; +(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place + + srlz.d + LOAD_PHYSICAL(p6, r19, swapper_pg_dir) // region 5 is rooted at swapper_pg_dir + + .pred.rel "mutex", p6, p7 +(p6) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT +(p7) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3 + ;; +(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=pgd_offset for region 5 +(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=pgd_offset for region[0-4] + cmp.eq p7,p6=0,r21 // unused address bits all zeroes? +#if CONFIG_PGTABLE_LEVELS == 4 + shr.u r28=r22,PUD_SHIFT // shift pud index into position +#else + shr.u r18=r22,PMD_SHIFT // shift pmd index into position +#endif + ;; + ld8 r17=[r17] // get *pgd (may be 0) + ;; +(p7) cmp.eq p6,p7=r17,r0 // was pgd_present(*pgd) == NULL? +#if CONFIG_PGTABLE_LEVELS == 4 + dep r28=r28,r17,3,(PAGE_SHIFT-3) // r28=pud_offset(pgd,addr) + ;; + shr.u r18=r22,PMD_SHIFT // shift pmd index into position +(p7) ld8 r29=[r28] // get *pud (may be 0) + ;; +(p7) cmp.eq.or.andcm p6,p7=r29,r0 // was pud_present(*pud) == NULL? + dep r17=r18,r29,3,(PAGE_SHIFT-3) // r17=pmd_offset(pud,addr) +#else + dep r17=r18,r17,3,(PAGE_SHIFT-3) // r17=pmd_offset(pgd,addr) +#endif + ;; +(p7) ld8 r20=[r17] // get *pmd (may be 0) + shr.u r19=r22,PAGE_SHIFT // shift pte index into position + ;; +(p7) cmp.eq.or.andcm p6,p7=r20,r0 // was pmd_present(*pmd) == NULL? + dep r21=r19,r20,3,(PAGE_SHIFT-3) // r21=pte_offset(pmd,addr) + ;; +(p7) ld8 r18=[r21] // read *pte + MOV_FROM_ISR(r19) // cr.isr bit 32 tells us if this is an insn miss + ;; +(p7) tbit.z p6,p7=r18,_PAGE_P_BIT // page present bit cleared? + MOV_FROM_IHA(r22) // get the VHPT address that caused the TLB miss + ;; // avoid RAW on p7 +(p7) tbit.nz.unc p10,p11=r19,32 // is it an instruction TLB miss? + dep r23=0,r20,0,PAGE_SHIFT // clear low bits to get page address + ;; + ITC_I_AND_D(p10, p11, r18, r24) // insert the instruction TLB entry and + // insert the data TLB entry +(p6) br.cond.spnt.many page_fault // handle bad address/page not present (page fault) + MOV_TO_IFA(r22, r24) + +#ifdef CONFIG_HUGETLB_PAGE + MOV_TO_ITIR(p8, r25, r24) // change to default page-size for VHPT +#endif + + /* + * Now compute and insert the TLB entry for the virtual page table. We never + * execute in a page table page so there is no need to set the exception deferral + * bit. + */ + adds r24=__DIRTY_BITS_NO_ED|_PAGE_PL_0|_PAGE_AR_RW,r23 + ;; + ITC_D(p7, r24, r25) + ;; +#ifdef CONFIG_SMP + /* + * Tell the assemblers dependency-violation checker that the above "itc" instructions + * cannot possibly affect the following loads: + */ + dv_serialize_data + + /* + * Re-check pagetable entry. If they changed, we may have received a ptc.g + * between reading the pagetable and the "itc". If so, flush the entry we + * inserted and retry. At this point, we have: + * + * r28 = equivalent of pud_offset(pgd, ifa) + * r17 = equivalent of pmd_offset(pud, ifa) + * r21 = equivalent of pte_offset(pmd, ifa) + * + * r29 = *pud + * r20 = *pmd + * r18 = *pte + */ + ld8 r25=[r21] // read *pte again + ld8 r26=[r17] // read *pmd again +#if CONFIG_PGTABLE_LEVELS == 4 + ld8 r19=[r28] // read *pud again +#endif + cmp.ne p6,p7=r0,r0 + ;; + cmp.ne.or.andcm p6,p7=r26,r20 // did *pmd change +#if CONFIG_PGTABLE_LEVELS == 4 + cmp.ne.or.andcm p6,p7=r19,r29 // did *pud change +#endif + mov r27=PAGE_SHIFT<<2 + ;; +(p6) ptc.l r22,r27 // purge PTE page translation +(p7) cmp.ne.or.andcm p6,p7=r25,r18 // did *pte change + ;; +(p6) ptc.l r16,r27 // purge translation +#endif + + mov pr=r31,-1 // restore predicate registers + RFI +END(vhpt_miss) + + .org ia64_ivt+0x400 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x0400 Entry 1 (size 64 bundles) ITLB (21) +ENTRY(itlb_miss) + DBG_FAULT(1) + /* + * The ITLB handler accesses the PTE via the virtually mapped linear + * page table. If a nested TLB miss occurs, we switch into physical + * mode, walk the page table, and then re-execute the PTE read and + * go on normally after that. + */ + MOV_FROM_IFA(r16) // get virtual address + mov r29=b0 // save b0 + mov r31=pr // save predicates +.itlb_fault: + MOV_FROM_IHA(r17) // get virtual address of PTE + movl r30=1f // load nested fault continuation point + ;; +1: ld8 r18=[r17] // read *pte + ;; + mov b0=r29 + tbit.z p6,p0=r18,_PAGE_P_BIT // page present bit cleared? +(p6) br.cond.spnt page_fault + ;; + ITC_I(p0, r18, r19) + ;; +#ifdef CONFIG_SMP + /* + * Tell the assemblers dependency-violation checker that the above "itc" instructions + * cannot possibly affect the following loads: + */ + dv_serialize_data + + ld8 r19=[r17] // read *pte again and see if same + mov r20=PAGE_SHIFT<<2 // setup page size for purge + ;; + cmp.ne p7,p0=r18,r19 + ;; +(p7) ptc.l r16,r20 +#endif + mov pr=r31,-1 + RFI +END(itlb_miss) + + .org ia64_ivt+0x0800 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48) +ENTRY(dtlb_miss) + DBG_FAULT(2) + /* + * The DTLB handler accesses the PTE via the virtually mapped linear + * page table. If a nested TLB miss occurs, we switch into physical + * mode, walk the page table, and then re-execute the PTE read and + * go on normally after that. + */ + MOV_FROM_IFA(r16) // get virtual address + mov r29=b0 // save b0 + mov r31=pr // save predicates +dtlb_fault: + MOV_FROM_IHA(r17) // get virtual address of PTE + movl r30=1f // load nested fault continuation point + ;; +1: ld8 r18=[r17] // read *pte + ;; + mov b0=r29 + tbit.z p6,p0=r18,_PAGE_P_BIT // page present bit cleared? +(p6) br.cond.spnt page_fault + ;; + ITC_D(p0, r18, r19) + ;; +#ifdef CONFIG_SMP + /* + * Tell the assemblers dependency-violation checker that the above "itc" instructions + * cannot possibly affect the following loads: + */ + dv_serialize_data + + ld8 r19=[r17] // read *pte again and see if same + mov r20=PAGE_SHIFT<<2 // setup page size for purge + ;; + cmp.ne p7,p0=r18,r19 + ;; +(p7) ptc.l r16,r20 +#endif + mov pr=r31,-1 + RFI +END(dtlb_miss) + + .org ia64_ivt+0x0c00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19) +ENTRY(alt_itlb_miss) + DBG_FAULT(3) + MOV_FROM_IFA(r16) // get address that caused the TLB miss + movl r17=PAGE_KERNEL + MOV_FROM_IPSR(p0, r21) + movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) + mov r31=pr + ;; +#ifdef CONFIG_DISABLE_VHPT + shr.u r22=r16,61 // get the region number into r21 + ;; + cmp.gt p8,p0=6,r22 // user mode + ;; + THASH(p8, r17, r16, r23) + ;; + MOV_TO_IHA(p8, r17, r23) +(p8) mov r29=b0 // save b0 +(p8) br.cond.dptk .itlb_fault +#endif + extr.u r23=r21,IA64_PSR_CPL0_BIT,2 // extract psr.cpl + and r19=r19,r16 // clear ed, reserved bits, and PTE control bits + shr.u r18=r16,57 // move address bit 61 to bit 4 + ;; + andcm r18=0x10,r18 // bit 4=~address-bit(61) + cmp.ne p8,p0=r0,r23 // psr.cpl != 0? + or r19=r17,r19 // insert PTE control bits into r19 + ;; + or r19=r19,r18 // set bit 4 (uncached) if the access was to region 6 +(p8) br.cond.spnt page_fault + ;; + ITC_I(p0, r19, r18) // insert the TLB entry + mov pr=r31,-1 + RFI +END(alt_itlb_miss) + + .org ia64_ivt+0x1000 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46) +ENTRY(alt_dtlb_miss) + DBG_FAULT(4) + MOV_FROM_IFA(r16) // get address that caused the TLB miss + movl r17=PAGE_KERNEL + MOV_FROM_ISR(r20) + movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) + MOV_FROM_IPSR(p0, r21) + mov r31=pr + mov r24=PERCPU_ADDR + ;; +#ifdef CONFIG_DISABLE_VHPT + shr.u r22=r16,61 // get the region number into r21 + ;; + cmp.gt p8,p0=6,r22 // access to region 0-5 + ;; + THASH(p8, r17, r16, r25) + ;; + MOV_TO_IHA(p8, r17, r25) +(p8) mov r29=b0 // save b0 +(p8) br.cond.dptk dtlb_fault +#endif + cmp.ge p10,p11=r16,r24 // access to per_cpu_data? + tbit.z p12,p0=r16,61 // access to region 6? + mov r25=PERCPU_PAGE_SHIFT << 2 + mov r26=PERCPU_PAGE_SIZE + nop.m 0 + nop.b 0 + ;; +(p10) mov r19=IA64_KR(PER_CPU_DATA) +(p11) and r19=r19,r16 // clear non-ppn fields + extr.u r23=r21,IA64_PSR_CPL0_BIT,2 // extract psr.cpl + and r22=IA64_ISR_CODE_MASK,r20 // get the isr.code field + tbit.nz p6,p7=r20,IA64_ISR_SP_BIT // is speculation bit on? + tbit.nz p9,p0=r20,IA64_ISR_NA_BIT // is non-access bit on? + ;; +(p10) sub r19=r19,r26 + MOV_TO_ITIR(p10, r25, r24) + cmp.ne p8,p0=r0,r23 +(p9) cmp.eq.or.andcm p6,p7=IA64_ISR_CODE_LFETCH,r22 // check isr.code field +(p12) dep r17=-1,r17,4,1 // set ma=UC for region 6 addr +(p8) br.cond.spnt page_fault + + dep r21=-1,r21,IA64_PSR_ED_BIT,1 + ;; + or r19=r19,r17 // insert PTE control bits into r19 + MOV_TO_IPSR(p6, r21, r24) + ;; + ITC_D(p7, r19, r18) // insert the TLB entry + mov pr=r31,-1 + RFI +END(alt_dtlb_miss) + + .org ia64_ivt+0x1400 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45) +ENTRY(nested_dtlb_miss) + /* + * In the absence of kernel bugs, we get here when the virtually mapped linear + * page table is accessed non-speculatively (e.g., in the Dirty-bit, Instruction + * Access-bit, or Data Access-bit faults). If the DTLB entry for the virtual page + * table is missing, a nested TLB miss fault is triggered and control is + * transferred to this point. When this happens, we lookup the pte for the + * faulting address by walking the page table in physical mode and return to the + * continuation point passed in register r30 (or call page_fault if the address is + * not mapped). + * + * Input: r16: faulting address + * r29: saved b0 + * r30: continuation address + * r31: saved pr + * + * Output: r17: physical address of PTE of faulting address + * r29: saved b0 + * r30: continuation address + * r31: saved pr + * + * Clobbered: b0, r18, r19, r21, r22, psr.dt (cleared) + */ + RSM_PSR_DT // switch to using physical data addressing + mov r19=IA64_KR(PT_BASE) // get the page table base address + shl r21=r16,3 // shift bit 60 into sign bit + MOV_FROM_ITIR(r18) + ;; + shr.u r17=r16,61 // get the region number into r17 + extr.u r18=r18,2,6 // get the faulting page size + ;; + cmp.eq p6,p7=5,r17 // is faulting address in region 5? + add r22=-PAGE_SHIFT,r18 // adjustment for hugetlb address + add r18=PGDIR_SHIFT-PAGE_SHIFT,r18 + ;; + shr.u r22=r16,r22 + shr.u r18=r16,r18 +(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place + + srlz.d + LOAD_PHYSICAL(p6, r19, swapper_pg_dir) // region 5 is rooted at swapper_pg_dir + + .pred.rel "mutex", p6, p7 +(p6) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT +(p7) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3 + ;; +(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=pgd_offset for region 5 +(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=pgd_offset for region[0-4] + cmp.eq p7,p6=0,r21 // unused address bits all zeroes? +#if CONFIG_PGTABLE_LEVELS == 4 + shr.u r18=r22,PUD_SHIFT // shift pud index into position +#else + shr.u r18=r22,PMD_SHIFT // shift pmd index into position +#endif + ;; + ld8 r17=[r17] // get *pgd (may be 0) + ;; +(p7) cmp.eq p6,p7=r17,r0 // was pgd_present(*pgd) == NULL? + dep r17=r18,r17,3,(PAGE_SHIFT-3) // r17=p[u|m]d_offset(pgd,addr) + ;; +#if CONFIG_PGTABLE_LEVELS == 4 +(p7) ld8 r17=[r17] // get *pud (may be 0) + shr.u r18=r22,PMD_SHIFT // shift pmd index into position + ;; +(p7) cmp.eq.or.andcm p6,p7=r17,r0 // was pud_present(*pud) == NULL? + dep r17=r18,r17,3,(PAGE_SHIFT-3) // r17=pmd_offset(pud,addr) + ;; +#endif +(p7) ld8 r17=[r17] // get *pmd (may be 0) + shr.u r19=r22,PAGE_SHIFT // shift pte index into position + ;; +(p7) cmp.eq.or.andcm p6,p7=r17,r0 // was pmd_present(*pmd) == NULL? + dep r17=r19,r17,3,(PAGE_SHIFT-3) // r17=pte_offset(pmd,addr); +(p6) br.cond.spnt page_fault + mov b0=r30 + br.sptk.many b0 // return to continuation point +END(nested_dtlb_miss) + + .org ia64_ivt+0x1800 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24) +ENTRY(ikey_miss) + DBG_FAULT(6) + FAULT(6) +END(ikey_miss) + + .org ia64_ivt+0x1c00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51) +ENTRY(dkey_miss) + DBG_FAULT(7) + FAULT(7) +END(dkey_miss) + + .org ia64_ivt+0x2000 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54) +ENTRY(dirty_bit) + DBG_FAULT(8) + /* + * What we do here is to simply turn on the dirty bit in the PTE. We need to + * update both the page-table and the TLB entry. To efficiently access the PTE, + * we address it through the virtual page table. Most likely, the TLB entry for + * the relevant virtual page table page is still present in the TLB so we can + * normally do this without additional TLB misses. In case the necessary virtual + * page table TLB entry isn't present, we take a nested TLB miss hit where we look + * up the physical address of the L3 PTE and then continue at label 1 below. + */ + MOV_FROM_IFA(r16) // get the address that caused the fault + movl r30=1f // load continuation point in case of nested fault + ;; + THASH(p0, r17, r16, r18) // compute virtual address of L3 PTE + mov r29=b0 // save b0 in case of nested fault + mov r31=pr // save pr +#ifdef CONFIG_SMP + mov r28=ar.ccv // save ar.ccv + ;; +1: ld8 r18=[r17] + ;; // avoid RAW on r18 + mov ar.ccv=r18 // set compare value for cmpxchg + or r25=_PAGE_D|_PAGE_A,r18 // set the dirty and accessed bits + tbit.z p7,p6 = r18,_PAGE_P_BIT // Check present bit + ;; +(p6) cmpxchg8.acq r26=[r17],r25,ar.ccv // Only update if page is present + mov r24=PAGE_SHIFT<<2 + ;; +(p6) cmp.eq p6,p7=r26,r18 // Only compare if page is present + ;; + ITC_D(p6, r25, r18) // install updated PTE + ;; + /* + * Tell the assemblers dependency-violation checker that the above "itc" instructions + * cannot possibly affect the following loads: + */ + dv_serialize_data + + ld8 r18=[r17] // read PTE again + ;; + cmp.eq p6,p7=r18,r25 // is it same as the newly installed + ;; +(p7) ptc.l r16,r24 + mov b0=r29 // restore b0 + mov ar.ccv=r28 +#else + ;; +1: ld8 r18=[r17] + ;; // avoid RAW on r18 + or r18=_PAGE_D|_PAGE_A,r18 // set the dirty and accessed bits + mov b0=r29 // restore b0 + ;; + st8 [r17]=r18 // store back updated PTE + ITC_D(p0, r18, r16) // install updated PTE +#endif + mov pr=r31,-1 // restore pr + RFI +END(dirty_bit) + + .org ia64_ivt+0x2400 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27) +ENTRY(iaccess_bit) + DBG_FAULT(9) + // Like Entry 8, except for instruction access + MOV_FROM_IFA(r16) // get the address that caused the fault + movl r30=1f // load continuation point in case of nested fault + mov r31=pr // save predicates +#ifdef CONFIG_ITANIUM + /* + * Erratum 10 (IFA may contain incorrect address) has "NoFix" status. + */ + MOV_FROM_IPSR(p0, r17) + ;; + MOV_FROM_IIP(r18) + tbit.z p6,p0=r17,IA64_PSR_IS_BIT // IA64 instruction set? + ;; +(p6) mov r16=r18 // if so, use cr.iip instead of cr.ifa +#endif /* CONFIG_ITANIUM */ + ;; + THASH(p0, r17, r16, r18) // compute virtual address of L3 PTE + mov r29=b0 // save b0 in case of nested fault) +#ifdef CONFIG_SMP + mov r28=ar.ccv // save ar.ccv + ;; +1: ld8 r18=[r17] + ;; + mov ar.ccv=r18 // set compare value for cmpxchg + or r25=_PAGE_A,r18 // set the accessed bit + tbit.z p7,p6 = r18,_PAGE_P_BIT // Check present bit + ;; +(p6) cmpxchg8.acq r26=[r17],r25,ar.ccv // Only if page present + mov r24=PAGE_SHIFT<<2 + ;; +(p6) cmp.eq p6,p7=r26,r18 // Only if page present + ;; + ITC_I(p6, r25, r26) // install updated PTE + ;; + /* + * Tell the assemblers dependency-violation checker that the above "itc" instructions + * cannot possibly affect the following loads: + */ + dv_serialize_data + + ld8 r18=[r17] // read PTE again + ;; + cmp.eq p6,p7=r18,r25 // is it same as the newly installed + ;; +(p7) ptc.l r16,r24 + mov b0=r29 // restore b0 + mov ar.ccv=r28 +#else /* !CONFIG_SMP */ + ;; +1: ld8 r18=[r17] + ;; + or r18=_PAGE_A,r18 // set the accessed bit + mov b0=r29 // restore b0 + ;; + st8 [r17]=r18 // store back updated PTE + ITC_I(p0, r18, r16) // install updated PTE +#endif /* !CONFIG_SMP */ + mov pr=r31,-1 + RFI +END(iaccess_bit) + + .org ia64_ivt+0x2800 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55) +ENTRY(daccess_bit) + DBG_FAULT(10) + // Like Entry 8, except for data access + MOV_FROM_IFA(r16) // get the address that caused the fault + movl r30=1f // load continuation point in case of nested fault + ;; + THASH(p0, r17, r16, r18) // compute virtual address of L3 PTE + mov r31=pr + mov r29=b0 // save b0 in case of nested fault) +#ifdef CONFIG_SMP + mov r28=ar.ccv // save ar.ccv + ;; +1: ld8 r18=[r17] + ;; // avoid RAW on r18 + mov ar.ccv=r18 // set compare value for cmpxchg + or r25=_PAGE_A,r18 // set the dirty bit + tbit.z p7,p6 = r18,_PAGE_P_BIT // Check present bit + ;; +(p6) cmpxchg8.acq r26=[r17],r25,ar.ccv // Only if page is present + mov r24=PAGE_SHIFT<<2 + ;; +(p6) cmp.eq p6,p7=r26,r18 // Only if page is present + ;; + ITC_D(p6, r25, r26) // install updated PTE + /* + * Tell the assemblers dependency-violation checker that the above "itc" instructions + * cannot possibly affect the following loads: + */ + dv_serialize_data + ;; + ld8 r18=[r17] // read PTE again + ;; + cmp.eq p6,p7=r18,r25 // is it same as the newly installed + ;; +(p7) ptc.l r16,r24 + mov ar.ccv=r28 +#else + ;; +1: ld8 r18=[r17] + ;; // avoid RAW on r18 + or r18=_PAGE_A,r18 // set the accessed bit + ;; + st8 [r17]=r18 // store back updated PTE + ITC_D(p0, r18, r16) // install updated PTE +#endif + mov b0=r29 // restore b0 + mov pr=r31,-1 + RFI +END(daccess_bit) + + .org ia64_ivt+0x2c00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33) +ENTRY(break_fault) + /* + * The streamlined system call entry/exit paths only save/restore the initial part + * of pt_regs. This implies that the callers of system-calls must adhere to the + * normal procedure calling conventions. + * + * Registers to be saved & restored: + * CR registers: cr.ipsr, cr.iip, cr.ifs + * AR registers: ar.unat, ar.pfs, ar.rsc, ar.rnat, ar.bspstore, ar.fpsr + * others: pr, b0, b6, loadrs, r1, r11, r12, r13, r15 + * Registers to be restored only: + * r8-r11: output value from the system call. + * + * During system call exit, scratch registers (including r15) are modified/cleared + * to prevent leaking bits from kernel to user level. + */ + DBG_FAULT(11) + mov.m r16=IA64_KR(CURRENT) // M2 r16 <- current task (12 cyc) + MOV_FROM_IPSR(p0, r29) // M2 (12 cyc) + mov r31=pr // I0 (2 cyc) + + MOV_FROM_IIM(r17) // M2 (2 cyc) + mov.m r27=ar.rsc // M2 (12 cyc) + mov r18=__IA64_BREAK_SYSCALL // A + + mov.m ar.rsc=0 // M2 + mov.m r21=ar.fpsr // M2 (12 cyc) + mov r19=b6 // I0 (2 cyc) + ;; + mov.m r23=ar.bspstore // M2 (12 cyc) + mov.m r24=ar.rnat // M2 (5 cyc) + mov.i r26=ar.pfs // I0 (2 cyc) + + invala // M0|1 + nop.m 0 // M + mov r20=r1 // A save r1 + + nop.m 0 + movl r30=sys_call_table // X + + MOV_FROM_IIP(r28) // M2 (2 cyc) + cmp.eq p0,p7=r18,r17 // I0 is this a system call? +(p7) br.cond.spnt non_syscall // B no -> + // + // From this point on, we are definitely on the syscall-path + // and we can use (non-banked) scratch registers. + // +/////////////////////////////////////////////////////////////////////// + mov r1=r16 // A move task-pointer to "addl"-addressable reg + mov r2=r16 // A setup r2 for ia64_syscall_setup + add r9=TI_FLAGS+IA64_TASK_SIZE,r16 // A r9 = ¤t_thread_info()->flags + + adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 + adds r15=-1024,r15 // A subtract 1024 from syscall number + mov r3=NR_syscalls - 1 + ;; + ld1.bias r17=[r16] // M0|1 r17 = current->thread.on_ustack flag + ld4 r9=[r9] // M0|1 r9 = current_thread_info()->flags + extr.u r8=r29,41,2 // I0 extract ei field from cr.ipsr + + shladd r30=r15,3,r30 // A r30 = sys_call_table + 8*(syscall-1024) + addl r22=IA64_RBS_OFFSET,r1 // A compute base of RBS + cmp.leu p6,p7=r15,r3 // A syscall number in range? + ;; + + lfetch.fault.excl.nt1 [r22] // M0|1 prefetch RBS +(p6) ld8 r30=[r30] // M0|1 load address of syscall entry point + tnat.nz.or p7,p0=r15 // I0 is syscall nr a NaT? + + mov.m ar.bspstore=r22 // M2 switch to kernel RBS + cmp.eq p8,p9=2,r8 // A isr.ei==2? + ;; + +(p8) mov r8=0 // A clear ei to 0 +(p7) movl r30=sys_ni_syscall // X + +(p8) adds r28=16,r28 // A switch cr.iip to next bundle +(p9) adds r8=1,r8 // A increment ei to next slot +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE + ;; + mov b6=r30 // I0 setup syscall handler branch reg early +#else + nop.i 0 + ;; +#endif + + mov.m r25=ar.unat // M2 (5 cyc) + dep r29=r8,r29,41,2 // I0 insert new ei into cr.ipsr + adds r15=1024,r15 // A restore original syscall number + // + // If any of the above loads miss in L1D, we'll stall here until + // the data arrives. + // +/////////////////////////////////////////////////////////////////////// + st1 [r16]=r0 // M2|3 clear current->thread.on_ustack flag +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE + MOV_FROM_ITC(p0, p14, r30, r18) // M get cycle for accounting +#else + mov b6=r30 // I0 setup syscall handler branch reg early +#endif + cmp.eq pKStk,pUStk=r0,r17 // A were we on kernel stacks already? + + and r9=_TIF_SYSCALL_TRACEAUDIT,r9 // A mask trace or audit + mov r18=ar.bsp // M2 (12 cyc) +(pKStk) br.cond.spnt .break_fixup // B we're already in kernel-mode -- fix up RBS + ;; +.back_from_break_fixup: +(pUStk) addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1 // A compute base of memory stack + cmp.eq p14,p0=r9,r0 // A are syscalls being traced/audited? + br.call.sptk.many b7=ia64_syscall_setup // B +1: +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE + // mov.m r30=ar.itc is called in advance, and r13 is current + add r16=TI_AC_STAMP+IA64_TASK_SIZE,r13 // A + add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r13 // A +(pKStk) br.cond.spnt .skip_accounting // B unlikely skip + ;; + ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP // M get last stamp + ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE // M time at leave + ;; + ld8 r20=[r16],TI_AC_STAMP-TI_AC_STIME // M cumulated stime + ld8 r21=[r17] // M cumulated utime + sub r22=r19,r18 // A stime before leave + ;; + st8 [r16]=r30,TI_AC_STIME-TI_AC_STAMP // M update stamp + sub r18=r30,r19 // A elapsed time in user + ;; + add r20=r20,r22 // A sum stime + add r21=r21,r18 // A sum utime + ;; + st8 [r16]=r20 // M update stime + st8 [r17]=r21 // M update utime + ;; +.skip_accounting: +#endif + mov ar.rsc=0x3 // M2 set eager mode, pl 0, LE, loadrs=0 + nop 0 + BSW_1(r2, r14) // B (6 cyc) regs are saved, switch to bank 1 + ;; + + SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r16) // M2 now it's safe to re-enable intr.-collection + // M0 ensure interruption collection is on + movl r3=ia64_ret_from_syscall // X + ;; + mov rp=r3 // I0 set the real return addr +(p10) br.cond.spnt.many ia64_ret_from_syscall // B return if bad call-frame or r15 is a NaT + + SSM_PSR_I(p15, p15, r16) // M2 restore psr.i +(p14) br.call.sptk.many b6=b6 // B invoke syscall-handker (ignore return addr) + br.cond.spnt.many ia64_trace_syscall // B do syscall-tracing thingamagic + // NOT REACHED +/////////////////////////////////////////////////////////////////////// + // On entry, we optimistically assumed that we're coming from user-space. + // For the rare cases where a system-call is done from within the kernel, + // we fix things up at this point: +.break_fixup: + add r1=-IA64_PT_REGS_SIZE,sp // A allocate space for pt_regs structure + mov ar.rnat=r24 // M2 restore kernel's AR.RNAT + ;; + mov ar.bspstore=r23 // M2 restore kernel's AR.BSPSTORE + br.cond.sptk .back_from_break_fixup +END(break_fault) + + .org ia64_ivt+0x3000 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4) +ENTRY(interrupt) + /* interrupt handler has become too big to fit this area. */ + br.sptk.many __interrupt +END(interrupt) + + .org ia64_ivt+0x3400 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x3400 Entry 13 (size 64 bundles) Reserved + DBG_FAULT(13) + FAULT(13) + + .org ia64_ivt+0x3800 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x3800 Entry 14 (size 64 bundles) Reserved + DBG_FAULT(14) + FAULT(14) + + /* + * There is no particular reason for this code to be here, other than that + * there happens to be space here that would go unused otherwise. If this + * fault ever gets "unreserved", simply moved the following code to a more + * suitable spot... + * + * ia64_syscall_setup() is a separate subroutine so that it can + * allocate stacked registers so it can safely demine any + * potential NaT values from the input registers. + * + * On entry: + * - executing on bank 0 or bank 1 register set (doesn't matter) + * - r1: stack pointer + * - r2: current task pointer + * - r3: preserved + * - r11: original contents (saved ar.pfs to be saved) + * - r12: original contents (sp to be saved) + * - r13: original contents (tp to be saved) + * - r15: original contents (syscall # to be saved) + * - r18: saved bsp (after switching to kernel stack) + * - r19: saved b6 + * - r20: saved r1 (gp) + * - r21: saved ar.fpsr + * - r22: kernel's register backing store base (krbs_base) + * - r23: saved ar.bspstore + * - r24: saved ar.rnat + * - r25: saved ar.unat + * - r26: saved ar.pfs + * - r27: saved ar.rsc + * - r28: saved cr.iip + * - r29: saved cr.ipsr + * - r30: ar.itc for accounting (don't touch) + * - r31: saved pr + * - b0: original contents (to be saved) + * On exit: + * - p10: TRUE if syscall is invoked with more than 8 out + * registers or r15's Nat is true + * - r1: kernel's gp + * - r3: preserved (same as on entry) + * - r8: -EINVAL if p10 is true + * - r12: points to kernel stack + * - r13: points to current task + * - r14: preserved (same as on entry) + * - p13: preserved + * - p15: TRUE if interrupts need to be re-enabled + * - ar.fpsr: set to kernel settings + * - b6: preserved (same as on entry) + */ +#ifdef __IA64_ASM_PARAVIRTUALIZED_NATIVE +GLOBAL_ENTRY(ia64_syscall_setup) +#if PT(B6) != 0 +# error This code assumes that b6 is the first field in pt_regs. +#endif + st8 [r1]=r19 // save b6 + add r16=PT(CR_IPSR),r1 // initialize first base pointer + add r17=PT(R11),r1 // initialize second base pointer + ;; + alloc r19=ar.pfs,8,0,0,0 // ensure in0-in7 are writable + st8 [r16]=r29,PT(AR_PFS)-PT(CR_IPSR) // save cr.ipsr + tnat.nz p8,p0=in0 + + st8.spill [r17]=r11,PT(CR_IIP)-PT(R11) // save r11 + tnat.nz p9,p0=in1 +(pKStk) mov r18=r0 // make sure r18 isn't NaT + ;; + + st8 [r16]=r26,PT(CR_IFS)-PT(AR_PFS) // save ar.pfs + st8 [r17]=r28,PT(AR_UNAT)-PT(CR_IIP) // save cr.iip + mov r28=b0 // save b0 (2 cyc) + ;; + + st8 [r17]=r25,PT(AR_RSC)-PT(AR_UNAT) // save ar.unat + dep r19=0,r19,38,26 // clear all bits but 0..37 [I0] +(p8) mov in0=-1 + ;; + + st8 [r16]=r19,PT(AR_RNAT)-PT(CR_IFS) // store ar.pfs.pfm in cr.ifs + extr.u r11=r19,7,7 // I0 // get sol of ar.pfs + and r8=0x7f,r19 // A // get sof of ar.pfs + + st8 [r17]=r27,PT(AR_BSPSTORE)-PT(AR_RSC)// save ar.rsc + tbit.nz p15,p0=r29,IA64_PSR_I_BIT // I0 +(p9) mov in1=-1 + ;; + +(pUStk) sub r18=r18,r22 // r18=RSE.ndirty*8 + tnat.nz p10,p0=in2 + add r11=8,r11 + ;; +(pKStk) adds r16=PT(PR)-PT(AR_RNAT),r16 // skip over ar_rnat field +(pKStk) adds r17=PT(B0)-PT(AR_BSPSTORE),r17 // skip over ar_bspstore field + tnat.nz p11,p0=in3 + ;; +(p10) mov in2=-1 + tnat.nz p12,p0=in4 // [I0] +(p11) mov in3=-1 + ;; +(pUStk) st8 [r16]=r24,PT(PR)-PT(AR_RNAT) // save ar.rnat +(pUStk) st8 [r17]=r23,PT(B0)-PT(AR_BSPSTORE) // save ar.bspstore + shl r18=r18,16 // compute ar.rsc to be used for "loadrs" + ;; + st8 [r16]=r31,PT(LOADRS)-PT(PR) // save predicates + st8 [r17]=r28,PT(R1)-PT(B0) // save b0 + tnat.nz p13,p0=in5 // [I0] + ;; + st8 [r16]=r18,PT(R12)-PT(LOADRS) // save ar.rsc value for "loadrs" + st8.spill [r17]=r20,PT(R13)-PT(R1) // save original r1 +(p12) mov in4=-1 + ;; + +.mem.offset 0,0; st8.spill [r16]=r12,PT(AR_FPSR)-PT(R12) // save r12 +.mem.offset 8,0; st8.spill [r17]=r13,PT(R15)-PT(R13) // save r13 +(p13) mov in5=-1 + ;; + st8 [r16]=r21,PT(R8)-PT(AR_FPSR) // save ar.fpsr + tnat.nz p13,p0=in6 + cmp.lt p10,p9=r11,r8 // frame size can't be more than local+8 + ;; + mov r8=1 +(p9) tnat.nz p10,p0=r15 + adds r12=-16,r1 // switch to kernel memory stack (with 16 bytes of scratch) + + st8.spill [r17]=r15 // save r15 + tnat.nz p8,p0=in7 + nop.i 0 + + mov r13=r2 // establish `current' + movl r1=__gp // establish kernel global pointer + ;; + st8 [r16]=r8 // ensure pt_regs.r8 != 0 (see handle_syscall_error) +(p13) mov in6=-1 +(p8) mov in7=-1 + + cmp.eq pSys,pNonSys=r0,r0 // set pSys=1, pNonSys=0 + movl r17=FPSR_DEFAULT + ;; + mov.m ar.fpsr=r17 // set ar.fpsr to kernel default value +(p10) mov r8=-EINVAL + br.ret.sptk.many b7 +END(ia64_syscall_setup) +#endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */ + + .org ia64_ivt+0x3c00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x3c00 Entry 15 (size 64 bundles) Reserved + DBG_FAULT(15) + FAULT(15) + + .org ia64_ivt+0x4000 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x4000 Entry 16 (size 64 bundles) Reserved + DBG_FAULT(16) + FAULT(16) + +#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(__IA64_ASM_PARAVIRTUALIZED_NATIVE) + /* + * There is no particular reason for this code to be here, other than + * that there happens to be space here that would go unused otherwise. + * If this fault ever gets "unreserved", simply moved the following + * code to a more suitable spot... + * + * account_sys_enter is called from SAVE_MIN* macros if accounting is + * enabled and if the macro is entered from user mode. + */ +GLOBAL_ENTRY(account_sys_enter) + // mov.m r20=ar.itc is called in advance, and r13 is current + add r16=TI_AC_STAMP+IA64_TASK_SIZE,r13 + add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r13 + ;; + ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP // time at last check in kernel + ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE // time at left from kernel + ;; + ld8 r23=[r16],TI_AC_STAMP-TI_AC_STIME // cumulated stime + ld8 r21=[r17] // cumulated utime + sub r22=r19,r18 // stime before leave kernel + ;; + st8 [r16]=r20,TI_AC_STIME-TI_AC_STAMP // update stamp + sub r18=r20,r19 // elapsed time in user mode + ;; + add r23=r23,r22 // sum stime + add r21=r21,r18 // sum utime + ;; + st8 [r16]=r23 // update stime + st8 [r17]=r21 // update utime + ;; + br.ret.sptk.many rp +END(account_sys_enter) +#endif + + .org ia64_ivt+0x4400 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x4400 Entry 17 (size 64 bundles) Reserved + DBG_FAULT(17) + FAULT(17) + + .org ia64_ivt+0x4800 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x4800 Entry 18 (size 64 bundles) Reserved + DBG_FAULT(18) + FAULT(18) + + .org ia64_ivt+0x4c00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x4c00 Entry 19 (size 64 bundles) Reserved + DBG_FAULT(19) + FAULT(19) + +// +// --- End of long entries, Beginning of short entries +// + + .org ia64_ivt+0x5000 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5000 Entry 20 (size 16 bundles) Page Not Present (10,22,49) +ENTRY(page_not_present) + DBG_FAULT(20) + MOV_FROM_IFA(r16) + RSM_PSR_DT + /* + * The Linux page fault handler doesn't expect non-present pages to be in + * the TLB. Flush the existing entry now, so we meet that expectation. + */ + mov r17=PAGE_SHIFT<<2 + ;; + ptc.l r16,r17 + ;; + mov r31=pr + srlz.d + br.sptk.many page_fault +END(page_not_present) + + .org ia64_ivt+0x5100 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5100 Entry 21 (size 16 bundles) Key Permission (13,25,52) +ENTRY(key_permission) + DBG_FAULT(21) + MOV_FROM_IFA(r16) + RSM_PSR_DT + mov r31=pr + ;; + srlz.d + br.sptk.many page_fault +END(key_permission) + + .org ia64_ivt+0x5200 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26) +ENTRY(iaccess_rights) + DBG_FAULT(22) + MOV_FROM_IFA(r16) + RSM_PSR_DT + mov r31=pr + ;; + srlz.d + br.sptk.many page_fault +END(iaccess_rights) + + .org ia64_ivt+0x5300 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53) +ENTRY(daccess_rights) + DBG_FAULT(23) + MOV_FROM_IFA(r16) + RSM_PSR_DT + mov r31=pr + ;; + srlz.d + br.sptk.many page_fault +END(daccess_rights) + + .org ia64_ivt+0x5400 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39) +ENTRY(general_exception) + DBG_FAULT(24) + MOV_FROM_ISR(r16) + mov r31=pr + ;; + cmp4.eq p6,p0=0,r16 +(p6) br.sptk.many dispatch_illegal_op_fault + ;; + mov r19=24 // fault number + br.sptk.many dispatch_to_fault_handler +END(general_exception) + + .org ia64_ivt+0x5500 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35) +ENTRY(disabled_fp_reg) + DBG_FAULT(25) + rsm psr.dfh // ensure we can access fph + ;; + srlz.d + mov r31=pr + mov r19=25 + br.sptk.many dispatch_to_fault_handler +END(disabled_fp_reg) + + .org ia64_ivt+0x5600 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50) +ENTRY(nat_consumption) + DBG_FAULT(26) + + MOV_FROM_IPSR(p0, r16) + MOV_FROM_ISR(r17) + mov r31=pr // save PR + ;; + and r18=0xf,r17 // r18 = cr.ipsr.code{3:0} + tbit.z p6,p0=r17,IA64_ISR_NA_BIT + ;; + cmp.ne.or p6,p0=IA64_ISR_CODE_LFETCH,r18 + dep r16=-1,r16,IA64_PSR_ED_BIT,1 +(p6) br.cond.spnt 1f // branch if (cr.ispr.na == 0 || cr.ipsr.code{3:0} != LFETCH) + ;; + MOV_TO_IPSR(p0, r16, r18) + mov pr=r31,-1 + ;; + RFI + +1: mov pr=r31,-1 + ;; + FAULT(26) +END(nat_consumption) + + .org ia64_ivt+0x5700 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5700 Entry 27 (size 16 bundles) Speculation (40) +ENTRY(speculation_vector) + DBG_FAULT(27) + /* + * A [f]chk.[as] instruction needs to take the branch to the recovery code but + * this part of the architecture is not implemented in hardware on some CPUs, such + * as Itanium. Thus, in general we need to emulate the behavior. IIM contains + * the relative target (not yet sign extended). So after sign extending it we + * simply add it to IIP. We also need to reset the EI field of the IPSR to zero, + * i.e., the slot to restart into. + * + * cr.imm contains zero_ext(imm21) + */ + MOV_FROM_IIM(r18) + ;; + MOV_FROM_IIP(r17) + shl r18=r18,43 // put sign bit in position (43=64-21) + ;; + + MOV_FROM_IPSR(p0, r16) + shr r18=r18,39 // sign extend (39=43-4) + ;; + + add r17=r17,r18 // now add the offset + ;; + MOV_TO_IIP(r17, r19) + dep r16=0,r16,41,2 // clear EI + ;; + + MOV_TO_IPSR(p0, r16, r19) + ;; + + RFI +END(speculation_vector) + + .org ia64_ivt+0x5800 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5800 Entry 28 (size 16 bundles) Reserved + DBG_FAULT(28) + FAULT(28) + + .org ia64_ivt+0x5900 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56) +ENTRY(debug_vector) + DBG_FAULT(29) + FAULT(29) +END(debug_vector) + + .org ia64_ivt+0x5a00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57) +ENTRY(unaligned_access) + DBG_FAULT(30) + mov r31=pr // prepare to save predicates + ;; + br.sptk.many dispatch_unaligned_handler +END(unaligned_access) + + .org ia64_ivt+0x5b00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57) +ENTRY(unsupported_data_reference) + DBG_FAULT(31) + FAULT(31) +END(unsupported_data_reference) + + .org ia64_ivt+0x5c00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5c00 Entry 32 (size 16 bundles) Floating-Point Fault (64) +ENTRY(floating_point_fault) + DBG_FAULT(32) + FAULT(32) +END(floating_point_fault) + + .org ia64_ivt+0x5d00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66) +ENTRY(floating_point_trap) + DBG_FAULT(33) + FAULT(33) +END(floating_point_trap) + + .org ia64_ivt+0x5e00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66) +ENTRY(lower_privilege_trap) + DBG_FAULT(34) + FAULT(34) +END(lower_privilege_trap) + + .org ia64_ivt+0x5f00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68) +ENTRY(taken_branch_trap) + DBG_FAULT(35) + FAULT(35) +END(taken_branch_trap) + + .org ia64_ivt+0x6000 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69) +ENTRY(single_step_trap) + DBG_FAULT(36) + FAULT(36) +END(single_step_trap) + + .org ia64_ivt+0x6100 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6100 Entry 37 (size 16 bundles) Reserved + DBG_FAULT(37) + FAULT(37) + + .org ia64_ivt+0x6200 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6200 Entry 38 (size 16 bundles) Reserved + DBG_FAULT(38) + FAULT(38) + + .org ia64_ivt+0x6300 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6300 Entry 39 (size 16 bundles) Reserved + DBG_FAULT(39) + FAULT(39) + + .org ia64_ivt+0x6400 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6400 Entry 40 (size 16 bundles) Reserved + DBG_FAULT(40) + FAULT(40) + + .org ia64_ivt+0x6500 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6500 Entry 41 (size 16 bundles) Reserved + DBG_FAULT(41) + FAULT(41) + + .org ia64_ivt+0x6600 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6600 Entry 42 (size 16 bundles) Reserved + DBG_FAULT(42) + FAULT(42) + + .org ia64_ivt+0x6700 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6700 Entry 43 (size 16 bundles) Reserved + DBG_FAULT(43) + FAULT(43) + + .org ia64_ivt+0x6800 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6800 Entry 44 (size 16 bundles) Reserved + DBG_FAULT(44) + FAULT(44) + + .org ia64_ivt+0x6900 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception (17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77) +ENTRY(ia32_exception) + DBG_FAULT(45) + FAULT(45) +END(ia32_exception) + + .org ia64_ivt+0x6a00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept (30,31,59,70,71) +ENTRY(ia32_intercept) + DBG_FAULT(46) + FAULT(46) +END(ia32_intercept) + + .org ia64_ivt+0x6b00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6b00 Entry 47 (size 16 bundles) IA-32 Interrupt (74) +ENTRY(ia32_interrupt) + DBG_FAULT(47) + FAULT(47) +END(ia32_interrupt) + + .org ia64_ivt+0x6c00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6c00 Entry 48 (size 16 bundles) Reserved + DBG_FAULT(48) + FAULT(48) + + .org ia64_ivt+0x6d00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6d00 Entry 49 (size 16 bundles) Reserved + DBG_FAULT(49) + FAULT(49) + + .org ia64_ivt+0x6e00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6e00 Entry 50 (size 16 bundles) Reserved + DBG_FAULT(50) + FAULT(50) + + .org ia64_ivt+0x6f00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6f00 Entry 51 (size 16 bundles) Reserved + DBG_FAULT(51) + FAULT(51) + + .org ia64_ivt+0x7000 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7000 Entry 52 (size 16 bundles) Reserved + DBG_FAULT(52) + FAULT(52) + + .org ia64_ivt+0x7100 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7100 Entry 53 (size 16 bundles) Reserved + DBG_FAULT(53) + FAULT(53) + + .org ia64_ivt+0x7200 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7200 Entry 54 (size 16 bundles) Reserved + DBG_FAULT(54) + FAULT(54) + + .org ia64_ivt+0x7300 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7300 Entry 55 (size 16 bundles) Reserved + DBG_FAULT(55) + FAULT(55) + + .org ia64_ivt+0x7400 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7400 Entry 56 (size 16 bundles) Reserved + DBG_FAULT(56) + FAULT(56) + + .org ia64_ivt+0x7500 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7500 Entry 57 (size 16 bundles) Reserved + DBG_FAULT(57) + FAULT(57) + + .org ia64_ivt+0x7600 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7600 Entry 58 (size 16 bundles) Reserved + DBG_FAULT(58) + FAULT(58) + + .org ia64_ivt+0x7700 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7700 Entry 59 (size 16 bundles) Reserved + DBG_FAULT(59) + FAULT(59) + + .org ia64_ivt+0x7800 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7800 Entry 60 (size 16 bundles) Reserved + DBG_FAULT(60) + FAULT(60) + + .org ia64_ivt+0x7900 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7900 Entry 61 (size 16 bundles) Reserved + DBG_FAULT(61) + FAULT(61) + + .org ia64_ivt+0x7a00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7a00 Entry 62 (size 16 bundles) Reserved + DBG_FAULT(62) + FAULT(62) + + .org ia64_ivt+0x7b00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7b00 Entry 63 (size 16 bundles) Reserved + DBG_FAULT(63) + FAULT(63) + + .org ia64_ivt+0x7c00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7c00 Entry 64 (size 16 bundles) Reserved + DBG_FAULT(64) + FAULT(64) + + .org ia64_ivt+0x7d00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7d00 Entry 65 (size 16 bundles) Reserved + DBG_FAULT(65) + FAULT(65) + + .org ia64_ivt+0x7e00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7e00 Entry 66 (size 16 bundles) Reserved + DBG_FAULT(66) + FAULT(66) + + .org ia64_ivt+0x7f00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7f00 Entry 67 (size 16 bundles) Reserved + DBG_FAULT(67) + FAULT(67) + + //----------------------------------------------------------------------------------- + // call do_page_fault (predicates are in r31, psr.dt may be off, r16 is faulting address) +ENTRY(page_fault) + SSM_PSR_DT_AND_SRLZ_I + ;; + SAVE_MIN_WITH_COVER + alloc r15=ar.pfs,0,0,3,0 + MOV_FROM_IFA(out0) + MOV_FROM_ISR(out1) + SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r14, r3) + adds r3=8,r2 // set up second base pointer + SSM_PSR_I(p15, p15, r14) // restore psr.i + movl r14=ia64_leave_kernel + ;; + SAVE_REST + mov rp=r14 + ;; + adds out2=16,r12 // out2 = pointer to pt_regs + br.call.sptk.many b6=ia64_do_page_fault // ignore return address +END(page_fault) + +ENTRY(non_syscall) + mov ar.rsc=r27 // restore ar.rsc before SAVE_MIN_WITH_COVER + ;; + SAVE_MIN_WITH_COVER + + // There is no particular reason for this code to be here, other than that + // there happens to be space here that would go unused otherwise. If this + // fault ever gets "unreserved", simply moved the following code to a more + // suitable spot... + + alloc r14=ar.pfs,0,0,2,0 + MOV_FROM_IIM(out0) + add out1=16,sp + adds r3=8,r2 // set up second base pointer for SAVE_REST + + SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r15, r24) + // guarantee that interruption collection is on + SSM_PSR_I(p15, p15, r15) // restore psr.i + movl r15=ia64_leave_kernel + ;; + SAVE_REST + mov rp=r15 + ;; + br.call.sptk.many b6=ia64_bad_break // avoid WAW on CFM and ignore return addr +END(non_syscall) + +ENTRY(__interrupt) + DBG_FAULT(12) + mov r31=pr // prepare to save predicates + ;; + SAVE_MIN_WITH_COVER // uses r31; defines r2 and r3 + SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r14) + // ensure everybody knows psr.ic is back on + adds r3=8,r2 // set up second base pointer for SAVE_REST + ;; + SAVE_REST + ;; + MCA_RECOVER_RANGE(interrupt) + alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group + MOV_FROM_IVR(out0, r8) // pass cr.ivr as first arg + add out1=16,sp // pass pointer to pt_regs as second arg + ;; + srlz.d // make sure we see the effect of cr.ivr + movl r14=ia64_leave_kernel + ;; + mov rp=r14 + br.call.sptk.many b6=ia64_handle_irq +END(__interrupt) + + /* + * There is no particular reason for this code to be here, other than that + * there happens to be space here that would go unused otherwise. If this + * fault ever gets "unreserved", simply moved the following code to a more + * suitable spot... + */ + +ENTRY(dispatch_unaligned_handler) + SAVE_MIN_WITH_COVER + ;; + alloc r14=ar.pfs,0,0,2,0 // now it's safe (must be first in insn group!) + MOV_FROM_IFA(out0) + adds out1=16,sp + + SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r24) + // guarantee that interruption collection is on + SSM_PSR_I(p15, p15, r3) // restore psr.i + adds r3=8,r2 // set up second base pointer + ;; + SAVE_REST + movl r14=ia64_leave_kernel + ;; + mov rp=r14 + br.sptk.many ia64_prepare_handle_unaligned +END(dispatch_unaligned_handler) + + /* + * There is no particular reason for this code to be here, other than that + * there happens to be space here that would go unused otherwise. If this + * fault ever gets "unreserved", simply moved the following code to a more + * suitable spot... + */ + +ENTRY(dispatch_to_fault_handler) + /* + * Input: + * psr.ic: off + * r19: fault vector number (e.g., 24 for General Exception) + * r31: contains saved predicates (pr) + */ + SAVE_MIN_WITH_COVER_R19 + alloc r14=ar.pfs,0,0,5,0 + MOV_FROM_ISR(out1) + MOV_FROM_IFA(out2) + MOV_FROM_IIM(out3) + MOV_FROM_ITIR(out4) + ;; + SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, out0) + // guarantee that interruption collection is on + mov out0=r15 + ;; + SSM_PSR_I(p15, p15, r3) // restore psr.i + adds r3=8,r2 // set up second base pointer for SAVE_REST + ;; + SAVE_REST + movl r14=ia64_leave_kernel + ;; + mov rp=r14 + br.call.sptk.many b6=ia64_fault +END(dispatch_to_fault_handler) + + /* + * Squatting in this space ... + * + * This special case dispatcher for illegal operation faults allows preserved + * registers to be modified through a callback function (asm only) that is handed + * back from the fault handler in r8. Up to three arguments can be passed to the + * callback function by returning an aggregate with the callback as its first + * element, followed by the arguments. + */ +ENTRY(dispatch_illegal_op_fault) + .prologue + .body + SAVE_MIN_WITH_COVER + SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r24) + // guarantee that interruption collection is on + ;; + SSM_PSR_I(p15, p15, r3) // restore psr.i + adds r3=8,r2 // set up second base pointer for SAVE_REST + ;; + alloc r14=ar.pfs,0,0,1,0 // must be first in insn group + mov out0=ar.ec + ;; + SAVE_REST + PT_REGS_UNWIND_INFO(0) + ;; + br.call.sptk.many rp=ia64_illegal_op_fault +.ret0: ;; + alloc r14=ar.pfs,0,0,3,0 // must be first in insn group + mov out0=r9 + mov out1=r10 + mov out2=r11 + movl r15=ia64_leave_kernel + ;; + mov rp=r15 + mov b6=r8 + ;; + cmp.ne p6,p0=0,r8 +(p6) br.call.dpnt.many b6=b6 // call returns to ia64_leave_kernel + br.sptk.many ia64_leave_kernel +END(dispatch_illegal_op_fault) diff --git a/kernel/arch/ia64/kernel/jprobes.S b/kernel/arch/ia64/kernel/jprobes.S new file mode 100644 index 000000000..f69389c7b --- /dev/null +++ b/kernel/arch/ia64/kernel/jprobes.S @@ -0,0 +1,90 @@ +/* + * Jprobe specific operations + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) Intel Corporation, 2005 + * + * 2005-May Rusty Lynch and Anil S Keshavamurthy + * initial implementation + * + * Jprobes (a.k.a. "jump probes" which is built on-top of kprobes) allow a + * probe to be inserted into the beginning of a function call. The fundamental + * difference between a jprobe and a kprobe is the jprobe handler is executed + * in the same context as the target function, while the kprobe handlers + * are executed in interrupt context. + * + * For jprobes we initially gain control by placing a break point in the + * first instruction of the targeted function. When we catch that specific + * break, we: + * * set the return address to our jprobe_inst_return() function + * * jump to the jprobe handler function + * + * Since we fixed up the return address, the jprobe handler will return to our + * jprobe_inst_return() function, giving us control again. At this point we + * are back in the parents frame marker, so we do yet another call to our + * jprobe_break() function to fix up the frame marker as it would normally + * exist in the target function. + * + * Our jprobe_return function then transfers control back to kprobes.c by + * executing a break instruction using one of our reserved numbers. When we + * catch that break in kprobes.c, we continue like we do for a normal kprobe + * by single stepping the emulated instruction, and then returning execution + * to the correct location. + */ +#include +#include + + /* + * void jprobe_break(void) + */ + .section .kprobes.text, "ax" +ENTRY(jprobe_break) + break.m __IA64_BREAK_JPROBE +END(jprobe_break) + + /* + * void jprobe_inst_return(void) + */ +GLOBAL_ENTRY(jprobe_inst_return) + br.call.sptk.many b0=jprobe_break +END(jprobe_inst_return) + +GLOBAL_ENTRY(invalidate_stacked_regs) + movl r16=invalidate_restore_cfm + ;; + mov b6=r16 + ;; + br.ret.sptk.many b6 + ;; +invalidate_restore_cfm: + mov r16=ar.rsc + ;; + mov ar.rsc=r0 + ;; + loadrs + ;; + mov ar.rsc=r16 + ;; + br.cond.sptk.many rp +END(invalidate_stacked_regs) + +GLOBAL_ENTRY(flush_register_stack) + // flush dirty regs to backing store (must be first in insn group) + flushrs + ;; + br.ret.sptk.many rp +END(flush_register_stack) + diff --git a/kernel/arch/ia64/kernel/kprobes.c b/kernel/arch/ia64/kernel/kprobes.c new file mode 100644 index 000000000..c7c51445c --- /dev/null +++ b/kernel/arch/ia64/kernel/kprobes.c @@ -0,0 +1,1129 @@ +/* + * Kernel Probes (KProbes) + * arch/ia64/kernel/kprobes.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2002, 2004 + * Copyright (C) Intel Corporation, 2005 + * + * 2005-Apr Rusty Lynch and Anil S Keshavamurthy + * adapted from i386 + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +extern void jprobe_inst_return(void); + +DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; +DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); + +struct kretprobe_blackpoint kretprobe_blacklist[] = {{NULL, NULL}}; + +enum instruction_type {A, I, M, F, B, L, X, u}; +static enum instruction_type bundle_encoding[32][3] = { + { M, I, I }, /* 00 */ + { M, I, I }, /* 01 */ + { M, I, I }, /* 02 */ + { M, I, I }, /* 03 */ + { M, L, X }, /* 04 */ + { M, L, X }, /* 05 */ + { u, u, u }, /* 06 */ + { u, u, u }, /* 07 */ + { M, M, I }, /* 08 */ + { M, M, I }, /* 09 */ + { M, M, I }, /* 0A */ + { M, M, I }, /* 0B */ + { M, F, I }, /* 0C */ + { M, F, I }, /* 0D */ + { M, M, F }, /* 0E */ + { M, M, F }, /* 0F */ + { M, I, B }, /* 10 */ + { M, I, B }, /* 11 */ + { M, B, B }, /* 12 */ + { M, B, B }, /* 13 */ + { u, u, u }, /* 14 */ + { u, u, u }, /* 15 */ + { B, B, B }, /* 16 */ + { B, B, B }, /* 17 */ + { M, M, B }, /* 18 */ + { M, M, B }, /* 19 */ + { u, u, u }, /* 1A */ + { u, u, u }, /* 1B */ + { M, F, B }, /* 1C */ + { M, F, B }, /* 1D */ + { u, u, u }, /* 1E */ + { u, u, u }, /* 1F */ +}; + +/* Insert a long branch code */ +static void __kprobes set_brl_inst(void *from, void *to) +{ + s64 rel = ((s64) to - (s64) from) >> 4; + bundle_t *brl; + brl = (bundle_t *) ((u64) from & ~0xf); + brl->quad0.template = 0x05; /* [MLX](stop) */ + brl->quad0.slot0 = NOP_M_INST; /* nop.m 0x0 */ + brl->quad0.slot1_p0 = ((rel >> 20) & 0x7fffffffff) << 2; + brl->quad1.slot1_p1 = (((rel >> 20) & 0x7fffffffff) << 2) >> (64 - 46); + /* brl.cond.sptk.many.clr rel<<4 (qp=0) */ + brl->quad1.slot2 = BRL_INST(rel >> 59, rel & 0xfffff); +} + +/* + * In this function we check to see if the instruction + * is IP relative instruction and update the kprobe + * inst flag accordingly + */ +static void __kprobes update_kprobe_inst_flag(uint template, uint slot, + uint major_opcode, + unsigned long kprobe_inst, + struct kprobe *p) +{ + p->ainsn.inst_flag = 0; + p->ainsn.target_br_reg = 0; + p->ainsn.slot = slot; + + /* Check for Break instruction + * Bits 37:40 Major opcode to be zero + * Bits 27:32 X6 to be zero + * Bits 32:35 X3 to be zero + */ + if ((!major_opcode) && (!((kprobe_inst >> 27) & 0x1FF)) ) { + /* is a break instruction */ + p->ainsn.inst_flag |= INST_FLAG_BREAK_INST; + return; + } + + if (bundle_encoding[template][slot] == B) { + switch (major_opcode) { + case INDIRECT_CALL_OPCODE: + p->ainsn.inst_flag |= INST_FLAG_FIX_BRANCH_REG; + p->ainsn.target_br_reg = ((kprobe_inst >> 6) & 0x7); + break; + case IP_RELATIVE_PREDICT_OPCODE: + case IP_RELATIVE_BRANCH_OPCODE: + p->ainsn.inst_flag |= INST_FLAG_FIX_RELATIVE_IP_ADDR; + break; + case IP_RELATIVE_CALL_OPCODE: + p->ainsn.inst_flag |= INST_FLAG_FIX_RELATIVE_IP_ADDR; + p->ainsn.inst_flag |= INST_FLAG_FIX_BRANCH_REG; + p->ainsn.target_br_reg = ((kprobe_inst >> 6) & 0x7); + break; + } + } else if (bundle_encoding[template][slot] == X) { + switch (major_opcode) { + case LONG_CALL_OPCODE: + p->ainsn.inst_flag |= INST_FLAG_FIX_BRANCH_REG; + p->ainsn.target_br_reg = ((kprobe_inst >> 6) & 0x7); + break; + } + } + return; +} + +/* + * In this function we check to see if the instruction + * (qp) cmpx.crel.ctype p1,p2=r2,r3 + * on which we are inserting kprobe is cmp instruction + * with ctype as unc. + */ +static uint __kprobes is_cmp_ctype_unc_inst(uint template, uint slot, + uint major_opcode, + unsigned long kprobe_inst) +{ + cmp_inst_t cmp_inst; + uint ctype_unc = 0; + + if (!((bundle_encoding[template][slot] == I) || + (bundle_encoding[template][slot] == M))) + goto out; + + if (!((major_opcode == 0xC) || (major_opcode == 0xD) || + (major_opcode == 0xE))) + goto out; + + cmp_inst.l = kprobe_inst; + if ((cmp_inst.f.x2 == 0) || (cmp_inst.f.x2 == 1)) { + /* Integer compare - Register Register (A6 type)*/ + if ((cmp_inst.f.tb == 0) && (cmp_inst.f.ta == 0) + &&(cmp_inst.f.c == 1)) + ctype_unc = 1; + } else if ((cmp_inst.f.x2 == 2)||(cmp_inst.f.x2 == 3)) { + /* Integer compare - Immediate Register (A8 type)*/ + if ((cmp_inst.f.ta == 0) &&(cmp_inst.f.c == 1)) + ctype_unc = 1; + } +out: + return ctype_unc; +} + +/* + * In this function we check to see if the instruction + * on which we are inserting kprobe is supported. + * Returns qp value if supported + * Returns -EINVAL if unsupported + */ +static int __kprobes unsupported_inst(uint template, uint slot, + uint major_opcode, + unsigned long kprobe_inst, + unsigned long addr) +{ + int qp; + + qp = kprobe_inst & 0x3f; + if (is_cmp_ctype_unc_inst(template, slot, major_opcode, kprobe_inst)) { + if (slot == 1 && qp) { + printk(KERN_WARNING "Kprobes on cmp unc " + "instruction on slot 1 at <0x%lx> " + "is not supported\n", addr); + return -EINVAL; + + } + qp = 0; + } + else if (bundle_encoding[template][slot] == I) { + if (major_opcode == 0) { + /* + * Check for Integer speculation instruction + * - Bit 33-35 to be equal to 0x1 + */ + if (((kprobe_inst >> 33) & 0x7) == 1) { + printk(KERN_WARNING + "Kprobes on speculation inst at <0x%lx> not supported\n", + addr); + return -EINVAL; + } + /* + * IP relative mov instruction + * - Bit 27-35 to be equal to 0x30 + */ + if (((kprobe_inst >> 27) & 0x1FF) == 0x30) { + printk(KERN_WARNING + "Kprobes on \"mov r1=ip\" at <0x%lx> not supported\n", + addr); + return -EINVAL; + + } + } + else if ((major_opcode == 5) && !(kprobe_inst & (0xFUl << 33)) && + (kprobe_inst & (0x1UL << 12))) { + /* test bit instructions, tbit,tnat,tf + * bit 33-36 to be equal to 0 + * bit 12 to be equal to 1 + */ + if (slot == 1 && qp) { + printk(KERN_WARNING "Kprobes on test bit " + "instruction on slot at <0x%lx> " + "is not supported\n", addr); + return -EINVAL; + } + qp = 0; + } + } + else if (bundle_encoding[template][slot] == B) { + if (major_opcode == 7) { + /* IP-Relative Predict major code is 7 */ + printk(KERN_WARNING "Kprobes on IP-Relative" + "Predict is not supported\n"); + return -EINVAL; + } + else if (major_opcode == 2) { + /* Indirect Predict, major code is 2 + * bit 27-32 to be equal to 10 or 11 + */ + int x6=(kprobe_inst >> 27) & 0x3F; + if ((x6 == 0x10) || (x6 == 0x11)) { + printk(KERN_WARNING "Kprobes on " + "Indirect Predict is not supported\n"); + return -EINVAL; + } + } + } + /* kernel does not use float instruction, here for safety kprobe + * will judge whether it is fcmp/flass/float approximation instruction + */ + else if (unlikely(bundle_encoding[template][slot] == F)) { + if ((major_opcode == 4 || major_opcode == 5) && + (kprobe_inst & (0x1 << 12))) { + /* fcmp/fclass unc instruction */ + if (slot == 1 && qp) { + printk(KERN_WARNING "Kprobes on fcmp/fclass " + "instruction on slot at <0x%lx> " + "is not supported\n", addr); + return -EINVAL; + + } + qp = 0; + } + if ((major_opcode == 0 || major_opcode == 1) && + (kprobe_inst & (0x1UL << 33))) { + /* float Approximation instruction */ + if (slot == 1 && qp) { + printk(KERN_WARNING "Kprobes on float Approx " + "instr at <0x%lx> is not supported\n", + addr); + return -EINVAL; + } + qp = 0; + } + } + return qp; +} + +/* + * In this function we override the bundle with + * the break instruction at the given slot. + */ +static void __kprobes prepare_break_inst(uint template, uint slot, + uint major_opcode, + unsigned long kprobe_inst, + struct kprobe *p, + int qp) +{ + unsigned long break_inst = BREAK_INST; + bundle_t *bundle = &p->opcode.bundle; + + /* + * Copy the original kprobe_inst qualifying predicate(qp) + * to the break instruction + */ + break_inst |= qp; + + switch (slot) { + case 0: + bundle->quad0.slot0 = break_inst; + break; + case 1: + bundle->quad0.slot1_p0 = break_inst; + bundle->quad1.slot1_p1 = break_inst >> (64-46); + break; + case 2: + bundle->quad1.slot2 = break_inst; + break; + } + + /* + * Update the instruction flag, so that we can + * emulate the instruction properly after we + * single step on original instruction + */ + update_kprobe_inst_flag(template, slot, major_opcode, kprobe_inst, p); +} + +static void __kprobes get_kprobe_inst(bundle_t *bundle, uint slot, + unsigned long *kprobe_inst, uint *major_opcode) +{ + unsigned long kprobe_inst_p0, kprobe_inst_p1; + unsigned int template; + + template = bundle->quad0.template; + + switch (slot) { + case 0: + *major_opcode = (bundle->quad0.slot0 >> SLOT0_OPCODE_SHIFT); + *kprobe_inst = bundle->quad0.slot0; + break; + case 1: + *major_opcode = (bundle->quad1.slot1_p1 >> SLOT1_p1_OPCODE_SHIFT); + kprobe_inst_p0 = bundle->quad0.slot1_p0; + kprobe_inst_p1 = bundle->quad1.slot1_p1; + *kprobe_inst = kprobe_inst_p0 | (kprobe_inst_p1 << (64-46)); + break; + case 2: + *major_opcode = (bundle->quad1.slot2 >> SLOT2_OPCODE_SHIFT); + *kprobe_inst = bundle->quad1.slot2; + break; + } +} + +/* Returns non-zero if the addr is in the Interrupt Vector Table */ +static int __kprobes in_ivt_functions(unsigned long addr) +{ + return (addr >= (unsigned long)__start_ivt_text + && addr < (unsigned long)__end_ivt_text); +} + +static int __kprobes valid_kprobe_addr(int template, int slot, + unsigned long addr) +{ + if ((slot > 2) || ((bundle_encoding[template][1] == L) && slot > 1)) { + printk(KERN_WARNING "Attempting to insert unaligned kprobe " + "at 0x%lx\n", addr); + return -EINVAL; + } + + if (in_ivt_functions(addr)) { + printk(KERN_WARNING "Kprobes can't be inserted inside " + "IVT functions at 0x%lx\n", addr); + return -EINVAL; + } + + return 0; +} + +static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) +{ + unsigned int i; + i = atomic_add_return(1, &kcb->prev_kprobe_index); + kcb->prev_kprobe[i-1].kp = kprobe_running(); + kcb->prev_kprobe[i-1].status = kcb->kprobe_status; +} + +static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) +{ + unsigned int i; + i = atomic_read(&kcb->prev_kprobe_index); + __this_cpu_write(current_kprobe, kcb->prev_kprobe[i-1].kp); + kcb->kprobe_status = kcb->prev_kprobe[i-1].status; + atomic_sub(1, &kcb->prev_kprobe_index); +} + +static void __kprobes set_current_kprobe(struct kprobe *p, + struct kprobe_ctlblk *kcb) +{ + __this_cpu_write(current_kprobe, p); +} + +static void kretprobe_trampoline(void) +{ +} + +/* + * At this point the target function has been tricked into + * returning into our trampoline. Lookup the associated instance + * and then: + * - call the handler function + * - cleanup by marking the instance as unused + * - long jump back to the original return address + */ +int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) +{ + struct kretprobe_instance *ri = NULL; + struct hlist_head *head, empty_rp; + struct hlist_node *tmp; + unsigned long flags, orig_ret_address = 0; + unsigned long trampoline_address = + ((struct fnptr *)kretprobe_trampoline)->ip; + + INIT_HLIST_HEAD(&empty_rp); + kretprobe_hash_lock(current, &head, &flags); + + /* + * It is possible to have multiple instances associated with a given + * task either because an multiple functions in the call path + * have a return probe installed on them, and/or more than one return + * return probe was registered for a target function. + * + * We can handle this because: + * - instances are always inserted at the head of the list + * - when multiple return probes are registered for the same + * function, the first instance's ret_addr will point to the + * real return address, and all the rest will point to + * kretprobe_trampoline + */ + hlist_for_each_entry_safe(ri, tmp, head, hlist) { + if (ri->task != current) + /* another task is sharing our hash bucket */ + continue; + + orig_ret_address = (unsigned long)ri->ret_addr; + if (orig_ret_address != trampoline_address) + /* + * This is the real return address. Any other + * instances associated with this task are for + * other calls deeper on the call stack + */ + break; + } + + regs->cr_iip = orig_ret_address; + + hlist_for_each_entry_safe(ri, tmp, head, hlist) { + if (ri->task != current) + /* another task is sharing our hash bucket */ + continue; + + if (ri->rp && ri->rp->handler) + ri->rp->handler(ri, regs); + + orig_ret_address = (unsigned long)ri->ret_addr; + recycle_rp_inst(ri, &empty_rp); + + if (orig_ret_address != trampoline_address) + /* + * This is the real return address. Any other + * instances associated with this task are for + * other calls deeper on the call stack + */ + break; + } + + kretprobe_assert(ri, orig_ret_address, trampoline_address); + + reset_current_kprobe(); + kretprobe_hash_unlock(current, &flags); + preempt_enable_no_resched(); + + hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { + hlist_del(&ri->hlist); + kfree(ri); + } + /* + * By returning a non-zero value, we are telling + * kprobe_handler() that we don't want the post_handler + * to run (and have re-enabled preemption) + */ + return 1; +} + +void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, + struct pt_regs *regs) +{ + ri->ret_addr = (kprobe_opcode_t *)regs->b0; + + /* Replace the return addr with trampoline addr */ + regs->b0 = ((struct fnptr *)kretprobe_trampoline)->ip; +} + +/* Check the instruction in the slot is break */ +static int __kprobes __is_ia64_break_inst(bundle_t *bundle, uint slot) +{ + unsigned int major_opcode; + unsigned int template = bundle->quad0.template; + unsigned long kprobe_inst; + + /* Move to slot 2, if bundle is MLX type and kprobe slot is 1 */ + if (slot == 1 && bundle_encoding[template][1] == L) + slot++; + + /* Get Kprobe probe instruction at given slot*/ + get_kprobe_inst(bundle, slot, &kprobe_inst, &major_opcode); + + /* For break instruction, + * Bits 37:40 Major opcode to be zero + * Bits 27:32 X6 to be zero + * Bits 32:35 X3 to be zero + */ + if (major_opcode || ((kprobe_inst >> 27) & 0x1FF)) { + /* Not a break instruction */ + return 0; + } + + /* Is a break instruction */ + return 1; +} + +/* + * In this function, we check whether the target bundle modifies IP or + * it triggers an exception. If so, it cannot be boostable. + */ +static int __kprobes can_boost(bundle_t *bundle, uint slot, + unsigned long bundle_addr) +{ + unsigned int template = bundle->quad0.template; + + do { + if (search_exception_tables(bundle_addr + slot) || + __is_ia64_break_inst(bundle, slot)) + return 0; /* exception may occur in this bundle*/ + } while ((++slot) < 3); + template &= 0x1e; + if (template >= 0x10 /* including B unit */ || + template == 0x04 /* including X unit */ || + template == 0x06) /* undefined */ + return 0; + + return 1; +} + +/* Prepare long jump bundle and disables other boosters if need */ +static void __kprobes prepare_booster(struct kprobe *p) +{ + unsigned long addr = (unsigned long)p->addr & ~0xFULL; + unsigned int slot = (unsigned long)p->addr & 0xf; + struct kprobe *other_kp; + + if (can_boost(&p->ainsn.insn[0].bundle, slot, addr)) { + set_brl_inst(&p->ainsn.insn[1].bundle, (bundle_t *)addr + 1); + p->ainsn.inst_flag |= INST_FLAG_BOOSTABLE; + } + + /* disables boosters in previous slots */ + for (; addr < (unsigned long)p->addr; addr++) { + other_kp = get_kprobe((void *)addr); + if (other_kp) + other_kp->ainsn.inst_flag &= ~INST_FLAG_BOOSTABLE; + } +} + +int __kprobes arch_prepare_kprobe(struct kprobe *p) +{ + unsigned long addr = (unsigned long) p->addr; + unsigned long *kprobe_addr = (unsigned long *)(addr & ~0xFULL); + unsigned long kprobe_inst=0; + unsigned int slot = addr & 0xf, template, major_opcode = 0; + bundle_t *bundle; + int qp; + + bundle = &((kprobe_opcode_t *)kprobe_addr)->bundle; + template = bundle->quad0.template; + + if(valid_kprobe_addr(template, slot, addr)) + return -EINVAL; + + /* Move to slot 2, if bundle is MLX type and kprobe slot is 1 */ + if (slot == 1 && bundle_encoding[template][1] == L) + slot++; + + /* Get kprobe_inst and major_opcode from the bundle */ + get_kprobe_inst(bundle, slot, &kprobe_inst, &major_opcode); + + qp = unsupported_inst(template, slot, major_opcode, kprobe_inst, addr); + if (qp < 0) + return -EINVAL; + + p->ainsn.insn = get_insn_slot(); + if (!p->ainsn.insn) + return -ENOMEM; + memcpy(&p->opcode, kprobe_addr, sizeof(kprobe_opcode_t)); + memcpy(p->ainsn.insn, kprobe_addr, sizeof(kprobe_opcode_t)); + + prepare_break_inst(template, slot, major_opcode, kprobe_inst, p, qp); + + prepare_booster(p); + + return 0; +} + +void __kprobes arch_arm_kprobe(struct kprobe *p) +{ + unsigned long arm_addr; + bundle_t *src, *dest; + + arm_addr = ((unsigned long)p->addr) & ~0xFUL; + dest = &((kprobe_opcode_t *)arm_addr)->bundle; + src = &p->opcode.bundle; + + flush_icache_range((unsigned long)p->ainsn.insn, + (unsigned long)p->ainsn.insn + + sizeof(kprobe_opcode_t) * MAX_INSN_SIZE); + + switch (p->ainsn.slot) { + case 0: + dest->quad0.slot0 = src->quad0.slot0; + break; + case 1: + dest->quad1.slot1_p1 = src->quad1.slot1_p1; + break; + case 2: + dest->quad1.slot2 = src->quad1.slot2; + break; + } + flush_icache_range(arm_addr, arm_addr + sizeof(kprobe_opcode_t)); +} + +void __kprobes arch_disarm_kprobe(struct kprobe *p) +{ + unsigned long arm_addr; + bundle_t *src, *dest; + + arm_addr = ((unsigned long)p->addr) & ~0xFUL; + dest = &((kprobe_opcode_t *)arm_addr)->bundle; + /* p->ainsn.insn contains the original unaltered kprobe_opcode_t */ + src = &p->ainsn.insn->bundle; + switch (p->ainsn.slot) { + case 0: + dest->quad0.slot0 = src->quad0.slot0; + break; + case 1: + dest->quad1.slot1_p1 = src->quad1.slot1_p1; + break; + case 2: + dest->quad1.slot2 = src->quad1.slot2; + break; + } + flush_icache_range(arm_addr, arm_addr + sizeof(kprobe_opcode_t)); +} + +void __kprobes arch_remove_kprobe(struct kprobe *p) +{ + if (p->ainsn.insn) { + free_insn_slot(p->ainsn.insn, + p->ainsn.inst_flag & INST_FLAG_BOOSTABLE); + p->ainsn.insn = NULL; + } +} +/* + * We are resuming execution after a single step fault, so the pt_regs + * structure reflects the register state after we executed the instruction + * located in the kprobe (p->ainsn.insn->bundle). We still need to adjust + * the ip to point back to the original stack address. To set the IP address + * to original stack address, handle the case where we need to fixup the + * relative IP address and/or fixup branch register. + */ +static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs) +{ + unsigned long bundle_addr = (unsigned long) (&p->ainsn.insn->bundle); + unsigned long resume_addr = (unsigned long)p->addr & ~0xFULL; + unsigned long template; + int slot = ((unsigned long)p->addr & 0xf); + + template = p->ainsn.insn->bundle.quad0.template; + + if (slot == 1 && bundle_encoding[template][1] == L) + slot = 2; + + if (p->ainsn.inst_flag & ~INST_FLAG_BOOSTABLE) { + + if (p->ainsn.inst_flag & INST_FLAG_FIX_RELATIVE_IP_ADDR) { + /* Fix relative IP address */ + regs->cr_iip = (regs->cr_iip - bundle_addr) + + resume_addr; + } + + if (p->ainsn.inst_flag & INST_FLAG_FIX_BRANCH_REG) { + /* + * Fix target branch register, software convention is + * to use either b0 or b6 or b7, so just checking + * only those registers + */ + switch (p->ainsn.target_br_reg) { + case 0: + if ((regs->b0 == bundle_addr) || + (regs->b0 == bundle_addr + 0x10)) { + regs->b0 = (regs->b0 - bundle_addr) + + resume_addr; + } + break; + case 6: + if ((regs->b6 == bundle_addr) || + (regs->b6 == bundle_addr + 0x10)) { + regs->b6 = (regs->b6 - bundle_addr) + + resume_addr; + } + break; + case 7: + if ((regs->b7 == bundle_addr) || + (regs->b7 == bundle_addr + 0x10)) { + regs->b7 = (regs->b7 - bundle_addr) + + resume_addr; + } + break; + } /* end switch */ + } + goto turn_ss_off; + } + + if (slot == 2) { + if (regs->cr_iip == bundle_addr + 0x10) { + regs->cr_iip = resume_addr + 0x10; + } + } else { + if (regs->cr_iip == bundle_addr) { + regs->cr_iip = resume_addr; + } + } + +turn_ss_off: + /* Turn off Single Step bit */ + ia64_psr(regs)->ss = 0; +} + +static void __kprobes prepare_ss(struct kprobe *p, struct pt_regs *regs) +{ + unsigned long bundle_addr = (unsigned long) &p->ainsn.insn->bundle; + unsigned long slot = (unsigned long)p->addr & 0xf; + + /* single step inline if break instruction */ + if (p->ainsn.inst_flag == INST_FLAG_BREAK_INST) + regs->cr_iip = (unsigned long)p->addr & ~0xFULL; + else + regs->cr_iip = bundle_addr & ~0xFULL; + + if (slot > 2) + slot = 0; + + ia64_psr(regs)->ri = slot; + + /* turn on single stepping */ + ia64_psr(regs)->ss = 1; +} + +static int __kprobes is_ia64_break_inst(struct pt_regs *regs) +{ + unsigned int slot = ia64_psr(regs)->ri; + unsigned long *kprobe_addr = (unsigned long *)regs->cr_iip; + bundle_t bundle; + + memcpy(&bundle, kprobe_addr, sizeof(bundle_t)); + + return __is_ia64_break_inst(&bundle, slot); +} + +static int __kprobes pre_kprobes_handler(struct die_args *args) +{ + struct kprobe *p; + int ret = 0; + struct pt_regs *regs = args->regs; + kprobe_opcode_t *addr = (kprobe_opcode_t *)instruction_pointer(regs); + struct kprobe_ctlblk *kcb; + + /* + * We don't want to be preempted for the entire + * duration of kprobe processing + */ + preempt_disable(); + kcb = get_kprobe_ctlblk(); + + /* Handle recursion cases */ + if (kprobe_running()) { + p = get_kprobe(addr); + if (p) { + if ((kcb->kprobe_status == KPROBE_HIT_SS) && + (p->ainsn.inst_flag == INST_FLAG_BREAK_INST)) { + ia64_psr(regs)->ss = 0; + goto no_kprobe; + } + /* We have reentered the pre_kprobe_handler(), since + * another probe was hit while within the handler. + * We here save the original kprobes variables and + * just single step on the instruction of the new probe + * without calling any user handlers. + */ + save_previous_kprobe(kcb); + set_current_kprobe(p, kcb); + kprobes_inc_nmissed_count(p); + prepare_ss(p, regs); + kcb->kprobe_status = KPROBE_REENTER; + return 1; + } else if (args->err == __IA64_BREAK_JPROBE) { + /* + * jprobe instrumented function just completed + */ + p = __this_cpu_read(current_kprobe); + if (p->break_handler && p->break_handler(p, regs)) { + goto ss_probe; + } + } else if (!is_ia64_break_inst(regs)) { + /* The breakpoint instruction was removed by + * another cpu right after we hit, no further + * handling of this interrupt is appropriate + */ + ret = 1; + goto no_kprobe; + } else { + /* Not our break */ + goto no_kprobe; + } + } + + p = get_kprobe(addr); + if (!p) { + if (!is_ia64_break_inst(regs)) { + /* + * The breakpoint instruction was removed right + * after we hit it. Another cpu has removed + * either a probepoint or a debugger breakpoint + * at this address. In either case, no further + * handling of this interrupt is appropriate. + */ + ret = 1; + + } + + /* Not one of our break, let kernel handle it */ + goto no_kprobe; + } + + set_current_kprobe(p, kcb); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + + if (p->pre_handler && p->pre_handler(p, regs)) + /* + * Our pre-handler is specifically requesting that we just + * do a return. This is used for both the jprobe pre-handler + * and the kretprobe trampoline + */ + return 1; + +ss_probe: +#if !defined(CONFIG_PREEMPT) + if (p->ainsn.inst_flag == INST_FLAG_BOOSTABLE && !p->post_handler) { + /* Boost up -- we can execute copied instructions directly */ + ia64_psr(regs)->ri = p->ainsn.slot; + regs->cr_iip = (unsigned long)&p->ainsn.insn->bundle & ~0xFULL; + /* turn single stepping off */ + ia64_psr(regs)->ss = 0; + + reset_current_kprobe(); + preempt_enable_no_resched(); + return 1; + } +#endif + prepare_ss(p, regs); + kcb->kprobe_status = KPROBE_HIT_SS; + return 1; + +no_kprobe: + preempt_enable_no_resched(); + return ret; +} + +static int __kprobes post_kprobes_handler(struct pt_regs *regs) +{ + struct kprobe *cur = kprobe_running(); + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + if (!cur) + return 0; + + if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) { + kcb->kprobe_status = KPROBE_HIT_SSDONE; + cur->post_handler(cur, regs, 0); + } + + resume_execution(cur, regs); + + /*Restore back the original saved kprobes variables and continue. */ + if (kcb->kprobe_status == KPROBE_REENTER) { + restore_previous_kprobe(kcb); + goto out; + } + reset_current_kprobe(); + +out: + preempt_enable_no_resched(); + return 1; +} + +int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) +{ + struct kprobe *cur = kprobe_running(); + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + + switch(kcb->kprobe_status) { + case KPROBE_HIT_SS: + case KPROBE_REENTER: + /* + * We are here because the instruction being single + * stepped caused a page fault. We reset the current + * kprobe and the instruction pointer points back to + * the probe address and allow the page fault handler + * to continue as a normal page fault. + */ + regs->cr_iip = ((unsigned long)cur->addr) & ~0xFULL; + ia64_psr(regs)->ri = ((unsigned long)cur->addr) & 0xf; + if (kcb->kprobe_status == KPROBE_REENTER) + restore_previous_kprobe(kcb); + else + reset_current_kprobe(); + preempt_enable_no_resched(); + break; + case KPROBE_HIT_ACTIVE: + case KPROBE_HIT_SSDONE: + /* + * We increment the nmissed count for accounting, + * we can also use npre/npostfault count for accounting + * these specific fault cases. + */ + kprobes_inc_nmissed_count(cur); + + /* + * We come here because instructions in the pre/post + * handler caused the page_fault, this could happen + * if handler tries to access user space by + * copy_from_user(), get_user() etc. Let the + * user-specified handler try to fix it first. + */ + if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) + return 1; + /* + * In case the user-specified fault handler returned + * zero, try to fix up. + */ + if (ia64_done_with_exception(regs)) + return 1; + + /* + * Let ia64_do_page_fault() fix it. + */ + break; + default: + break; + } + + return 0; +} + +int __kprobes kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + struct die_args *args = (struct die_args *)data; + int ret = NOTIFY_DONE; + + if (args->regs && user_mode(args->regs)) + return ret; + + switch(val) { + case DIE_BREAK: + /* err is break number from ia64_bad_break() */ + if ((args->err >> 12) == (__IA64_BREAK_KPROBE >> 12) + || args->err == __IA64_BREAK_JPROBE + || args->err == 0) + if (pre_kprobes_handler(args)) + ret = NOTIFY_STOP; + break; + case DIE_FAULT: + /* err is vector number from ia64_fault() */ + if (args->err == 36) + if (post_kprobes_handler(args->regs)) + ret = NOTIFY_STOP; + break; + default: + break; + } + return ret; +} + +struct param_bsp_cfm { + unsigned long ip; + unsigned long *bsp; + unsigned long cfm; +}; + +static void ia64_get_bsp_cfm(struct unw_frame_info *info, void *arg) +{ + unsigned long ip; + struct param_bsp_cfm *lp = arg; + + do { + unw_get_ip(info, &ip); + if (ip == 0) + break; + if (ip == lp->ip) { + unw_get_bsp(info, (unsigned long*)&lp->bsp); + unw_get_cfm(info, (unsigned long*)&lp->cfm); + return; + } + } while (unw_unwind(info) >= 0); + lp->bsp = NULL; + lp->cfm = 0; + return; +} + +unsigned long arch_deref_entry_point(void *entry) +{ + return ((struct fnptr *)entry)->ip; +} + +int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) +{ + struct jprobe *jp = container_of(p, struct jprobe, kp); + unsigned long addr = arch_deref_entry_point(jp->entry); + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + struct param_bsp_cfm pa; + int bytes; + + /* + * Callee owns the argument space and could overwrite it, eg + * tail call optimization. So to be absolutely safe + * we save the argument space before transferring the control + * to instrumented jprobe function which runs in + * the process context + */ + pa.ip = regs->cr_iip; + unw_init_running(ia64_get_bsp_cfm, &pa); + bytes = (char *)ia64_rse_skip_regs(pa.bsp, pa.cfm & 0x3f) + - (char *)pa.bsp; + memcpy( kcb->jprobes_saved_stacked_regs, + pa.bsp, + bytes ); + kcb->bsp = pa.bsp; + kcb->cfm = pa.cfm; + + /* save architectural state */ + kcb->jprobe_saved_regs = *regs; + + /* after rfi, execute the jprobe instrumented function */ + regs->cr_iip = addr & ~0xFULL; + ia64_psr(regs)->ri = addr & 0xf; + regs->r1 = ((struct fnptr *)(jp->entry))->gp; + + /* + * fix the return address to our jprobe_inst_return() function + * in the jprobes.S file + */ + regs->b0 = ((struct fnptr *)(jprobe_inst_return))->ip; + + return 1; +} + +/* ia64 does not need this */ +void __kprobes jprobe_return(void) +{ +} + +int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) +{ + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + int bytes; + + /* restoring architectural state */ + *regs = kcb->jprobe_saved_regs; + + /* restoring the original argument space */ + flush_register_stack(); + bytes = (char *)ia64_rse_skip_regs(kcb->bsp, kcb->cfm & 0x3f) + - (char *)kcb->bsp; + memcpy( kcb->bsp, + kcb->jprobes_saved_stacked_regs, + bytes ); + invalidate_stacked_regs(); + + preempt_enable_no_resched(); + return 1; +} + +static struct kprobe trampoline_p = { + .pre_handler = trampoline_probe_handler +}; + +int __init arch_init_kprobes(void) +{ + trampoline_p.addr = + (kprobe_opcode_t *)((struct fnptr *)kretprobe_trampoline)->ip; + return register_kprobe(&trampoline_p); +} + +int __kprobes arch_trampoline_kprobe(struct kprobe *p) +{ + if (p->addr == + (kprobe_opcode_t *)((struct fnptr *)kretprobe_trampoline)->ip) + return 1; + + return 0; +} diff --git a/kernel/arch/ia64/kernel/machine_kexec.c b/kernel/arch/ia64/kernel/machine_kexec.c new file mode 100644 index 000000000..b72cd7a07 --- /dev/null +++ b/kernel/arch/ia64/kernel/machine_kexec.c @@ -0,0 +1,170 @@ +/* + * arch/ia64/kernel/machine_kexec.c + * + * Handle transition of Linux booting another kernel + * Copyright (C) 2005 Hewlett-Packard Development Comapny, L.P. + * Copyright (C) 2005 Khalid Aziz + * Copyright (C) 2006 Intel Corp, Zou Nan hai + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef void (*relocate_new_kernel_t)( + unsigned long indirection_page, + unsigned long start_address, + struct ia64_boot_param *boot_param, + unsigned long pal_addr) __noreturn; + +struct kimage *ia64_kimage; + +struct resource efi_memmap_res = { + .name = "EFI Memory Map", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_MEM +}; + +struct resource boot_param_res = { + .name = "Boot parameter", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_MEM +}; + + +/* + * Do what every setup is needed on image and the + * reboot code buffer to allow us to avoid allocations + * later. + */ +int machine_kexec_prepare(struct kimage *image) +{ + void *control_code_buffer; + const unsigned long *func; + + func = (unsigned long *)&relocate_new_kernel; + /* Pre-load control code buffer to minimize work in kexec path */ + control_code_buffer = page_address(image->control_code_page); + memcpy((void *)control_code_buffer, (const void *)func[0], + relocate_new_kernel_size); + flush_icache_range((unsigned long)control_code_buffer, + (unsigned long)control_code_buffer + relocate_new_kernel_size); + ia64_kimage = image; + + return 0; +} + +void machine_kexec_cleanup(struct kimage *image) +{ +} + +/* + * Do not allocate memory (or fail in any way) in machine_kexec(). + * We are past the point of no return, committed to rebooting now. + */ +static void ia64_machine_kexec(struct unw_frame_info *info, void *arg) +{ + struct kimage *image = arg; + relocate_new_kernel_t rnk; + void *pal_addr = efi_get_pal_addr(); + unsigned long code_addr; + int ii; + u64 fp, gp; + ia64_fptr_t *init_handler = (ia64_fptr_t *)ia64_os_init_on_kdump; + + BUG_ON(!image); + code_addr = (unsigned long)page_address(image->control_code_page); + if (image->type == KEXEC_TYPE_CRASH) { + crash_save_this_cpu(); + current->thread.ksp = (__u64)info->sw - 16; + + /* Register noop init handler */ + fp = ia64_tpa(init_handler->fp); + gp = ia64_tpa(ia64_getreg(_IA64_REG_GP)); + ia64_sal_set_vectors(SAL_VECTOR_OS_INIT, fp, gp, 0, fp, gp, 0); + } else { + /* Unregister init handlers of current kernel */ + ia64_sal_set_vectors(SAL_VECTOR_OS_INIT, 0, 0, 0, 0, 0, 0); + } + + /* Unregister mca handler - No more recovery on current kernel */ + ia64_sal_set_vectors(SAL_VECTOR_OS_MCA, 0, 0, 0, 0, 0, 0); + + /* Interrupts aren't acceptable while we reboot */ + local_irq_disable(); + + /* Mask CMC and Performance Monitor interrupts */ + ia64_setreg(_IA64_REG_CR_PMV, 1 << 16); + ia64_setreg(_IA64_REG_CR_CMCV, 1 << 16); + + /* Mask ITV and Local Redirect Registers */ + ia64_set_itv(1 << 16); + ia64_set_lrr0(1 << 16); + ia64_set_lrr1(1 << 16); + + /* terminate possible nested in-service interrupts */ + for (ii = 0; ii < 16; ii++) + ia64_eoi(); + + /* unmask TPR and clear any pending interrupts */ + ia64_setreg(_IA64_REG_CR_TPR, 0); + ia64_srlz_d(); + while (ia64_get_ivr() != IA64_SPURIOUS_INT_VECTOR) + ia64_eoi(); + platform_kernel_launch_event(); + rnk = (relocate_new_kernel_t)&code_addr; + (*rnk)(image->head, image->start, ia64_boot_param, + GRANULEROUNDDOWN((unsigned long) pal_addr)); + BUG(); +} + +void machine_kexec(struct kimage *image) +{ + BUG_ON(!image); + unw_init_running(ia64_machine_kexec, image); + for(;;); +} + +void arch_crash_save_vmcoreinfo(void) +{ +#if defined(CONFIG_DISCONTIGMEM) || defined(CONFIG_SPARSEMEM) + VMCOREINFO_SYMBOL(pgdat_list); + VMCOREINFO_LENGTH(pgdat_list, MAX_NUMNODES); +#endif +#ifdef CONFIG_NUMA + VMCOREINFO_SYMBOL(node_memblk); + VMCOREINFO_LENGTH(node_memblk, NR_NODE_MEMBLKS); + VMCOREINFO_STRUCT_SIZE(node_memblk_s); + VMCOREINFO_OFFSET(node_memblk_s, start_paddr); + VMCOREINFO_OFFSET(node_memblk_s, size); +#endif +#if CONFIG_PGTABLE_LEVELS == 3 + VMCOREINFO_CONFIG(PGTABLE_3); +#elif CONFIG_PGTABLE_LEVELS == 4 + VMCOREINFO_CONFIG(PGTABLE_4); +#endif +} + +unsigned long paddr_vmcoreinfo_note(void) +{ + return ia64_tpa((unsigned long)(char *)&vmcoreinfo_note); +} + diff --git a/kernel/arch/ia64/kernel/machvec.c b/kernel/arch/ia64/kernel/machvec.c new file mode 100644 index 000000000..f5a1e5246 --- /dev/null +++ b/kernel/arch/ia64/kernel/machvec.c @@ -0,0 +1,90 @@ +#include +#include +#include + +#ifdef CONFIG_IA64_GENERIC + +#include +#include + +#include + +struct ia64_machine_vector ia64_mv; +EXPORT_SYMBOL(ia64_mv); + +static struct ia64_machine_vector * __init +lookup_machvec (const char *name) +{ + extern struct ia64_machine_vector machvec_start[]; + extern struct ia64_machine_vector machvec_end[]; + struct ia64_machine_vector *mv; + + for (mv = machvec_start; mv < machvec_end; ++mv) + if (strcmp (mv->name, name) == 0) + return mv; + + return 0; +} + +void __init +machvec_init (const char *name) +{ + struct ia64_machine_vector *mv; + + if (!name) + name = acpi_get_sysname(); + mv = lookup_machvec(name); + if (!mv) + panic("generic kernel failed to find machine vector for" + " platform %s!", name); + + ia64_mv = *mv; + printk(KERN_INFO "booting generic kernel on platform %s\n", name); +} + +void __init +machvec_init_from_cmdline(const char *cmdline) +{ + char str[64]; + const char *start; + char *end; + + if (! (start = strstr(cmdline, "machvec=")) ) + return machvec_init(NULL); + + strlcpy(str, start + strlen("machvec="), sizeof(str)); + if ( (end = strchr(str, ' ')) ) + *end = '\0'; + + return machvec_init(str); +} + +#endif /* CONFIG_IA64_GENERIC */ + +void +machvec_setup (char **arg) +{ +} +EXPORT_SYMBOL(machvec_setup); + +void +machvec_timer_interrupt (int irq, void *dev_id) +{ +} +EXPORT_SYMBOL(machvec_timer_interrupt); + +void +machvec_dma_sync_single(struct device *hwdev, dma_addr_t dma_handle, size_t size, + enum dma_data_direction dir) +{ + mb(); +} +EXPORT_SYMBOL(machvec_dma_sync_single); + +void +machvec_dma_sync_sg(struct device *hwdev, struct scatterlist *sg, int n, + enum dma_data_direction dir) +{ + mb(); +} +EXPORT_SYMBOL(machvec_dma_sync_sg); diff --git a/kernel/arch/ia64/kernel/mca.c b/kernel/arch/ia64/kernel/mca.c new file mode 100644 index 000000000..dd5801eb4 --- /dev/null +++ b/kernel/arch/ia64/kernel/mca.c @@ -0,0 +1,2166 @@ +/* + * File: mca.c + * Purpose: Generic MCA handling layer + * + * Copyright (C) 2003 Hewlett-Packard Co + * David Mosberger-Tang + * + * Copyright (C) 2002 Dell Inc. + * Copyright (C) Matt Domsch + * + * Copyright (C) 2002 Intel + * Copyright (C) Jenna Hall + * + * Copyright (C) 2001 Intel + * Copyright (C) Fred Lewis + * + * Copyright (C) 2000 Intel + * Copyright (C) Chuck Fleckenstein + * + * Copyright (C) 1999, 2004-2008 Silicon Graphics, Inc. + * Copyright (C) Vijay Chander + * + * Copyright (C) 2006 FUJITSU LIMITED + * Copyright (C) Hidetoshi Seto + * + * 2000-03-29 Chuck Fleckenstein + * Fixed PAL/SAL update issues, began MCA bug fixes, logging issues, + * added min save state dump, added INIT handler. + * + * 2001-01-03 Fred Lewis + * Added setup of CMCI and CPEI IRQs, logging of corrected platform + * errors, completed code for logging of corrected & uncorrected + * machine check errors, and updated for conformance with Nov. 2000 + * revision of the SAL 3.0 spec. + * + * 2002-01-04 Jenna Hall + * Aligned MCA stack to 16 bytes, added platform vs. CPU error flag, + * set SAL default return values, changed error record structure to + * linked list, added init call to sal_get_state_info_size(). + * + * 2002-03-25 Matt Domsch + * GUID cleanups. + * + * 2003-04-15 David Mosberger-Tang + * Added INIT backtrace support. + * + * 2003-12-08 Keith Owens + * smp_call_function() must not be called from interrupt context + * (can deadlock on tasklist_lock). + * Use keventd to call smp_call_function(). + * + * 2004-02-01 Keith Owens + * Avoid deadlock when using printk() for MCA and INIT records. + * Delete all record printing code, moved to salinfo_decode in user + * space. Mark variables and functions static where possible. + * Delete dead variables and functions. Reorder to remove the need + * for forward declarations and to consolidate related code. + * + * 2005-08-12 Keith Owens + * Convert MCA/INIT handlers to use per event stacks and SAL/OS + * state. + * + * 2005-10-07 Keith Owens + * Add notify_die() hooks. + * + * 2006-09-15 Hidetoshi Seto + * Add printing support for MCA/INIT. + * + * 2007-04-27 Russ Anderson + * Support multiple cpus going through OS_MCA in the same event. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "mca_drv.h" +#include "entry.h" + +#if defined(IA64_MCA_DEBUG_INFO) +# define IA64_MCA_DEBUG(fmt...) printk(fmt) +#else +# define IA64_MCA_DEBUG(fmt...) +#endif + +#define NOTIFY_INIT(event, regs, arg, spin) \ +do { \ + if ((notify_die((event), "INIT", (regs), (arg), 0, 0) \ + == NOTIFY_STOP) && ((spin) == 1)) \ + ia64_mca_spin(__func__); \ +} while (0) + +#define NOTIFY_MCA(event, regs, arg, spin) \ +do { \ + if ((notify_die((event), "MCA", (regs), (arg), 0, 0) \ + == NOTIFY_STOP) && ((spin) == 1)) \ + ia64_mca_spin(__func__); \ +} while (0) + +/* Used by mca_asm.S */ +DEFINE_PER_CPU(u64, ia64_mca_data); /* == __per_cpu_mca[smp_processor_id()] */ +DEFINE_PER_CPU(u64, ia64_mca_per_cpu_pte); /* PTE to map per-CPU area */ +DEFINE_PER_CPU(u64, ia64_mca_pal_pte); /* PTE to map PAL code */ +DEFINE_PER_CPU(u64, ia64_mca_pal_base); /* vaddr PAL code granule */ +DEFINE_PER_CPU(u64, ia64_mca_tr_reload); /* Flag for TR reload */ + +unsigned long __per_cpu_mca[NR_CPUS]; + +/* In mca_asm.S */ +extern void ia64_os_init_dispatch_monarch (void); +extern void ia64_os_init_dispatch_slave (void); + +static int monarch_cpu = -1; + +static ia64_mc_info_t ia64_mc_info; + +#define MAX_CPE_POLL_INTERVAL (15*60*HZ) /* 15 minutes */ +#define MIN_CPE_POLL_INTERVAL (2*60*HZ) /* 2 minutes */ +#define CMC_POLL_INTERVAL (1*60*HZ) /* 1 minute */ +#define CPE_HISTORY_LENGTH 5 +#define CMC_HISTORY_LENGTH 5 + +#ifdef CONFIG_ACPI +static struct timer_list cpe_poll_timer; +#endif +static struct timer_list cmc_poll_timer; +/* + * This variable tells whether we are currently in polling mode. + * Start with this in the wrong state so we won't play w/ timers + * before the system is ready. + */ +static int cmc_polling_enabled = 1; + +/* + * Clearing this variable prevents CPE polling from getting activated + * in mca_late_init. Use it if your system doesn't provide a CPEI, + * but encounters problems retrieving CPE logs. This should only be + * necessary for debugging. + */ +static int cpe_poll_enabled = 1; + +extern void salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe); + +static int mca_init __initdata; + +/* + * limited & delayed printing support for MCA/INIT handler + */ + +#define mprintk(fmt...) ia64_mca_printk(fmt) + +#define MLOGBUF_SIZE (512+256*NR_CPUS) +#define MLOGBUF_MSGMAX 256 +static char mlogbuf[MLOGBUF_SIZE]; +static DEFINE_SPINLOCK(mlogbuf_wlock); /* mca context only */ +static DEFINE_SPINLOCK(mlogbuf_rlock); /* normal context only */ +static unsigned long mlogbuf_start; +static unsigned long mlogbuf_end; +static unsigned int mlogbuf_finished = 0; +static unsigned long mlogbuf_timestamp = 0; + +static int loglevel_save = -1; +#define BREAK_LOGLEVEL(__console_loglevel) \ + oops_in_progress = 1; \ + if (loglevel_save < 0) \ + loglevel_save = __console_loglevel; \ + __console_loglevel = 15; + +#define RESTORE_LOGLEVEL(__console_loglevel) \ + if (loglevel_save >= 0) { \ + __console_loglevel = loglevel_save; \ + loglevel_save = -1; \ + } \ + mlogbuf_finished = 0; \ + oops_in_progress = 0; + +/* + * Push messages into buffer, print them later if not urgent. + */ +void ia64_mca_printk(const char *fmt, ...) +{ + va_list args; + int printed_len; + char temp_buf[MLOGBUF_MSGMAX]; + char *p; + + va_start(args, fmt); + printed_len = vscnprintf(temp_buf, sizeof(temp_buf), fmt, args); + va_end(args); + + /* Copy the output into mlogbuf */ + if (oops_in_progress) { + /* mlogbuf was abandoned, use printk directly instead. */ + printk("%s", temp_buf); + } else { + spin_lock(&mlogbuf_wlock); + for (p = temp_buf; *p; p++) { + unsigned long next = (mlogbuf_end + 1) % MLOGBUF_SIZE; + if (next != mlogbuf_start) { + mlogbuf[mlogbuf_end] = *p; + mlogbuf_end = next; + } else { + /* buffer full */ + break; + } + } + mlogbuf[mlogbuf_end] = '\0'; + spin_unlock(&mlogbuf_wlock); + } +} +EXPORT_SYMBOL(ia64_mca_printk); + +/* + * Print buffered messages. + * NOTE: call this after returning normal context. (ex. from salinfod) + */ +void ia64_mlogbuf_dump(void) +{ + char temp_buf[MLOGBUF_MSGMAX]; + char *p; + unsigned long index; + unsigned long flags; + unsigned int printed_len; + + /* Get output from mlogbuf */ + while (mlogbuf_start != mlogbuf_end) { + temp_buf[0] = '\0'; + p = temp_buf; + printed_len = 0; + + spin_lock_irqsave(&mlogbuf_rlock, flags); + + index = mlogbuf_start; + while (index != mlogbuf_end) { + *p = mlogbuf[index]; + index = (index + 1) % MLOGBUF_SIZE; + if (!*p) + break; + p++; + if (++printed_len >= MLOGBUF_MSGMAX - 1) + break; + } + *p = '\0'; + if (temp_buf[0]) + printk("%s", temp_buf); + mlogbuf_start = index; + + mlogbuf_timestamp = 0; + spin_unlock_irqrestore(&mlogbuf_rlock, flags); + } +} +EXPORT_SYMBOL(ia64_mlogbuf_dump); + +/* + * Call this if system is going to down or if immediate flushing messages to + * console is required. (ex. recovery was failed, crash dump is going to be + * invoked, long-wait rendezvous etc.) + * NOTE: this should be called from monarch. + */ +static void ia64_mlogbuf_finish(int wait) +{ + BREAK_LOGLEVEL(console_loglevel); + + spin_lock_init(&mlogbuf_rlock); + ia64_mlogbuf_dump(); + printk(KERN_EMERG "mlogbuf_finish: printing switched to urgent mode, " + "MCA/INIT might be dodgy or fail.\n"); + + if (!wait) + return; + + /* wait for console */ + printk("Delaying for 5 seconds...\n"); + udelay(5*1000000); + + mlogbuf_finished = 1; +} + +/* + * Print buffered messages from INIT context. + */ +static void ia64_mlogbuf_dump_from_init(void) +{ + if (mlogbuf_finished) + return; + + if (mlogbuf_timestamp && + time_before(jiffies, mlogbuf_timestamp + 30 * HZ)) { + printk(KERN_ERR "INIT: mlogbuf_dump is interrupted by INIT " + " and the system seems to be messed up.\n"); + ia64_mlogbuf_finish(0); + return; + } + + if (!spin_trylock(&mlogbuf_rlock)) { + printk(KERN_ERR "INIT: mlogbuf_dump is interrupted by INIT. " + "Generated messages other than stack dump will be " + "buffered to mlogbuf and will be printed later.\n"); + printk(KERN_ERR "INIT: If messages would not printed after " + "this INIT, wait 30sec and assert INIT again.\n"); + if (!mlogbuf_timestamp) + mlogbuf_timestamp = jiffies; + return; + } + spin_unlock(&mlogbuf_rlock); + ia64_mlogbuf_dump(); +} + +static void inline +ia64_mca_spin(const char *func) +{ + if (monarch_cpu == smp_processor_id()) + ia64_mlogbuf_finish(0); + mprintk(KERN_EMERG "%s: spinning here, not returning to SAL\n", func); + while (1) + cpu_relax(); +} +/* + * IA64_MCA log support + */ +#define IA64_MAX_LOGS 2 /* Double-buffering for nested MCAs */ +#define IA64_MAX_LOG_TYPES 4 /* MCA, INIT, CMC, CPE */ + +typedef struct ia64_state_log_s +{ + spinlock_t isl_lock; + int isl_index; + unsigned long isl_count; + ia64_err_rec_t *isl_log[IA64_MAX_LOGS]; /* need space to store header + error log */ +} ia64_state_log_t; + +static ia64_state_log_t ia64_state_log[IA64_MAX_LOG_TYPES]; + +#define IA64_LOG_ALLOCATE(it, size) \ + {ia64_state_log[it].isl_log[IA64_LOG_CURR_INDEX(it)] = \ + (ia64_err_rec_t *)alloc_bootmem(size); \ + ia64_state_log[it].isl_log[IA64_LOG_NEXT_INDEX(it)] = \ + (ia64_err_rec_t *)alloc_bootmem(size);} +#define IA64_LOG_LOCK_INIT(it) spin_lock_init(&ia64_state_log[it].isl_lock) +#define IA64_LOG_LOCK(it) spin_lock_irqsave(&ia64_state_log[it].isl_lock, s) +#define IA64_LOG_UNLOCK(it) spin_unlock_irqrestore(&ia64_state_log[it].isl_lock,s) +#define IA64_LOG_NEXT_INDEX(it) ia64_state_log[it].isl_index +#define IA64_LOG_CURR_INDEX(it) 1 - ia64_state_log[it].isl_index +#define IA64_LOG_INDEX_INC(it) \ + {ia64_state_log[it].isl_index = 1 - ia64_state_log[it].isl_index; \ + ia64_state_log[it].isl_count++;} +#define IA64_LOG_INDEX_DEC(it) \ + ia64_state_log[it].isl_index = 1 - ia64_state_log[it].isl_index +#define IA64_LOG_NEXT_BUFFER(it) (void *)((ia64_state_log[it].isl_log[IA64_LOG_NEXT_INDEX(it)])) +#define IA64_LOG_CURR_BUFFER(it) (void *)((ia64_state_log[it].isl_log[IA64_LOG_CURR_INDEX(it)])) +#define IA64_LOG_COUNT(it) ia64_state_log[it].isl_count + +/* + * ia64_log_init + * Reset the OS ia64 log buffer + * Inputs : info_type (SAL_INFO_TYPE_{MCA,INIT,CMC,CPE}) + * Outputs : None + */ +static void __init +ia64_log_init(int sal_info_type) +{ + u64 max_size = 0; + + IA64_LOG_NEXT_INDEX(sal_info_type) = 0; + IA64_LOG_LOCK_INIT(sal_info_type); + + // SAL will tell us the maximum size of any error record of this type + max_size = ia64_sal_get_state_info_size(sal_info_type); + if (!max_size) + /* alloc_bootmem() doesn't like zero-sized allocations! */ + return; + + // set up OS data structures to hold error info + IA64_LOG_ALLOCATE(sal_info_type, max_size); + memset(IA64_LOG_CURR_BUFFER(sal_info_type), 0, max_size); + memset(IA64_LOG_NEXT_BUFFER(sal_info_type), 0, max_size); +} + +/* + * ia64_log_get + * + * Get the current MCA log from SAL and copy it into the OS log buffer. + * + * Inputs : info_type (SAL_INFO_TYPE_{MCA,INIT,CMC,CPE}) + * irq_safe whether you can use printk at this point + * Outputs : size (total record length) + * *buffer (ptr to error record) + * + */ +static u64 +ia64_log_get(int sal_info_type, u8 **buffer, int irq_safe) +{ + sal_log_record_header_t *log_buffer; + u64 total_len = 0; + unsigned long s; + + IA64_LOG_LOCK(sal_info_type); + + /* Get the process state information */ + log_buffer = IA64_LOG_NEXT_BUFFER(sal_info_type); + + total_len = ia64_sal_get_state_info(sal_info_type, (u64 *)log_buffer); + + if (total_len) { + IA64_LOG_INDEX_INC(sal_info_type); + IA64_LOG_UNLOCK(sal_info_type); + if (irq_safe) { + IA64_MCA_DEBUG("%s: SAL error record type %d retrieved. Record length = %ld\n", + __func__, sal_info_type, total_len); + } + *buffer = (u8 *) log_buffer; + return total_len; + } else { + IA64_LOG_UNLOCK(sal_info_type); + return 0; + } +} + +/* + * ia64_mca_log_sal_error_record + * + * This function retrieves a specified error record type from SAL + * and wakes up any processes waiting for error records. + * + * Inputs : sal_info_type (Type of error record MCA/CMC/CPE) + * FIXME: remove MCA and irq_safe. + */ +static void +ia64_mca_log_sal_error_record(int sal_info_type) +{ + u8 *buffer; + sal_log_record_header_t *rh; + u64 size; + int irq_safe = sal_info_type != SAL_INFO_TYPE_MCA; +#ifdef IA64_MCA_DEBUG_INFO + static const char * const rec_name[] = { "MCA", "INIT", "CMC", "CPE" }; +#endif + + size = ia64_log_get(sal_info_type, &buffer, irq_safe); + if (!size) + return; + + salinfo_log_wakeup(sal_info_type, buffer, size, irq_safe); + + if (irq_safe) + IA64_MCA_DEBUG("CPU %d: SAL log contains %s error record\n", + smp_processor_id(), + sal_info_type < ARRAY_SIZE(rec_name) ? rec_name[sal_info_type] : "UNKNOWN"); + + /* Clear logs from corrected errors in case there's no user-level logger */ + rh = (sal_log_record_header_t *)buffer; + if (rh->severity == sal_log_severity_corrected) + ia64_sal_clear_state_info(sal_info_type); +} + +/* + * search_mca_table + * See if the MCA surfaced in an instruction range + * that has been tagged as recoverable. + * + * Inputs + * first First address range to check + * last Last address range to check + * ip Instruction pointer, address we are looking for + * + * Return value: + * 1 on Success (in the table)/ 0 on Failure (not in the table) + */ +int +search_mca_table (const struct mca_table_entry *first, + const struct mca_table_entry *last, + unsigned long ip) +{ + const struct mca_table_entry *curr; + u64 curr_start, curr_end; + + curr = first; + while (curr <= last) { + curr_start = (u64) &curr->start_addr + curr->start_addr; + curr_end = (u64) &curr->end_addr + curr->end_addr; + + if ((ip >= curr_start) && (ip <= curr_end)) { + return 1; + } + curr++; + } + return 0; +} + +/* Given an address, look for it in the mca tables. */ +int mca_recover_range(unsigned long addr) +{ + extern struct mca_table_entry __start___mca_table[]; + extern struct mca_table_entry __stop___mca_table[]; + + return search_mca_table(__start___mca_table, __stop___mca_table-1, addr); +} +EXPORT_SYMBOL_GPL(mca_recover_range); + +#ifdef CONFIG_ACPI + +int cpe_vector = -1; +int ia64_cpe_irq = -1; + +static irqreturn_t +ia64_mca_cpe_int_handler (int cpe_irq, void *arg) +{ + static unsigned long cpe_history[CPE_HISTORY_LENGTH]; + static int index; + static DEFINE_SPINLOCK(cpe_history_lock); + + IA64_MCA_DEBUG("%s: received interrupt vector = %#x on CPU %d\n", + __func__, cpe_irq, smp_processor_id()); + + /* SAL spec states this should run w/ interrupts enabled */ + local_irq_enable(); + + spin_lock(&cpe_history_lock); + if (!cpe_poll_enabled && cpe_vector >= 0) { + + int i, count = 1; /* we know 1 happened now */ + unsigned long now = jiffies; + + for (i = 0; i < CPE_HISTORY_LENGTH; i++) { + if (now - cpe_history[i] <= HZ) + count++; + } + + IA64_MCA_DEBUG(KERN_INFO "CPE threshold %d/%d\n", count, CPE_HISTORY_LENGTH); + if (count >= CPE_HISTORY_LENGTH) { + + cpe_poll_enabled = 1; + spin_unlock(&cpe_history_lock); + disable_irq_nosync(local_vector_to_irq(IA64_CPE_VECTOR)); + + /* + * Corrected errors will still be corrected, but + * make sure there's a log somewhere that indicates + * something is generating more than we can handle. + */ + printk(KERN_WARNING "WARNING: Switching to polling CPE handler; error records may be lost\n"); + + mod_timer(&cpe_poll_timer, jiffies + MIN_CPE_POLL_INTERVAL); + + /* lock already released, get out now */ + goto out; + } else { + cpe_history[index++] = now; + if (index == CPE_HISTORY_LENGTH) + index = 0; + } + } + spin_unlock(&cpe_history_lock); +out: + /* Get the CPE error record and log it */ + ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CPE); + + local_irq_disable(); + + return IRQ_HANDLED; +} + +#endif /* CONFIG_ACPI */ + +#ifdef CONFIG_ACPI +/* + * ia64_mca_register_cpev + * + * Register the corrected platform error vector with SAL. + * + * Inputs + * cpev Corrected Platform Error Vector number + * + * Outputs + * None + */ +void +ia64_mca_register_cpev (int cpev) +{ + /* Register the CPE interrupt vector with SAL */ + struct ia64_sal_retval isrv; + + isrv = ia64_sal_mc_set_params(SAL_MC_PARAM_CPE_INT, SAL_MC_PARAM_MECHANISM_INT, cpev, 0, 0); + if (isrv.status) { + printk(KERN_ERR "Failed to register Corrected Platform " + "Error interrupt vector with SAL (status %ld)\n", isrv.status); + return; + } + + IA64_MCA_DEBUG("%s: corrected platform error " + "vector %#x registered\n", __func__, cpev); +} +#endif /* CONFIG_ACPI */ + +/* + * ia64_mca_cmc_vector_setup + * + * Setup the corrected machine check vector register in the processor. + * (The interrupt is masked on boot. ia64_mca_late_init unmask this.) + * This function is invoked on a per-processor basis. + * + * Inputs + * None + * + * Outputs + * None + */ +void +ia64_mca_cmc_vector_setup (void) +{ + cmcv_reg_t cmcv; + + cmcv.cmcv_regval = 0; + cmcv.cmcv_mask = 1; /* Mask/disable interrupt at first */ + cmcv.cmcv_vector = IA64_CMC_VECTOR; + ia64_setreg(_IA64_REG_CR_CMCV, cmcv.cmcv_regval); + + IA64_MCA_DEBUG("%s: CPU %d corrected machine check vector %#x registered.\n", + __func__, smp_processor_id(), IA64_CMC_VECTOR); + + IA64_MCA_DEBUG("%s: CPU %d CMCV = %#016lx\n", + __func__, smp_processor_id(), ia64_getreg(_IA64_REG_CR_CMCV)); +} + +/* + * ia64_mca_cmc_vector_disable + * + * Mask the corrected machine check vector register in the processor. + * This function is invoked on a per-processor basis. + * + * Inputs + * dummy(unused) + * + * Outputs + * None + */ +static void +ia64_mca_cmc_vector_disable (void *dummy) +{ + cmcv_reg_t cmcv; + + cmcv.cmcv_regval = ia64_getreg(_IA64_REG_CR_CMCV); + + cmcv.cmcv_mask = 1; /* Mask/disable interrupt */ + ia64_setreg(_IA64_REG_CR_CMCV, cmcv.cmcv_regval); + + IA64_MCA_DEBUG("%s: CPU %d corrected machine check vector %#x disabled.\n", + __func__, smp_processor_id(), cmcv.cmcv_vector); +} + +/* + * ia64_mca_cmc_vector_enable + * + * Unmask the corrected machine check vector register in the processor. + * This function is invoked on a per-processor basis. + * + * Inputs + * dummy(unused) + * + * Outputs + * None + */ +static void +ia64_mca_cmc_vector_enable (void *dummy) +{ + cmcv_reg_t cmcv; + + cmcv.cmcv_regval = ia64_getreg(_IA64_REG_CR_CMCV); + + cmcv.cmcv_mask = 0; /* Unmask/enable interrupt */ + ia64_setreg(_IA64_REG_CR_CMCV, cmcv.cmcv_regval); + + IA64_MCA_DEBUG("%s: CPU %d corrected machine check vector %#x enabled.\n", + __func__, smp_processor_id(), cmcv.cmcv_vector); +} + +/* + * ia64_mca_cmc_vector_disable_keventd + * + * Called via keventd (smp_call_function() is not safe in interrupt context) to + * disable the cmc interrupt vector. + */ +static void +ia64_mca_cmc_vector_disable_keventd(struct work_struct *unused) +{ + on_each_cpu(ia64_mca_cmc_vector_disable, NULL, 0); +} + +/* + * ia64_mca_cmc_vector_enable_keventd + * + * Called via keventd (smp_call_function() is not safe in interrupt context) to + * enable the cmc interrupt vector. + */ +static void +ia64_mca_cmc_vector_enable_keventd(struct work_struct *unused) +{ + on_each_cpu(ia64_mca_cmc_vector_enable, NULL, 0); +} + +/* + * ia64_mca_wakeup + * + * Send an inter-cpu interrupt to wake-up a particular cpu. + * + * Inputs : cpuid + * Outputs : None + */ +static void +ia64_mca_wakeup(int cpu) +{ + platform_send_ipi(cpu, IA64_MCA_WAKEUP_VECTOR, IA64_IPI_DM_INT, 0); +} + +/* + * ia64_mca_wakeup_all + * + * Wakeup all the slave cpus which have rendez'ed previously. + * + * Inputs : None + * Outputs : None + */ +static void +ia64_mca_wakeup_all(void) +{ + int cpu; + + /* Clear the Rendez checkin flag for all cpus */ + for_each_online_cpu(cpu) { + if (ia64_mc_info.imi_rendez_checkin[cpu] == IA64_MCA_RENDEZ_CHECKIN_DONE) + ia64_mca_wakeup(cpu); + } + +} + +/* + * ia64_mca_rendez_interrupt_handler + * + * This is handler used to put slave processors into spinloop + * while the monarch processor does the mca handling and later + * wake each slave up once the monarch is done. The state + * IA64_MCA_RENDEZ_CHECKIN_DONE indicates the cpu is rendez'ed + * in SAL. The state IA64_MCA_RENDEZ_CHECKIN_NOTDONE indicates + * the cpu has come out of OS rendezvous. + * + * Inputs : None + * Outputs : None + */ +static irqreturn_t +ia64_mca_rendez_int_handler(int rendez_irq, void *arg) +{ + unsigned long flags; + int cpu = smp_processor_id(); + struct ia64_mca_notify_die nd = + { .sos = NULL, .monarch_cpu = &monarch_cpu }; + + /* Mask all interrupts */ + local_irq_save(flags); + + NOTIFY_MCA(DIE_MCA_RENDZVOUS_ENTER, get_irq_regs(), (long)&nd, 1); + + ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_DONE; + /* Register with the SAL monarch that the slave has + * reached SAL + */ + ia64_sal_mc_rendez(); + + NOTIFY_MCA(DIE_MCA_RENDZVOUS_PROCESS, get_irq_regs(), (long)&nd, 1); + + /* Wait for the monarch cpu to exit. */ + while (monarch_cpu != -1) + cpu_relax(); /* spin until monarch leaves */ + + NOTIFY_MCA(DIE_MCA_RENDZVOUS_LEAVE, get_irq_regs(), (long)&nd, 1); + + ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; + /* Enable all interrupts */ + local_irq_restore(flags); + return IRQ_HANDLED; +} + +/* + * ia64_mca_wakeup_int_handler + * + * The interrupt handler for processing the inter-cpu interrupt to the + * slave cpu which was spinning in the rendez loop. + * Since this spinning is done by turning off the interrupts and + * polling on the wakeup-interrupt bit in the IRR, there is + * nothing useful to be done in the handler. + * + * Inputs : wakeup_irq (Wakeup-interrupt bit) + * arg (Interrupt handler specific argument) + * Outputs : None + * + */ +static irqreturn_t +ia64_mca_wakeup_int_handler(int wakeup_irq, void *arg) +{ + return IRQ_HANDLED; +} + +/* Function pointer for extra MCA recovery */ +int (*ia64_mca_ucmc_extension) + (void*,struct ia64_sal_os_state*) + = NULL; + +int +ia64_reg_MCA_extension(int (*fn)(void *, struct ia64_sal_os_state *)) +{ + if (ia64_mca_ucmc_extension) + return 1; + + ia64_mca_ucmc_extension = fn; + return 0; +} + +void +ia64_unreg_MCA_extension(void) +{ + if (ia64_mca_ucmc_extension) + ia64_mca_ucmc_extension = NULL; +} + +EXPORT_SYMBOL(ia64_reg_MCA_extension); +EXPORT_SYMBOL(ia64_unreg_MCA_extension); + + +static inline void +copy_reg(const u64 *fr, u64 fnat, unsigned long *tr, unsigned long *tnat) +{ + u64 fslot, tslot, nat; + *tr = *fr; + fslot = ((unsigned long)fr >> 3) & 63; + tslot = ((unsigned long)tr >> 3) & 63; + *tnat &= ~(1UL << tslot); + nat = (fnat >> fslot) & 1; + *tnat |= (nat << tslot); +} + +/* Change the comm field on the MCA/INT task to include the pid that + * was interrupted, it makes for easier debugging. If that pid was 0 + * (swapper or nested MCA/INIT) then use the start of the previous comm + * field suffixed with its cpu. + */ + +static void +ia64_mca_modify_comm(const struct task_struct *previous_current) +{ + char *p, comm[sizeof(current->comm)]; + if (previous_current->pid) + snprintf(comm, sizeof(comm), "%s %d", + current->comm, previous_current->pid); + else { + int l; + if ((p = strchr(previous_current->comm, ' '))) + l = p - previous_current->comm; + else + l = strlen(previous_current->comm); + snprintf(comm, sizeof(comm), "%s %*s %d", + current->comm, l, previous_current->comm, + task_thread_info(previous_current)->cpu); + } + memcpy(current->comm, comm, sizeof(current->comm)); +} + +static void +finish_pt_regs(struct pt_regs *regs, struct ia64_sal_os_state *sos, + unsigned long *nat) +{ + const pal_min_state_area_t *ms = sos->pal_min_state; + const u64 *bank; + + /* If ipsr.ic then use pmsa_{iip,ipsr,ifs}, else use + * pmsa_{xip,xpsr,xfs} + */ + if (ia64_psr(regs)->ic) { + regs->cr_iip = ms->pmsa_iip; + regs->cr_ipsr = ms->pmsa_ipsr; + regs->cr_ifs = ms->pmsa_ifs; + } else { + regs->cr_iip = ms->pmsa_xip; + regs->cr_ipsr = ms->pmsa_xpsr; + regs->cr_ifs = ms->pmsa_xfs; + + sos->iip = ms->pmsa_iip; + sos->ipsr = ms->pmsa_ipsr; + sos->ifs = ms->pmsa_ifs; + } + regs->pr = ms->pmsa_pr; + regs->b0 = ms->pmsa_br0; + regs->ar_rsc = ms->pmsa_rsc; + copy_reg(&ms->pmsa_gr[1-1], ms->pmsa_nat_bits, ®s->r1, nat); + copy_reg(&ms->pmsa_gr[2-1], ms->pmsa_nat_bits, ®s->r2, nat); + copy_reg(&ms->pmsa_gr[3-1], ms->pmsa_nat_bits, ®s->r3, nat); + copy_reg(&ms->pmsa_gr[8-1], ms->pmsa_nat_bits, ®s->r8, nat); + copy_reg(&ms->pmsa_gr[9-1], ms->pmsa_nat_bits, ®s->r9, nat); + copy_reg(&ms->pmsa_gr[10-1], ms->pmsa_nat_bits, ®s->r10, nat); + copy_reg(&ms->pmsa_gr[11-1], ms->pmsa_nat_bits, ®s->r11, nat); + copy_reg(&ms->pmsa_gr[12-1], ms->pmsa_nat_bits, ®s->r12, nat); + copy_reg(&ms->pmsa_gr[13-1], ms->pmsa_nat_bits, ®s->r13, nat); + copy_reg(&ms->pmsa_gr[14-1], ms->pmsa_nat_bits, ®s->r14, nat); + copy_reg(&ms->pmsa_gr[15-1], ms->pmsa_nat_bits, ®s->r15, nat); + if (ia64_psr(regs)->bn) + bank = ms->pmsa_bank1_gr; + else + bank = ms->pmsa_bank0_gr; + copy_reg(&bank[16-16], ms->pmsa_nat_bits, ®s->r16, nat); + copy_reg(&bank[17-16], ms->pmsa_nat_bits, ®s->r17, nat); + copy_reg(&bank[18-16], ms->pmsa_nat_bits, ®s->r18, nat); + copy_reg(&bank[19-16], ms->pmsa_nat_bits, ®s->r19, nat); + copy_reg(&bank[20-16], ms->pmsa_nat_bits, ®s->r20, nat); + copy_reg(&bank[21-16], ms->pmsa_nat_bits, ®s->r21, nat); + copy_reg(&bank[22-16], ms->pmsa_nat_bits, ®s->r22, nat); + copy_reg(&bank[23-16], ms->pmsa_nat_bits, ®s->r23, nat); + copy_reg(&bank[24-16], ms->pmsa_nat_bits, ®s->r24, nat); + copy_reg(&bank[25-16], ms->pmsa_nat_bits, ®s->r25, nat); + copy_reg(&bank[26-16], ms->pmsa_nat_bits, ®s->r26, nat); + copy_reg(&bank[27-16], ms->pmsa_nat_bits, ®s->r27, nat); + copy_reg(&bank[28-16], ms->pmsa_nat_bits, ®s->r28, nat); + copy_reg(&bank[29-16], ms->pmsa_nat_bits, ®s->r29, nat); + copy_reg(&bank[30-16], ms->pmsa_nat_bits, ®s->r30, nat); + copy_reg(&bank[31-16], ms->pmsa_nat_bits, ®s->r31, nat); +} + +/* On entry to this routine, we are running on the per cpu stack, see + * mca_asm.h. The original stack has not been touched by this event. Some of + * the original stack's registers will be in the RBS on this stack. This stack + * also contains a partial pt_regs and switch_stack, the rest of the data is in + * PAL minstate. + * + * The first thing to do is modify the original stack to look like a blocked + * task so we can run backtrace on the original task. Also mark the per cpu + * stack as current to ensure that we use the correct task state, it also means + * that we can do backtrace on the MCA/INIT handler code itself. + */ + +static struct task_struct * +ia64_mca_modify_original_stack(struct pt_regs *regs, + const struct switch_stack *sw, + struct ia64_sal_os_state *sos, + const char *type) +{ + char *p; + ia64_va va; + extern char ia64_leave_kernel[]; /* Need asm address, not function descriptor */ + const pal_min_state_area_t *ms = sos->pal_min_state; + struct task_struct *previous_current; + struct pt_regs *old_regs; + struct switch_stack *old_sw; + unsigned size = sizeof(struct pt_regs) + + sizeof(struct switch_stack) + 16; + unsigned long *old_bspstore, *old_bsp; + unsigned long *new_bspstore, *new_bsp; + unsigned long old_unat, old_rnat, new_rnat, nat; + u64 slots, loadrs = regs->loadrs; + u64 r12 = ms->pmsa_gr[12-1], r13 = ms->pmsa_gr[13-1]; + u64 ar_bspstore = regs->ar_bspstore; + u64 ar_bsp = regs->ar_bspstore + (loadrs >> 16); + const char *msg; + int cpu = smp_processor_id(); + + previous_current = curr_task(cpu); + set_curr_task(cpu, current); + if ((p = strchr(current->comm, ' '))) + *p = '\0'; + + /* Best effort attempt to cope with MCA/INIT delivered while in + * physical mode. + */ + regs->cr_ipsr = ms->pmsa_ipsr; + if (ia64_psr(regs)->dt == 0) { + va.l = r12; + if (va.f.reg == 0) { + va.f.reg = 7; + r12 = va.l; + } + va.l = r13; + if (va.f.reg == 0) { + va.f.reg = 7; + r13 = va.l; + } + } + if (ia64_psr(regs)->rt == 0) { + va.l = ar_bspstore; + if (va.f.reg == 0) { + va.f.reg = 7; + ar_bspstore = va.l; + } + va.l = ar_bsp; + if (va.f.reg == 0) { + va.f.reg = 7; + ar_bsp = va.l; + } + } + + /* mca_asm.S ia64_old_stack() cannot assume that the dirty registers + * have been copied to the old stack, the old stack may fail the + * validation tests below. So ia64_old_stack() must restore the dirty + * registers from the new stack. The old and new bspstore probably + * have different alignments, so loadrs calculated on the old bsp + * cannot be used to restore from the new bsp. Calculate a suitable + * loadrs for the new stack and save it in the new pt_regs, where + * ia64_old_stack() can get it. + */ + old_bspstore = (unsigned long *)ar_bspstore; + old_bsp = (unsigned long *)ar_bsp; + slots = ia64_rse_num_regs(old_bspstore, old_bsp); + new_bspstore = (unsigned long *)((u64)current + IA64_RBS_OFFSET); + new_bsp = ia64_rse_skip_regs(new_bspstore, slots); + regs->loadrs = (new_bsp - new_bspstore) * 8 << 16; + + /* Verify the previous stack state before we change it */ + if (user_mode(regs)) { + msg = "occurred in user space"; + /* previous_current is guaranteed to be valid when the task was + * in user space, so ... + */ + ia64_mca_modify_comm(previous_current); + goto no_mod; + } + + if (r13 != sos->prev_IA64_KR_CURRENT) { + msg = "inconsistent previous current and r13"; + goto no_mod; + } + + if (!mca_recover_range(ms->pmsa_iip)) { + if ((r12 - r13) >= KERNEL_STACK_SIZE) { + msg = "inconsistent r12 and r13"; + goto no_mod; + } + if ((ar_bspstore - r13) >= KERNEL_STACK_SIZE) { + msg = "inconsistent ar.bspstore and r13"; + goto no_mod; + } + va.p = old_bspstore; + if (va.f.reg < 5) { + msg = "old_bspstore is in the wrong region"; + goto no_mod; + } + if ((ar_bsp - r13) >= KERNEL_STACK_SIZE) { + msg = "inconsistent ar.bsp and r13"; + goto no_mod; + } + size += (ia64_rse_skip_regs(old_bspstore, slots) - old_bspstore) * 8; + if (ar_bspstore + size > r12) { + msg = "no room for blocked state"; + goto no_mod; + } + } + + ia64_mca_modify_comm(previous_current); + + /* Make the original task look blocked. First stack a struct pt_regs, + * describing the state at the time of interrupt. mca_asm.S built a + * partial pt_regs, copy it and fill in the blanks using minstate. + */ + p = (char *)r12 - sizeof(*regs); + old_regs = (struct pt_regs *)p; + memcpy(old_regs, regs, sizeof(*regs)); + old_regs->loadrs = loadrs; + old_unat = old_regs->ar_unat; + finish_pt_regs(old_regs, sos, &old_unat); + + /* Next stack a struct switch_stack. mca_asm.S built a partial + * switch_stack, copy it and fill in the blanks using pt_regs and + * minstate. + * + * In the synthesized switch_stack, b0 points to ia64_leave_kernel, + * ar.pfs is set to 0. + * + * unwind.c::unw_unwind() does special processing for interrupt frames. + * It checks if the PRED_NON_SYSCALL predicate is set, if the predicate + * is clear then unw_unwind() does _not_ adjust bsp over pt_regs. Not + * that this is documented, of course. Set PRED_NON_SYSCALL in the + * switch_stack on the original stack so it will unwind correctly when + * unwind.c reads pt_regs. + * + * thread.ksp is updated to point to the synthesized switch_stack. + */ + p -= sizeof(struct switch_stack); + old_sw = (struct switch_stack *)p; + memcpy(old_sw, sw, sizeof(*sw)); + old_sw->caller_unat = old_unat; + old_sw->ar_fpsr = old_regs->ar_fpsr; + copy_reg(&ms->pmsa_gr[4-1], ms->pmsa_nat_bits, &old_sw->r4, &old_unat); + copy_reg(&ms->pmsa_gr[5-1], ms->pmsa_nat_bits, &old_sw->r5, &old_unat); + copy_reg(&ms->pmsa_gr[6-1], ms->pmsa_nat_bits, &old_sw->r6, &old_unat); + copy_reg(&ms->pmsa_gr[7-1], ms->pmsa_nat_bits, &old_sw->r7, &old_unat); + old_sw->b0 = (u64)ia64_leave_kernel; + old_sw->b1 = ms->pmsa_br1; + old_sw->ar_pfs = 0; + old_sw->ar_unat = old_unat; + old_sw->pr = old_regs->pr | (1UL << PRED_NON_SYSCALL); + previous_current->thread.ksp = (u64)p - 16; + + /* Finally copy the original stack's registers back to its RBS. + * Registers from ar.bspstore through ar.bsp at the time of the event + * are in the current RBS, copy them back to the original stack. The + * copy must be done register by register because the original bspstore + * and the current one have different alignments, so the saved RNAT + * data occurs at different places. + * + * mca_asm does cover, so the old_bsp already includes all registers at + * the time of MCA/INIT. It also does flushrs, so all registers before + * this function have been written to backing store on the MCA/INIT + * stack. + */ + new_rnat = ia64_get_rnat(ia64_rse_rnat_addr(new_bspstore)); + old_rnat = regs->ar_rnat; + while (slots--) { + if (ia64_rse_is_rnat_slot(new_bspstore)) { + new_rnat = ia64_get_rnat(new_bspstore++); + } + if (ia64_rse_is_rnat_slot(old_bspstore)) { + *old_bspstore++ = old_rnat; + old_rnat = 0; + } + nat = (new_rnat >> ia64_rse_slot_num(new_bspstore)) & 1UL; + old_rnat &= ~(1UL << ia64_rse_slot_num(old_bspstore)); + old_rnat |= (nat << ia64_rse_slot_num(old_bspstore)); + *old_bspstore++ = *new_bspstore++; + } + old_sw->ar_bspstore = (unsigned long)old_bspstore; + old_sw->ar_rnat = old_rnat; + + sos->prev_task = previous_current; + return previous_current; + +no_mod: + mprintk(KERN_INFO "cpu %d, %s %s, original stack not modified\n", + smp_processor_id(), type, msg); + old_unat = regs->ar_unat; + finish_pt_regs(regs, sos, &old_unat); + return previous_current; +} + +/* The monarch/slave interaction is based on monarch_cpu and requires that all + * slaves have entered rendezvous before the monarch leaves. If any cpu has + * not entered rendezvous yet then wait a bit. The assumption is that any + * slave that has not rendezvoused after a reasonable time is never going to do + * so. In this context, slave includes cpus that respond to the MCA rendezvous + * interrupt, as well as cpus that receive the INIT slave event. + */ + +static void +ia64_wait_for_slaves(int monarch, const char *type) +{ + int c, i , wait; + + /* + * wait 5 seconds total for slaves (arbitrary) + */ + for (i = 0; i < 5000; i++) { + wait = 0; + for_each_online_cpu(c) { + if (c == monarch) + continue; + if (ia64_mc_info.imi_rendez_checkin[c] + == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) { + udelay(1000); /* short wait */ + wait = 1; + break; + } + } + if (!wait) + goto all_in; + } + + /* + * Maybe slave(s) dead. Print buffered messages immediately. + */ + ia64_mlogbuf_finish(0); + mprintk(KERN_INFO "OS %s slave did not rendezvous on cpu", type); + for_each_online_cpu(c) { + if (c == monarch) + continue; + if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) + mprintk(" %d", c); + } + mprintk("\n"); + return; + +all_in: + mprintk(KERN_INFO "All OS %s slaves have reached rendezvous\n", type); + return; +} + +/* mca_insert_tr + * + * Switch rid when TR reload and needed! + * iord: 1: itr, 2: itr; + * +*/ +static void mca_insert_tr(u64 iord) +{ + + int i; + u64 old_rr; + struct ia64_tr_entry *p; + unsigned long psr; + int cpu = smp_processor_id(); + + if (!ia64_idtrs[cpu]) + return; + + psr = ia64_clear_ic(); + for (i = IA64_TR_ALLOC_BASE; i < IA64_TR_ALLOC_MAX; i++) { + p = ia64_idtrs[cpu] + (iord - 1) * IA64_TR_ALLOC_MAX; + if (p->pte & 0x1) { + old_rr = ia64_get_rr(p->ifa); + if (old_rr != p->rr) { + ia64_set_rr(p->ifa, p->rr); + ia64_srlz_d(); + } + ia64_ptr(iord, p->ifa, p->itir >> 2); + ia64_srlz_i(); + if (iord & 0x1) { + ia64_itr(0x1, i, p->ifa, p->pte, p->itir >> 2); + ia64_srlz_i(); + } + if (iord & 0x2) { + ia64_itr(0x2, i, p->ifa, p->pte, p->itir >> 2); + ia64_srlz_i(); + } + if (old_rr != p->rr) { + ia64_set_rr(p->ifa, old_rr); + ia64_srlz_d(); + } + } + } + ia64_set_psr(psr); +} + +/* + * ia64_mca_handler + * + * This is uncorrectable machine check handler called from OS_MCA + * dispatch code which is in turn called from SAL_CHECK(). + * This is the place where the core of OS MCA handling is done. + * Right now the logs are extracted and displayed in a well-defined + * format. This handler code is supposed to be run only on the + * monarch processor. Once the monarch is done with MCA handling + * further MCA logging is enabled by clearing logs. + * Monarch also has the duty of sending wakeup-IPIs to pull the + * slave processors out of rendezvous spinloop. + * + * If multiple processors call into OS_MCA, the first will become + * the monarch. Subsequent cpus will be recorded in the mca_cpu + * bitmask. After the first monarch has processed its MCA, it + * will wake up the next cpu in the mca_cpu bitmask and then go + * into the rendezvous loop. When all processors have serviced + * their MCA, the last monarch frees up the rest of the processors. + */ +void +ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, + struct ia64_sal_os_state *sos) +{ + int recover, cpu = smp_processor_id(); + struct task_struct *previous_current; + struct ia64_mca_notify_die nd = + { .sos = sos, .monarch_cpu = &monarch_cpu, .data = &recover }; + static atomic_t mca_count; + static cpumask_t mca_cpu; + + if (atomic_add_return(1, &mca_count) == 1) { + monarch_cpu = cpu; + sos->monarch = 1; + } else { + cpumask_set_cpu(cpu, &mca_cpu); + sos->monarch = 0; + } + mprintk(KERN_INFO "Entered OS MCA handler. PSP=%lx cpu=%d " + "monarch=%ld\n", sos->proc_state_param, cpu, sos->monarch); + + previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA"); + + NOTIFY_MCA(DIE_MCA_MONARCH_ENTER, regs, (long)&nd, 1); + + ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_CONCURRENT_MCA; + if (sos->monarch) { + ia64_wait_for_slaves(cpu, "MCA"); + + /* Wakeup all the processors which are spinning in the + * rendezvous loop. They will leave SAL, then spin in the OS + * with interrupts disabled until this monarch cpu leaves the + * MCA handler. That gets control back to the OS so we can + * backtrace the other cpus, backtrace when spinning in SAL + * does not work. + */ + ia64_mca_wakeup_all(); + } else { + while (cpumask_test_cpu(cpu, &mca_cpu)) + cpu_relax(); /* spin until monarch wakes us */ + } + + NOTIFY_MCA(DIE_MCA_MONARCH_PROCESS, regs, (long)&nd, 1); + + /* Get the MCA error record and log it */ + ia64_mca_log_sal_error_record(SAL_INFO_TYPE_MCA); + + /* MCA error recovery */ + recover = (ia64_mca_ucmc_extension + && ia64_mca_ucmc_extension( + IA64_LOG_CURR_BUFFER(SAL_INFO_TYPE_MCA), + sos)); + + if (recover) { + sal_log_record_header_t *rh = IA64_LOG_CURR_BUFFER(SAL_INFO_TYPE_MCA); + rh->severity = sal_log_severity_corrected; + ia64_sal_clear_state_info(SAL_INFO_TYPE_MCA); + sos->os_status = IA64_MCA_CORRECTED; + } else { + /* Dump buffered message to console */ + ia64_mlogbuf_finish(1); + } + + if (__this_cpu_read(ia64_mca_tr_reload)) { + mca_insert_tr(0x1); /*Reload dynamic itrs*/ + mca_insert_tr(0x2); /*Reload dynamic itrs*/ + } + + NOTIFY_MCA(DIE_MCA_MONARCH_LEAVE, regs, (long)&nd, 1); + + if (atomic_dec_return(&mca_count) > 0) { + int i; + + /* wake up the next monarch cpu, + * and put this cpu in the rendez loop. + */ + for_each_online_cpu(i) { + if (cpumask_test_cpu(i, &mca_cpu)) { + monarch_cpu = i; + cpumask_clear_cpu(i, &mca_cpu); /* wake next cpu */ + while (monarch_cpu != -1) + cpu_relax(); /* spin until last cpu leaves */ + set_curr_task(cpu, previous_current); + ia64_mc_info.imi_rendez_checkin[cpu] + = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; + return; + } + } + } + set_curr_task(cpu, previous_current); + ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; + monarch_cpu = -1; /* This frees the slaves and previous monarchs */ +} + +static DECLARE_WORK(cmc_disable_work, ia64_mca_cmc_vector_disable_keventd); +static DECLARE_WORK(cmc_enable_work, ia64_mca_cmc_vector_enable_keventd); + +/* + * ia64_mca_cmc_int_handler + * + * This is corrected machine check interrupt handler. + * Right now the logs are extracted and displayed in a well-defined + * format. + * + * Inputs + * interrupt number + * client data arg ptr + * + * Outputs + * None + */ +static irqreturn_t +ia64_mca_cmc_int_handler(int cmc_irq, void *arg) +{ + static unsigned long cmc_history[CMC_HISTORY_LENGTH]; + static int index; + static DEFINE_SPINLOCK(cmc_history_lock); + + IA64_MCA_DEBUG("%s: received interrupt vector = %#x on CPU %d\n", + __func__, cmc_irq, smp_processor_id()); + + /* SAL spec states this should run w/ interrupts enabled */ + local_irq_enable(); + + spin_lock(&cmc_history_lock); + if (!cmc_polling_enabled) { + int i, count = 1; /* we know 1 happened now */ + unsigned long now = jiffies; + + for (i = 0; i < CMC_HISTORY_LENGTH; i++) { + if (now - cmc_history[i] <= HZ) + count++; + } + + IA64_MCA_DEBUG(KERN_INFO "CMC threshold %d/%d\n", count, CMC_HISTORY_LENGTH); + if (count >= CMC_HISTORY_LENGTH) { + + cmc_polling_enabled = 1; + spin_unlock(&cmc_history_lock); + /* If we're being hit with CMC interrupts, we won't + * ever execute the schedule_work() below. Need to + * disable CMC interrupts on this processor now. + */ + ia64_mca_cmc_vector_disable(NULL); + schedule_work(&cmc_disable_work); + + /* + * Corrected errors will still be corrected, but + * make sure there's a log somewhere that indicates + * something is generating more than we can handle. + */ + printk(KERN_WARNING "WARNING: Switching to polling CMC handler; error records may be lost\n"); + + mod_timer(&cmc_poll_timer, jiffies + CMC_POLL_INTERVAL); + + /* lock already released, get out now */ + goto out; + } else { + cmc_history[index++] = now; + if (index == CMC_HISTORY_LENGTH) + index = 0; + } + } + spin_unlock(&cmc_history_lock); +out: + /* Get the CMC error record and log it */ + ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CMC); + + local_irq_disable(); + + return IRQ_HANDLED; +} + +/* + * ia64_mca_cmc_int_caller + * + * Triggered by sw interrupt from CMC polling routine. Calls + * real interrupt handler and either triggers a sw interrupt + * on the next cpu or does cleanup at the end. + * + * Inputs + * interrupt number + * client data arg ptr + * Outputs + * handled + */ +static irqreturn_t +ia64_mca_cmc_int_caller(int cmc_irq, void *arg) +{ + static int start_count = -1; + unsigned int cpuid; + + cpuid = smp_processor_id(); + + /* If first cpu, update count */ + if (start_count == -1) + start_count = IA64_LOG_COUNT(SAL_INFO_TYPE_CMC); + + ia64_mca_cmc_int_handler(cmc_irq, arg); + + cpuid = cpumask_next(cpuid+1, cpu_online_mask); + + if (cpuid < nr_cpu_ids) { + platform_send_ipi(cpuid, IA64_CMCP_VECTOR, IA64_IPI_DM_INT, 0); + } else { + /* If no log record, switch out of polling mode */ + if (start_count == IA64_LOG_COUNT(SAL_INFO_TYPE_CMC)) { + + printk(KERN_WARNING "Returning to interrupt driven CMC handler\n"); + schedule_work(&cmc_enable_work); + cmc_polling_enabled = 0; + + } else { + + mod_timer(&cmc_poll_timer, jiffies + CMC_POLL_INTERVAL); + } + + start_count = -1; + } + + return IRQ_HANDLED; +} + +/* + * ia64_mca_cmc_poll + * + * Poll for Corrected Machine Checks (CMCs) + * + * Inputs : dummy(unused) + * Outputs : None + * + */ +static void +ia64_mca_cmc_poll (unsigned long dummy) +{ + /* Trigger a CMC interrupt cascade */ + platform_send_ipi(cpumask_first(cpu_online_mask), IA64_CMCP_VECTOR, + IA64_IPI_DM_INT, 0); +} + +/* + * ia64_mca_cpe_int_caller + * + * Triggered by sw interrupt from CPE polling routine. Calls + * real interrupt handler and either triggers a sw interrupt + * on the next cpu or does cleanup at the end. + * + * Inputs + * interrupt number + * client data arg ptr + * Outputs + * handled + */ +#ifdef CONFIG_ACPI + +static irqreturn_t +ia64_mca_cpe_int_caller(int cpe_irq, void *arg) +{ + static int start_count = -1; + static int poll_time = MIN_CPE_POLL_INTERVAL; + unsigned int cpuid; + + cpuid = smp_processor_id(); + + /* If first cpu, update count */ + if (start_count == -1) + start_count = IA64_LOG_COUNT(SAL_INFO_TYPE_CPE); + + ia64_mca_cpe_int_handler(cpe_irq, arg); + + cpuid = cpumask_next(cpuid+1, cpu_online_mask); + + if (cpuid < NR_CPUS) { + platform_send_ipi(cpuid, IA64_CPEP_VECTOR, IA64_IPI_DM_INT, 0); + } else { + /* + * If a log was recorded, increase our polling frequency, + * otherwise, backoff or return to interrupt mode. + */ + if (start_count != IA64_LOG_COUNT(SAL_INFO_TYPE_CPE)) { + poll_time = max(MIN_CPE_POLL_INTERVAL, poll_time / 2); + } else if (cpe_vector < 0) { + poll_time = min(MAX_CPE_POLL_INTERVAL, poll_time * 2); + } else { + poll_time = MIN_CPE_POLL_INTERVAL; + + printk(KERN_WARNING "Returning to interrupt driven CPE handler\n"); + enable_irq(local_vector_to_irq(IA64_CPE_VECTOR)); + cpe_poll_enabled = 0; + } + + if (cpe_poll_enabled) + mod_timer(&cpe_poll_timer, jiffies + poll_time); + start_count = -1; + } + + return IRQ_HANDLED; +} + +/* + * ia64_mca_cpe_poll + * + * Poll for Corrected Platform Errors (CPEs), trigger interrupt + * on first cpu, from there it will trickle through all the cpus. + * + * Inputs : dummy(unused) + * Outputs : None + * + */ +static void +ia64_mca_cpe_poll (unsigned long dummy) +{ + /* Trigger a CPE interrupt cascade */ + platform_send_ipi(cpumask_first(cpu_online_mask), IA64_CPEP_VECTOR, + IA64_IPI_DM_INT, 0); +} + +#endif /* CONFIG_ACPI */ + +static int +default_monarch_init_process(struct notifier_block *self, unsigned long val, void *data) +{ + int c; + struct task_struct *g, *t; + if (val != DIE_INIT_MONARCH_PROCESS) + return NOTIFY_DONE; +#ifdef CONFIG_KEXEC + if (atomic_read(&kdump_in_progress)) + return NOTIFY_DONE; +#endif + + /* + * FIXME: mlogbuf will brim over with INIT stack dumps. + * To enable show_stack from INIT, we use oops_in_progress which should + * be used in real oops. This would cause something wrong after INIT. + */ + BREAK_LOGLEVEL(console_loglevel); + ia64_mlogbuf_dump_from_init(); + + printk(KERN_ERR "Processes interrupted by INIT -"); + for_each_online_cpu(c) { + struct ia64_sal_os_state *s; + t = __va(__per_cpu_mca[c] + IA64_MCA_CPU_INIT_STACK_OFFSET); + s = (struct ia64_sal_os_state *)((char *)t + MCA_SOS_OFFSET); + g = s->prev_task; + if (g) { + if (g->pid) + printk(" %d", g->pid); + else + printk(" %d (cpu %d task 0x%p)", g->pid, task_cpu(g), g); + } + } + printk("\n\n"); + if (read_trylock(&tasklist_lock)) { + do_each_thread (g, t) { + printk("\nBacktrace of pid %d (%s)\n", t->pid, t->comm); + show_stack(t, NULL); + } while_each_thread (g, t); + read_unlock(&tasklist_lock); + } + /* FIXME: This will not restore zapped printk locks. */ + RESTORE_LOGLEVEL(console_loglevel); + return NOTIFY_DONE; +} + +/* + * C portion of the OS INIT handler + * + * Called from ia64_os_init_dispatch + * + * Inputs: pointer to pt_regs where processor info was saved. SAL/OS state for + * this event. This code is used for both monarch and slave INIT events, see + * sos->monarch. + * + * All INIT events switch to the INIT stack and change the previous process to + * blocked status. If one of the INIT events is the monarch then we are + * probably processing the nmi button/command. Use the monarch cpu to dump all + * the processes. The slave INIT events all spin until the monarch cpu + * returns. We can also get INIT slave events for MCA, in which case the MCA + * process is the monarch. + */ + +void +ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, + struct ia64_sal_os_state *sos) +{ + static atomic_t slaves; + static atomic_t monarchs; + struct task_struct *previous_current; + int cpu = smp_processor_id(); + struct ia64_mca_notify_die nd = + { .sos = sos, .monarch_cpu = &monarch_cpu }; + + NOTIFY_INIT(DIE_INIT_ENTER, regs, (long)&nd, 0); + + mprintk(KERN_INFO "Entered OS INIT handler. PSP=%lx cpu=%d monarch=%ld\n", + sos->proc_state_param, cpu, sos->monarch); + salinfo_log_wakeup(SAL_INFO_TYPE_INIT, NULL, 0, 0); + + previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "INIT"); + sos->os_status = IA64_INIT_RESUME; + + /* FIXME: Workaround for broken proms that drive all INIT events as + * slaves. The last slave that enters is promoted to be a monarch. + * Remove this code in September 2006, that gives platforms a year to + * fix their proms and get their customers updated. + */ + if (!sos->monarch && atomic_add_return(1, &slaves) == num_online_cpus()) { + mprintk(KERN_WARNING "%s: Promoting cpu %d to monarch.\n", + __func__, cpu); + atomic_dec(&slaves); + sos->monarch = 1; + } + + /* FIXME: Workaround for broken proms that drive all INIT events as + * monarchs. Second and subsequent monarchs are demoted to slaves. + * Remove this code in September 2006, that gives platforms a year to + * fix their proms and get their customers updated. + */ + if (sos->monarch && atomic_add_return(1, &monarchs) > 1) { + mprintk(KERN_WARNING "%s: Demoting cpu %d to slave.\n", + __func__, cpu); + atomic_dec(&monarchs); + sos->monarch = 0; + } + + if (!sos->monarch) { + ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_INIT; + +#ifdef CONFIG_KEXEC + while (monarch_cpu == -1 && !atomic_read(&kdump_in_progress)) + udelay(1000); +#else + while (monarch_cpu == -1) + cpu_relax(); /* spin until monarch enters */ +#endif + + NOTIFY_INIT(DIE_INIT_SLAVE_ENTER, regs, (long)&nd, 1); + NOTIFY_INIT(DIE_INIT_SLAVE_PROCESS, regs, (long)&nd, 1); + +#ifdef CONFIG_KEXEC + while (monarch_cpu != -1 && !atomic_read(&kdump_in_progress)) + udelay(1000); +#else + while (monarch_cpu != -1) + cpu_relax(); /* spin until monarch leaves */ +#endif + + NOTIFY_INIT(DIE_INIT_SLAVE_LEAVE, regs, (long)&nd, 1); + + mprintk("Slave on cpu %d returning to normal service.\n", cpu); + set_curr_task(cpu, previous_current); + ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; + atomic_dec(&slaves); + return; + } + + monarch_cpu = cpu; + NOTIFY_INIT(DIE_INIT_MONARCH_ENTER, regs, (long)&nd, 1); + + /* + * Wait for a bit. On some machines (e.g., HP's zx2000 and zx6000, INIT can be + * generated via the BMC's command-line interface, but since the console is on the + * same serial line, the user will need some time to switch out of the BMC before + * the dump begins. + */ + mprintk("Delaying for 5 seconds...\n"); + udelay(5*1000000); + ia64_wait_for_slaves(cpu, "INIT"); + /* If nobody intercepts DIE_INIT_MONARCH_PROCESS then we drop through + * to default_monarch_init_process() above and just print all the + * tasks. + */ + NOTIFY_INIT(DIE_INIT_MONARCH_PROCESS, regs, (long)&nd, 1); + NOTIFY_INIT(DIE_INIT_MONARCH_LEAVE, regs, (long)&nd, 1); + + mprintk("\nINIT dump complete. Monarch on cpu %d returning to normal service.\n", cpu); + atomic_dec(&monarchs); + set_curr_task(cpu, previous_current); + monarch_cpu = -1; + return; +} + +static int __init +ia64_mca_disable_cpe_polling(char *str) +{ + cpe_poll_enabled = 0; + return 1; +} + +__setup("disable_cpe_poll", ia64_mca_disable_cpe_polling); + +static struct irqaction cmci_irqaction = { + .handler = ia64_mca_cmc_int_handler, + .name = "cmc_hndlr" +}; + +static struct irqaction cmcp_irqaction = { + .handler = ia64_mca_cmc_int_caller, + .name = "cmc_poll" +}; + +static struct irqaction mca_rdzv_irqaction = { + .handler = ia64_mca_rendez_int_handler, + .name = "mca_rdzv" +}; + +static struct irqaction mca_wkup_irqaction = { + .handler = ia64_mca_wakeup_int_handler, + .name = "mca_wkup" +}; + +#ifdef CONFIG_ACPI +static struct irqaction mca_cpe_irqaction = { + .handler = ia64_mca_cpe_int_handler, + .name = "cpe_hndlr" +}; + +static struct irqaction mca_cpep_irqaction = { + .handler = ia64_mca_cpe_int_caller, + .name = "cpe_poll" +}; +#endif /* CONFIG_ACPI */ + +/* Minimal format of the MCA/INIT stacks. The pseudo processes that run on + * these stacks can never sleep, they cannot return from the kernel to user + * space, they do not appear in a normal ps listing. So there is no need to + * format most of the fields. + */ + +static void +format_mca_init_stack(void *mca_data, unsigned long offset, + const char *type, int cpu) +{ + struct task_struct *p = (struct task_struct *)((char *)mca_data + offset); + struct thread_info *ti; + memset(p, 0, KERNEL_STACK_SIZE); + ti = task_thread_info(p); + ti->flags = _TIF_MCA_INIT; + ti->preempt_count = 1; + ti->task = p; + ti->cpu = cpu; + p->stack = ti; + p->state = TASK_UNINTERRUPTIBLE; + cpumask_set_cpu(cpu, &p->cpus_allowed); + INIT_LIST_HEAD(&p->tasks); + p->parent = p->real_parent = p->group_leader = p; + INIT_LIST_HEAD(&p->children); + INIT_LIST_HEAD(&p->sibling); + strncpy(p->comm, type, sizeof(p->comm)-1); +} + +/* Caller prevents this from being called after init */ +static void * __init_refok mca_bootmem(void) +{ + return __alloc_bootmem(sizeof(struct ia64_mca_cpu), + KERNEL_STACK_SIZE, 0); +} + +/* Do per-CPU MCA-related initialization. */ +void +ia64_mca_cpu_init(void *cpu_data) +{ + void *pal_vaddr; + void *data; + long sz = sizeof(struct ia64_mca_cpu); + int cpu = smp_processor_id(); + static int first_time = 1; + + /* + * Structure will already be allocated if cpu has been online, + * then offlined. + */ + if (__per_cpu_mca[cpu]) { + data = __va(__per_cpu_mca[cpu]); + } else { + if (first_time) { + data = mca_bootmem(); + first_time = 0; + } else + data = (void *)__get_free_pages(GFP_KERNEL, + get_order(sz)); + if (!data) + panic("Could not allocate MCA memory for cpu %d\n", + cpu); + } + format_mca_init_stack(data, offsetof(struct ia64_mca_cpu, mca_stack), + "MCA", cpu); + format_mca_init_stack(data, offsetof(struct ia64_mca_cpu, init_stack), + "INIT", cpu); + __this_cpu_write(ia64_mca_data, (__per_cpu_mca[cpu] = __pa(data))); + + /* + * Stash away a copy of the PTE needed to map the per-CPU page. + * We may need it during MCA recovery. + */ + __this_cpu_write(ia64_mca_per_cpu_pte, + pte_val(mk_pte_phys(__pa(cpu_data), PAGE_KERNEL))); + + /* + * Also, stash away a copy of the PAL address and the PTE + * needed to map it. + */ + pal_vaddr = efi_get_pal_addr(); + if (!pal_vaddr) + return; + __this_cpu_write(ia64_mca_pal_base, + GRANULEROUNDDOWN((unsigned long) pal_vaddr)); + __this_cpu_write(ia64_mca_pal_pte, pte_val(mk_pte_phys(__pa(pal_vaddr), + PAGE_KERNEL))); +} + +static void ia64_mca_cmc_vector_adjust(void *dummy) +{ + unsigned long flags; + + local_irq_save(flags); + if (!cmc_polling_enabled) + ia64_mca_cmc_vector_enable(NULL); + local_irq_restore(flags); +} + +static int mca_cpu_callback(struct notifier_block *nfb, + unsigned long action, + void *hcpu) +{ + int hotcpu = (unsigned long) hcpu; + + switch (action) { + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + smp_call_function_single(hotcpu, ia64_mca_cmc_vector_adjust, + NULL, 0); + break; + } + return NOTIFY_OK; +} + +static struct notifier_block mca_cpu_notifier = { + .notifier_call = mca_cpu_callback +}; + +/* + * ia64_mca_init + * + * Do all the system level mca specific initialization. + * + * 1. Register spinloop and wakeup request interrupt vectors + * + * 2. Register OS_MCA handler entry point + * + * 3. Register OS_INIT handler entry point + * + * 4. Initialize MCA/CMC/INIT related log buffers maintained by the OS. + * + * Note that this initialization is done very early before some kernel + * services are available. + * + * Inputs : None + * + * Outputs : None + */ +void __init +ia64_mca_init(void) +{ + ia64_fptr_t *init_hldlr_ptr_monarch = (ia64_fptr_t *)ia64_os_init_dispatch_monarch; + ia64_fptr_t *init_hldlr_ptr_slave = (ia64_fptr_t *)ia64_os_init_dispatch_slave; + ia64_fptr_t *mca_hldlr_ptr = (ia64_fptr_t *)ia64_os_mca_dispatch; + int i; + long rc; + struct ia64_sal_retval isrv; + unsigned long timeout = IA64_MCA_RENDEZ_TIMEOUT; /* platform specific */ + static struct notifier_block default_init_monarch_nb = { + .notifier_call = default_monarch_init_process, + .priority = 0/* we need to notified last */ + }; + + IA64_MCA_DEBUG("%s: begin\n", __func__); + + /* Clear the Rendez checkin flag for all cpus */ + for(i = 0 ; i < NR_CPUS; i++) + ia64_mc_info.imi_rendez_checkin[i] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; + + /* + * Register the rendezvous spinloop and wakeup mechanism with SAL + */ + + /* Register the rendezvous interrupt vector with SAL */ + while (1) { + isrv = ia64_sal_mc_set_params(SAL_MC_PARAM_RENDEZ_INT, + SAL_MC_PARAM_MECHANISM_INT, + IA64_MCA_RENDEZ_VECTOR, + timeout, + SAL_MC_PARAM_RZ_ALWAYS); + rc = isrv.status; + if (rc == 0) + break; + if (rc == -2) { + printk(KERN_INFO "Increasing MCA rendezvous timeout from " + "%ld to %ld milliseconds\n", timeout, isrv.v0); + timeout = isrv.v0; + NOTIFY_MCA(DIE_MCA_NEW_TIMEOUT, NULL, timeout, 0); + continue; + } + printk(KERN_ERR "Failed to register rendezvous interrupt " + "with SAL (status %ld)\n", rc); + return; + } + + /* Register the wakeup interrupt vector with SAL */ + isrv = ia64_sal_mc_set_params(SAL_MC_PARAM_RENDEZ_WAKEUP, + SAL_MC_PARAM_MECHANISM_INT, + IA64_MCA_WAKEUP_VECTOR, + 0, 0); + rc = isrv.status; + if (rc) { + printk(KERN_ERR "Failed to register wakeup interrupt with SAL " + "(status %ld)\n", rc); + return; + } + + IA64_MCA_DEBUG("%s: registered MCA rendezvous spinloop and wakeup mech.\n", __func__); + + ia64_mc_info.imi_mca_handler = ia64_tpa(mca_hldlr_ptr->fp); + /* + * XXX - disable SAL checksum by setting size to 0; should be + * ia64_tpa(ia64_os_mca_dispatch_end) - ia64_tpa(ia64_os_mca_dispatch); + */ + ia64_mc_info.imi_mca_handler_size = 0; + + /* Register the os mca handler with SAL */ + if ((rc = ia64_sal_set_vectors(SAL_VECTOR_OS_MCA, + ia64_mc_info.imi_mca_handler, + ia64_tpa(mca_hldlr_ptr->gp), + ia64_mc_info.imi_mca_handler_size, + 0, 0, 0))) + { + printk(KERN_ERR "Failed to register OS MCA handler with SAL " + "(status %ld)\n", rc); + return; + } + + IA64_MCA_DEBUG("%s: registered OS MCA handler with SAL at 0x%lx, gp = 0x%lx\n", __func__, + ia64_mc_info.imi_mca_handler, ia64_tpa(mca_hldlr_ptr->gp)); + + /* + * XXX - disable SAL checksum by setting size to 0, should be + * size of the actual init handler in mca_asm.S. + */ + ia64_mc_info.imi_monarch_init_handler = ia64_tpa(init_hldlr_ptr_monarch->fp); + ia64_mc_info.imi_monarch_init_handler_size = 0; + ia64_mc_info.imi_slave_init_handler = ia64_tpa(init_hldlr_ptr_slave->fp); + ia64_mc_info.imi_slave_init_handler_size = 0; + + IA64_MCA_DEBUG("%s: OS INIT handler at %lx\n", __func__, + ia64_mc_info.imi_monarch_init_handler); + + /* Register the os init handler with SAL */ + if ((rc = ia64_sal_set_vectors(SAL_VECTOR_OS_INIT, + ia64_mc_info.imi_monarch_init_handler, + ia64_tpa(ia64_getreg(_IA64_REG_GP)), + ia64_mc_info.imi_monarch_init_handler_size, + ia64_mc_info.imi_slave_init_handler, + ia64_tpa(ia64_getreg(_IA64_REG_GP)), + ia64_mc_info.imi_slave_init_handler_size))) + { + printk(KERN_ERR "Failed to register m/s INIT handlers with SAL " + "(status %ld)\n", rc); + return; + } + if (register_die_notifier(&default_init_monarch_nb)) { + printk(KERN_ERR "Failed to register default monarch INIT process\n"); + return; + } + + IA64_MCA_DEBUG("%s: registered OS INIT handler with SAL\n", __func__); + + /* Initialize the areas set aside by the OS to buffer the + * platform/processor error states for MCA/INIT/CMC + * handling. + */ + ia64_log_init(SAL_INFO_TYPE_MCA); + ia64_log_init(SAL_INFO_TYPE_INIT); + ia64_log_init(SAL_INFO_TYPE_CMC); + ia64_log_init(SAL_INFO_TYPE_CPE); + + mca_init = 1; + printk(KERN_INFO "MCA related initialization done\n"); +} + + +/* + * These pieces cannot be done in ia64_mca_init() because it is called before + * early_irq_init() which would wipe out our percpu irq registrations. But we + * cannot leave them until ia64_mca_late_init() because by then all the other + * processors have been brought online and have set their own CMC vectors to + * point at a non-existant action. Called from arch_early_irq_init(). + */ +void __init ia64_mca_irq_init(void) +{ + /* + * Configure the CMCI/P vector and handler. Interrupts for CMC are + * per-processor, so AP CMC interrupts are setup in smp_callin() (smpboot.c). + */ + register_percpu_irq(IA64_CMC_VECTOR, &cmci_irqaction); + register_percpu_irq(IA64_CMCP_VECTOR, &cmcp_irqaction); + ia64_mca_cmc_vector_setup(); /* Setup vector on BSP */ + + /* Setup the MCA rendezvous interrupt vector */ + register_percpu_irq(IA64_MCA_RENDEZ_VECTOR, &mca_rdzv_irqaction); + + /* Setup the MCA wakeup interrupt vector */ + register_percpu_irq(IA64_MCA_WAKEUP_VECTOR, &mca_wkup_irqaction); + +#ifdef CONFIG_ACPI + /* Setup the CPEI/P handler */ + register_percpu_irq(IA64_CPEP_VECTOR, &mca_cpep_irqaction); +#endif +} + +/* + * ia64_mca_late_init + * + * Opportunity to setup things that require initialization later + * than ia64_mca_init. Setup a timer to poll for CPEs if the + * platform doesn't support an interrupt driven mechanism. + * + * Inputs : None + * Outputs : Status + */ +static int __init +ia64_mca_late_init(void) +{ + if (!mca_init) + return 0; + + register_hotcpu_notifier(&mca_cpu_notifier); + + /* Setup the CMCI/P vector and handler */ + init_timer(&cmc_poll_timer); + cmc_poll_timer.function = ia64_mca_cmc_poll; + + /* Unmask/enable the vector */ + cmc_polling_enabled = 0; + schedule_work(&cmc_enable_work); + + IA64_MCA_DEBUG("%s: CMCI/P setup and enabled.\n", __func__); + +#ifdef CONFIG_ACPI + /* Setup the CPEI/P vector and handler */ + cpe_vector = acpi_request_vector(ACPI_INTERRUPT_CPEI); + init_timer(&cpe_poll_timer); + cpe_poll_timer.function = ia64_mca_cpe_poll; + + { + unsigned int irq; + + if (cpe_vector >= 0) { + /* If platform supports CPEI, enable the irq. */ + irq = local_vector_to_irq(cpe_vector); + if (irq > 0) { + cpe_poll_enabled = 0; + irq_set_status_flags(irq, IRQ_PER_CPU); + setup_irq(irq, &mca_cpe_irqaction); + ia64_cpe_irq = irq; + ia64_mca_register_cpev(cpe_vector); + IA64_MCA_DEBUG("%s: CPEI/P setup and enabled.\n", + __func__); + return 0; + } + printk(KERN_ERR "%s: Failed to find irq for CPE " + "interrupt handler, vector %d\n", + __func__, cpe_vector); + } + /* If platform doesn't support CPEI, get the timer going. */ + if (cpe_poll_enabled) { + ia64_mca_cpe_poll(0UL); + IA64_MCA_DEBUG("%s: CPEP setup and enabled.\n", __func__); + } + } +#endif + + return 0; +} + +device_initcall(ia64_mca_late_init); diff --git a/kernel/arch/ia64/kernel/mca_asm.S b/kernel/arch/ia64/kernel/mca_asm.S new file mode 100644 index 000000000..d5bdf9de3 --- /dev/null +++ b/kernel/arch/ia64/kernel/mca_asm.S @@ -0,0 +1,1122 @@ +/* + * File: mca_asm.S + * Purpose: assembly portion of the IA64 MCA handling + * + * Mods by cfleck to integrate into kernel build + * + * 2000-03-15 David Mosberger-Tang + * Added various stop bits to get a clean compile + * + * 2000-03-29 Chuck Fleckenstein + * Added code to save INIT handoff state in pt_regs format, + * switch to temp kstack, switch modes, jump to C INIT handler + * + * 2002-01-04 J.Hall + * Before entering virtual mode code: + * 1. Check for TLB CPU error + * 2. Restore current thread pointer to kr6 + * 3. Move stack ptr 16 bytes to conform to C calling convention + * + * 2004-11-12 Russ Anderson + * Added per cpu MCA/INIT stack save areas. + * + * 2005-12-08 Keith Owens + * Use per cpu MCA/INIT stacks for all data. + */ +#include + +#include +#include +#include +#include +#include + +#include "entry.h" + +#define GET_IA64_MCA_DATA(reg) \ + GET_THIS_PADDR(reg, ia64_mca_data) \ + ;; \ + ld8 reg=[reg] + + .global ia64_do_tlb_purge + .global ia64_os_mca_dispatch + .global ia64_os_init_on_kdump + .global ia64_os_init_dispatch_monarch + .global ia64_os_init_dispatch_slave + + .text + .align 16 + +//StartMain//////////////////////////////////////////////////////////////////// + +/* + * Just the TLB purge part is moved to a separate function + * so we can re-use the code for cpu hotplug code as well + * Caller should now setup b1, so we can branch once the + * tlb flush is complete. + */ + +ia64_do_tlb_purge: +#define O(member) IA64_CPUINFO_##member##_OFFSET + + GET_THIS_PADDR(r2, ia64_cpu_info) // load phys addr of cpu_info into r2 + ;; + addl r17=O(PTCE_STRIDE),r2 + addl r2=O(PTCE_BASE),r2 + ;; + ld8 r18=[r2],(O(PTCE_COUNT)-O(PTCE_BASE));; // r18=ptce_base + ld4 r19=[r2],4 // r19=ptce_count[0] + ld4 r21=[r17],4 // r21=ptce_stride[0] + ;; + ld4 r20=[r2] // r20=ptce_count[1] + ld4 r22=[r17] // r22=ptce_stride[1] + mov r24=0 + ;; + adds r20=-1,r20 + ;; +#undef O + +2: + cmp.ltu p6,p7=r24,r19 +(p7) br.cond.dpnt.few 4f + mov ar.lc=r20 +3: + ptc.e r18 + ;; + add r18=r22,r18 + br.cloop.sptk.few 3b + ;; + add r18=r21,r18 + add r24=1,r24 + ;; + br.sptk.few 2b +4: + srlz.i // srlz.i implies srlz.d + ;; + + // Now purge addresses formerly mapped by TR registers + // 1. Purge ITR&DTR for kernel. + movl r16=KERNEL_START + mov r18=KERNEL_TR_PAGE_SHIFT<<2 + ;; + ptr.i r16, r18 + ptr.d r16, r18 + ;; + srlz.i + ;; + srlz.d + ;; + // 3. Purge ITR for PAL code. + GET_THIS_PADDR(r2, ia64_mca_pal_base) + ;; + ld8 r16=[r2] + mov r18=IA64_GRANULE_SHIFT<<2 + ;; + ptr.i r16,r18 + ;; + srlz.i + ;; + // 4. Purge DTR for stack. + mov r16=IA64_KR(CURRENT_STACK) + ;; + shl r16=r16,IA64_GRANULE_SHIFT + movl r19=PAGE_OFFSET + ;; + add r16=r19,r16 + mov r18=IA64_GRANULE_SHIFT<<2 + ;; + ptr.d r16,r18 + ;; + srlz.i + ;; + // Now branch away to caller. + br.sptk.many b1 + ;; + +//EndMain////////////////////////////////////////////////////////////////////// + +//StartMain//////////////////////////////////////////////////////////////////// + +ia64_os_mca_dispatch: + mov r3=IA64_MCA_CPU_MCA_STACK_OFFSET // use the MCA stack + LOAD_PHYSICAL(p0,r2,1f) // return address + mov r19=1 // All MCA events are treated as monarch (for now) + br.sptk ia64_state_save // save the state that is not in minstate +1: + + GET_IA64_MCA_DATA(r2) + // Using MCA stack, struct ia64_sal_os_state, variable proc_state_param + ;; + add r3=IA64_MCA_CPU_MCA_STACK_OFFSET+MCA_SOS_OFFSET+SOS(PROC_STATE_PARAM), r2 + ;; + ld8 r18=[r3] // Get processor state parameter on existing PALE_CHECK. + ;; + tbit.nz p6,p7=r18,60 +(p7) br.spnt done_tlb_purge_and_reload + + // The following code purges TC and TR entries. Then reload all TC entries. + // Purge percpu data TC entries. +begin_tlb_purge_and_reload: + movl r18=ia64_reload_tr;; + LOAD_PHYSICAL(p0,r18,ia64_reload_tr);; + mov b1=r18;; + br.sptk.many ia64_do_tlb_purge;; + +ia64_reload_tr: + // Finally reload the TR registers. + // 1. Reload DTR/ITR registers for kernel. + mov r18=KERNEL_TR_PAGE_SHIFT<<2 + movl r17=KERNEL_START + ;; + mov cr.itir=r18 + mov cr.ifa=r17 + mov r16=IA64_TR_KERNEL + mov r19=ip + movl r18=PAGE_KERNEL + ;; + dep r17=0,r19,0, KERNEL_TR_PAGE_SHIFT + ;; + or r18=r17,r18 + ;; + itr.i itr[r16]=r18 + ;; + itr.d dtr[r16]=r18 + ;; + srlz.i + srlz.d + ;; + // 3. Reload ITR for PAL code. + GET_THIS_PADDR(r2, ia64_mca_pal_pte) + ;; + ld8 r18=[r2] // load PAL PTE + ;; + GET_THIS_PADDR(r2, ia64_mca_pal_base) + ;; + ld8 r16=[r2] // load PAL vaddr + mov r19=IA64_GRANULE_SHIFT<<2 + ;; + mov cr.itir=r19 + mov cr.ifa=r16 + mov r20=IA64_TR_PALCODE + ;; + itr.i itr[r20]=r18 + ;; + srlz.i + ;; + // 4. Reload DTR for stack. + mov r16=IA64_KR(CURRENT_STACK) + ;; + shl r16=r16,IA64_GRANULE_SHIFT + movl r19=PAGE_OFFSET + ;; + add r18=r19,r16 + movl r20=PAGE_KERNEL + ;; + add r16=r20,r16 + mov r19=IA64_GRANULE_SHIFT<<2 + ;; + mov cr.itir=r19 + mov cr.ifa=r18 + mov r20=IA64_TR_CURRENT_STACK + ;; + itr.d dtr[r20]=r16 + GET_THIS_PADDR(r2, ia64_mca_tr_reload) + mov r18 = 1 + ;; + srlz.d + ;; + st8 [r2] =r18 + ;; + +done_tlb_purge_and_reload: + + // switch to per cpu MCA stack + mov r3=IA64_MCA_CPU_MCA_STACK_OFFSET // use the MCA stack + LOAD_PHYSICAL(p0,r2,1f) // return address + br.sptk ia64_new_stack +1: + + // everything saved, now we can set the kernel registers + mov r3=IA64_MCA_CPU_MCA_STACK_OFFSET // use the MCA stack + LOAD_PHYSICAL(p0,r2,1f) // return address + br.sptk ia64_set_kernel_registers +1: + + // This must be done in physical mode + GET_IA64_MCA_DATA(r2) + ;; + mov r7=r2 + + // Enter virtual mode from physical mode + VIRTUAL_MODE_ENTER(r2, r3, ia64_os_mca_virtual_begin, r4) + + // This code returns to SAL via SOS r2, in general SAL has no unwind + // data. To get a clean termination when backtracing the C MCA/INIT + // handler, set a dummy return address of 0 in this routine. That + // requires that ia64_os_mca_virtual_begin be a global function. +ENTRY(ia64_os_mca_virtual_begin) + .prologue + .save rp,r0 + .body + + mov ar.rsc=3 // set eager mode for C handler + mov r2=r7 // see GET_IA64_MCA_DATA above + ;; + + // Call virtual mode handler + alloc r14=ar.pfs,0,0,3,0 + ;; + DATA_PA_TO_VA(r2,r7) + ;; + add out0=IA64_MCA_CPU_MCA_STACK_OFFSET+MCA_PT_REGS_OFFSET, r2 + add out1=IA64_MCA_CPU_MCA_STACK_OFFSET+MCA_SWITCH_STACK_OFFSET, r2 + add out2=IA64_MCA_CPU_MCA_STACK_OFFSET+MCA_SOS_OFFSET, r2 + br.call.sptk.many b0=ia64_mca_handler + + // Revert back to physical mode before going back to SAL + PHYSICAL_MODE_ENTER(r2, r3, ia64_os_mca_virtual_end, r4) +ia64_os_mca_virtual_end: + +END(ia64_os_mca_virtual_begin) + + // switch back to previous stack + alloc r14=ar.pfs,0,0,0,0 // remove the MCA handler frame + mov r3=IA64_MCA_CPU_MCA_STACK_OFFSET // use the MCA stack + LOAD_PHYSICAL(p0,r2,1f) // return address + br.sptk ia64_old_stack +1: + + mov r3=IA64_MCA_CPU_MCA_STACK_OFFSET // use the MCA stack + LOAD_PHYSICAL(p0,r2,1f) // return address + br.sptk ia64_state_restore // restore the SAL state +1: + + mov b0=r12 // SAL_CHECK return address + + br b0 + +//EndMain////////////////////////////////////////////////////////////////////// + +//StartMain//////////////////////////////////////////////////////////////////// + +// +// NOP init handler for kdump. In panic situation, we may receive INIT +// while kernel transition. Since we initialize registers on leave from +// current kernel, no longer monarch/slave handlers of current kernel in +// virtual mode are called safely. +// We can unregister these init handlers from SAL, however then the INIT +// will result in warmboot by SAL and we cannot retrieve the crashdump. +// Therefore register this NOP function to SAL, to prevent entering virtual +// mode and resulting warmboot by SAL. +// +ia64_os_init_on_kdump: + mov r8=r0 // IA64_INIT_RESUME + mov r9=r10 // SAL_GP + mov r22=r17 // *minstate + ;; + mov r10=r0 // return to same context + mov b0=r12 // SAL_CHECK return address + br b0 + +// +// SAL to OS entry point for INIT on all processors. This has been defined for +// registration purposes with SAL as a part of ia64_mca_init. Monarch and +// slave INIT have identical processing, except for the value of the +// sos->monarch flag in r19. +// + +ia64_os_init_dispatch_monarch: + mov r19=1 // Bow, bow, ye lower middle classes! + br.sptk ia64_os_init_dispatch + +ia64_os_init_dispatch_slave: + mov r19=0 // yeth, mathter + +ia64_os_init_dispatch: + + mov r3=IA64_MCA_CPU_INIT_STACK_OFFSET // use the INIT stack + LOAD_PHYSICAL(p0,r2,1f) // return address + br.sptk ia64_state_save // save the state that is not in minstate +1: + + // switch to per cpu INIT stack + mov r3=IA64_MCA_CPU_INIT_STACK_OFFSET // use the INIT stack + LOAD_PHYSICAL(p0,r2,1f) // return address + br.sptk ia64_new_stack +1: + + // everything saved, now we can set the kernel registers + mov r3=IA64_MCA_CPU_INIT_STACK_OFFSET // use the INIT stack + LOAD_PHYSICAL(p0,r2,1f) // return address + br.sptk ia64_set_kernel_registers +1: + + // This must be done in physical mode + GET_IA64_MCA_DATA(r2) + ;; + mov r7=r2 + + // Enter virtual mode from physical mode + VIRTUAL_MODE_ENTER(r2, r3, ia64_os_init_virtual_begin, r4) + + // This code returns to SAL via SOS r2, in general SAL has no unwind + // data. To get a clean termination when backtracing the C MCA/INIT + // handler, set a dummy return address of 0 in this routine. That + // requires that ia64_os_init_virtual_begin be a global function. +ENTRY(ia64_os_init_virtual_begin) + .prologue + .save rp,r0 + .body + + mov ar.rsc=3 // set eager mode for C handler + mov r2=r7 // see GET_IA64_MCA_DATA above + ;; + + // Call virtual mode handler + alloc r14=ar.pfs,0,0,3,0 + ;; + DATA_PA_TO_VA(r2,r7) + ;; + add out0=IA64_MCA_CPU_INIT_STACK_OFFSET+MCA_PT_REGS_OFFSET, r2 + add out1=IA64_MCA_CPU_INIT_STACK_OFFSET+MCA_SWITCH_STACK_OFFSET, r2 + add out2=IA64_MCA_CPU_INIT_STACK_OFFSET+MCA_SOS_OFFSET, r2 + br.call.sptk.many b0=ia64_init_handler + + // Revert back to physical mode before going back to SAL + PHYSICAL_MODE_ENTER(r2, r3, ia64_os_init_virtual_end, r4) +ia64_os_init_virtual_end: + +END(ia64_os_init_virtual_begin) + + mov r3=IA64_MCA_CPU_INIT_STACK_OFFSET // use the INIT stack + LOAD_PHYSICAL(p0,r2,1f) // return address + br.sptk ia64_state_restore // restore the SAL state +1: + + // switch back to previous stack + alloc r14=ar.pfs,0,0,0,0 // remove the INIT handler frame + mov r3=IA64_MCA_CPU_INIT_STACK_OFFSET // use the INIT stack + LOAD_PHYSICAL(p0,r2,1f) // return address + br.sptk ia64_old_stack +1: + + mov b0=r12 // SAL_CHECK return address + br b0 + +//EndMain////////////////////////////////////////////////////////////////////// + +// common defines for the stubs +#define ms r4 +#define regs r5 +#define temp1 r2 /* careful, it overlaps with input registers */ +#define temp2 r3 /* careful, it overlaps with input registers */ +#define temp3 r7 +#define temp4 r14 + + +//++ +// Name: +// ia64_state_save() +// +// Stub Description: +// +// Save the state that is not in minstate. This is sensitive to the layout of +// struct ia64_sal_os_state in mca.h. +// +// r2 contains the return address, r3 contains either +// IA64_MCA_CPU_MCA_STACK_OFFSET or IA64_MCA_CPU_INIT_STACK_OFFSET. +// +// The OS to SAL section of struct ia64_sal_os_state is set to a default +// value of cold boot (MCA) or warm boot (INIT) and return to the same +// context. ia64_sal_os_state is also used to hold some registers that +// need to be saved and restored across the stack switches. +// +// Most input registers to this stub come from PAL/SAL +// r1 os gp, physical +// r8 pal_proc entry point +// r9 sal_proc entry point +// r10 sal gp +// r11 MCA - rendevzous state, INIT - reason code +// r12 sal return address +// r17 pal min_state +// r18 processor state parameter +// r19 monarch flag, set by the caller of this routine +// +// In addition to the SAL to OS state, this routine saves all the +// registers that appear in struct pt_regs and struct switch_stack, +// excluding those that are already in the PAL minstate area. This +// results in a partial pt_regs and switch_stack, the C code copies the +// remaining registers from PAL minstate to pt_regs and switch_stack. The +// resulting structures contain all the state of the original process when +// MCA/INIT occurred. +// +//-- + +ia64_state_save: + add regs=MCA_SOS_OFFSET, r3 + add ms=MCA_SOS_OFFSET+8, r3 + mov b0=r2 // save return address + cmp.eq p1,p2=IA64_MCA_CPU_MCA_STACK_OFFSET, r3 + ;; + GET_IA64_MCA_DATA(temp2) + ;; + add temp1=temp2, regs // struct ia64_sal_os_state on MCA or INIT stack + add temp2=temp2, ms // struct ia64_sal_os_state+8 on MCA or INIT stack + ;; + mov regs=temp1 // save the start of sos + st8 [temp1]=r1,16 // os_gp + st8 [temp2]=r8,16 // pal_proc + ;; + st8 [temp1]=r9,16 // sal_proc + st8 [temp2]=r11,16 // rv_rc + mov r11=cr.iipa + ;; + st8 [temp1]=r18 // proc_state_param + st8 [temp2]=r19 // monarch + mov r6=IA64_KR(CURRENT) + add temp1=SOS(SAL_RA), regs + add temp2=SOS(SAL_GP), regs + ;; + st8 [temp1]=r12,16 // sal_ra + st8 [temp2]=r10,16 // sal_gp + mov r12=cr.isr + ;; + st8 [temp1]=r17,16 // pal_min_state + st8 [temp2]=r6,16 // prev_IA64_KR_CURRENT + mov r6=IA64_KR(CURRENT_STACK) + ;; + st8 [temp1]=r6,16 // prev_IA64_KR_CURRENT_STACK + st8 [temp2]=r0,16 // prev_task, starts off as NULL + mov r6=cr.ifa + ;; + st8 [temp1]=r12,16 // cr.isr + st8 [temp2]=r6,16 // cr.ifa + mov r12=cr.itir + ;; + st8 [temp1]=r12,16 // cr.itir + st8 [temp2]=r11,16 // cr.iipa + mov r12=cr.iim + ;; + st8 [temp1]=r12 // cr.iim +(p1) mov r12=IA64_MCA_COLD_BOOT +(p2) mov r12=IA64_INIT_WARM_BOOT + mov r6=cr.iha + add temp1=SOS(OS_STATUS), regs + ;; + st8 [temp2]=r6 // cr.iha + add temp2=SOS(CONTEXT), regs + st8 [temp1]=r12 // os_status, default is cold boot + mov r6=IA64_MCA_SAME_CONTEXT + ;; + st8 [temp2]=r6 // context, default is same context + + // Save the pt_regs data that is not in minstate. The previous code + // left regs at sos. + add regs=MCA_PT_REGS_OFFSET-MCA_SOS_OFFSET, regs + ;; + add temp1=PT(B6), regs + mov temp3=b6 + mov temp4=b7 + add temp2=PT(B7), regs + ;; + st8 [temp1]=temp3,PT(AR_CSD)-PT(B6) // save b6 + st8 [temp2]=temp4,PT(AR_SSD)-PT(B7) // save b7 + mov temp3=ar.csd + mov temp4=ar.ssd + cover // must be last in group + ;; + st8 [temp1]=temp3,PT(AR_UNAT)-PT(AR_CSD) // save ar.csd + st8 [temp2]=temp4,PT(AR_PFS)-PT(AR_SSD) // save ar.ssd + mov temp3=ar.unat + mov temp4=ar.pfs + ;; + st8 [temp1]=temp3,PT(AR_RNAT)-PT(AR_UNAT) // save ar.unat + st8 [temp2]=temp4,PT(AR_BSPSTORE)-PT(AR_PFS) // save ar.pfs + mov temp3=ar.rnat + mov temp4=ar.bspstore + ;; + st8 [temp1]=temp3,PT(LOADRS)-PT(AR_RNAT) // save ar.rnat + st8 [temp2]=temp4,PT(AR_FPSR)-PT(AR_BSPSTORE) // save ar.bspstore + mov temp3=ar.bsp + ;; + sub temp3=temp3, temp4 // ar.bsp - ar.bspstore + mov temp4=ar.fpsr + ;; + shl temp3=temp3,16 // compute ar.rsc to be used for "loadrs" + ;; + st8 [temp1]=temp3,PT(AR_CCV)-PT(LOADRS) // save loadrs + st8 [temp2]=temp4,PT(F6)-PT(AR_FPSR) // save ar.fpsr + mov temp3=ar.ccv + ;; + st8 [temp1]=temp3,PT(F7)-PT(AR_CCV) // save ar.ccv + stf.spill [temp2]=f6,PT(F8)-PT(F6) + ;; + stf.spill [temp1]=f7,PT(F9)-PT(F7) + stf.spill [temp2]=f8,PT(F10)-PT(F8) + ;; + stf.spill [temp1]=f9,PT(F11)-PT(F9) + stf.spill [temp2]=f10 + ;; + stf.spill [temp1]=f11 + + // Save the switch_stack data that is not in minstate nor pt_regs. The + // previous code left regs at pt_regs. + add regs=MCA_SWITCH_STACK_OFFSET-MCA_PT_REGS_OFFSET, regs + ;; + add temp1=SW(F2), regs + add temp2=SW(F3), regs + ;; + stf.spill [temp1]=f2,32 + stf.spill [temp2]=f3,32 + ;; + stf.spill [temp1]=f4,32 + stf.spill [temp2]=f5,32 + ;; + stf.spill [temp1]=f12,32 + stf.spill [temp2]=f13,32 + ;; + stf.spill [temp1]=f14,32 + stf.spill [temp2]=f15,32 + ;; + stf.spill [temp1]=f16,32 + stf.spill [temp2]=f17,32 + ;; + stf.spill [temp1]=f18,32 + stf.spill [temp2]=f19,32 + ;; + stf.spill [temp1]=f20,32 + stf.spill [temp2]=f21,32 + ;; + stf.spill [temp1]=f22,32 + stf.spill [temp2]=f23,32 + ;; + stf.spill [temp1]=f24,32 + stf.spill [temp2]=f25,32 + ;; + stf.spill [temp1]=f26,32 + stf.spill [temp2]=f27,32 + ;; + stf.spill [temp1]=f28,32 + stf.spill [temp2]=f29,32 + ;; + stf.spill [temp1]=f30,SW(B2)-SW(F30) + stf.spill [temp2]=f31,SW(B3)-SW(F31) + mov temp3=b2 + mov temp4=b3 + ;; + st8 [temp1]=temp3,16 // save b2 + st8 [temp2]=temp4,16 // save b3 + mov temp3=b4 + mov temp4=b5 + ;; + st8 [temp1]=temp3,SW(AR_LC)-SW(B4) // save b4 + st8 [temp2]=temp4 // save b5 + mov temp3=ar.lc + ;; + st8 [temp1]=temp3 // save ar.lc + + // FIXME: Some proms are incorrectly accessing the minstate area as + // cached data. The C code uses region 6, uncached virtual. Ensure + // that there is no cache data lying around for the first 1K of the + // minstate area. + // Remove this code in September 2006, that gives platforms a year to + // fix their proms and get their customers updated. + + add r1=32*1,r17 + add r2=32*2,r17 + add r3=32*3,r17 + add r4=32*4,r17 + add r5=32*5,r17 + add r6=32*6,r17 + add r7=32*7,r17 + ;; + fc r17 + fc r1 + fc r2 + fc r3 + fc r4 + fc r5 + fc r6 + fc r7 + add r17=32*8,r17 + add r1=32*8,r1 + add r2=32*8,r2 + add r3=32*8,r3 + add r4=32*8,r4 + add r5=32*8,r5 + add r6=32*8,r6 + add r7=32*8,r7 + ;; + fc r17 + fc r1 + fc r2 + fc r3 + fc r4 + fc r5 + fc r6 + fc r7 + add r17=32*8,r17 + add r1=32*8,r1 + add r2=32*8,r2 + add r3=32*8,r3 + add r4=32*8,r4 + add r5=32*8,r5 + add r6=32*8,r6 + add r7=32*8,r7 + ;; + fc r17 + fc r1 + fc r2 + fc r3 + fc r4 + fc r5 + fc r6 + fc r7 + add r17=32*8,r17 + add r1=32*8,r1 + add r2=32*8,r2 + add r3=32*8,r3 + add r4=32*8,r4 + add r5=32*8,r5 + add r6=32*8,r6 + add r7=32*8,r7 + ;; + fc r17 + fc r1 + fc r2 + fc r3 + fc r4 + fc r5 + fc r6 + fc r7 + + br.sptk b0 + +//EndStub////////////////////////////////////////////////////////////////////// + + +//++ +// Name: +// ia64_state_restore() +// +// Stub Description: +// +// Restore the SAL/OS state. This is sensitive to the layout of struct +// ia64_sal_os_state in mca.h. +// +// r2 contains the return address, r3 contains either +// IA64_MCA_CPU_MCA_STACK_OFFSET or IA64_MCA_CPU_INIT_STACK_OFFSET. +// +// In addition to the SAL to OS state, this routine restores all the +// registers that appear in struct pt_regs and struct switch_stack, +// excluding those in the PAL minstate area. +// +//-- + +ia64_state_restore: + // Restore the switch_stack data that is not in minstate nor pt_regs. + add regs=MCA_SWITCH_STACK_OFFSET, r3 + mov b0=r2 // save return address + ;; + GET_IA64_MCA_DATA(temp2) + ;; + add regs=temp2, regs + ;; + add temp1=SW(F2), regs + add temp2=SW(F3), regs + ;; + ldf.fill f2=[temp1],32 + ldf.fill f3=[temp2],32 + ;; + ldf.fill f4=[temp1],32 + ldf.fill f5=[temp2],32 + ;; + ldf.fill f12=[temp1],32 + ldf.fill f13=[temp2],32 + ;; + ldf.fill f14=[temp1],32 + ldf.fill f15=[temp2],32 + ;; + ldf.fill f16=[temp1],32 + ldf.fill f17=[temp2],32 + ;; + ldf.fill f18=[temp1],32 + ldf.fill f19=[temp2],32 + ;; + ldf.fill f20=[temp1],32 + ldf.fill f21=[temp2],32 + ;; + ldf.fill f22=[temp1],32 + ldf.fill f23=[temp2],32 + ;; + ldf.fill f24=[temp1],32 + ldf.fill f25=[temp2],32 + ;; + ldf.fill f26=[temp1],32 + ldf.fill f27=[temp2],32 + ;; + ldf.fill f28=[temp1],32 + ldf.fill f29=[temp2],32 + ;; + ldf.fill f30=[temp1],SW(B2)-SW(F30) + ldf.fill f31=[temp2],SW(B3)-SW(F31) + ;; + ld8 temp3=[temp1],16 // restore b2 + ld8 temp4=[temp2],16 // restore b3 + ;; + mov b2=temp3 + mov b3=temp4 + ld8 temp3=[temp1],SW(AR_LC)-SW(B4) // restore b4 + ld8 temp4=[temp2] // restore b5 + ;; + mov b4=temp3 + mov b5=temp4 + ld8 temp3=[temp1] // restore ar.lc + ;; + mov ar.lc=temp3 + + // Restore the pt_regs data that is not in minstate. The previous code + // left regs at switch_stack. + add regs=MCA_PT_REGS_OFFSET-MCA_SWITCH_STACK_OFFSET, regs + ;; + add temp1=PT(B6), regs + add temp2=PT(B7), regs + ;; + ld8 temp3=[temp1],PT(AR_CSD)-PT(B6) // restore b6 + ld8 temp4=[temp2],PT(AR_SSD)-PT(B7) // restore b7 + ;; + mov b6=temp3 + mov b7=temp4 + ld8 temp3=[temp1],PT(AR_UNAT)-PT(AR_CSD) // restore ar.csd + ld8 temp4=[temp2],PT(AR_PFS)-PT(AR_SSD) // restore ar.ssd + ;; + mov ar.csd=temp3 + mov ar.ssd=temp4 + ld8 temp3=[temp1] // restore ar.unat + add temp1=PT(AR_CCV)-PT(AR_UNAT), temp1 + ld8 temp4=[temp2],PT(AR_FPSR)-PT(AR_PFS) // restore ar.pfs + ;; + mov ar.unat=temp3 + mov ar.pfs=temp4 + // ar.rnat, ar.bspstore, loadrs are restore in ia64_old_stack. + ld8 temp3=[temp1],PT(F6)-PT(AR_CCV) // restore ar.ccv + ld8 temp4=[temp2],PT(F7)-PT(AR_FPSR) // restore ar.fpsr + ;; + mov ar.ccv=temp3 + mov ar.fpsr=temp4 + ldf.fill f6=[temp1],PT(F8)-PT(F6) + ldf.fill f7=[temp2],PT(F9)-PT(F7) + ;; + ldf.fill f8=[temp1],PT(F10)-PT(F8) + ldf.fill f9=[temp2],PT(F11)-PT(F9) + ;; + ldf.fill f10=[temp1] + ldf.fill f11=[temp2] + + // Restore the SAL to OS state. The previous code left regs at pt_regs. + add regs=MCA_SOS_OFFSET-MCA_PT_REGS_OFFSET, regs + ;; + add temp1=SOS(SAL_RA), regs + add temp2=SOS(SAL_GP), regs + ;; + ld8 r12=[temp1],16 // sal_ra + ld8 r9=[temp2],16 // sal_gp + ;; + ld8 r22=[temp1],16 // pal_min_state, virtual + ld8 r13=[temp2],16 // prev_IA64_KR_CURRENT + ;; + ld8 r16=[temp1],16 // prev_IA64_KR_CURRENT_STACK + ld8 r20=[temp2],16 // prev_task + ;; + ld8 temp3=[temp1],16 // cr.isr + ld8 temp4=[temp2],16 // cr.ifa + ;; + mov cr.isr=temp3 + mov cr.ifa=temp4 + ld8 temp3=[temp1],16 // cr.itir + ld8 temp4=[temp2],16 // cr.iipa + ;; + mov cr.itir=temp3 + mov cr.iipa=temp4 + ld8 temp3=[temp1] // cr.iim + ld8 temp4=[temp2] // cr.iha + add temp1=SOS(OS_STATUS), regs + add temp2=SOS(CONTEXT), regs + ;; + mov cr.iim=temp3 + mov cr.iha=temp4 + dep r22=0,r22,62,1 // pal_min_state, physical, uncached + mov IA64_KR(CURRENT)=r13 + ld8 r8=[temp1] // os_status + ld8 r10=[temp2] // context + + /* Wire IA64_TR_CURRENT_STACK to the stack that we are resuming to. To + * avoid any dependencies on the algorithm in ia64_switch_to(), just + * purge any existing CURRENT_STACK mapping and insert the new one. + * + * r16 contains prev_IA64_KR_CURRENT_STACK, r13 contains + * prev_IA64_KR_CURRENT, these values may have been changed by the C + * code. Do not use r8, r9, r10, r22, they contain values ready for + * the return to SAL. + */ + + mov r15=IA64_KR(CURRENT_STACK) // physical granule mapped by IA64_TR_CURRENT_STACK + ;; + shl r15=r15,IA64_GRANULE_SHIFT + ;; + dep r15=-1,r15,61,3 // virtual granule + mov r18=IA64_GRANULE_SHIFT<<2 // for cr.itir.ps + ;; + ptr.d r15,r18 + ;; + srlz.d + + extr.u r19=r13,61,3 // r13 = prev_IA64_KR_CURRENT + shl r20=r16,IA64_GRANULE_SHIFT // r16 = prev_IA64_KR_CURRENT_STACK + movl r21=PAGE_KERNEL // page properties + ;; + mov IA64_KR(CURRENT_STACK)=r16 + cmp.ne p6,p0=RGN_KERNEL,r19 // new stack is in the kernel region? + or r21=r20,r21 // construct PA | page properties +(p6) br.spnt 1f // the dreaded cpu 0 idle task in region 5:( + ;; + mov cr.itir=r18 + mov cr.ifa=r13 + mov r20=IA64_TR_CURRENT_STACK + ;; + itr.d dtr[r20]=r21 + ;; + srlz.d +1: + + br.sptk b0 + +//EndStub////////////////////////////////////////////////////////////////////// + + +//++ +// Name: +// ia64_new_stack() +// +// Stub Description: +// +// Switch to the MCA/INIT stack. +// +// r2 contains the return address, r3 contains either +// IA64_MCA_CPU_MCA_STACK_OFFSET or IA64_MCA_CPU_INIT_STACK_OFFSET. +// +// On entry RBS is still on the original stack, this routine switches RBS +// to use the MCA/INIT stack. +// +// On entry, sos->pal_min_state is physical, on exit it is virtual. +// +//-- + +ia64_new_stack: + add regs=MCA_PT_REGS_OFFSET, r3 + add temp2=MCA_SOS_OFFSET+SOS(PAL_MIN_STATE), r3 + mov b0=r2 // save return address + GET_IA64_MCA_DATA(temp1) + invala + ;; + add temp2=temp2, temp1 // struct ia64_sal_os_state.pal_min_state on MCA or INIT stack + add regs=regs, temp1 // struct pt_regs on MCA or INIT stack + ;; + // Address of minstate area provided by PAL is physical, uncacheable. + // Convert to Linux virtual address in region 6 for C code. + ld8 ms=[temp2] // pal_min_state, physical + ;; + dep temp1=-1,ms,62,2 // set region 6 + mov temp3=IA64_RBS_OFFSET-MCA_PT_REGS_OFFSET + ;; + st8 [temp2]=temp1 // pal_min_state, virtual + + add temp4=temp3, regs // start of bspstore on new stack + ;; + mov ar.bspstore=temp4 // switch RBS to MCA/INIT stack + ;; + flushrs // must be first in group + br.sptk b0 + +//EndStub////////////////////////////////////////////////////////////////////// + + +//++ +// Name: +// ia64_old_stack() +// +// Stub Description: +// +// Switch to the old stack. +// +// r2 contains the return address, r3 contains either +// IA64_MCA_CPU_MCA_STACK_OFFSET or IA64_MCA_CPU_INIT_STACK_OFFSET. +// +// On entry, pal_min_state is virtual, on exit it is physical. +// +// On entry RBS is on the MCA/INIT stack, this routine switches RBS +// back to the previous stack. +// +// The psr is set to all zeroes. SAL return requires either all zeroes or +// just psr.mc set. Leaving psr.mc off allows INIT to be issued if this +// code does not perform correctly. +// +// The dirty registers at the time of the event were flushed to the +// MCA/INIT stack in ia64_pt_regs_save(). Restore the dirty registers +// before reverting to the previous bspstore. +//-- + +ia64_old_stack: + add regs=MCA_PT_REGS_OFFSET, r3 + mov b0=r2 // save return address + GET_IA64_MCA_DATA(temp2) + LOAD_PHYSICAL(p0,temp1,1f) + ;; + mov cr.ipsr=r0 + mov cr.ifs=r0 + mov cr.iip=temp1 + ;; + invala + rfi +1: + + add regs=regs, temp2 // struct pt_regs on MCA or INIT stack + ;; + add temp1=PT(LOADRS), regs + ;; + ld8 temp2=[temp1],PT(AR_BSPSTORE)-PT(LOADRS) // restore loadrs + ;; + ld8 temp3=[temp1],PT(AR_RNAT)-PT(AR_BSPSTORE) // restore ar.bspstore + mov ar.rsc=temp2 + ;; + loadrs + ld8 temp4=[temp1] // restore ar.rnat + ;; + mov ar.bspstore=temp3 // back to old stack + ;; + mov ar.rnat=temp4 + ;; + + br.sptk b0 + +//EndStub////////////////////////////////////////////////////////////////////// + + +//++ +// Name: +// ia64_set_kernel_registers() +// +// Stub Description: +// +// Set the registers that are required by the C code in order to run on an +// MCA/INIT stack. +// +// r2 contains the return address, r3 contains either +// IA64_MCA_CPU_MCA_STACK_OFFSET or IA64_MCA_CPU_INIT_STACK_OFFSET. +// +//-- + +ia64_set_kernel_registers: + add temp3=MCA_SP_OFFSET, r3 + mov b0=r2 // save return address + GET_IA64_MCA_DATA(temp1) + ;; + add r12=temp1, temp3 // kernel stack pointer on MCA/INIT stack + add r13=temp1, r3 // set current to start of MCA/INIT stack + add r20=temp1, r3 // physical start of MCA/INIT stack + ;; + DATA_PA_TO_VA(r12,temp2) + DATA_PA_TO_VA(r13,temp3) + ;; + mov IA64_KR(CURRENT)=r13 + + /* Wire IA64_TR_CURRENT_STACK to the MCA/INIT handler stack. To avoid + * any dependencies on the algorithm in ia64_switch_to(), just purge + * any existing CURRENT_STACK mapping and insert the new one. + */ + + mov r16=IA64_KR(CURRENT_STACK) // physical granule mapped by IA64_TR_CURRENT_STACK + ;; + shl r16=r16,IA64_GRANULE_SHIFT + ;; + dep r16=-1,r16,61,3 // virtual granule + mov r18=IA64_GRANULE_SHIFT<<2 // for cr.itir.ps + ;; + ptr.d r16,r18 + ;; + srlz.d + + shr.u r16=r20,IA64_GRANULE_SHIFT // r20 = physical start of MCA/INIT stack + movl r21=PAGE_KERNEL // page properties + ;; + mov IA64_KR(CURRENT_STACK)=r16 + or r21=r20,r21 // construct PA | page properties + ;; + mov cr.itir=r18 + mov cr.ifa=r13 + mov r20=IA64_TR_CURRENT_STACK + + movl r17=FPSR_DEFAULT + ;; + mov.m ar.fpsr=r17 // set ar.fpsr to kernel default value + ;; + itr.d dtr[r20]=r21 + ;; + srlz.d + + br.sptk b0 + +//EndStub////////////////////////////////////////////////////////////////////// + +#undef ms +#undef regs +#undef temp1 +#undef temp2 +#undef temp3 +#undef temp4 + + +// Support function for mca.c, it is here to avoid using inline asm. Given the +// address of an rnat slot, if that address is below the current ar.bspstore +// then return the contents of that slot, otherwise return the contents of +// ar.rnat. +GLOBAL_ENTRY(ia64_get_rnat) + alloc r14=ar.pfs,1,0,0,0 + mov ar.rsc=0 + ;; + mov r14=ar.bspstore + ;; + cmp.lt p6,p7=in0,r14 + ;; +(p6) ld8 r8=[in0] +(p7) mov r8=ar.rnat + mov ar.rsc=3 + br.ret.sptk.many rp +END(ia64_get_rnat) + + +// void ia64_set_psr_mc(void) +// +// Set psr.mc bit to mask MCA/INIT. +GLOBAL_ENTRY(ia64_set_psr_mc) + rsm psr.i | psr.ic // disable interrupts + ;; + srlz.d + ;; + mov r14 = psr // get psr{36:35,31:0} + movl r15 = 1f + ;; + dep r14 = -1, r14, PSR_MC, 1 // set psr.mc + ;; + dep r14 = -1, r14, PSR_IC, 1 // set psr.ic + ;; + dep r14 = -1, r14, PSR_BN, 1 // keep bank1 in use + ;; + mov cr.ipsr = r14 + mov cr.ifs = r0 + mov cr.iip = r15 + ;; + rfi +1: + br.ret.sptk.many rp +END(ia64_set_psr_mc) diff --git a/kernel/arch/ia64/kernel/mca_drv.c b/kernel/arch/ia64/kernel/mca_drv.c new file mode 100644 index 000000000..94f8bf777 --- /dev/null +++ b/kernel/arch/ia64/kernel/mca_drv.c @@ -0,0 +1,795 @@ +/* + * File: mca_drv.c + * Purpose: Generic MCA handling layer + * + * Copyright (C) 2004 FUJITSU LIMITED + * Copyright (C) 2004 Hidetoshi Seto + * Copyright (C) 2005 Silicon Graphics, Inc + * Copyright (C) 2005 Keith Owens + * Copyright (C) 2006 Russ Anderson + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "mca_drv.h" + +/* max size of SAL error record (default) */ +static int sal_rec_max = 10000; + +/* from mca_drv_asm.S */ +extern void *mca_handler_bhhook(void); + +static DEFINE_SPINLOCK(mca_bh_lock); + +typedef enum { + MCA_IS_LOCAL = 0, + MCA_IS_GLOBAL = 1 +} mca_type_t; + +#define MAX_PAGE_ISOLATE 1024 + +static struct page *page_isolate[MAX_PAGE_ISOLATE]; +static int num_page_isolate = 0; + +typedef enum { + ISOLATE_NG, + ISOLATE_OK, + ISOLATE_NONE +} isolate_status_t; + +typedef enum { + MCA_NOT_RECOVERED = 0, + MCA_RECOVERED = 1 +} recovery_status_t; + +/* + * This pool keeps pointers to the section part of SAL error record + */ +static struct { + slidx_list_t *buffer; /* section pointer list pool */ + int cur_idx; /* Current index of section pointer list pool */ + int max_idx; /* Maximum index of section pointer list pool */ +} slidx_pool; + +static int +fatal_mca(const char *fmt, ...) +{ + va_list args; + char buf[256]; + + va_start(args, fmt); + vsnprintf(buf, sizeof(buf), fmt, args); + va_end(args); + ia64_mca_printk(KERN_ALERT "MCA: %s\n", buf); + + return MCA_NOT_RECOVERED; +} + +static int +mca_recovered(const char *fmt, ...) +{ + va_list args; + char buf[256]; + + va_start(args, fmt); + vsnprintf(buf, sizeof(buf), fmt, args); + va_end(args); + ia64_mca_printk(KERN_INFO "MCA: %s\n", buf); + + return MCA_RECOVERED; +} + +/** + * mca_page_isolate - isolate a poisoned page in order not to use it later + * @paddr: poisoned memory location + * + * Return value: + * one of isolate_status_t, ISOLATE_OK/NG/NONE. + */ + +static isolate_status_t +mca_page_isolate(unsigned long paddr) +{ + int i; + struct page *p; + + /* whether physical address is valid or not */ + if (!ia64_phys_addr_valid(paddr)) + return ISOLATE_NONE; + + if (!pfn_valid(paddr >> PAGE_SHIFT)) + return ISOLATE_NONE; + + /* convert physical address to physical page number */ + p = pfn_to_page(paddr>>PAGE_SHIFT); + + /* check whether a page number have been already registered or not */ + for (i = 0; i < num_page_isolate; i++) + if (page_isolate[i] == p) + return ISOLATE_OK; /* already listed */ + + /* limitation check */ + if (num_page_isolate == MAX_PAGE_ISOLATE) + return ISOLATE_NG; + + /* kick pages having attribute 'SLAB' or 'Reserved' */ + if (PageSlab(p) || PageReserved(p)) + return ISOLATE_NG; + + /* add attribute 'Reserved' and register the page */ + get_page(p); + SetPageReserved(p); + page_isolate[num_page_isolate++] = p; + + return ISOLATE_OK; +} + +/** + * mca_hanlder_bh - Kill the process which occurred memory read error + * @paddr: poisoned address received from MCA Handler + */ + +void +mca_handler_bh(unsigned long paddr, void *iip, unsigned long ipsr) +{ + ia64_mlogbuf_dump(); + printk(KERN_ERR "OS_MCA: process [cpu %d, pid: %d, uid: %d, " + "iip: %p, psr: 0x%lx,paddr: 0x%lx](%s) encounters MCA.\n", + raw_smp_processor_id(), current->pid, + from_kuid(&init_user_ns, current_uid()), + iip, ipsr, paddr, current->comm); + + spin_lock(&mca_bh_lock); + switch (mca_page_isolate(paddr)) { + case ISOLATE_OK: + printk(KERN_DEBUG "Page isolation: ( %lx ) success.\n", paddr); + break; + case ISOLATE_NG: + printk(KERN_CRIT "Page isolation: ( %lx ) failure.\n", paddr); + break; + default: + break; + } + spin_unlock(&mca_bh_lock); + + /* This process is about to be killed itself */ + do_exit(SIGKILL); +} + +/** + * mca_make_peidx - Make index of processor error section + * @slpi: pointer to record of processor error section + * @peidx: pointer to index of processor error section + */ + +static void +mca_make_peidx(sal_log_processor_info_t *slpi, peidx_table_t *peidx) +{ + /* + * calculate the start address of + * "struct cpuid_info" and "sal_processor_static_info_t". + */ + u64 total_check_num = slpi->valid.num_cache_check + + slpi->valid.num_tlb_check + + slpi->valid.num_bus_check + + slpi->valid.num_reg_file_check + + slpi->valid.num_ms_check; + u64 head_size = sizeof(sal_log_mod_error_info_t) * total_check_num + + sizeof(sal_log_processor_info_t); + u64 mid_size = slpi->valid.cpuid_info * sizeof(struct sal_cpuid_info); + + peidx_head(peidx) = slpi; + peidx_mid(peidx) = (struct sal_cpuid_info *) + (slpi->valid.cpuid_info ? ((char*)slpi + head_size) : NULL); + peidx_bottom(peidx) = (sal_processor_static_info_t *) + (slpi->valid.psi_static_struct ? + ((char*)slpi + head_size + mid_size) : NULL); +} + +/** + * mca_make_slidx - Make index of SAL error record + * @buffer: pointer to SAL error record + * @slidx: pointer to index of SAL error record + * + * Return value: + * 1 if record has platform error / 0 if not + */ +#define LOG_INDEX_ADD_SECT_PTR(sect, ptr) \ + {slidx_list_t *hl = &slidx_pool.buffer[slidx_pool.cur_idx]; \ + hl->hdr = ptr; \ + list_add(&hl->list, &(sect)); \ + slidx_pool.cur_idx = (slidx_pool.cur_idx + 1)%slidx_pool.max_idx; } + +static int +mca_make_slidx(void *buffer, slidx_table_t *slidx) +{ + int platform_err = 0; + int record_len = ((sal_log_record_header_t*)buffer)->len; + u32 ercd_pos; + int sects; + sal_log_section_hdr_t *sp; + + /* + * Initialize index referring current record + */ + INIT_LIST_HEAD(&(slidx->proc_err)); + INIT_LIST_HEAD(&(slidx->mem_dev_err)); + INIT_LIST_HEAD(&(slidx->sel_dev_err)); + INIT_LIST_HEAD(&(slidx->pci_bus_err)); + INIT_LIST_HEAD(&(slidx->smbios_dev_err)); + INIT_LIST_HEAD(&(slidx->pci_comp_err)); + INIT_LIST_HEAD(&(slidx->plat_specific_err)); + INIT_LIST_HEAD(&(slidx->host_ctlr_err)); + INIT_LIST_HEAD(&(slidx->plat_bus_err)); + INIT_LIST_HEAD(&(slidx->unsupported)); + + /* + * Extract a Record Header + */ + slidx->header = buffer; + + /* + * Extract each section records + * (arranged from "int ia64_log_platform_info_print()") + */ + for (ercd_pos = sizeof(sal_log_record_header_t), sects = 0; + ercd_pos < record_len; ercd_pos += sp->len, sects++) { + sp = (sal_log_section_hdr_t *)((char*)buffer + ercd_pos); + if (!efi_guidcmp(sp->guid, SAL_PROC_DEV_ERR_SECT_GUID)) { + LOG_INDEX_ADD_SECT_PTR(slidx->proc_err, sp); + } else if (!efi_guidcmp(sp->guid, + SAL_PLAT_MEM_DEV_ERR_SECT_GUID)) { + platform_err = 1; + LOG_INDEX_ADD_SECT_PTR(slidx->mem_dev_err, sp); + } else if (!efi_guidcmp(sp->guid, + SAL_PLAT_SEL_DEV_ERR_SECT_GUID)) { + platform_err = 1; + LOG_INDEX_ADD_SECT_PTR(slidx->sel_dev_err, sp); + } else if (!efi_guidcmp(sp->guid, + SAL_PLAT_PCI_BUS_ERR_SECT_GUID)) { + platform_err = 1; + LOG_INDEX_ADD_SECT_PTR(slidx->pci_bus_err, sp); + } else if (!efi_guidcmp(sp->guid, + SAL_PLAT_SMBIOS_DEV_ERR_SECT_GUID)) { + platform_err = 1; + LOG_INDEX_ADD_SECT_PTR(slidx->smbios_dev_err, sp); + } else if (!efi_guidcmp(sp->guid, + SAL_PLAT_PCI_COMP_ERR_SECT_GUID)) { + platform_err = 1; + LOG_INDEX_ADD_SECT_PTR(slidx->pci_comp_err, sp); + } else if (!efi_guidcmp(sp->guid, + SAL_PLAT_SPECIFIC_ERR_SECT_GUID)) { + platform_err = 1; + LOG_INDEX_ADD_SECT_PTR(slidx->plat_specific_err, sp); + } else if (!efi_guidcmp(sp->guid, + SAL_PLAT_HOST_CTLR_ERR_SECT_GUID)) { + platform_err = 1; + LOG_INDEX_ADD_SECT_PTR(slidx->host_ctlr_err, sp); + } else if (!efi_guidcmp(sp->guid, + SAL_PLAT_BUS_ERR_SECT_GUID)) { + platform_err = 1; + LOG_INDEX_ADD_SECT_PTR(slidx->plat_bus_err, sp); + } else { + LOG_INDEX_ADD_SECT_PTR(slidx->unsupported, sp); + } + } + slidx->n_sections = sects; + + return platform_err; +} + +/** + * init_record_index_pools - Initialize pool of lists for SAL record index + * + * Return value: + * 0 on Success / -ENOMEM on Failure + */ +static int +init_record_index_pools(void) +{ + int i; + int rec_max_size; /* Maximum size of SAL error records */ + int sect_min_size; /* Minimum size of SAL error sections */ + /* minimum size table of each section */ + static int sal_log_sect_min_sizes[] = { + sizeof(sal_log_processor_info_t) + + sizeof(sal_processor_static_info_t), + sizeof(sal_log_mem_dev_err_info_t), + sizeof(sal_log_sel_dev_err_info_t), + sizeof(sal_log_pci_bus_err_info_t), + sizeof(sal_log_smbios_dev_err_info_t), + sizeof(sal_log_pci_comp_err_info_t), + sizeof(sal_log_plat_specific_err_info_t), + sizeof(sal_log_host_ctlr_err_info_t), + sizeof(sal_log_plat_bus_err_info_t), + }; + + /* + * MCA handler cannot allocate new memory on flight, + * so we preallocate enough memory to handle a SAL record. + * + * Initialize a handling set of slidx_pool: + * 1. Pick up the max size of SAL error records + * 2. Pick up the min size of SAL error sections + * 3. Allocate the pool as enough to 2 SAL records + * (now we can estimate the maxinum of section in a record.) + */ + + /* - 1 - */ + rec_max_size = sal_rec_max; + + /* - 2 - */ + sect_min_size = sal_log_sect_min_sizes[0]; + for (i = 1; i < sizeof sal_log_sect_min_sizes/sizeof(size_t); i++) + if (sect_min_size > sal_log_sect_min_sizes[i]) + sect_min_size = sal_log_sect_min_sizes[i]; + + /* - 3 - */ + slidx_pool.max_idx = (rec_max_size/sect_min_size) * 2 + 1; + slidx_pool.buffer = + kmalloc(slidx_pool.max_idx * sizeof(slidx_list_t), GFP_KERNEL); + + return slidx_pool.buffer ? 0 : -ENOMEM; +} + + +/***************************************************************************** + * Recovery functions * + *****************************************************************************/ + +/** + * is_mca_global - Check whether this MCA is global or not + * @peidx: pointer of index of processor error section + * @pbci: pointer to pal_bus_check_info_t + * @sos: pointer to hand off struct between SAL and OS + * + * Return value: + * MCA_IS_LOCAL / MCA_IS_GLOBAL + */ + +static mca_type_t +is_mca_global(peidx_table_t *peidx, pal_bus_check_info_t *pbci, + struct ia64_sal_os_state *sos) +{ + pal_processor_state_info_t *psp = + (pal_processor_state_info_t*)peidx_psp(peidx); + + /* + * PAL can request a rendezvous, if the MCA has a global scope. + * If "rz_always" flag is set, SAL requests MCA rendezvous + * in spite of global MCA. + * Therefore it is local MCA when rendezvous has not been requested. + * Failed to rendezvous, the system must be down. + */ + switch (sos->rv_rc) { + case -1: /* SAL rendezvous unsuccessful */ + return MCA_IS_GLOBAL; + case 0: /* SAL rendezvous not required */ + return MCA_IS_LOCAL; + case 1: /* SAL rendezvous successful int */ + case 2: /* SAL rendezvous successful int with init */ + default: + break; + } + + /* + * If One or more Cache/TLB/Reg_File/Uarch_Check is here, + * it would be a local MCA. (i.e. processor internal error) + */ + if (psp->tc || psp->cc || psp->rc || psp->uc) + return MCA_IS_LOCAL; + + /* + * Bus_Check structure with Bus_Check.ib (internal bus error) flag set + * would be a global MCA. (e.g. a system bus address parity error) + */ + if (!pbci || pbci->ib) + return MCA_IS_GLOBAL; + + /* + * Bus_Check structure with Bus_Check.eb (external bus error) flag set + * could be either a local MCA or a global MCA. + * + * Referring Bus_Check.bsi: + * 0: Unknown/unclassified + * 1: BERR# + * 2: BINIT# + * 3: Hard Fail + * (FIXME: Are these SGI specific or generic bsi values?) + */ + if (pbci->eb) + switch (pbci->bsi) { + case 0: + /* e.g. a load from poisoned memory */ + return MCA_IS_LOCAL; + case 1: + case 2: + case 3: + return MCA_IS_GLOBAL; + } + + return MCA_IS_GLOBAL; +} + +/** + * get_target_identifier - Get the valid Cache or Bus check target identifier. + * @peidx: pointer of index of processor error section + * + * Return value: + * target address on Success / 0 on Failure + */ +static u64 +get_target_identifier(peidx_table_t *peidx) +{ + u64 target_address = 0; + sal_log_mod_error_info_t *smei; + pal_cache_check_info_t *pcci; + int i, level = 9; + + /* + * Look through the cache checks for a valid target identifier + * If more than one valid target identifier, return the one + * with the lowest cache level. + */ + for (i = 0; i < peidx_cache_check_num(peidx); i++) { + smei = (sal_log_mod_error_info_t *)peidx_cache_check(peidx, i); + if (smei->valid.target_identifier && smei->target_identifier) { + pcci = (pal_cache_check_info_t *)&(smei->check_info); + if (!target_address || (pcci->level < level)) { + target_address = smei->target_identifier; + level = pcci->level; + continue; + } + } + } + if (target_address) + return target_address; + + /* + * Look at the bus check for a valid target identifier + */ + smei = peidx_bus_check(peidx, 0); + if (smei && smei->valid.target_identifier) + return smei->target_identifier; + + return 0; +} + +/** + * recover_from_read_error - Try to recover the errors which type are "read"s. + * @slidx: pointer of index of SAL error record + * @peidx: pointer of index of processor error section + * @pbci: pointer of pal_bus_check_info + * @sos: pointer to hand off struct between SAL and OS + * + * Return value: + * 1 on Success / 0 on Failure + */ + +static int +recover_from_read_error(slidx_table_t *slidx, + peidx_table_t *peidx, pal_bus_check_info_t *pbci, + struct ia64_sal_os_state *sos) +{ + u64 target_identifier; + pal_min_state_area_t *pmsa; + struct ia64_psr *psr1, *psr2; + ia64_fptr_t *mca_hdlr_bh = (ia64_fptr_t*)mca_handler_bhhook; + + /* Is target address valid? */ + target_identifier = get_target_identifier(peidx); + if (!target_identifier) + return fatal_mca("target address not valid"); + + /* + * cpu read or memory-mapped io read + * + * offending process affected process OS MCA do + * kernel mode kernel mode down system + * kernel mode user mode kill the process + * user mode kernel mode down system (*) + * user mode user mode kill the process + * + * (*) You could terminate offending user-mode process + * if (pbci->pv && pbci->pl != 0) *and* if you sure + * the process not have any locks of kernel. + */ + + /* Is minstate valid? */ + if (!peidx_bottom(peidx) || !(peidx_bottom(peidx)->valid.minstate)) + return fatal_mca("minstate not valid"); + psr1 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_ipsr); + psr2 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_xpsr); + + /* + * Check the privilege level of interrupted context. + * If it is user-mode, then terminate affected process. + */ + + pmsa = sos->pal_min_state; + if (psr1->cpl != 0 || + ((psr2->cpl != 0) && mca_recover_range(pmsa->pmsa_iip))) { + /* + * setup for resume to bottom half of MCA, + * "mca_handler_bhhook" + */ + /* pass to bhhook as argument (gr8, ...) */ + pmsa->pmsa_gr[8-1] = target_identifier; + pmsa->pmsa_gr[9-1] = pmsa->pmsa_iip; + pmsa->pmsa_gr[10-1] = pmsa->pmsa_ipsr; + /* set interrupted return address (but no use) */ + pmsa->pmsa_br0 = pmsa->pmsa_iip; + /* change resume address to bottom half */ + pmsa->pmsa_iip = mca_hdlr_bh->fp; + pmsa->pmsa_gr[1-1] = mca_hdlr_bh->gp; + /* set cpl with kernel mode */ + psr2 = (struct ia64_psr *)&pmsa->pmsa_ipsr; + psr2->cpl = 0; + psr2->ri = 0; + psr2->bn = 1; + psr2->i = 0; + + return mca_recovered("user memory corruption. " + "kill affected process - recovered."); + } + + return fatal_mca("kernel context not recovered, iip 0x%lx\n", + pmsa->pmsa_iip); +} + +/** + * recover_from_platform_error - Recover from platform error. + * @slidx: pointer of index of SAL error record + * @peidx: pointer of index of processor error section + * @pbci: pointer of pal_bus_check_info + * @sos: pointer to hand off struct between SAL and OS + * + * Return value: + * 1 on Success / 0 on Failure + */ + +static int +recover_from_platform_error(slidx_table_t *slidx, peidx_table_t *peidx, + pal_bus_check_info_t *pbci, + struct ia64_sal_os_state *sos) +{ + int status = 0; + pal_processor_state_info_t *psp = + (pal_processor_state_info_t*)peidx_psp(peidx); + + if (psp->bc && pbci->eb && pbci->bsi == 0) { + switch(pbci->type) { + case 1: /* partial read */ + case 3: /* full line(cpu) read */ + case 9: /* I/O space read */ + status = recover_from_read_error(slidx, peidx, pbci, + sos); + break; + case 0: /* unknown */ + case 2: /* partial write */ + case 4: /* full line write */ + case 5: /* implicit or explicit write-back operation */ + case 6: /* snoop probe */ + case 7: /* incoming or outgoing ptc.g */ + case 8: /* write coalescing transactions */ + case 10: /* I/O space write */ + case 11: /* inter-processor interrupt message(IPI) */ + case 12: /* interrupt acknowledge or + external task priority cycle */ + default: + break; + } + } else if (psp->cc && !psp->bc) { /* Cache error */ + status = recover_from_read_error(slidx, peidx, pbci, sos); + } + + return status; +} + +/* + * recover_from_tlb_check + * @peidx: pointer of index of processor error section + * + * Return value: + * 1 on Success / 0 on Failure + */ +static int +recover_from_tlb_check(peidx_table_t *peidx) +{ + sal_log_mod_error_info_t *smei; + pal_tlb_check_info_t *ptci; + + smei = (sal_log_mod_error_info_t *)peidx_tlb_check(peidx, 0); + ptci = (pal_tlb_check_info_t *)&(smei->check_info); + + /* + * Look for signature of a duplicate TLB DTC entry, which is + * a SW bug and always fatal. + */ + if (ptci->op == PAL_TLB_CHECK_OP_PURGE + && !(ptci->itr || ptci->dtc || ptci->itc)) + return fatal_mca("Duplicate TLB entry"); + + return mca_recovered("TLB check recovered"); +} + +/** + * recover_from_processor_error + * @platform: whether there are some platform error section or not + * @slidx: pointer of index of SAL error record + * @peidx: pointer of index of processor error section + * @pbci: pointer of pal_bus_check_info + * @sos: pointer to hand off struct between SAL and OS + * + * Return value: + * 1 on Success / 0 on Failure + */ + +static int +recover_from_processor_error(int platform, slidx_table_t *slidx, + peidx_table_t *peidx, pal_bus_check_info_t *pbci, + struct ia64_sal_os_state *sos) +{ + pal_processor_state_info_t *psp = + (pal_processor_state_info_t*)peidx_psp(peidx); + + /* + * Processor recovery status must key off of the PAL recovery + * status in the Processor State Parameter. + */ + + /* + * The machine check is corrected. + */ + if (psp->cm == 1) + return mca_recovered("machine check is already corrected."); + + /* + * The error was not contained. Software must be reset. + */ + if (psp->us || psp->ci == 0) + return fatal_mca("error not contained"); + + /* + * Look for recoverable TLB check + */ + if (psp->tc && !(psp->cc || psp->bc || psp->rc || psp->uc)) + return recover_from_tlb_check(peidx); + + /* + * The cache check and bus check bits have four possible states + * cc bc + * 1 1 Memory error, attempt recovery + * 1 0 Cache error, attempt recovery + * 0 1 I/O error, attempt recovery + * 0 0 Other error type, not recovered + */ + if (psp->cc == 0 && (psp->bc == 0 || pbci == NULL)) + return fatal_mca("No cache or bus check"); + + /* + * Cannot handle more than one bus check. + */ + if (peidx_bus_check_num(peidx) > 1) + return fatal_mca("Too many bus checks"); + + if (pbci->ib) + return fatal_mca("Internal Bus error"); + if (pbci->eb && pbci->bsi > 0) + return fatal_mca("External bus check fatal status"); + + /* + * This is a local MCA and estimated as a recoverable error. + */ + if (platform) + return recover_from_platform_error(slidx, peidx, pbci, sos); + + /* + * On account of strange SAL error record, we cannot recover. + */ + return fatal_mca("Strange SAL record"); +} + +/** + * mca_try_to_recover - Try to recover from MCA + * @rec: pointer to a SAL error record + * @sos: pointer to hand off struct between SAL and OS + * + * Return value: + * 1 on Success / 0 on Failure + */ + +static int +mca_try_to_recover(void *rec, struct ia64_sal_os_state *sos) +{ + int platform_err; + int n_proc_err; + slidx_table_t slidx; + peidx_table_t peidx; + pal_bus_check_info_t pbci; + + /* Make index of SAL error record */ + platform_err = mca_make_slidx(rec, &slidx); + + /* Count processor error sections */ + n_proc_err = slidx_count(&slidx, proc_err); + + /* Now, OS can recover when there is one processor error section */ + if (n_proc_err > 1) + return fatal_mca("Too Many Errors"); + else if (n_proc_err == 0) + /* Weird SAL record ... We can't do anything */ + return fatal_mca("Weird SAL record"); + + /* Make index of processor error section */ + mca_make_peidx((sal_log_processor_info_t*) + slidx_first_entry(&slidx.proc_err)->hdr, &peidx); + + /* Extract Processor BUS_CHECK[0] */ + *((u64*)&pbci) = peidx_check_info(&peidx, bus_check, 0); + + /* Check whether MCA is global or not */ + if (is_mca_global(&peidx, &pbci, sos)) + return fatal_mca("global MCA"); + + /* Try to recover a processor error */ + return recover_from_processor_error(platform_err, &slidx, &peidx, + &pbci, sos); +} + +/* + * ============================================================================= + */ + +int __init mca_external_handler_init(void) +{ + if (init_record_index_pools()) + return -ENOMEM; + + /* register external mca handlers */ + if (ia64_reg_MCA_extension(mca_try_to_recover)) { + printk(KERN_ERR "ia64_reg_MCA_extension failed.\n"); + kfree(slidx_pool.buffer); + return -EFAULT; + } + return 0; +} + +void __exit mca_external_handler_exit(void) +{ + /* unregister external mca handlers */ + ia64_unreg_MCA_extension(); + kfree(slidx_pool.buffer); +} + +module_init(mca_external_handler_init); +module_exit(mca_external_handler_exit); + +module_param(sal_rec_max, int, 0644); +MODULE_PARM_DESC(sal_rec_max, "Max size of SAL error record"); + +MODULE_DESCRIPTION("ia64 platform dependent mca handler driver"); +MODULE_LICENSE("GPL"); diff --git a/kernel/arch/ia64/kernel/mca_drv.h b/kernel/arch/ia64/kernel/mca_drv.h new file mode 100644 index 000000000..53b8ecb5b --- /dev/null +++ b/kernel/arch/ia64/kernel/mca_drv.h @@ -0,0 +1,122 @@ +/* + * File: mca_drv.h + * Purpose: Define helpers for Generic MCA handling + * + * Copyright (C) 2004 FUJITSU LIMITED + * Copyright (C) 2004 Hidetoshi Seto + */ +/* + * Processor error section: + * + * +-sal_log_processor_info_t *info-------------+ + * | sal_log_section_hdr_t header; | + * | ... | + * | sal_log_mod_error_info_t info[0]; | + * +-+----------------+-------------------------+ + * | CACHE_CHECK | ^ num_cache_check v + * +----------------+ + * | TLB_CHECK | ^ num_tlb_check v + * +----------------+ + * | BUS_CHECK | ^ num_bus_check v + * +----------------+ + * | REG_FILE_CHECK | ^ num_reg_file_check v + * +----------------+ + * | MS_CHECK | ^ num_ms_check v + * +-struct cpuid_info *id----------------------+ + * | regs[5]; | + * | reserved; | + * +-sal_processor_static_info_t *regs----------+ + * | valid; | + * | ... | + * | fr[128]; | + * +--------------------------------------------+ + */ + +/* peidx: index of processor error section */ +typedef struct peidx_table { + sal_log_processor_info_t *info; + struct sal_cpuid_info *id; + sal_processor_static_info_t *regs; +} peidx_table_t; + +#define peidx_head(p) (((p)->info)) +#define peidx_mid(p) (((p)->id)) +#define peidx_bottom(p) (((p)->regs)) + +#define peidx_psp(p) (&(peidx_head(p)->proc_state_parameter)) +#define peidx_field_valid(p) (&(peidx_head(p)->valid)) +#define peidx_minstate_area(p) (&(peidx_bottom(p)->min_state_area)) + +#define peidx_cache_check_num(p) (peidx_head(p)->valid.num_cache_check) +#define peidx_tlb_check_num(p) (peidx_head(p)->valid.num_tlb_check) +#define peidx_bus_check_num(p) (peidx_head(p)->valid.num_bus_check) +#define peidx_reg_file_check_num(p) (peidx_head(p)->valid.num_reg_file_check) +#define peidx_ms_check_num(p) (peidx_head(p)->valid.num_ms_check) + +#define peidx_cache_check_idx(p, n) (n) +#define peidx_tlb_check_idx(p, n) (peidx_cache_check_idx(p, peidx_cache_check_num(p)) + n) +#define peidx_bus_check_idx(p, n) (peidx_tlb_check_idx(p, peidx_tlb_check_num(p)) + n) +#define peidx_reg_file_check_idx(p, n) (peidx_bus_check_idx(p, peidx_bus_check_num(p)) + n) +#define peidx_ms_check_idx(p, n) (peidx_reg_file_check_idx(p, peidx_reg_file_check_num(p)) + n) + +#define peidx_mod_error_info(p, name, n) \ +({ int __idx = peidx_##name##_idx(p, n); \ + sal_log_mod_error_info_t *__ret = NULL; \ + if (peidx_##name##_num(p) > n) /*BUG*/ \ + __ret = &(peidx_head(p)->info[__idx]); \ + __ret; }) + +#define peidx_cache_check(p, n) peidx_mod_error_info(p, cache_check, n) +#define peidx_tlb_check(p, n) peidx_mod_error_info(p, tlb_check, n) +#define peidx_bus_check(p, n) peidx_mod_error_info(p, bus_check, n) +#define peidx_reg_file_check(p, n) peidx_mod_error_info(p, reg_file_check, n) +#define peidx_ms_check(p, n) peidx_mod_error_info(p, ms_check, n) + +#define peidx_check_info(proc, name, n) \ +({ \ + sal_log_mod_error_info_t *__info = peidx_mod_error_info(proc, name, n);\ + u64 __temp = __info && __info->valid.check_info \ + ? __info->check_info : 0; \ + __temp; }) + +/* slidx: index of SAL log error record */ + +typedef struct slidx_list { + struct list_head list; + sal_log_section_hdr_t *hdr; +} slidx_list_t; + +typedef struct slidx_table { + sal_log_record_header_t *header; + int n_sections; /* # of section headers */ + struct list_head proc_err; + struct list_head mem_dev_err; + struct list_head sel_dev_err; + struct list_head pci_bus_err; + struct list_head smbios_dev_err; + struct list_head pci_comp_err; + struct list_head plat_specific_err; + struct list_head host_ctlr_err; + struct list_head plat_bus_err; + struct list_head unsupported; /* list of unsupported sections */ +} slidx_table_t; + +#define slidx_foreach_entry(pos, head) \ + list_for_each_entry(pos, head, list) +#define slidx_first_entry(head) \ + (((head)->next != (head)) ? list_entry((head)->next, typeof(slidx_list_t), list) : NULL) +#define slidx_count(slidx, sec) \ +({ int __count = 0; \ + slidx_list_t *__pos; \ + slidx_foreach_entry(__pos, &((slidx)->sec)) { __count++; }\ + __count; }) + +struct mca_table_entry { + int start_addr; /* location-relative starting address of MCA recoverable range */ + int end_addr; /* location-relative ending address of MCA recoverable range */ +}; + +extern const struct mca_table_entry *search_mca_tables (unsigned long addr); +extern int mca_recover_range(unsigned long); +extern void ia64_mlogbuf_dump(void); + diff --git a/kernel/arch/ia64/kernel/mca_drv_asm.S b/kernel/arch/ia64/kernel/mca_drv_asm.S new file mode 100644 index 000000000..767ac2c20 --- /dev/null +++ b/kernel/arch/ia64/kernel/mca_drv_asm.S @@ -0,0 +1,55 @@ +/* + * File: mca_drv_asm.S + * Purpose: Assembly portion of Generic MCA handling + * + * Copyright (C) 2004 FUJITSU LIMITED + * Copyright (C) 2004 Hidetoshi Seto + */ +#include + +#include +#include +#include + +GLOBAL_ENTRY(mca_handler_bhhook) + invala // clear RSE ? + cover + ;; + clrrrb + ;; + alloc r16=ar.pfs,0,2,3,0 // make a new frame + mov ar.rsc=0 + mov r13=IA64_KR(CURRENT) // current task pointer + ;; + mov r2=r13 + ;; + addl r22=IA64_RBS_OFFSET,r2 + ;; + mov ar.bspstore=r22 + addl sp=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2 + ;; + adds r2=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13 + ;; + st1 [r2]=r0 // clear current->thread.on_ustack flag + mov loc0=r16 + movl loc1=mca_handler_bh // recovery C function + ;; + mov out0=r8 // poisoned address + mov out1=r9 // iip + mov out2=r10 // psr + mov b6=loc1 + ;; + mov loc1=rp + ssm psr.ic + ;; + srlz.i + ;; + ssm psr.i + br.call.sptk.many rp=b6 // does not return ... + ;; + mov ar.pfs=loc0 + mov rp=loc1 + ;; + mov r8=r0 + br.ret.sptk.many rp +END(mca_handler_bhhook) diff --git a/kernel/arch/ia64/kernel/minstate.h b/kernel/arch/ia64/kernel/minstate.h new file mode 100644 index 000000000..cc82a7d74 --- /dev/null +++ b/kernel/arch/ia64/kernel/minstate.h @@ -0,0 +1,250 @@ + +#include + +#include "entry.h" +#include "paravirt_inst.h" + +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE +/* read ar.itc in advance, and use it before leaving bank 0 */ +#define ACCOUNT_GET_STAMP \ +(pUStk) mov.m r20=ar.itc; +#define ACCOUNT_SYS_ENTER \ +(pUStk) br.call.spnt rp=account_sys_enter \ + ;; +#else +#define ACCOUNT_GET_STAMP +#define ACCOUNT_SYS_ENTER +#endif + +.section ".data..patch.rse", "a" +.previous + +/* + * DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves + * the minimum state necessary that allows us to turn psr.ic back + * on. + * + * Assumed state upon entry: + * psr.ic: off + * r31: contains saved predicates (pr) + * + * Upon exit, the state is as follows: + * psr.ic: off + * r2 = points to &pt_regs.r16 + * r8 = contents of ar.ccv + * r9 = contents of ar.csd + * r10 = contents of ar.ssd + * r11 = FPSR_DEFAULT + * r12 = kernel sp (kernel virtual address) + * r13 = points to current task_struct (kernel virtual address) + * p15 = TRUE if psr.i is set in cr.ipsr + * predicate registers (other than p2, p3, and p15), b6, r3, r14, r15: + * preserved + * + * Note that psr.ic is NOT turned on by this macro. This is so that + * we can pass interruption state as arguments to a handler. + */ +#define IA64_NATIVE_DO_SAVE_MIN(__COVER,SAVE_IFS,EXTRA,WORKAROUND) \ + mov r16=IA64_KR(CURRENT); /* M */ \ + mov r27=ar.rsc; /* M */ \ + mov r20=r1; /* A */ \ + mov r25=ar.unat; /* M */ \ + MOV_FROM_IPSR(p0,r29); /* M */ \ + mov r26=ar.pfs; /* I */ \ + MOV_FROM_IIP(r28); /* M */ \ + mov r21=ar.fpsr; /* M */ \ + __COVER; /* B;; (or nothing) */ \ + ;; \ + adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16; \ + ;; \ + ld1 r17=[r16]; /* load current->thread.on_ustack flag */ \ + st1 [r16]=r0; /* clear current->thread.on_ustack flag */ \ + adds r1=-IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 \ + /* switch from user to kernel RBS: */ \ + ;; \ + invala; /* M */ \ + SAVE_IFS; \ + cmp.eq pKStk,pUStk=r0,r17; /* are we in kernel mode already? */ \ + ;; \ +(pUStk) mov ar.rsc=0; /* set enforced lazy mode, pl 0, little-endian, loadrs=0 */ \ + ;; \ +(pUStk) mov.m r24=ar.rnat; \ +(pUStk) addl r22=IA64_RBS_OFFSET,r1; /* compute base of RBS */ \ +(pKStk) mov r1=sp; /* get sp */ \ + ;; \ +(pUStk) lfetch.fault.excl.nt1 [r22]; \ +(pUStk) addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1; /* compute base of memory stack */ \ +(pUStk) mov r23=ar.bspstore; /* save ar.bspstore */ \ + ;; \ +(pUStk) mov ar.bspstore=r22; /* switch to kernel RBS */ \ +(pKStk) addl r1=-IA64_PT_REGS_SIZE,r1; /* if in kernel mode, use sp (r12) */ \ + ;; \ +(pUStk) mov r18=ar.bsp; \ +(pUStk) mov ar.rsc=0x3; /* set eager mode, pl 0, little-endian, loadrs=0 */ \ + adds r17=2*L1_CACHE_BYTES,r1; /* really: biggest cache-line size */ \ + adds r16=PT(CR_IPSR),r1; \ + ;; \ + lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES; \ + st8 [r16]=r29; /* save cr.ipsr */ \ + ;; \ + lfetch.fault.excl.nt1 [r17]; \ + tbit.nz p15,p0=r29,IA64_PSR_I_BIT; \ + mov r29=b0 \ + ;; \ + WORKAROUND; \ + adds r16=PT(R8),r1; /* initialize first base pointer */ \ + adds r17=PT(R9),r1; /* initialize second base pointer */ \ +(pKStk) mov r18=r0; /* make sure r18 isn't NaT */ \ + ;; \ +.mem.offset 0,0; st8.spill [r16]=r8,16; \ +.mem.offset 8,0; st8.spill [r17]=r9,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r16]=r10,24; \ +.mem.offset 8,0; st8.spill [r17]=r11,24; \ + ;; \ + st8 [r16]=r28,16; /* save cr.iip */ \ + st8 [r17]=r30,16; /* save cr.ifs */ \ +(pUStk) sub r18=r18,r22; /* r18=RSE.ndirty*8 */ \ + mov r8=ar.ccv; \ + mov r9=ar.csd; \ + mov r10=ar.ssd; \ + movl r11=FPSR_DEFAULT; /* L-unit */ \ + ;; \ + st8 [r16]=r25,16; /* save ar.unat */ \ + st8 [r17]=r26,16; /* save ar.pfs */ \ + shl r18=r18,16; /* compute ar.rsc to be used for "loadrs" */ \ + ;; \ + st8 [r16]=r27,16; /* save ar.rsc */ \ +(pUStk) st8 [r17]=r24,16; /* save ar.rnat */ \ +(pKStk) adds r17=16,r17; /* skip over ar_rnat field */ \ + ;; /* avoid RAW on r16 & r17 */ \ +(pUStk) st8 [r16]=r23,16; /* save ar.bspstore */ \ + st8 [r17]=r31,16; /* save predicates */ \ +(pKStk) adds r16=16,r16; /* skip over ar_bspstore field */ \ + ;; \ + st8 [r16]=r29,16; /* save b0 */ \ + st8 [r17]=r18,16; /* save ar.rsc value for "loadrs" */ \ + cmp.eq pNonSys,pSys=r0,r0 /* initialize pSys=0, pNonSys=1 */ \ + ;; \ +.mem.offset 0,0; st8.spill [r16]=r20,16; /* save original r1 */ \ +.mem.offset 8,0; st8.spill [r17]=r12,16; \ + adds r12=-16,r1; /* switch to kernel memory stack (with 16 bytes of scratch) */ \ + ;; \ +.mem.offset 0,0; st8.spill [r16]=r13,16; \ +.mem.offset 8,0; st8.spill [r17]=r21,16; /* save ar.fpsr */ \ + mov r13=IA64_KR(CURRENT); /* establish `current' */ \ + ;; \ +.mem.offset 0,0; st8.spill [r16]=r15,16; \ +.mem.offset 8,0; st8.spill [r17]=r14,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r16]=r2,16; \ +.mem.offset 8,0; st8.spill [r17]=r3,16; \ + ACCOUNT_GET_STAMP \ + adds r2=IA64_PT_REGS_R16_OFFSET,r1; \ + ;; \ + EXTRA; \ + movl r1=__gp; /* establish kernel global pointer */ \ + ;; \ + ACCOUNT_SYS_ENTER \ + bsw.1; /* switch back to bank 1 (must be last in insn group) */ \ + ;; + +/* + * SAVE_REST saves the remainder of pt_regs (with psr.ic on). + * + * Assumed state upon entry: + * psr.ic: on + * r2: points to &pt_regs.r16 + * r3: points to &pt_regs.r17 + * r8: contents of ar.ccv + * r9: contents of ar.csd + * r10: contents of ar.ssd + * r11: FPSR_DEFAULT + * + * Registers r14 and r15 are guaranteed not to be touched by SAVE_REST. + */ +#define SAVE_REST \ +.mem.offset 0,0; st8.spill [r2]=r16,16; \ +.mem.offset 8,0; st8.spill [r3]=r17,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r2]=r18,16; \ +.mem.offset 8,0; st8.spill [r3]=r19,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r2]=r20,16; \ +.mem.offset 8,0; st8.spill [r3]=r21,16; \ + mov r18=b6; \ + ;; \ +.mem.offset 0,0; st8.spill [r2]=r22,16; \ +.mem.offset 8,0; st8.spill [r3]=r23,16; \ + mov r19=b7; \ + ;; \ +.mem.offset 0,0; st8.spill [r2]=r24,16; \ +.mem.offset 8,0; st8.spill [r3]=r25,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r2]=r26,16; \ +.mem.offset 8,0; st8.spill [r3]=r27,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r2]=r28,16; \ +.mem.offset 8,0; st8.spill [r3]=r29,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r2]=r30,16; \ +.mem.offset 8,0; st8.spill [r3]=r31,32; \ + ;; \ + mov ar.fpsr=r11; /* M-unit */ \ + st8 [r2]=r8,8; /* ar.ccv */ \ + adds r24=PT(B6)-PT(F7),r3; \ + ;; \ + stf.spill [r2]=f6,32; \ + stf.spill [r3]=f7,32; \ + ;; \ + stf.spill [r2]=f8,32; \ + stf.spill [r3]=f9,32; \ + ;; \ + stf.spill [r2]=f10; \ + stf.spill [r3]=f11; \ + adds r25=PT(B7)-PT(F11),r3; \ + ;; \ + st8 [r24]=r18,16; /* b6 */ \ + st8 [r25]=r19,16; /* b7 */ \ + ;; \ + st8 [r24]=r9; /* ar.csd */ \ + st8 [r25]=r10; /* ar.ssd */ \ + ;; + +#define RSE_WORKAROUND \ +(pUStk) extr.u r17=r18,3,6; \ +(pUStk) sub r16=r18,r22; \ +[1:](pKStk) br.cond.sptk.many 1f; \ + .xdata4 ".data..patch.rse",1b-. \ + ;; \ + cmp.ge p6,p7 = 33,r17; \ + ;; \ +(p6) mov r17=0x310; \ +(p7) mov r17=0x308; \ + ;; \ + cmp.leu p1,p0=r16,r17; \ +(p1) br.cond.sptk.many 1f; \ + dep.z r17=r26,0,62; \ + movl r16=2f; \ + ;; \ + mov ar.pfs=r17; \ + dep r27=r0,r27,16,14; \ + mov b0=r16; \ + ;; \ + br.ret.sptk b0; \ + ;; \ +2: \ + mov ar.rsc=r0 \ + ;; \ + flushrs; \ + ;; \ + mov ar.bspstore=r22 \ + ;; \ + mov r18=ar.bsp; \ + ;; \ +1: \ + .pred.rel "mutex", pKStk, pUStk + +#define SAVE_MIN_WITH_COVER DO_SAVE_MIN(COVER, mov r30=cr.ifs, , RSE_WORKAROUND) +#define SAVE_MIN_WITH_COVER_R19 DO_SAVE_MIN(COVER, mov r30=cr.ifs, mov r15=r19, RSE_WORKAROUND) +#define SAVE_MIN DO_SAVE_MIN( , mov r30=r0, , ) diff --git a/kernel/arch/ia64/kernel/module.c b/kernel/arch/ia64/kernel/module.c new file mode 100644 index 000000000..29754aae5 --- /dev/null +++ b/kernel/arch/ia64/kernel/module.c @@ -0,0 +1,951 @@ +/* + * IA-64-specific support for kernel module loader. + * + * Copyright (C) 2003 Hewlett-Packard Co + * David Mosberger-Tang + * + * Loosely based on patch by Rusty Russell. + */ + +/* relocs tested so far: + + DIR64LSB + FPTR64LSB + GPREL22 + LDXMOV + LDXMOV + LTOFF22 + LTOFF22X + LTOFF22X + LTOFF_FPTR22 + PCREL21B (for br.call only; br.cond is not supported out of modules!) + PCREL60B (for brl.cond only; brl.call is not supported for modules!) + PCREL64LSB + SECREL32LSB + SEGREL64LSB + */ + + +#include +#include +#include +#include +#include +#include + +#include +#include + +#define ARCH_MODULE_DEBUG 0 + +#if ARCH_MODULE_DEBUG +# define DEBUGP printk +# define inline +#else +# define DEBUGP(fmt , a...) +#endif + +#ifdef CONFIG_ITANIUM +# define USE_BRL 0 +#else +# define USE_BRL 1 +#endif + +#define MAX_LTOFF ((uint64_t) (1 << 22)) /* max. allowable linkage-table offset */ + +/* Define some relocation helper macros/types: */ + +#define FORMAT_SHIFT 0 +#define FORMAT_BITS 3 +#define FORMAT_MASK ((1 << FORMAT_BITS) - 1) +#define VALUE_SHIFT 3 +#define VALUE_BITS 5 +#define VALUE_MASK ((1 << VALUE_BITS) - 1) + +enum reloc_target_format { + /* direct encoded formats: */ + RF_NONE = 0, + RF_INSN14 = 1, + RF_INSN22 = 2, + RF_INSN64 = 3, + RF_32MSB = 4, + RF_32LSB = 5, + RF_64MSB = 6, + RF_64LSB = 7, + + /* formats that cannot be directly decoded: */ + RF_INSN60, + RF_INSN21B, /* imm21 form 1 */ + RF_INSN21M, /* imm21 form 2 */ + RF_INSN21F /* imm21 form 3 */ +}; + +enum reloc_value_formula { + RV_DIRECT = 4, /* S + A */ + RV_GPREL = 5, /* @gprel(S + A) */ + RV_LTREL = 6, /* @ltoff(S + A) */ + RV_PLTREL = 7, /* @pltoff(S + A) */ + RV_FPTR = 8, /* @fptr(S + A) */ + RV_PCREL = 9, /* S + A - P */ + RV_LTREL_FPTR = 10, /* @ltoff(@fptr(S + A)) */ + RV_SEGREL = 11, /* @segrel(S + A) */ + RV_SECREL = 12, /* @secrel(S + A) */ + RV_BDREL = 13, /* BD + A */ + RV_LTV = 14, /* S + A (like RV_DIRECT, except frozen at static link-time) */ + RV_PCREL2 = 15, /* S + A - P */ + RV_SPECIAL = 16, /* various (see below) */ + RV_RSVD17 = 17, + RV_TPREL = 18, /* @tprel(S + A) */ + RV_LTREL_TPREL = 19, /* @ltoff(@tprel(S + A)) */ + RV_DTPMOD = 20, /* @dtpmod(S + A) */ + RV_LTREL_DTPMOD = 21, /* @ltoff(@dtpmod(S + A)) */ + RV_DTPREL = 22, /* @dtprel(S + A) */ + RV_LTREL_DTPREL = 23, /* @ltoff(@dtprel(S + A)) */ + RV_RSVD24 = 24, + RV_RSVD25 = 25, + RV_RSVD26 = 26, + RV_RSVD27 = 27 + /* 28-31 reserved for implementation-specific purposes. */ +}; + +#define N(reloc) [R_IA64_##reloc] = #reloc + +static const char *reloc_name[256] = { + N(NONE), N(IMM14), N(IMM22), N(IMM64), + N(DIR32MSB), N(DIR32LSB), N(DIR64MSB), N(DIR64LSB), + N(GPREL22), N(GPREL64I), N(GPREL32MSB), N(GPREL32LSB), + N(GPREL64MSB), N(GPREL64LSB), N(LTOFF22), N(LTOFF64I), + N(PLTOFF22), N(PLTOFF64I), N(PLTOFF64MSB), N(PLTOFF64LSB), + N(FPTR64I), N(FPTR32MSB), N(FPTR32LSB), N(FPTR64MSB), + N(FPTR64LSB), N(PCREL60B), N(PCREL21B), N(PCREL21M), + N(PCREL21F), N(PCREL32MSB), N(PCREL32LSB), N(PCREL64MSB), + N(PCREL64LSB), N(LTOFF_FPTR22), N(LTOFF_FPTR64I), N(LTOFF_FPTR32MSB), + N(LTOFF_FPTR32LSB), N(LTOFF_FPTR64MSB), N(LTOFF_FPTR64LSB), N(SEGREL32MSB), + N(SEGREL32LSB), N(SEGREL64MSB), N(SEGREL64LSB), N(SECREL32MSB), + N(SECREL32LSB), N(SECREL64MSB), N(SECREL64LSB), N(REL32MSB), + N(REL32LSB), N(REL64MSB), N(REL64LSB), N(LTV32MSB), + N(LTV32LSB), N(LTV64MSB), N(LTV64LSB), N(PCREL21BI), + N(PCREL22), N(PCREL64I), N(IPLTMSB), N(IPLTLSB), + N(COPY), N(LTOFF22X), N(LDXMOV), N(TPREL14), + N(TPREL22), N(TPREL64I), N(TPREL64MSB), N(TPREL64LSB), + N(LTOFF_TPREL22), N(DTPMOD64MSB), N(DTPMOD64LSB), N(LTOFF_DTPMOD22), + N(DTPREL14), N(DTPREL22), N(DTPREL64I), N(DTPREL32MSB), + N(DTPREL32LSB), N(DTPREL64MSB), N(DTPREL64LSB), N(LTOFF_DTPREL22) +}; + +#undef N + +/* Opaque struct for insns, to protect against derefs. */ +struct insn; + +static inline uint64_t +bundle (const struct insn *insn) +{ + return (uint64_t) insn & ~0xfUL; +} + +static inline int +slot (const struct insn *insn) +{ + return (uint64_t) insn & 0x3; +} + +static int +apply_imm64 (struct module *mod, struct insn *insn, uint64_t val) +{ + if (slot(insn) != 2) { + printk(KERN_ERR "%s: invalid slot number %d for IMM64\n", + mod->name, slot(insn)); + return 0; + } + ia64_patch_imm64((u64) insn, val); + return 1; +} + +static int +apply_imm60 (struct module *mod, struct insn *insn, uint64_t val) +{ + if (slot(insn) != 2) { + printk(KERN_ERR "%s: invalid slot number %d for IMM60\n", + mod->name, slot(insn)); + return 0; + } + if (val + ((uint64_t) 1 << 59) >= (1UL << 60)) { + printk(KERN_ERR "%s: value %ld out of IMM60 range\n", + mod->name, (long) val); + return 0; + } + ia64_patch_imm60((u64) insn, val); + return 1; +} + +static int +apply_imm22 (struct module *mod, struct insn *insn, uint64_t val) +{ + if (val + (1 << 21) >= (1 << 22)) { + printk(KERN_ERR "%s: value %li out of IMM22 range\n", + mod->name, (long)val); + return 0; + } + ia64_patch((u64) insn, 0x01fffcfe000UL, ( ((val & 0x200000UL) << 15) /* bit 21 -> 36 */ + | ((val & 0x1f0000UL) << 6) /* bit 16 -> 22 */ + | ((val & 0x00ff80UL) << 20) /* bit 7 -> 27 */ + | ((val & 0x00007fUL) << 13) /* bit 0 -> 13 */)); + return 1; +} + +static int +apply_imm21b (struct module *mod, struct insn *insn, uint64_t val) +{ + if (val + (1 << 20) >= (1 << 21)) { + printk(KERN_ERR "%s: value %li out of IMM21b range\n", + mod->name, (long)val); + return 0; + } + ia64_patch((u64) insn, 0x11ffffe000UL, ( ((val & 0x100000UL) << 16) /* bit 20 -> 36 */ + | ((val & 0x0fffffUL) << 13) /* bit 0 -> 13 */)); + return 1; +} + +#if USE_BRL + +struct plt_entry { + /* Three instruction bundles in PLT. */ + unsigned char bundle[2][16]; +}; + +static const struct plt_entry ia64_plt_template = { + { + { + 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, /* movl gp=TARGET_GP */ + 0x00, 0x00, 0x00, 0x60 + }, + { + 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* brl.many gp=TARGET_GP */ + 0x08, 0x00, 0x00, 0xc0 + } + } +}; + +static int +patch_plt (struct module *mod, struct plt_entry *plt, long target_ip, unsigned long target_gp) +{ + if (apply_imm64(mod, (struct insn *) (plt->bundle[0] + 2), target_gp) + && apply_imm60(mod, (struct insn *) (plt->bundle[1] + 2), + (target_ip - (int64_t) plt->bundle[1]) / 16)) + return 1; + return 0; +} + +unsigned long +plt_target (struct plt_entry *plt) +{ + uint64_t b0, b1, *b = (uint64_t *) plt->bundle[1]; + long off; + + b0 = b[0]; b1 = b[1]; + off = ( ((b1 & 0x00fffff000000000UL) >> 36) /* imm20b -> bit 0 */ + | ((b0 >> 48) << 20) | ((b1 & 0x7fffffUL) << 36) /* imm39 -> bit 20 */ + | ((b1 & 0x0800000000000000UL) << 0)); /* i -> bit 59 */ + return (long) plt->bundle[1] + 16*off; +} + +#else /* !USE_BRL */ + +struct plt_entry { + /* Three instruction bundles in PLT. */ + unsigned char bundle[3][16]; +}; + +static const struct plt_entry ia64_plt_template = { + { + { + 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* movl r16=TARGET_IP */ + 0x02, 0x00, 0x00, 0x60 + }, + { + 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, /* movl gp=TARGET_GP */ + 0x00, 0x00, 0x00, 0x60 + }, + { + 0x11, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MIB] nop.m 0 */ + 0x60, 0x80, 0x04, 0x80, 0x03, 0x00, /* mov b6=r16 */ + 0x60, 0x00, 0x80, 0x00 /* br.few b6 */ + } + } +}; + +static int +patch_plt (struct module *mod, struct plt_entry *plt, long target_ip, unsigned long target_gp) +{ + if (apply_imm64(mod, (struct insn *) (plt->bundle[0] + 2), target_ip) + && apply_imm64(mod, (struct insn *) (plt->bundle[1] + 2), target_gp)) + return 1; + return 0; +} + +unsigned long +plt_target (struct plt_entry *plt) +{ + uint64_t b0, b1, *b = (uint64_t *) plt->bundle[0]; + + b0 = b[0]; b1 = b[1]; + return ( ((b1 & 0x000007f000000000) >> 36) /* imm7b -> bit 0 */ + | ((b1 & 0x07fc000000000000) >> 43) /* imm9d -> bit 7 */ + | ((b1 & 0x0003e00000000000) >> 29) /* imm5c -> bit 16 */ + | ((b1 & 0x0000100000000000) >> 23) /* ic -> bit 21 */ + | ((b0 >> 46) << 22) | ((b1 & 0x7fffff) << 40) /* imm41 -> bit 22 */ + | ((b1 & 0x0800000000000000) << 4)); /* i -> bit 63 */ +} + +#endif /* !USE_BRL */ + +void +module_arch_freeing_init (struct module *mod) +{ + if (mod->arch.init_unw_table) { + unw_remove_unwind_table(mod->arch.init_unw_table); + mod->arch.init_unw_table = NULL; + } +} + +/* Have we already seen one of these relocations? */ +/* FIXME: we could look in other sections, too --RR */ +static int +duplicate_reloc (const Elf64_Rela *rela, unsigned int num) +{ + unsigned int i; + + for (i = 0; i < num; i++) { + if (rela[i].r_info == rela[num].r_info && rela[i].r_addend == rela[num].r_addend) + return 1; + } + return 0; +} + +/* Count how many GOT entries we may need */ +static unsigned int +count_gots (const Elf64_Rela *rela, unsigned int num) +{ + unsigned int i, ret = 0; + + /* Sure, this is order(n^2), but it's usually short, and not + time critical */ + for (i = 0; i < num; i++) { + switch (ELF64_R_TYPE(rela[i].r_info)) { + case R_IA64_LTOFF22: + case R_IA64_LTOFF22X: + case R_IA64_LTOFF64I: + case R_IA64_LTOFF_FPTR22: + case R_IA64_LTOFF_FPTR64I: + case R_IA64_LTOFF_FPTR32MSB: + case R_IA64_LTOFF_FPTR32LSB: + case R_IA64_LTOFF_FPTR64MSB: + case R_IA64_LTOFF_FPTR64LSB: + if (!duplicate_reloc(rela, i)) + ret++; + break; + } + } + return ret; +} + +/* Count how many PLT entries we may need */ +static unsigned int +count_plts (const Elf64_Rela *rela, unsigned int num) +{ + unsigned int i, ret = 0; + + /* Sure, this is order(n^2), but it's usually short, and not + time critical */ + for (i = 0; i < num; i++) { + switch (ELF64_R_TYPE(rela[i].r_info)) { + case R_IA64_PCREL21B: + case R_IA64_PLTOFF22: + case R_IA64_PLTOFF64I: + case R_IA64_PLTOFF64MSB: + case R_IA64_PLTOFF64LSB: + case R_IA64_IPLTMSB: + case R_IA64_IPLTLSB: + if (!duplicate_reloc(rela, i)) + ret++; + break; + } + } + return ret; +} + +/* We need to create an function-descriptors for any internal function + which is referenced. */ +static unsigned int +count_fdescs (const Elf64_Rela *rela, unsigned int num) +{ + unsigned int i, ret = 0; + + /* Sure, this is order(n^2), but it's usually short, and not time critical. */ + for (i = 0; i < num; i++) { + switch (ELF64_R_TYPE(rela[i].r_info)) { + case R_IA64_FPTR64I: + case R_IA64_FPTR32LSB: + case R_IA64_FPTR32MSB: + case R_IA64_FPTR64LSB: + case R_IA64_FPTR64MSB: + case R_IA64_LTOFF_FPTR22: + case R_IA64_LTOFF_FPTR32LSB: + case R_IA64_LTOFF_FPTR32MSB: + case R_IA64_LTOFF_FPTR64I: + case R_IA64_LTOFF_FPTR64LSB: + case R_IA64_LTOFF_FPTR64MSB: + case R_IA64_IPLTMSB: + case R_IA64_IPLTLSB: + /* + * Jumps to static functions sometimes go straight to their + * offset. Of course, that may not be possible if the jump is + * from init -> core or vice. versa, so we need to generate an + * FDESC (and PLT etc) for that. + */ + case R_IA64_PCREL21B: + if (!duplicate_reloc(rela, i)) + ret++; + break; + } + } + return ret; +} + +int +module_frob_arch_sections (Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, char *secstrings, + struct module *mod) +{ + unsigned long core_plts = 0, init_plts = 0, gots = 0, fdescs = 0; + Elf64_Shdr *s, *sechdrs_end = sechdrs + ehdr->e_shnum; + + /* + * To store the PLTs and function-descriptors, we expand the .text section for + * core module-code and the .init.text section for initialization code. + */ + for (s = sechdrs; s < sechdrs_end; ++s) + if (strcmp(".core.plt", secstrings + s->sh_name) == 0) + mod->arch.core_plt = s; + else if (strcmp(".init.plt", secstrings + s->sh_name) == 0) + mod->arch.init_plt = s; + else if (strcmp(".got", secstrings + s->sh_name) == 0) + mod->arch.got = s; + else if (strcmp(".opd", secstrings + s->sh_name) == 0) + mod->arch.opd = s; + else if (strcmp(".IA_64.unwind", secstrings + s->sh_name) == 0) + mod->arch.unwind = s; +#ifdef CONFIG_PARAVIRT + else if (strcmp(".paravirt_bundles", + secstrings + s->sh_name) == 0) + mod->arch.paravirt_bundles = s; + else if (strcmp(".paravirt_insts", + secstrings + s->sh_name) == 0) + mod->arch.paravirt_insts = s; +#endif + + if (!mod->arch.core_plt || !mod->arch.init_plt || !mod->arch.got || !mod->arch.opd) { + printk(KERN_ERR "%s: sections missing\n", mod->name); + return -ENOEXEC; + } + + /* GOT and PLTs can occur in any relocated section... */ + for (s = sechdrs + 1; s < sechdrs_end; ++s) { + const Elf64_Rela *rels = (void *)ehdr + s->sh_offset; + unsigned long numrels = s->sh_size/sizeof(Elf64_Rela); + + if (s->sh_type != SHT_RELA) + continue; + + gots += count_gots(rels, numrels); + fdescs += count_fdescs(rels, numrels); + if (strstr(secstrings + s->sh_name, ".init")) + init_plts += count_plts(rels, numrels); + else + core_plts += count_plts(rels, numrels); + } + + mod->arch.core_plt->sh_type = SHT_NOBITS; + mod->arch.core_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC; + mod->arch.core_plt->sh_addralign = 16; + mod->arch.core_plt->sh_size = core_plts * sizeof(struct plt_entry); + mod->arch.init_plt->sh_type = SHT_NOBITS; + mod->arch.init_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC; + mod->arch.init_plt->sh_addralign = 16; + mod->arch.init_plt->sh_size = init_plts * sizeof(struct plt_entry); + mod->arch.got->sh_type = SHT_NOBITS; + mod->arch.got->sh_flags = ARCH_SHF_SMALL | SHF_ALLOC; + mod->arch.got->sh_addralign = 8; + mod->arch.got->sh_size = gots * sizeof(struct got_entry); + mod->arch.opd->sh_type = SHT_NOBITS; + mod->arch.opd->sh_flags = SHF_ALLOC; + mod->arch.opd->sh_addralign = 8; + mod->arch.opd->sh_size = fdescs * sizeof(struct fdesc); + DEBUGP("%s: core.plt=%lx, init.plt=%lx, got=%lx, fdesc=%lx\n", + __func__, mod->arch.core_plt->sh_size, mod->arch.init_plt->sh_size, + mod->arch.got->sh_size, mod->arch.opd->sh_size); + return 0; +} + +static inline int +in_init (const struct module *mod, uint64_t addr) +{ + return addr - (uint64_t) mod->module_init < mod->init_size; +} + +static inline int +in_core (const struct module *mod, uint64_t addr) +{ + return addr - (uint64_t) mod->module_core < mod->core_size; +} + +static inline int +is_internal (const struct module *mod, uint64_t value) +{ + return in_init(mod, value) || in_core(mod, value); +} + +/* + * Get gp-relative offset for the linkage-table entry of VALUE. + */ +static uint64_t +get_ltoff (struct module *mod, uint64_t value, int *okp) +{ + struct got_entry *got, *e; + + if (!*okp) + return 0; + + got = (void *) mod->arch.got->sh_addr; + for (e = got; e < got + mod->arch.next_got_entry; ++e) + if (e->val == value) + goto found; + + /* Not enough GOT entries? */ + BUG_ON(e >= (struct got_entry *) (mod->arch.got->sh_addr + mod->arch.got->sh_size)); + + e->val = value; + ++mod->arch.next_got_entry; + found: + return (uint64_t) e - mod->arch.gp; +} + +static inline int +gp_addressable (struct module *mod, uint64_t value) +{ + return value - mod->arch.gp + MAX_LTOFF/2 < MAX_LTOFF; +} + +/* Get PC-relative PLT entry for this value. Returns 0 on failure. */ +static uint64_t +get_plt (struct module *mod, const struct insn *insn, uint64_t value, int *okp) +{ + struct plt_entry *plt, *plt_end; + uint64_t target_ip, target_gp; + + if (!*okp) + return 0; + + if (in_init(mod, (uint64_t) insn)) { + plt = (void *) mod->arch.init_plt->sh_addr; + plt_end = (void *) plt + mod->arch.init_plt->sh_size; + } else { + plt = (void *) mod->arch.core_plt->sh_addr; + plt_end = (void *) plt + mod->arch.core_plt->sh_size; + } + + /* "value" is a pointer to a function-descriptor; fetch the target ip/gp from it: */ + target_ip = ((uint64_t *) value)[0]; + target_gp = ((uint64_t *) value)[1]; + + /* Look for existing PLT entry. */ + while (plt->bundle[0][0]) { + if (plt_target(plt) == target_ip) + goto found; + if (++plt >= plt_end) + BUG(); + } + *plt = ia64_plt_template; + if (!patch_plt(mod, plt, target_ip, target_gp)) { + *okp = 0; + return 0; + } +#if ARCH_MODULE_DEBUG + if (plt_target(plt) != target_ip) { + printk("%s: mistargeted PLT: wanted %lx, got %lx\n", + __func__, target_ip, plt_target(plt)); + *okp = 0; + return 0; + } +#endif + found: + return (uint64_t) plt; +} + +/* Get function descriptor for VALUE. */ +static uint64_t +get_fdesc (struct module *mod, uint64_t value, int *okp) +{ + struct fdesc *fdesc = (void *) mod->arch.opd->sh_addr; + + if (!*okp) + return 0; + + if (!value) { + printk(KERN_ERR "%s: fdesc for zero requested!\n", mod->name); + return 0; + } + + if (!is_internal(mod, value)) + /* + * If it's not a module-local entry-point, "value" already points to a + * function-descriptor. + */ + return value; + + /* Look for existing function descriptor. */ + while (fdesc->ip) { + if (fdesc->ip == value) + return (uint64_t)fdesc; + if ((uint64_t) ++fdesc >= mod->arch.opd->sh_addr + mod->arch.opd->sh_size) + BUG(); + } + + /* Create new one */ + fdesc->ip = value; + fdesc->gp = mod->arch.gp; + return (uint64_t) fdesc; +} + +static inline int +do_reloc (struct module *mod, uint8_t r_type, Elf64_Sym *sym, uint64_t addend, + Elf64_Shdr *sec, void *location) +{ + enum reloc_target_format format = (r_type >> FORMAT_SHIFT) & FORMAT_MASK; + enum reloc_value_formula formula = (r_type >> VALUE_SHIFT) & VALUE_MASK; + uint64_t val; + int ok = 1; + + val = sym->st_value + addend; + + switch (formula) { + case RV_SEGREL: /* segment base is arbitrarily chosen to be 0 for kernel modules */ + case RV_DIRECT: + break; + + case RV_GPREL: val -= mod->arch.gp; break; + case RV_LTREL: val = get_ltoff(mod, val, &ok); break; + case RV_PLTREL: val = get_plt(mod, location, val, &ok); break; + case RV_FPTR: val = get_fdesc(mod, val, &ok); break; + case RV_SECREL: val -= sec->sh_addr; break; + case RV_LTREL_FPTR: val = get_ltoff(mod, get_fdesc(mod, val, &ok), &ok); break; + + case RV_PCREL: + switch (r_type) { + case R_IA64_PCREL21B: + if ((in_init(mod, val) && in_core(mod, (uint64_t)location)) || + (in_core(mod, val) && in_init(mod, (uint64_t)location))) { + /* + * Init section may have been allocated far away from core, + * if the branch won't reach, then allocate a plt for it. + */ + uint64_t delta = ((int64_t)val - (int64_t)location) / 16; + if (delta + (1 << 20) >= (1 << 21)) { + val = get_fdesc(mod, val, &ok); + val = get_plt(mod, location, val, &ok); + } + } else if (!is_internal(mod, val)) + val = get_plt(mod, location, val, &ok); + /* FALL THROUGH */ + default: + val -= bundle(location); + break; + + case R_IA64_PCREL32MSB: + case R_IA64_PCREL32LSB: + case R_IA64_PCREL64MSB: + case R_IA64_PCREL64LSB: + val -= (uint64_t) location; + break; + + } + switch (r_type) { + case R_IA64_PCREL60B: format = RF_INSN60; break; + case R_IA64_PCREL21B: format = RF_INSN21B; break; + case R_IA64_PCREL21M: format = RF_INSN21M; break; + case R_IA64_PCREL21F: format = RF_INSN21F; break; + default: break; + } + break; + + case RV_BDREL: + val -= (uint64_t) (in_init(mod, val) ? mod->module_init : mod->module_core); + break; + + case RV_LTV: + /* can link-time value relocs happen here? */ + BUG(); + break; + + case RV_PCREL2: + if (r_type == R_IA64_PCREL21BI) { + if (!is_internal(mod, val)) { + printk(KERN_ERR "%s: %s reloc against " + "non-local symbol (%lx)\n", __func__, + reloc_name[r_type], (unsigned long)val); + return -ENOEXEC; + } + format = RF_INSN21B; + } + val -= bundle(location); + break; + + case RV_SPECIAL: + switch (r_type) { + case R_IA64_IPLTMSB: + case R_IA64_IPLTLSB: + val = get_fdesc(mod, get_plt(mod, location, val, &ok), &ok); + format = RF_64LSB; + if (r_type == R_IA64_IPLTMSB) + format = RF_64MSB; + break; + + case R_IA64_SUB: + val = addend - sym->st_value; + format = RF_INSN64; + break; + + case R_IA64_LTOFF22X: + if (gp_addressable(mod, val)) + val -= mod->arch.gp; + else + val = get_ltoff(mod, val, &ok); + format = RF_INSN22; + break; + + case R_IA64_LDXMOV: + if (gp_addressable(mod, val)) { + /* turn "ld8" into "mov": */ + DEBUGP("%s: patching ld8 at %p to mov\n", __func__, location); + ia64_patch((u64) location, 0x1fff80fe000UL, 0x10000000000UL); + } + return 0; + + default: + if (reloc_name[r_type]) + printk(KERN_ERR "%s: special reloc %s not supported", + mod->name, reloc_name[r_type]); + else + printk(KERN_ERR "%s: unknown special reloc %x\n", + mod->name, r_type); + return -ENOEXEC; + } + break; + + case RV_TPREL: + case RV_LTREL_TPREL: + case RV_DTPMOD: + case RV_LTREL_DTPMOD: + case RV_DTPREL: + case RV_LTREL_DTPREL: + printk(KERN_ERR "%s: %s reloc not supported\n", + mod->name, reloc_name[r_type] ? reloc_name[r_type] : "?"); + return -ENOEXEC; + + default: + printk(KERN_ERR "%s: unknown reloc %x\n", mod->name, r_type); + return -ENOEXEC; + } + + if (!ok) + return -ENOEXEC; + + DEBUGP("%s: [%p]<-%016lx = %s(%lx)\n", __func__, location, val, + reloc_name[r_type] ? reloc_name[r_type] : "?", sym->st_value + addend); + + switch (format) { + case RF_INSN21B: ok = apply_imm21b(mod, location, (int64_t) val / 16); break; + case RF_INSN22: ok = apply_imm22(mod, location, val); break; + case RF_INSN64: ok = apply_imm64(mod, location, val); break; + case RF_INSN60: ok = apply_imm60(mod, location, (int64_t) val / 16); break; + case RF_32LSB: put_unaligned(val, (uint32_t *) location); break; + case RF_64LSB: put_unaligned(val, (uint64_t *) location); break; + case RF_32MSB: /* ia64 Linux is little-endian... */ + case RF_64MSB: /* ia64 Linux is little-endian... */ + case RF_INSN14: /* must be within-module, i.e., resolved by "ld -r" */ + case RF_INSN21M: /* must be within-module, i.e., resolved by "ld -r" */ + case RF_INSN21F: /* must be within-module, i.e., resolved by "ld -r" */ + printk(KERN_ERR "%s: format %u needed by %s reloc is not supported\n", + mod->name, format, reloc_name[r_type] ? reloc_name[r_type] : "?"); + return -ENOEXEC; + + default: + printk(KERN_ERR "%s: relocation %s resulted in unknown format %u\n", + mod->name, reloc_name[r_type] ? reloc_name[r_type] : "?", format); + return -ENOEXEC; + } + return ok ? 0 : -ENOEXEC; +} + +int +apply_relocate_add (Elf64_Shdr *sechdrs, const char *strtab, unsigned int symindex, + unsigned int relsec, struct module *mod) +{ + unsigned int i, n = sechdrs[relsec].sh_size / sizeof(Elf64_Rela); + Elf64_Rela *rela = (void *) sechdrs[relsec].sh_addr; + Elf64_Shdr *target_sec; + int ret; + + DEBUGP("%s: applying section %u (%u relocs) to %u\n", __func__, + relsec, n, sechdrs[relsec].sh_info); + + target_sec = sechdrs + sechdrs[relsec].sh_info; + + if (target_sec->sh_entsize == ~0UL) + /* + * If target section wasn't allocated, we don't need to relocate it. + * Happens, e.g., for debug sections. + */ + return 0; + + if (!mod->arch.gp) { + /* + * XXX Should have an arch-hook for running this after final section + * addresses have been selected... + */ + uint64_t gp; + if (mod->core_size > MAX_LTOFF) + /* + * This takes advantage of fact that SHF_ARCH_SMALL gets allocated + * at the end of the module. + */ + gp = mod->core_size - MAX_LTOFF / 2; + else + gp = mod->core_size / 2; + gp = (uint64_t) mod->module_core + ((gp + 7) & -8); + mod->arch.gp = gp; + DEBUGP("%s: placing gp at 0x%lx\n", __func__, gp); + } + + for (i = 0; i < n; i++) { + ret = do_reloc(mod, ELF64_R_TYPE(rela[i].r_info), + ((Elf64_Sym *) sechdrs[symindex].sh_addr + + ELF64_R_SYM(rela[i].r_info)), + rela[i].r_addend, target_sec, + (void *) target_sec->sh_addr + rela[i].r_offset); + if (ret < 0) + return ret; + } + return 0; +} + +/* + * Modules contain a single unwind table which covers both the core and the init text + * sections but since the two are not contiguous, we need to split this table up such that + * we can register (and unregister) each "segment" separately. Fortunately, this sounds + * more complicated than it really is. + */ +static void +register_unwind_table (struct module *mod) +{ + struct unw_table_entry *start = (void *) mod->arch.unwind->sh_addr; + struct unw_table_entry *end = start + mod->arch.unwind->sh_size / sizeof (*start); + struct unw_table_entry tmp, *e1, *e2, *core, *init; + unsigned long num_init = 0, num_core = 0; + + /* First, count how many init and core unwind-table entries there are. */ + for (e1 = start; e1 < end; ++e1) + if (in_init(mod, e1->start_offset)) + ++num_init; + else + ++num_core; + /* + * Second, sort the table such that all unwind-table entries for the init and core + * text sections are nicely separated. We do this with a stupid bubble sort + * (unwind tables don't get ridiculously huge). + */ + for (e1 = start; e1 < end; ++e1) { + for (e2 = e1 + 1; e2 < end; ++e2) { + if (e2->start_offset < e1->start_offset) { + tmp = *e1; + *e1 = *e2; + *e2 = tmp; + } + } + } + /* + * Third, locate the init and core segments in the unwind table: + */ + if (in_init(mod, start->start_offset)) { + init = start; + core = start + num_init; + } else { + core = start; + init = start + num_core; + } + + DEBUGP("%s: name=%s, gp=%lx, num_init=%lu, num_core=%lu\n", __func__, + mod->name, mod->arch.gp, num_init, num_core); + + /* + * Fourth, register both tables (if not empty). + */ + if (num_core > 0) { + mod->arch.core_unw_table = unw_add_unwind_table(mod->name, 0, mod->arch.gp, + core, core + num_core); + DEBUGP("%s: core: handle=%p [%p-%p)\n", __func__, + mod->arch.core_unw_table, core, core + num_core); + } + if (num_init > 0) { + mod->arch.init_unw_table = unw_add_unwind_table(mod->name, 0, mod->arch.gp, + init, init + num_init); + DEBUGP("%s: init: handle=%p [%p-%p)\n", __func__, + mod->arch.init_unw_table, init, init + num_init); + } +} + +int +module_finalize (const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mod) +{ + DEBUGP("%s: init: entry=%p\n", __func__, mod->init); + if (mod->arch.unwind) + register_unwind_table(mod); +#ifdef CONFIG_PARAVIRT + if (mod->arch.paravirt_bundles) { + struct paravirt_patch_site_bundle *start = + (struct paravirt_patch_site_bundle *) + mod->arch.paravirt_bundles->sh_addr; + struct paravirt_patch_site_bundle *end = + (struct paravirt_patch_site_bundle *) + (mod->arch.paravirt_bundles->sh_addr + + mod->arch.paravirt_bundles->sh_size); + + paravirt_patch_apply_bundle(start, end); + } + if (mod->arch.paravirt_insts) { + struct paravirt_patch_site_inst *start = + (struct paravirt_patch_site_inst *) + mod->arch.paravirt_insts->sh_addr; + struct paravirt_patch_site_inst *end = + (struct paravirt_patch_site_inst *) + (mod->arch.paravirt_insts->sh_addr + + mod->arch.paravirt_insts->sh_size); + + paravirt_patch_apply_inst(start, end); + } +#endif + return 0; +} + +void +module_arch_cleanup (struct module *mod) +{ + if (mod->arch.init_unw_table) + unw_remove_unwind_table(mod->arch.init_unw_table); + if (mod->arch.core_unw_table) + unw_remove_unwind_table(mod->arch.core_unw_table); +} diff --git a/kernel/arch/ia64/kernel/msi_ia64.c b/kernel/arch/ia64/kernel/msi_ia64.c new file mode 100644 index 000000000..9dd7464f8 --- /dev/null +++ b/kernel/arch/ia64/kernel/msi_ia64.c @@ -0,0 +1,206 @@ +/* + * MSI hooks for standard x86 apic + */ + +#include +#include +#include +#include +#include +#include + +static struct irq_chip ia64_msi_chip; + +#ifdef CONFIG_SMP +static int ia64_set_msi_irq_affinity(struct irq_data *idata, + const cpumask_t *cpu_mask, bool force) +{ + struct msi_msg msg; + u32 addr, data; + int cpu = cpumask_first_and(cpu_mask, cpu_online_mask); + unsigned int irq = idata->irq; + + if (irq_prepare_move(irq, cpu)) + return -1; + + __get_cached_msi_msg(idata->msi_desc, &msg); + + addr = msg.address_lo; + addr &= MSI_ADDR_DEST_ID_MASK; + addr |= MSI_ADDR_DEST_ID_CPU(cpu_physical_id(cpu)); + msg.address_lo = addr; + + data = msg.data; + data &= MSI_DATA_VECTOR_MASK; + data |= MSI_DATA_VECTOR(irq_to_vector(irq)); + msg.data = data; + + pci_write_msi_msg(irq, &msg); + cpumask_copy(idata->affinity, cpumask_of(cpu)); + + return 0; +} +#endif /* CONFIG_SMP */ + +int ia64_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc) +{ + struct msi_msg msg; + unsigned long dest_phys_id; + int irq, vector; + + irq = create_irq(); + if (irq < 0) + return irq; + + irq_set_msi_desc(irq, desc); + dest_phys_id = cpu_physical_id(cpumask_any_and(&(irq_to_domain(irq)), + cpu_online_mask)); + vector = irq_to_vector(irq); + + msg.address_hi = 0; + msg.address_lo = + MSI_ADDR_HEADER | + MSI_ADDR_DEST_MODE_PHYS | + MSI_ADDR_REDIRECTION_CPU | + MSI_ADDR_DEST_ID_CPU(dest_phys_id); + + msg.data = + MSI_DATA_TRIGGER_EDGE | + MSI_DATA_LEVEL_ASSERT | + MSI_DATA_DELIVERY_FIXED | + MSI_DATA_VECTOR(vector); + + pci_write_msi_msg(irq, &msg); + irq_set_chip_and_handler(irq, &ia64_msi_chip, handle_edge_irq); + + return 0; +} + +void ia64_teardown_msi_irq(unsigned int irq) +{ + destroy_irq(irq); +} + +static void ia64_ack_msi_irq(struct irq_data *data) +{ + irq_complete_move(data->irq); + irq_move_irq(data); + ia64_eoi(); +} + +static int ia64_msi_retrigger_irq(struct irq_data *data) +{ + unsigned int vector = irq_to_vector(data->irq); + ia64_resend_irq(vector); + + return 1; +} + +/* + * Generic ops used on most IA64 platforms. + */ +static struct irq_chip ia64_msi_chip = { + .name = "PCI-MSI", + .irq_mask = pci_msi_mask_irq, + .irq_unmask = pci_msi_unmask_irq, + .irq_ack = ia64_ack_msi_irq, +#ifdef CONFIG_SMP + .irq_set_affinity = ia64_set_msi_irq_affinity, +#endif + .irq_retrigger = ia64_msi_retrigger_irq, +}; + + +int arch_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc) +{ + if (platform_setup_msi_irq) + return platform_setup_msi_irq(pdev, desc); + + return ia64_setup_msi_irq(pdev, desc); +} + +void arch_teardown_msi_irq(unsigned int irq) +{ + if (platform_teardown_msi_irq) + return platform_teardown_msi_irq(irq); + + return ia64_teardown_msi_irq(irq); +} + +#ifdef CONFIG_INTEL_IOMMU +#ifdef CONFIG_SMP +static int dmar_msi_set_affinity(struct irq_data *data, + const struct cpumask *mask, bool force) +{ + unsigned int irq = data->irq; + struct irq_cfg *cfg = irq_cfg + irq; + struct msi_msg msg; + int cpu = cpumask_first_and(mask, cpu_online_mask); + + if (irq_prepare_move(irq, cpu)) + return -1; + + dmar_msi_read(irq, &msg); + + msg.data &= ~MSI_DATA_VECTOR_MASK; + msg.data |= MSI_DATA_VECTOR(cfg->vector); + msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; + msg.address_lo |= MSI_ADDR_DEST_ID_CPU(cpu_physical_id(cpu)); + + dmar_msi_write(irq, &msg); + cpumask_copy(data->affinity, mask); + + return 0; +} +#endif /* CONFIG_SMP */ + +static struct irq_chip dmar_msi_type = { + .name = "DMAR_MSI", + .irq_unmask = dmar_msi_unmask, + .irq_mask = dmar_msi_mask, + .irq_ack = ia64_ack_msi_irq, +#ifdef CONFIG_SMP + .irq_set_affinity = dmar_msi_set_affinity, +#endif + .irq_retrigger = ia64_msi_retrigger_irq, +}; + +static int +msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) +{ + struct irq_cfg *cfg = irq_cfg + irq; + unsigned dest; + + dest = cpu_physical_id(cpumask_first_and(&(irq_to_domain(irq)), + cpu_online_mask)); + + msg->address_hi = 0; + msg->address_lo = + MSI_ADDR_HEADER | + MSI_ADDR_DEST_MODE_PHYS | + MSI_ADDR_REDIRECTION_CPU | + MSI_ADDR_DEST_ID_CPU(dest); + + msg->data = + MSI_DATA_TRIGGER_EDGE | + MSI_DATA_LEVEL_ASSERT | + MSI_DATA_DELIVERY_FIXED | + MSI_DATA_VECTOR(cfg->vector); + return 0; +} + +int arch_setup_dmar_msi(unsigned int irq) +{ + int ret; + struct msi_msg msg; + + ret = msi_compose_msg(NULL, irq, &msg); + if (ret < 0) + return ret; + dmar_msi_write(irq, &msg); + irq_set_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq, + "edge"); + return 0; +} +#endif /* CONFIG_INTEL_IOMMU */ + diff --git a/kernel/arch/ia64/kernel/nr-irqs.c b/kernel/arch/ia64/kernel/nr-irqs.c new file mode 100644 index 000000000..f6769cd54 --- /dev/null +++ b/kernel/arch/ia64/kernel/nr-irqs.c @@ -0,0 +1,21 @@ +/* + * calculate + * NR_IRQS = max(IA64_NATIVE_NR_IRQS, XEN_NR_IRQS, FOO_NR_IRQS...) + * depending on config. + * This must be calculated before processing asm-offset.c. + */ + +#define ASM_OFFSETS_C 1 + +#include +#include +#include + +void foo(void) +{ + union paravirt_nr_irqs_max { + char ia64_native_nr_irqs[IA64_NATIVE_NR_IRQS]; + }; + + DEFINE(NR_IRQS, sizeof (union paravirt_nr_irqs_max)); +} diff --git a/kernel/arch/ia64/kernel/numa.c b/kernel/arch/ia64/kernel/numa.c new file mode 100644 index 000000000..92c376279 --- /dev/null +++ b/kernel/arch/ia64/kernel/numa.c @@ -0,0 +1,85 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * ia64 kernel NUMA specific stuff + * + * Copyright (C) 2002 Erich Focht + * Copyright (C) 2004 Silicon Graphics, Inc. + * Jesse Barnes + */ +#include +#include +#include +#include + +u16 cpu_to_node_map[NR_CPUS] __cacheline_aligned; +EXPORT_SYMBOL(cpu_to_node_map); + +cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned; +EXPORT_SYMBOL(node_to_cpu_mask); + +void map_cpu_to_node(int cpu, int nid) +{ + int oldnid; + if (nid < 0) { /* just initialize by zero */ + cpu_to_node_map[cpu] = 0; + return; + } + /* sanity check first */ + oldnid = cpu_to_node_map[cpu]; + if (cpumask_test_cpu(cpu, &node_to_cpu_mask[oldnid])) { + return; /* nothing to do */ + } + /* we don't have cpu-driven node hot add yet... + In usual case, node is created from SRAT at boot time. */ + if (!node_online(nid)) + nid = first_online_node; + cpu_to_node_map[cpu] = nid; + cpumask_set_cpu(cpu, &node_to_cpu_mask[nid]); + return; +} + +void unmap_cpu_from_node(int cpu, int nid) +{ + WARN_ON(!cpumask_test_cpu(cpu, &node_to_cpu_mask[nid])); + WARN_ON(cpu_to_node_map[cpu] != nid); + cpu_to_node_map[cpu] = 0; + cpumask_clear_cpu(cpu, &node_to_cpu_mask[nid]); +} + + +/** + * build_cpu_to_node_map - setup cpu to node and node to cpumask arrays + * + * Build cpu to node mapping and initialize the per node cpu masks using + * info from the node_cpuid array handed to us by ACPI. + */ +void __init build_cpu_to_node_map(void) +{ + int cpu, i, node; + + for(node=0; node < MAX_NUMNODES; node++) + cpumask_clear(&node_to_cpu_mask[node]); + + for_each_possible_early_cpu(cpu) { + node = -1; + for (i = 0; i < NR_CPUS; ++i) + if (cpu_physical_id(cpu) == node_cpuid[i].phys_id) { + node = node_cpuid[i].nid; + break; + } + map_cpu_to_node(cpu, node); + } +} diff --git a/kernel/arch/ia64/kernel/pal.S b/kernel/arch/ia64/kernel/pal.S new file mode 100644 index 000000000..0b533441c --- /dev/null +++ b/kernel/arch/ia64/kernel/pal.S @@ -0,0 +1,298 @@ +/* + * PAL Firmware support + * IA-64 Processor Programmers Reference Vol 2 + * + * Copyright (C) 1999 Don Dugger + * Copyright (C) 1999 Walt Drummond + * Copyright (C) 1999-2001, 2003 Hewlett-Packard Co + * David Mosberger + * Stephane Eranian + * + * 05/22/2000 eranian Added support for stacked register calls + * 05/24/2000 eranian Added support for physical mode static calls + */ + +#include +#include + + .data +pal_entry_point: + data8 ia64_pal_default_handler + .text + +/* + * Set the PAL entry point address. This could be written in C code, but we + * do it here to keep it all in one module (besides, it's so trivial that it's + * not a big deal). + * + * in0 Address of the PAL entry point (text address, NOT a function + * descriptor). + */ +GLOBAL_ENTRY(ia64_pal_handler_init) + alloc r3=ar.pfs,1,0,0,0 + movl r2=pal_entry_point + ;; + st8 [r2]=in0 + br.ret.sptk.many rp +END(ia64_pal_handler_init) + +/* + * Default PAL call handler. This needs to be coded in assembly because it + * uses the static calling convention, i.e., the RSE may not be used and + * calls are done via "br.cond" (not "br.call"). + */ +GLOBAL_ENTRY(ia64_pal_default_handler) + mov r8=-1 + br.cond.sptk.many rp +END(ia64_pal_default_handler) + +/* + * Make a PAL call using the static calling convention. + * + * in0 Index of PAL service + * in1 - in3 Remaining PAL arguments + */ +GLOBAL_ENTRY(ia64_pal_call_static) + .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(4) + alloc loc1 = ar.pfs,4,5,0,0 + movl loc2 = pal_entry_point +1: { + mov r28 = in0 + mov r29 = in1 + mov r8 = ip + } + ;; + ld8 loc2 = [loc2] // loc2 <- entry point + adds r8 = 1f-1b,r8 + mov loc4=ar.rsc // save RSE configuration + ;; + mov ar.rsc=0 // put RSE in enforced lazy, LE mode + mov loc3 = psr + mov loc0 = rp + .body + mov r30 = in2 + + mov r31 = in3 + mov b7 = loc2 + + rsm psr.i + ;; + mov rp = r8 + br.cond.sptk.many b7 +1: mov psr.l = loc3 + mov ar.rsc = loc4 // restore RSE configuration + mov ar.pfs = loc1 + mov rp = loc0 + ;; + srlz.d // seralize restoration of psr.l + br.ret.sptk.many b0 +END(ia64_pal_call_static) + +/* + * Make a PAL call using the stacked registers calling convention. + * + * Inputs: + * in0 Index of PAL service + * in2 - in3 Remaining PAL arguments + */ +GLOBAL_ENTRY(ia64_pal_call_stacked) + .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(4) + alloc loc1 = ar.pfs,4,4,4,0 + movl loc2 = pal_entry_point + + mov r28 = in0 // Index MUST be copied to r28 + mov out0 = in0 // AND in0 of PAL function + mov loc0 = rp + .body + ;; + ld8 loc2 = [loc2] // loc2 <- entry point + mov out1 = in1 + mov out2 = in2 + mov out3 = in3 + mov loc3 = psr + ;; + rsm psr.i + mov b7 = loc2 + ;; + br.call.sptk.many rp=b7 // now make the call +.ret0: mov psr.l = loc3 + mov ar.pfs = loc1 + mov rp = loc0 + ;; + srlz.d // serialize restoration of psr.l + br.ret.sptk.many b0 +END(ia64_pal_call_stacked) + +/* + * Make a physical mode PAL call using the static registers calling convention. + * + * Inputs: + * in0 Index of PAL service + * in2 - in3 Remaining PAL arguments + * + * PSR_LP, PSR_TB, PSR_ID, PSR_DA are never set by the kernel. + * So we don't need to clear them. + */ +#define PAL_PSR_BITS_TO_CLEAR \ + (IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT | IA64_PSR_DB | IA64_PSR_RT |\ + IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | \ + IA64_PSR_DFL | IA64_PSR_DFH) + +#define PAL_PSR_BITS_TO_SET \ + (IA64_PSR_BN) + + +GLOBAL_ENTRY(ia64_pal_call_phys_static) + .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(4) + alloc loc1 = ar.pfs,4,7,0,0 + movl loc2 = pal_entry_point +1: { + mov r28 = in0 // copy procedure index + mov r8 = ip // save ip to compute branch + mov loc0 = rp // save rp + } + .body + ;; + ld8 loc2 = [loc2] // loc2 <- entry point + mov r29 = in1 // first argument + mov r30 = in2 // copy arg2 + mov r31 = in3 // copy arg3 + ;; + mov loc3 = psr // save psr + adds r8 = 1f-1b,r8 // calculate return address for call + ;; + mov loc4=ar.rsc // save RSE configuration + dep.z loc2=loc2,0,61 // convert pal entry point to physical + tpa r8=r8 // convert rp to physical + ;; + mov b7 = loc2 // install target to branch reg + mov ar.rsc=0 // put RSE in enforced lazy, LE mode + movl r16=PAL_PSR_BITS_TO_CLEAR + movl r17=PAL_PSR_BITS_TO_SET + ;; + or loc3=loc3,r17 // add in psr the bits to set + ;; + andcm r16=loc3,r16 // removes bits to clear from psr + br.call.sptk.many rp=ia64_switch_mode_phys + mov rp = r8 // install return address (physical) + mov loc5 = r19 + mov loc6 = r20 + br.cond.sptk.many b7 +1: + mov ar.rsc=0 // put RSE in enforced lazy, LE mode + mov r16=loc3 // r16= original psr + mov r19=loc5 + mov r20=loc6 + br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode + mov psr.l = loc3 // restore init PSR + + mov ar.pfs = loc1 + mov rp = loc0 + ;; + mov ar.rsc=loc4 // restore RSE configuration + srlz.d // seralize restoration of psr.l + br.ret.sptk.many b0 +END(ia64_pal_call_phys_static) + +/* + * Make a PAL call using the stacked registers in physical mode. + * + * Inputs: + * in0 Index of PAL service + * in2 - in3 Remaining PAL arguments + */ +GLOBAL_ENTRY(ia64_pal_call_phys_stacked) + .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(5) + alloc loc1 = ar.pfs,5,7,4,0 + movl loc2 = pal_entry_point +1: { + mov r28 = in0 // copy procedure index + mov loc0 = rp // save rp + } + .body + ;; + ld8 loc2 = [loc2] // loc2 <- entry point + mov loc3 = psr // save psr + ;; + mov loc4=ar.rsc // save RSE configuration + dep.z loc2=loc2,0,61 // convert pal entry point to physical + ;; + mov ar.rsc=0 // put RSE in enforced lazy, LE mode + movl r16=PAL_PSR_BITS_TO_CLEAR + movl r17=PAL_PSR_BITS_TO_SET + ;; + or loc3=loc3,r17 // add in psr the bits to set + mov b7 = loc2 // install target to branch reg + ;; + andcm r16=loc3,r16 // removes bits to clear from psr + br.call.sptk.many rp=ia64_switch_mode_phys + + mov out0 = in0 // first argument + mov out1 = in1 // copy arg2 + mov out2 = in2 // copy arg3 + mov out3 = in3 // copy arg3 + mov loc5 = r19 + mov loc6 = r20 + + br.call.sptk.many rp=b7 // now make the call + + mov ar.rsc=0 // put RSE in enforced lazy, LE mode + mov r16=loc3 // r16= original psr + mov r19=loc5 + mov r20=loc6 + br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode + + mov psr.l = loc3 // restore init PSR + mov ar.pfs = loc1 + mov rp = loc0 + ;; + mov ar.rsc=loc4 // restore RSE configuration + srlz.d // seralize restoration of psr.l + br.ret.sptk.many b0 +END(ia64_pal_call_phys_stacked) + +/* + * Save scratch fp scratch regs which aren't saved in pt_regs already + * (fp10-fp15). + * + * NOTE: We need to do this since firmware (SAL and PAL) may use any of the + * scratch regs fp-low partition. + * + * Inputs: + * in0 Address of stack storage for fp regs + */ +GLOBAL_ENTRY(ia64_save_scratch_fpregs) + alloc r3=ar.pfs,1,0,0,0 + add r2=16,in0 + ;; + stf.spill [in0] = f10,32 + stf.spill [r2] = f11,32 + ;; + stf.spill [in0] = f12,32 + stf.spill [r2] = f13,32 + ;; + stf.spill [in0] = f14,32 + stf.spill [r2] = f15,32 + br.ret.sptk.many rp +END(ia64_save_scratch_fpregs) + +/* + * Load scratch fp scratch regs (fp10-fp15) + * + * Inputs: + * in0 Address of stack storage for fp regs + */ +GLOBAL_ENTRY(ia64_load_scratch_fpregs) + alloc r3=ar.pfs,1,0,0,0 + add r2=16,in0 + ;; + ldf.fill f10 = [in0],32 + ldf.fill f11 = [r2],32 + ;; + ldf.fill f12 = [in0],32 + ldf.fill f13 = [r2],32 + ;; + ldf.fill f14 = [in0],32 + ldf.fill f15 = [r2],32 + br.ret.sptk.many rp +END(ia64_load_scratch_fpregs) diff --git a/kernel/arch/ia64/kernel/palinfo.c b/kernel/arch/ia64/kernel/palinfo.c new file mode 100644 index 000000000..c39c3cd3a --- /dev/null +++ b/kernel/arch/ia64/kernel/palinfo.c @@ -0,0 +1,1022 @@ +/* + * palinfo.c + * + * Prints processor specific information reported by PAL. + * This code is based on specification of PAL as of the + * Intel IA-64 Architecture Software Developer's Manual v1.0. + * + * + * Copyright (C) 2000-2001, 2003 Hewlett-Packard Co + * Stephane Eranian + * Copyright (C) 2004 Intel Corporation + * Ashok Raj + * + * 05/26/2000 S.Eranian initial release + * 08/21/2000 S.Eranian updated to July 2000 PAL specs + * 02/05/2001 S.Eranian fixed module support + * 10/23/2001 S.Eranian updated pal_perf_mon_info bug fixes + * 03/24/2004 Ashok Raj updated to work with CPU Hotplug + * 10/26/2006 Russ Anderson updated processor features to rev 2.2 spec + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +MODULE_AUTHOR("Stephane Eranian "); +MODULE_DESCRIPTION("/proc interface to IA-64 PAL"); +MODULE_LICENSE("GPL"); + +#define PALINFO_VERSION "0.5" + +typedef int (*palinfo_func_t)(struct seq_file *); + +typedef struct { + const char *name; /* name of the proc entry */ + palinfo_func_t proc_read; /* function to call for reading */ + struct proc_dir_entry *entry; /* registered entry (removal) */ +} palinfo_entry_t; + + +/* + * A bunch of string array to get pretty printing + */ + +static const char *cache_types[] = { + "", /* not used */ + "Instruction", + "Data", + "Data/Instruction" /* unified */ +}; + +static const char *cache_mattrib[]={ + "WriteThrough", + "WriteBack", + "", /* reserved */ + "" /* reserved */ +}; + +static const char *cache_st_hints[]={ + "Temporal, level 1", + "Reserved", + "Reserved", + "Non-temporal, all levels", + "Reserved", + "Reserved", + "Reserved", + "Reserved" +}; + +static const char *cache_ld_hints[]={ + "Temporal, level 1", + "Non-temporal, level 1", + "Reserved", + "Non-temporal, all levels", + "Reserved", + "Reserved", + "Reserved", + "Reserved" +}; + +static const char *rse_hints[]={ + "enforced lazy", + "eager stores", + "eager loads", + "eager loads and stores" +}; + +#define RSE_HINTS_COUNT ARRAY_SIZE(rse_hints) + +static const char *mem_attrib[]={ + "WB", /* 000 */ + "SW", /* 001 */ + "010", /* 010 */ + "011", /* 011 */ + "UC", /* 100 */ + "UCE", /* 101 */ + "WC", /* 110 */ + "NaTPage" /* 111 */ +}; + +/* + * Take a 64bit vector and produces a string such that + * if bit n is set then 2^n in clear text is generated. The adjustment + * to the right unit is also done. + * + * Input: + * - a pointer to a buffer to hold the string + * - a 64-bit vector + * Ouput: + * - a pointer to the end of the buffer + * + */ +static void bitvector_process(struct seq_file *m, u64 vector) +{ + int i,j; + static const char *units[]={ "", "K", "M", "G", "T" }; + + for (i=0, j=0; i < 64; i++ , j=i/10) { + if (vector & 0x1) + seq_printf(m, "%d%s ", 1 << (i-j*10), units[j]); + vector >>= 1; + } +} + +/* + * Take a 64bit vector and produces a string such that + * if bit n is set then register n is present. The function + * takes into account consecutive registers and prints out ranges. + * + * Input: + * - a pointer to a buffer to hold the string + * - a 64-bit vector + * Ouput: + * - a pointer to the end of the buffer + * + */ +static void bitregister_process(struct seq_file *m, u64 *reg_info, int max) +{ + int i, begin, skip = 0; + u64 value = reg_info[0]; + + value >>= i = begin = ffs(value) - 1; + + for(; i < max; i++ ) { + + if (i != 0 && (i%64) == 0) value = *++reg_info; + + if ((value & 0x1) == 0 && skip == 0) { + if (begin <= i - 2) + seq_printf(m, "%d-%d ", begin, i-1); + else + seq_printf(m, "%d ", i-1); + skip = 1; + begin = -1; + } else if ((value & 0x1) && skip == 1) { + skip = 0; + begin = i; + } + value >>=1; + } + if (begin > -1) { + if (begin < 127) + seq_printf(m, "%d-127", begin); + else + seq_puts(m, "127"); + } +} + +static int power_info(struct seq_file *m) +{ + s64 status; + u64 halt_info_buffer[8]; + pal_power_mgmt_info_u_t *halt_info =(pal_power_mgmt_info_u_t *)halt_info_buffer; + int i; + + status = ia64_pal_halt_info(halt_info); + if (status != 0) return 0; + + for (i=0; i < 8 ; i++ ) { + if (halt_info[i].pal_power_mgmt_info_s.im == 1) { + seq_printf(m, + "Power level %d:\n" + "\tentry_latency : %d cycles\n" + "\texit_latency : %d cycles\n" + "\tpower consumption : %d mW\n" + "\tCache+TLB coherency : %s\n", i, + halt_info[i].pal_power_mgmt_info_s.entry_latency, + halt_info[i].pal_power_mgmt_info_s.exit_latency, + halt_info[i].pal_power_mgmt_info_s.power_consumption, + halt_info[i].pal_power_mgmt_info_s.co ? "Yes" : "No"); + } else { + seq_printf(m,"Power level %d: not implemented\n", i); + } + } + return 0; +} + +static int cache_info(struct seq_file *m) +{ + unsigned long i, levels, unique_caches; + pal_cache_config_info_t cci; + int j, k; + long status; + + if ((status = ia64_pal_cache_summary(&levels, &unique_caches)) != 0) { + printk(KERN_ERR "ia64_pal_cache_summary=%ld\n", status); + return 0; + } + + seq_printf(m, "Cache levels : %ld\nUnique caches : %ld\n\n", + levels, unique_caches); + + for (i=0; i < levels; i++) { + for (j=2; j >0 ; j--) { + /* even without unification some level may not be present */ + if ((status=ia64_pal_cache_config_info(i,j, &cci)) != 0) + continue; + + seq_printf(m, + "%s Cache level %lu:\n" + "\tSize : %u bytes\n" + "\tAttributes : ", + cache_types[j+cci.pcci_unified], i+1, + cci.pcci_cache_size); + + if (cci.pcci_unified) + seq_puts(m, "Unified "); + + seq_printf(m, "%s\n", cache_mattrib[cci.pcci_cache_attr]); + + seq_printf(m, + "\tAssociativity : %d\n" + "\tLine size : %d bytes\n" + "\tStride : %d bytes\n", + cci.pcci_assoc, + 1<>=1; + } + seq_puts(m, "\n\tLoad hints : "); + + for(k=0; k < 8; k++ ) { + if (cci.pcci_ld_hints & 0x1) + seq_printf(m, "[%s]", cache_ld_hints[k]); + cci.pcci_ld_hints >>=1; + } + seq_printf(m, + "\n\tAlias boundary : %d byte(s)\n" + "\tTag LSB : %d\n" + "\tTag MSB : %d\n", + 1<0 ; j--) { + tc_pages = 0; /* just in case */ + + /* even without unification, some levels may not be present */ + if ((status=ia64_pal_vm_info(i,j, &tc_info, &tc_pages)) != 0) + continue; + + seq_printf(m, + "\n%s Translation Cache Level %d:\n" + "\tHash sets : %d\n" + "\tAssociativity : %d\n" + "\tNumber of entries : %d\n" + "\tFlags : ", + cache_types[j+tc_info.tc_unified], i+1, + tc_info.tc_num_sets, + tc_info.tc_associativity, + tc_info.tc_num_entries); + + if (tc_info.tc_pf) + seq_puts(m, "PreferredPageSizeOptimized "); + if (tc_info.tc_unified) + seq_puts(m, "Unified "); + if (tc_info.tc_reduce_tr) + seq_puts(m, "TCReduction"); + + seq_puts(m, "\n\tSupported page sizes: "); + + bitvector_process(m, tc_pages); + + /* when unified date (j=2) is enough */ + if (tc_info.tc_unified) + break; + } + } + } + + seq_putc(m, '\n'); + return 0; +} + + +static int register_info(struct seq_file *m) +{ + u64 reg_info[2]; + u64 info; + unsigned long phys_stacked; + pal_hints_u_t hints; + unsigned long iregs, dregs; + static const char * const info_type[] = { + "Implemented AR(s)", + "AR(s) with read side-effects", + "Implemented CR(s)", + "CR(s) with read side-effects", + }; + + for(info=0; info < 4; info++) { + if (ia64_pal_register_info(info, ®_info[0], ®_info[1]) != 0) + return 0; + seq_printf(m, "%-32s : ", info_type[info]); + bitregister_process(m, reg_info, 128); + seq_putc(m, '\n'); + } + + if (ia64_pal_rse_info(&phys_stacked, &hints) == 0) + seq_printf(m, + "RSE stacked physical registers : %ld\n" + "RSE load/store hints : %ld (%s)\n", + phys_stacked, hints.ph_data, + hints.ph_data < RSE_HINTS_COUNT ? rse_hints[hints.ph_data]: "(??)"); + + if (ia64_pal_debug_info(&iregs, &dregs)) + return 0; + + seq_printf(m, + "Instruction debug register pairs : %ld\n" + "Data debug register pairs : %ld\n", iregs, dregs); + + return 0; +} + +static const char *const proc_features_0[]={ /* Feature set 0 */ + NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, + NULL,NULL,NULL,NULL,NULL,NULL,NULL, NULL,NULL, + NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, + NULL,NULL,NULL,NULL,NULL, NULL,NULL,NULL,NULL, + "Unimplemented instruction address fault", + "INIT, PMI, and LINT pins", + "Simple unimplemented instr addresses", + "Variable P-state performance", + "Virtual machine features implemented", + "XIP,XPSR,XFS implemented", + "XR1-XR3 implemented", + "Disable dynamic predicate prediction", + "Disable processor physical number", + "Disable dynamic data cache prefetch", + "Disable dynamic inst cache prefetch", + "Disable dynamic branch prediction", + NULL, NULL, NULL, NULL, + "Disable P-states", + "Enable MCA on Data Poisoning", + "Enable vmsw instruction", + "Enable extern environmental notification", + "Disable BINIT on processor time-out", + "Disable dynamic power management (DPM)", + "Disable coherency", + "Disable cache", + "Enable CMCI promotion", + "Enable MCA to BINIT promotion", + "Enable MCA promotion", + "Enable BERR promotion" +}; + +static const char *const proc_features_16[]={ /* Feature set 16 */ + "Disable ETM", + "Enable ETM", + "Enable MCA on half-way timer", + "Enable snoop WC", + NULL, + "Enable Fast Deferral", + "Disable MCA on memory aliasing", + "Enable RSB", + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + "DP system processor", + "Low Voltage", + "HT supported", + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL +}; + +static const char *const *const proc_features[]={ + proc_features_0, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + proc_features_16, + NULL, NULL, NULL, NULL, +}; + +static void feature_set_info(struct seq_file *m, u64 avail, u64 status, u64 control, + unsigned long set) +{ + const char *const *vf, *const *v; + int i; + + vf = v = proc_features[set]; + for(i=0; i < 64; i++, avail >>=1, status >>=1, control >>=1) { + + if (!(control)) /* No remaining bits set */ + break; + if (!(avail & 0x1)) /* Print only bits that are available */ + continue; + if (vf) + v = vf + i; + if ( v && *v ) { + seq_printf(m, "%-40s : %s %s\n", *v, + avail & 0x1 ? (status & 0x1 ? + "On " : "Off"): "", + avail & 0x1 ? (control & 0x1 ? + "Ctrl" : "NoCtrl"): ""); + } else { + seq_printf(m, "Feature set %2ld bit %2d\t\t\t" + " : %s %s\n", + set, i, + avail & 0x1 ? (status & 0x1 ? + "On " : "Off"): "", + avail & 0x1 ? (control & 0x1 ? + "Ctrl" : "NoCtrl"): ""); + } + } +} + +static int processor_info(struct seq_file *m) +{ + u64 avail=1, status=1, control=1, feature_set=0; + s64 ret; + + do { + ret = ia64_pal_proc_get_features(&avail, &status, &control, + feature_set); + if (ret < 0) + return 0; + + if (ret == 1) { + feature_set++; + continue; + } + + feature_set_info(m, avail, status, control, feature_set); + feature_set++; + } while(1); + + return 0; +} + +static const char *const bus_features[]={ + NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, + NULL,NULL,NULL,NULL,NULL,NULL,NULL, NULL,NULL, + NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, + NULL,NULL, + "Request Bus Parking", + "Bus Lock Mask", + "Enable Half Transfer", + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + "Enable Cache Line Repl. Shared", + "Enable Cache Line Repl. Exclusive", + "Disable Transaction Queuing", + "Disable Response Error Checking", + "Disable Bus Error Checking", + "Disable Bus Requester Internal Error Signalling", + "Disable Bus Requester Error Signalling", + "Disable Bus Initialization Event Checking", + "Disable Bus Initialization Event Signalling", + "Disable Bus Address Error Checking", + "Disable Bus Address Error Signalling", + "Disable Bus Data Error Checking" +}; + + +static int bus_info(struct seq_file *m) +{ + const char *const *v = bus_features; + pal_bus_features_u_t av, st, ct; + u64 avail, status, control; + int i; + s64 ret; + + if ((ret=ia64_pal_bus_get_features(&av, &st, &ct)) != 0) + return 0; + + avail = av.pal_bus_features_val; + status = st.pal_bus_features_val; + control = ct.pal_bus_features_val; + + for(i=0; i < 64; i++, v++, avail >>=1, status >>=1, control >>=1) { + if ( ! *v ) + continue; + seq_printf(m, "%-48s : %s%s %s\n", *v, + avail & 0x1 ? "" : "NotImpl", + avail & 0x1 ? (status & 0x1 ? "On" : "Off"): "", + avail & 0x1 ? (control & 0x1 ? "Ctrl" : "NoCtrl"): ""); + } + return 0; +} + +static int version_info(struct seq_file *m) +{ + pal_version_u_t min_ver, cur_ver; + + if (ia64_pal_version(&min_ver, &cur_ver) != 0) + return 0; + + seq_printf(m, + "PAL_vendor : 0x%02x (min=0x%02x)\n" + "PAL_A : %02x.%02x (min=%02x.%02x)\n" + "PAL_B : %02x.%02x (min=%02x.%02x)\n", + cur_ver.pal_version_s.pv_pal_vendor, + min_ver.pal_version_s.pv_pal_vendor, + cur_ver.pal_version_s.pv_pal_a_model, + cur_ver.pal_version_s.pv_pal_a_rev, + min_ver.pal_version_s.pv_pal_a_model, + min_ver.pal_version_s.pv_pal_a_rev, + cur_ver.pal_version_s.pv_pal_b_model, + cur_ver.pal_version_s.pv_pal_b_rev, + min_ver.pal_version_s.pv_pal_b_model, + min_ver.pal_version_s.pv_pal_b_rev); + return 0; +} + +static int perfmon_info(struct seq_file *m) +{ + u64 pm_buffer[16]; + pal_perf_mon_info_u_t pm_info; + + if (ia64_pal_perf_mon_info(pm_buffer, &pm_info) != 0) + return 0; + + seq_printf(m, + "PMC/PMD pairs : %d\n" + "Counter width : %d bits\n" + "Cycle event number : %d\n" + "Retired event number : %d\n" + "Implemented PMC : ", + pm_info.pal_perf_mon_info_s.generic, + pm_info.pal_perf_mon_info_s.width, + pm_info.pal_perf_mon_info_s.cycles, + pm_info.pal_perf_mon_info_s.retired); + + bitregister_process(m, pm_buffer, 256); + seq_puts(m, "\nImplemented PMD : "); + bitregister_process(m, pm_buffer+4, 256); + seq_puts(m, "\nCycles count capable : "); + bitregister_process(m, pm_buffer+8, 256); + seq_puts(m, "\nRetired bundles count capable : "); + +#ifdef CONFIG_ITANIUM + /* + * PAL_PERF_MON_INFO reports that only PMC4 can be used to count CPU_CYCLES + * which is wrong, both PMC4 and PMD5 support it. + */ + if (pm_buffer[12] == 0x10) + pm_buffer[12]=0x30; +#endif + + bitregister_process(m, pm_buffer+12, 256); + seq_putc(m, '\n'); + return 0; +} + +static int frequency_info(struct seq_file *m) +{ + struct pal_freq_ratio proc, itc, bus; + unsigned long base; + + if (ia64_pal_freq_base(&base) == -1) + seq_puts(m, "Output clock : not implemented\n"); + else + seq_printf(m, "Output clock : %ld ticks/s\n", base); + + if (ia64_pal_freq_ratios(&proc, &bus, &itc) != 0) return 0; + + seq_printf(m, + "Processor/Clock ratio : %d/%d\n" + "Bus/Clock ratio : %d/%d\n" + "ITC/Clock ratio : %d/%d\n", + proc.num, proc.den, bus.num, bus.den, itc.num, itc.den); + return 0; +} + +static int tr_info(struct seq_file *m) +{ + long status; + pal_tr_valid_u_t tr_valid; + u64 tr_buffer[4]; + pal_vm_info_1_u_t vm_info_1; + pal_vm_info_2_u_t vm_info_2; + unsigned long i, j; + unsigned long max[3], pgm; + struct ifa_reg { + unsigned long valid:1; + unsigned long ig:11; + unsigned long vpn:52; + } *ifa_reg; + struct itir_reg { + unsigned long rv1:2; + unsigned long ps:6; + unsigned long key:24; + unsigned long rv2:32; + } *itir_reg; + struct gr_reg { + unsigned long p:1; + unsigned long rv1:1; + unsigned long ma:3; + unsigned long a:1; + unsigned long d:1; + unsigned long pl:2; + unsigned long ar:3; + unsigned long ppn:38; + unsigned long rv2:2; + unsigned long ed:1; + unsigned long ig:11; + } *gr_reg; + struct rid_reg { + unsigned long ig1:1; + unsigned long rv1:1; + unsigned long ig2:6; + unsigned long rid:24; + unsigned long rv2:32; + } *rid_reg; + + if ((status = ia64_pal_vm_summary(&vm_info_1, &vm_info_2)) !=0) { + printk(KERN_ERR "ia64_pal_vm_summary=%ld\n", status); + return 0; + } + max[0] = vm_info_1.pal_vm_info_1_s.max_itr_entry+1; + max[1] = vm_info_1.pal_vm_info_1_s.max_dtr_entry+1; + + for (i=0; i < 2; i++ ) { + for (j=0; j < max[i]; j++) { + + status = ia64_pal_tr_read(j, i, tr_buffer, &tr_valid); + if (status != 0) { + printk(KERN_ERR "palinfo: pal call failed on tr[%lu:%lu]=%ld\n", + i, j, status); + continue; + } + + ifa_reg = (struct ifa_reg *)&tr_buffer[2]; + + if (ifa_reg->valid == 0) + continue; + + gr_reg = (struct gr_reg *)tr_buffer; + itir_reg = (struct itir_reg *)&tr_buffer[1]; + rid_reg = (struct rid_reg *)&tr_buffer[3]; + + pgm = -1 << (itir_reg->ps - 12); + seq_printf(m, + "%cTR%lu: av=%d pv=%d dv=%d mv=%d\n" + "\tppn : 0x%lx\n" + "\tvpn : 0x%lx\n" + "\tps : ", + "ID"[i], j, + tr_valid.pal_tr_valid_s.access_rights_valid, + tr_valid.pal_tr_valid_s.priv_level_valid, + tr_valid.pal_tr_valid_s.dirty_bit_valid, + tr_valid.pal_tr_valid_s.mem_attr_valid, + (gr_reg->ppn & pgm)<< 12, (ifa_reg->vpn & pgm)<< 12); + + bitvector_process(m, 1<< itir_reg->ps); + + seq_printf(m, + "\n\tpl : %d\n" + "\tar : %d\n" + "\trid : %x\n" + "\tp : %d\n" + "\tma : %d\n" + "\td : %d\n", + gr_reg->pl, gr_reg->ar, rid_reg->rid, gr_reg->p, gr_reg->ma, + gr_reg->d); + } + } + return 0; +} + + + +/* + * List {name,function} pairs for every entry in /proc/palinfo/cpu* + */ +static const palinfo_entry_t palinfo_entries[]={ + { "version_info", version_info, }, + { "vm_info", vm_info, }, + { "cache_info", cache_info, }, + { "power_info", power_info, }, + { "register_info", register_info, }, + { "processor_info", processor_info, }, + { "perfmon_info", perfmon_info, }, + { "frequency_info", frequency_info, }, + { "bus_info", bus_info }, + { "tr_info", tr_info, } +}; + +#define NR_PALINFO_ENTRIES (int) ARRAY_SIZE(palinfo_entries) + +static struct proc_dir_entry *palinfo_dir; + +/* + * This data structure is used to pass which cpu,function is being requested + * It must fit in a 64bit quantity to be passed to the proc callback routine + * + * In SMP mode, when we get a request for another CPU, we must call that + * other CPU using IPI and wait for the result before returning. + */ +typedef union { + u64 value; + struct { + unsigned req_cpu: 32; /* for which CPU this info is */ + unsigned func_id: 32; /* which function is requested */ + } pal_func_cpu; +} pal_func_cpu_u_t; + +#define req_cpu pal_func_cpu.req_cpu +#define func_id pal_func_cpu.func_id + +#ifdef CONFIG_SMP + +/* + * used to hold information about final function to call + */ +typedef struct { + palinfo_func_t func; /* pointer to function to call */ + struct seq_file *m; /* buffer to store results */ + int ret; /* return value from call */ +} palinfo_smp_data_t; + + +/* + * this function does the actual final call and he called + * from the smp code, i.e., this is the palinfo callback routine + */ +static void +palinfo_smp_call(void *info) +{ + palinfo_smp_data_t *data = (palinfo_smp_data_t *)info; + data->ret = (*data->func)(data->m); +} + +/* + * function called to trigger the IPI, we need to access a remote CPU + * Return: + * 0 : error or nothing to output + * otherwise how many bytes in the "page" buffer were written + */ +static +int palinfo_handle_smp(struct seq_file *m, pal_func_cpu_u_t *f) +{ + palinfo_smp_data_t ptr; + int ret; + + ptr.func = palinfo_entries[f->func_id].proc_read; + ptr.m = m; + ptr.ret = 0; /* just in case */ + + + /* will send IPI to other CPU and wait for completion of remote call */ + if ((ret=smp_call_function_single(f->req_cpu, palinfo_smp_call, &ptr, 1))) { + printk(KERN_ERR "palinfo: remote CPU call from %d to %d on function %d: " + "error %d\n", smp_processor_id(), f->req_cpu, f->func_id, ret); + return 0; + } + return ptr.ret; +} +#else /* ! CONFIG_SMP */ +static +int palinfo_handle_smp(struct seq_file *m, pal_func_cpu_u_t *f) +{ + printk(KERN_ERR "palinfo: should not be called with non SMP kernel\n"); + return 0; +} +#endif /* CONFIG_SMP */ + +/* + * Entry point routine: all calls go through this function + */ +static int proc_palinfo_show(struct seq_file *m, void *v) +{ + pal_func_cpu_u_t *f = (pal_func_cpu_u_t *)&m->private; + + /* + * in SMP mode, we may need to call another CPU to get correct + * information. PAL, by definition, is processor specific + */ + if (f->req_cpu == get_cpu()) + (*palinfo_entries[f->func_id].proc_read)(m); + else + palinfo_handle_smp(m, f); + + put_cpu(); + return 0; +} + +static int proc_palinfo_open(struct inode *inode, struct file *file) +{ + return single_open(file, proc_palinfo_show, PDE_DATA(inode)); +} + +static const struct file_operations proc_palinfo_fops = { + .open = proc_palinfo_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static void +create_palinfo_proc_entries(unsigned int cpu) +{ + pal_func_cpu_u_t f; + struct proc_dir_entry *cpu_dir; + int j; + char cpustr[3+4+1]; /* cpu numbers are up to 4095 on itanic */ + sprintf(cpustr, "cpu%d", cpu); + + cpu_dir = proc_mkdir(cpustr, palinfo_dir); + if (!cpu_dir) + return; + + f.req_cpu = cpu; + + for (j=0; j < NR_PALINFO_ENTRIES; j++) { + f.func_id = j; + proc_create_data(palinfo_entries[j].name, 0, cpu_dir, + &proc_palinfo_fops, (void *)f.value); + } +} + +static void +remove_palinfo_proc_entries(unsigned int hcpu) +{ + char cpustr[3+4+1]; /* cpu numbers are up to 4095 on itanic */ + sprintf(cpustr, "cpu%d", hcpu); + remove_proc_subtree(cpustr, palinfo_dir); +} + +static int palinfo_cpu_callback(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + unsigned int hotcpu = (unsigned long)hcpu; + + switch (action) { + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + create_palinfo_proc_entries(hotcpu); + break; + case CPU_DEAD: + case CPU_DEAD_FROZEN: + remove_palinfo_proc_entries(hotcpu); + break; + } + return NOTIFY_OK; +} + +static struct notifier_block __refdata palinfo_cpu_notifier = +{ + .notifier_call = palinfo_cpu_callback, + .priority = 0, +}; + +static int __init +palinfo_init(void) +{ + int i = 0; + + printk(KERN_INFO "PAL Information Facility v%s\n", PALINFO_VERSION); + palinfo_dir = proc_mkdir("pal", NULL); + if (!palinfo_dir) + return -ENOMEM; + + cpu_notifier_register_begin(); + + /* Create palinfo dirs in /proc for all online cpus */ + for_each_online_cpu(i) { + create_palinfo_proc_entries(i); + } + + /* Register for future delivery via notify registration */ + __register_hotcpu_notifier(&palinfo_cpu_notifier); + + cpu_notifier_register_done(); + + return 0; +} + +static void __exit +palinfo_exit(void) +{ + unregister_hotcpu_notifier(&palinfo_cpu_notifier); + remove_proc_subtree("pal", NULL); +} + +module_init(palinfo_init); +module_exit(palinfo_exit); diff --git a/kernel/arch/ia64/kernel/paravirt.c b/kernel/arch/ia64/kernel/paravirt.c new file mode 100644 index 000000000..1b22f6de2 --- /dev/null +++ b/kernel/arch/ia64/kernel/paravirt.c @@ -0,0 +1,902 @@ +/****************************************************************************** + * arch/ia64/kernel/paravirt.c + * + * Copyright (c) 2008 Isaku Yamahata + * VA Linux Systems Japan K.K. + * Yaozu (Eddie) Dong + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include + +#include +#include +#include +#include +#include + +#include +#include + +/*************************************************************************** + * general info + */ +struct pv_info pv_info = { + .kernel_rpl = 0, + .paravirt_enabled = 0, + .name = "bare hardware" +}; + +/*************************************************************************** + * pv_init_ops + * initialization hooks. + */ + +static void __init +ia64_native_patch_branch(unsigned long tag, unsigned long type); + +struct pv_init_ops pv_init_ops = +{ +#ifdef ASM_SUPPORTED + .patch_bundle = ia64_native_patch_bundle, +#endif + .patch_branch = ia64_native_patch_branch, +}; + +/*************************************************************************** + * pv_cpu_ops + * intrinsics hooks. + */ + +#ifndef ASM_SUPPORTED +/* ia64_native_xxx are macros so that we have to make them real functions */ + +#define DEFINE_VOID_FUNC1(name) \ + static void \ + ia64_native_ ## name ## _func(unsigned long arg) \ + { \ + ia64_native_ ## name(arg); \ + } + +#define DEFINE_VOID_FUNC1_VOID(name) \ + static void \ + ia64_native_ ## name ## _func(void *arg) \ + { \ + ia64_native_ ## name(arg); \ + } + +#define DEFINE_VOID_FUNC2(name) \ + static void \ + ia64_native_ ## name ## _func(unsigned long arg0, \ + unsigned long arg1) \ + { \ + ia64_native_ ## name(arg0, arg1); \ + } + +#define DEFINE_FUNC0(name) \ + static unsigned long \ + ia64_native_ ## name ## _func(void) \ + { \ + return ia64_native_ ## name(); \ + } + +#define DEFINE_FUNC1(name, type) \ + static unsigned long \ + ia64_native_ ## name ## _func(type arg) \ + { \ + return ia64_native_ ## name(arg); \ + } \ + +DEFINE_VOID_FUNC1_VOID(fc); +DEFINE_VOID_FUNC1(intrin_local_irq_restore); + +DEFINE_VOID_FUNC2(ptcga); +DEFINE_VOID_FUNC2(set_rr); + +DEFINE_FUNC0(get_psr_i); + +DEFINE_FUNC1(thash, unsigned long); +DEFINE_FUNC1(get_cpuid, int); +DEFINE_FUNC1(get_pmd, int); +DEFINE_FUNC1(get_rr, unsigned long); + +static void +ia64_native_ssm_i_func(void) +{ + ia64_native_ssm(IA64_PSR_I); +} + +static void +ia64_native_rsm_i_func(void) +{ + ia64_native_rsm(IA64_PSR_I); +} + +static void +ia64_native_set_rr0_to_rr4_func(unsigned long val0, unsigned long val1, + unsigned long val2, unsigned long val3, + unsigned long val4) +{ + ia64_native_set_rr0_to_rr4(val0, val1, val2, val3, val4); +} + +#define CASE_GET_REG(id) \ + case _IA64_REG_ ## id: \ + res = ia64_native_getreg(_IA64_REG_ ## id); \ + break; +#define CASE_GET_AR(id) CASE_GET_REG(AR_ ## id) +#define CASE_GET_CR(id) CASE_GET_REG(CR_ ## id) + +unsigned long +ia64_native_getreg_func(int regnum) +{ + unsigned long res = -1; + switch (regnum) { + CASE_GET_REG(GP); + /*CASE_GET_REG(IP);*/ /* returned ip value shouldn't be constant */ + CASE_GET_REG(PSR); + CASE_GET_REG(TP); + CASE_GET_REG(SP); + + CASE_GET_AR(KR0); + CASE_GET_AR(KR1); + CASE_GET_AR(KR2); + CASE_GET_AR(KR3); + CASE_GET_AR(KR4); + CASE_GET_AR(KR5); + CASE_GET_AR(KR6); + CASE_GET_AR(KR7); + CASE_GET_AR(RSC); + CASE_GET_AR(BSP); + CASE_GET_AR(BSPSTORE); + CASE_GET_AR(RNAT); + CASE_GET_AR(FCR); + CASE_GET_AR(EFLAG); + CASE_GET_AR(CSD); + CASE_GET_AR(SSD); + CASE_GET_AR(CFLAG); + CASE_GET_AR(FSR); + CASE_GET_AR(FIR); + CASE_GET_AR(FDR); + CASE_GET_AR(CCV); + CASE_GET_AR(UNAT); + CASE_GET_AR(FPSR); + CASE_GET_AR(ITC); + CASE_GET_AR(PFS); + CASE_GET_AR(LC); + CASE_GET_AR(EC); + + CASE_GET_CR(DCR); + CASE_GET_CR(ITM); + CASE_GET_CR(IVA); + CASE_GET_CR(PTA); + CASE_GET_CR(IPSR); + CASE_GET_CR(ISR); + CASE_GET_CR(IIP); + CASE_GET_CR(IFA); + CASE_GET_CR(ITIR); + CASE_GET_CR(IIPA); + CASE_GET_CR(IFS); + CASE_GET_CR(IIM); + CASE_GET_CR(IHA); + CASE_GET_CR(LID); + CASE_GET_CR(IVR); + CASE_GET_CR(TPR); + CASE_GET_CR(EOI); + CASE_GET_CR(IRR0); + CASE_GET_CR(IRR1); + CASE_GET_CR(IRR2); + CASE_GET_CR(IRR3); + CASE_GET_CR(ITV); + CASE_GET_CR(PMV); + CASE_GET_CR(CMCV); + CASE_GET_CR(LRR0); + CASE_GET_CR(LRR1); + + default: + printk(KERN_CRIT "wrong_getreg %d\n", regnum); + break; + } + return res; +} + +#define CASE_SET_REG(id) \ + case _IA64_REG_ ## id: \ + ia64_native_setreg(_IA64_REG_ ## id, val); \ + break; +#define CASE_SET_AR(id) CASE_SET_REG(AR_ ## id) +#define CASE_SET_CR(id) CASE_SET_REG(CR_ ## id) + +void +ia64_native_setreg_func(int regnum, unsigned long val) +{ + switch (regnum) { + case _IA64_REG_PSR_L: + ia64_native_setreg(_IA64_REG_PSR_L, val); + ia64_dv_serialize_data(); + break; + CASE_SET_REG(SP); + CASE_SET_REG(GP); + + CASE_SET_AR(KR0); + CASE_SET_AR(KR1); + CASE_SET_AR(KR2); + CASE_SET_AR(KR3); + CASE_SET_AR(KR4); + CASE_SET_AR(KR5); + CASE_SET_AR(KR6); + CASE_SET_AR(KR7); + CASE_SET_AR(RSC); + CASE_SET_AR(BSP); + CASE_SET_AR(BSPSTORE); + CASE_SET_AR(RNAT); + CASE_SET_AR(FCR); + CASE_SET_AR(EFLAG); + CASE_SET_AR(CSD); + CASE_SET_AR(SSD); + CASE_SET_AR(CFLAG); + CASE_SET_AR(FSR); + CASE_SET_AR(FIR); + CASE_SET_AR(FDR); + CASE_SET_AR(CCV); + CASE_SET_AR(UNAT); + CASE_SET_AR(FPSR); + CASE_SET_AR(ITC); + CASE_SET_AR(PFS); + CASE_SET_AR(LC); + CASE_SET_AR(EC); + + CASE_SET_CR(DCR); + CASE_SET_CR(ITM); + CASE_SET_CR(IVA); + CASE_SET_CR(PTA); + CASE_SET_CR(IPSR); + CASE_SET_CR(ISR); + CASE_SET_CR(IIP); + CASE_SET_CR(IFA); + CASE_SET_CR(ITIR); + CASE_SET_CR(IIPA); + CASE_SET_CR(IFS); + CASE_SET_CR(IIM); + CASE_SET_CR(IHA); + CASE_SET_CR(LID); + CASE_SET_CR(IVR); + CASE_SET_CR(TPR); + CASE_SET_CR(EOI); + CASE_SET_CR(IRR0); + CASE_SET_CR(IRR1); + CASE_SET_CR(IRR2); + CASE_SET_CR(IRR3); + CASE_SET_CR(ITV); + CASE_SET_CR(PMV); + CASE_SET_CR(CMCV); + CASE_SET_CR(LRR0); + CASE_SET_CR(LRR1); + default: + printk(KERN_CRIT "wrong setreg %d\n", regnum); + break; + } +} +#else + +#define __DEFINE_FUNC(name, code) \ + extern const char ia64_native_ ## name ## _direct_start[]; \ + extern const char ia64_native_ ## name ## _direct_end[]; \ + asm (".align 32\n" \ + ".proc ia64_native_" #name "_func\n" \ + "ia64_native_" #name "_func:\n" \ + "ia64_native_" #name "_direct_start:\n" \ + code \ + "ia64_native_" #name "_direct_end:\n" \ + "br.cond.sptk.many b6\n" \ + ".endp ia64_native_" #name "_func\n") + +#define DEFINE_VOID_FUNC0(name, code) \ + extern void \ + ia64_native_ ## name ## _func(void); \ + __DEFINE_FUNC(name, code) + +#define DEFINE_VOID_FUNC1(name, code) \ + extern void \ + ia64_native_ ## name ## _func(unsigned long arg); \ + __DEFINE_FUNC(name, code) + +#define DEFINE_VOID_FUNC1_VOID(name, code) \ + extern void \ + ia64_native_ ## name ## _func(void *arg); \ + __DEFINE_FUNC(name, code) + +#define DEFINE_VOID_FUNC2(name, code) \ + extern void \ + ia64_native_ ## name ## _func(unsigned long arg0, \ + unsigned long arg1); \ + __DEFINE_FUNC(name, code) + +#define DEFINE_FUNC0(name, code) \ + extern unsigned long \ + ia64_native_ ## name ## _func(void); \ + __DEFINE_FUNC(name, code) + +#define DEFINE_FUNC1(name, type, code) \ + extern unsigned long \ + ia64_native_ ## name ## _func(type arg); \ + __DEFINE_FUNC(name, code) + +DEFINE_VOID_FUNC1_VOID(fc, + "fc r8\n"); +DEFINE_VOID_FUNC1(intrin_local_irq_restore, + ";;\n" + " cmp.ne p6, p7 = r8, r0\n" + ";;\n" + "(p6) ssm psr.i\n" + "(p7) rsm psr.i\n" + ";;\n" + "(p6) srlz.d\n"); + +DEFINE_VOID_FUNC2(ptcga, + "ptc.ga r8, r9\n"); +DEFINE_VOID_FUNC2(set_rr, + "mov rr[r8] = r9\n"); + +/* ia64_native_getreg(_IA64_REG_PSR) & IA64_PSR_I */ +DEFINE_FUNC0(get_psr_i, + "mov r2 = " __stringify(1 << IA64_PSR_I_BIT) "\n" + "mov r8 = psr\n" + ";;\n" + "and r8 = r2, r8\n"); + +DEFINE_FUNC1(thash, unsigned long, + "thash r8 = r8\n"); +DEFINE_FUNC1(get_cpuid, int, + "mov r8 = cpuid[r8]\n"); +DEFINE_FUNC1(get_pmd, int, + "mov r8 = pmd[r8]\n"); +DEFINE_FUNC1(get_rr, unsigned long, + "mov r8 = rr[r8]\n"); + +DEFINE_VOID_FUNC0(ssm_i, + "ssm psr.i\n"); +DEFINE_VOID_FUNC0(rsm_i, + "rsm psr.i\n"); + +extern void +ia64_native_set_rr0_to_rr4_func(unsigned long val0, unsigned long val1, + unsigned long val2, unsigned long val3, + unsigned long val4); +__DEFINE_FUNC(set_rr0_to_rr4, + "mov rr[r0] = r8\n" + "movl r2 = 0x2000000000000000\n" + ";;\n" + "mov rr[r2] = r9\n" + "shl r3 = r2, 1\n" /* movl r3 = 0x4000000000000000 */ + ";;\n" + "add r2 = r2, r3\n" /* movl r2 = 0x6000000000000000 */ + "mov rr[r3] = r10\n" + ";;\n" + "mov rr[r2] = r11\n" + "shl r3 = r3, 1\n" /* movl r3 = 0x8000000000000000 */ + ";;\n" + "mov rr[r3] = r14\n"); + +extern unsigned long ia64_native_getreg_func(int regnum); +asm(".global ia64_native_getreg_func\n"); +#define __DEFINE_GET_REG(id, reg) \ + "mov r2 = " __stringify(_IA64_REG_ ## id) "\n" \ + ";;\n" \ + "cmp.eq p6, p0 = r2, r8\n" \ + ";;\n" \ + "(p6) mov r8 = " #reg "\n" \ + "(p6) br.cond.sptk.many b6\n" \ + ";;\n" +#define __DEFINE_GET_AR(id, reg) __DEFINE_GET_REG(AR_ ## id, ar.reg) +#define __DEFINE_GET_CR(id, reg) __DEFINE_GET_REG(CR_ ## id, cr.reg) + +__DEFINE_FUNC(getreg, + __DEFINE_GET_REG(GP, gp) + /*__DEFINE_GET_REG(IP, ip)*/ /* returned ip value shouldn't be constant */ + __DEFINE_GET_REG(PSR, psr) + __DEFINE_GET_REG(TP, tp) + __DEFINE_GET_REG(SP, sp) + + __DEFINE_GET_REG(AR_KR0, ar0) + __DEFINE_GET_REG(AR_KR1, ar1) + __DEFINE_GET_REG(AR_KR2, ar2) + __DEFINE_GET_REG(AR_KR3, ar3) + __DEFINE_GET_REG(AR_KR4, ar4) + __DEFINE_GET_REG(AR_KR5, ar5) + __DEFINE_GET_REG(AR_KR6, ar6) + __DEFINE_GET_REG(AR_KR7, ar7) + __DEFINE_GET_AR(RSC, rsc) + __DEFINE_GET_AR(BSP, bsp) + __DEFINE_GET_AR(BSPSTORE, bspstore) + __DEFINE_GET_AR(RNAT, rnat) + __DEFINE_GET_AR(FCR, fcr) + __DEFINE_GET_AR(EFLAG, eflag) + __DEFINE_GET_AR(CSD, csd) + __DEFINE_GET_AR(SSD, ssd) + __DEFINE_GET_REG(AR_CFLAG, ar27) + __DEFINE_GET_AR(FSR, fsr) + __DEFINE_GET_AR(FIR, fir) + __DEFINE_GET_AR(FDR, fdr) + __DEFINE_GET_AR(CCV, ccv) + __DEFINE_GET_AR(UNAT, unat) + __DEFINE_GET_AR(FPSR, fpsr) + __DEFINE_GET_AR(ITC, itc) + __DEFINE_GET_AR(PFS, pfs) + __DEFINE_GET_AR(LC, lc) + __DEFINE_GET_AR(EC, ec) + + __DEFINE_GET_CR(DCR, dcr) + __DEFINE_GET_CR(ITM, itm) + __DEFINE_GET_CR(IVA, iva) + __DEFINE_GET_CR(PTA, pta) + __DEFINE_GET_CR(IPSR, ipsr) + __DEFINE_GET_CR(ISR, isr) + __DEFINE_GET_CR(IIP, iip) + __DEFINE_GET_CR(IFA, ifa) + __DEFINE_GET_CR(ITIR, itir) + __DEFINE_GET_CR(IIPA, iipa) + __DEFINE_GET_CR(IFS, ifs) + __DEFINE_GET_CR(IIM, iim) + __DEFINE_GET_CR(IHA, iha) + __DEFINE_GET_CR(LID, lid) + __DEFINE_GET_CR(IVR, ivr) + __DEFINE_GET_CR(TPR, tpr) + __DEFINE_GET_CR(EOI, eoi) + __DEFINE_GET_CR(IRR0, irr0) + __DEFINE_GET_CR(IRR1, irr1) + __DEFINE_GET_CR(IRR2, irr2) + __DEFINE_GET_CR(IRR3, irr3) + __DEFINE_GET_CR(ITV, itv) + __DEFINE_GET_CR(PMV, pmv) + __DEFINE_GET_CR(CMCV, cmcv) + __DEFINE_GET_CR(LRR0, lrr0) + __DEFINE_GET_CR(LRR1, lrr1) + + "mov r8 = -1\n" /* unsupported case */ + ); + +extern void ia64_native_setreg_func(int regnum, unsigned long val); +asm(".global ia64_native_setreg_func\n"); +#define __DEFINE_SET_REG(id, reg) \ + "mov r2 = " __stringify(_IA64_REG_ ## id) "\n" \ + ";;\n" \ + "cmp.eq p6, p0 = r2, r9\n" \ + ";;\n" \ + "(p6) mov " #reg " = r8\n" \ + "(p6) br.cond.sptk.many b6\n" \ + ";;\n" +#define __DEFINE_SET_AR(id, reg) __DEFINE_SET_REG(AR_ ## id, ar.reg) +#define __DEFINE_SET_CR(id, reg) __DEFINE_SET_REG(CR_ ## id, cr.reg) +__DEFINE_FUNC(setreg, + "mov r2 = " __stringify(_IA64_REG_PSR_L) "\n" + ";;\n" + "cmp.eq p6, p0 = r2, r9\n" + ";;\n" + "(p6) mov psr.l = r8\n" +#ifdef HAVE_SERIALIZE_DIRECTIVE + ".serialize.data\n" +#endif + "(p6) br.cond.sptk.many b6\n" + __DEFINE_SET_REG(GP, gp) + __DEFINE_SET_REG(SP, sp) + + __DEFINE_SET_REG(AR_KR0, ar0) + __DEFINE_SET_REG(AR_KR1, ar1) + __DEFINE_SET_REG(AR_KR2, ar2) + __DEFINE_SET_REG(AR_KR3, ar3) + __DEFINE_SET_REG(AR_KR4, ar4) + __DEFINE_SET_REG(AR_KR5, ar5) + __DEFINE_SET_REG(AR_KR6, ar6) + __DEFINE_SET_REG(AR_KR7, ar7) + __DEFINE_SET_AR(RSC, rsc) + __DEFINE_SET_AR(BSP, bsp) + __DEFINE_SET_AR(BSPSTORE, bspstore) + __DEFINE_SET_AR(RNAT, rnat) + __DEFINE_SET_AR(FCR, fcr) + __DEFINE_SET_AR(EFLAG, eflag) + __DEFINE_SET_AR(CSD, csd) + __DEFINE_SET_AR(SSD, ssd) + __DEFINE_SET_REG(AR_CFLAG, ar27) + __DEFINE_SET_AR(FSR, fsr) + __DEFINE_SET_AR(FIR, fir) + __DEFINE_SET_AR(FDR, fdr) + __DEFINE_SET_AR(CCV, ccv) + __DEFINE_SET_AR(UNAT, unat) + __DEFINE_SET_AR(FPSR, fpsr) + __DEFINE_SET_AR(ITC, itc) + __DEFINE_SET_AR(PFS, pfs) + __DEFINE_SET_AR(LC, lc) + __DEFINE_SET_AR(EC, ec) + + __DEFINE_SET_CR(DCR, dcr) + __DEFINE_SET_CR(ITM, itm) + __DEFINE_SET_CR(IVA, iva) + __DEFINE_SET_CR(PTA, pta) + __DEFINE_SET_CR(IPSR, ipsr) + __DEFINE_SET_CR(ISR, isr) + __DEFINE_SET_CR(IIP, iip) + __DEFINE_SET_CR(IFA, ifa) + __DEFINE_SET_CR(ITIR, itir) + __DEFINE_SET_CR(IIPA, iipa) + __DEFINE_SET_CR(IFS, ifs) + __DEFINE_SET_CR(IIM, iim) + __DEFINE_SET_CR(IHA, iha) + __DEFINE_SET_CR(LID, lid) + __DEFINE_SET_CR(IVR, ivr) + __DEFINE_SET_CR(TPR, tpr) + __DEFINE_SET_CR(EOI, eoi) + __DEFINE_SET_CR(IRR0, irr0) + __DEFINE_SET_CR(IRR1, irr1) + __DEFINE_SET_CR(IRR2, irr2) + __DEFINE_SET_CR(IRR3, irr3) + __DEFINE_SET_CR(ITV, itv) + __DEFINE_SET_CR(PMV, pmv) + __DEFINE_SET_CR(CMCV, cmcv) + __DEFINE_SET_CR(LRR0, lrr0) + __DEFINE_SET_CR(LRR1, lrr1) + ); +#endif + +struct pv_cpu_ops pv_cpu_ops = { + .fc = ia64_native_fc_func, + .thash = ia64_native_thash_func, + .get_cpuid = ia64_native_get_cpuid_func, + .get_pmd = ia64_native_get_pmd_func, + .ptcga = ia64_native_ptcga_func, + .get_rr = ia64_native_get_rr_func, + .set_rr = ia64_native_set_rr_func, + .set_rr0_to_rr4 = ia64_native_set_rr0_to_rr4_func, + .ssm_i = ia64_native_ssm_i_func, + .getreg = ia64_native_getreg_func, + .setreg = ia64_native_setreg_func, + .rsm_i = ia64_native_rsm_i_func, + .get_psr_i = ia64_native_get_psr_i_func, + .intrin_local_irq_restore + = ia64_native_intrin_local_irq_restore_func, +}; +EXPORT_SYMBOL(pv_cpu_ops); + +/****************************************************************************** + * replacement of hand written assembly codes. + */ + +void +paravirt_cpu_asm_init(const struct pv_cpu_asm_switch *cpu_asm_switch) +{ + extern unsigned long paravirt_switch_to_targ; + extern unsigned long paravirt_leave_syscall_targ; + extern unsigned long paravirt_work_processed_syscall_targ; + extern unsigned long paravirt_leave_kernel_targ; + + paravirt_switch_to_targ = cpu_asm_switch->switch_to; + paravirt_leave_syscall_targ = cpu_asm_switch->leave_syscall; + paravirt_work_processed_syscall_targ = + cpu_asm_switch->work_processed_syscall; + paravirt_leave_kernel_targ = cpu_asm_switch->leave_kernel; +} + +/*************************************************************************** + * pv_iosapic_ops + * iosapic read/write hooks. + */ + +static unsigned int +ia64_native_iosapic_read(char __iomem *iosapic, unsigned int reg) +{ + return __ia64_native_iosapic_read(iosapic, reg); +} + +static void +ia64_native_iosapic_write(char __iomem *iosapic, unsigned int reg, u32 val) +{ + __ia64_native_iosapic_write(iosapic, reg, val); +} + +struct pv_iosapic_ops pv_iosapic_ops = { + .pcat_compat_init = ia64_native_iosapic_pcat_compat_init, + .__get_irq_chip = ia64_native_iosapic_get_irq_chip, + + .__read = ia64_native_iosapic_read, + .__write = ia64_native_iosapic_write, +}; + +/*************************************************************************** + * pv_irq_ops + * irq operations + */ + +struct pv_irq_ops pv_irq_ops = { + .register_ipi = ia64_native_register_ipi, + + .assign_irq_vector = ia64_native_assign_irq_vector, + .free_irq_vector = ia64_native_free_irq_vector, + .register_percpu_irq = ia64_native_register_percpu_irq, + + .resend_irq = ia64_native_resend_irq, +}; + +/*************************************************************************** + * pv_time_ops + * time operations + */ +struct static_key paravirt_steal_enabled; +struct static_key paravirt_steal_rq_enabled; + +static int +ia64_native_do_steal_accounting(unsigned long *new_itm) +{ + return 0; +} + +struct pv_time_ops pv_time_ops = { + .do_steal_accounting = ia64_native_do_steal_accounting, + .sched_clock = ia64_native_sched_clock, +}; + +/*************************************************************************** + * binary pacthing + * pv_init_ops.patch_bundle + */ + +#ifdef ASM_SUPPORTED +#define IA64_NATIVE_PATCH_DEFINE_GET_REG(name, reg) \ + __DEFINE_FUNC(get_ ## name, \ + ";;\n" \ + "mov r8 = " #reg "\n" \ + ";;\n") + +#define IA64_NATIVE_PATCH_DEFINE_SET_REG(name, reg) \ + __DEFINE_FUNC(set_ ## name, \ + ";;\n" \ + "mov " #reg " = r8\n" \ + ";;\n") + +#define IA64_NATIVE_PATCH_DEFINE_REG(name, reg) \ + IA64_NATIVE_PATCH_DEFINE_GET_REG(name, reg); \ + IA64_NATIVE_PATCH_DEFINE_SET_REG(name, reg) \ + +#define IA64_NATIVE_PATCH_DEFINE_AR(name, reg) \ + IA64_NATIVE_PATCH_DEFINE_REG(ar_ ## name, ar.reg) + +#define IA64_NATIVE_PATCH_DEFINE_CR(name, reg) \ + IA64_NATIVE_PATCH_DEFINE_REG(cr_ ## name, cr.reg) + + +IA64_NATIVE_PATCH_DEFINE_GET_REG(psr, psr); +IA64_NATIVE_PATCH_DEFINE_GET_REG(tp, tp); + +/* IA64_NATIVE_PATCH_DEFINE_SET_REG(psr_l, psr.l); */ +__DEFINE_FUNC(set_psr_l, + ";;\n" + "mov psr.l = r8\n" +#ifdef HAVE_SERIALIZE_DIRECTIVE + ".serialize.data\n" +#endif + ";;\n"); + +IA64_NATIVE_PATCH_DEFINE_REG(gp, gp); +IA64_NATIVE_PATCH_DEFINE_REG(sp, sp); + +IA64_NATIVE_PATCH_DEFINE_REG(kr0, ar0); +IA64_NATIVE_PATCH_DEFINE_REG(kr1, ar1); +IA64_NATIVE_PATCH_DEFINE_REG(kr2, ar2); +IA64_NATIVE_PATCH_DEFINE_REG(kr3, ar3); +IA64_NATIVE_PATCH_DEFINE_REG(kr4, ar4); +IA64_NATIVE_PATCH_DEFINE_REG(kr5, ar5); +IA64_NATIVE_PATCH_DEFINE_REG(kr6, ar6); +IA64_NATIVE_PATCH_DEFINE_REG(kr7, ar7); + +IA64_NATIVE_PATCH_DEFINE_AR(rsc, rsc); +IA64_NATIVE_PATCH_DEFINE_AR(bsp, bsp); +IA64_NATIVE_PATCH_DEFINE_AR(bspstore, bspstore); +IA64_NATIVE_PATCH_DEFINE_AR(rnat, rnat); +IA64_NATIVE_PATCH_DEFINE_AR(fcr, fcr); +IA64_NATIVE_PATCH_DEFINE_AR(eflag, eflag); +IA64_NATIVE_PATCH_DEFINE_AR(csd, csd); +IA64_NATIVE_PATCH_DEFINE_AR(ssd, ssd); +IA64_NATIVE_PATCH_DEFINE_REG(ar27, ar27); +IA64_NATIVE_PATCH_DEFINE_AR(fsr, fsr); +IA64_NATIVE_PATCH_DEFINE_AR(fir, fir); +IA64_NATIVE_PATCH_DEFINE_AR(fdr, fdr); +IA64_NATIVE_PATCH_DEFINE_AR(ccv, ccv); +IA64_NATIVE_PATCH_DEFINE_AR(unat, unat); +IA64_NATIVE_PATCH_DEFINE_AR(fpsr, fpsr); +IA64_NATIVE_PATCH_DEFINE_AR(itc, itc); +IA64_NATIVE_PATCH_DEFINE_AR(pfs, pfs); +IA64_NATIVE_PATCH_DEFINE_AR(lc, lc); +IA64_NATIVE_PATCH_DEFINE_AR(ec, ec); + +IA64_NATIVE_PATCH_DEFINE_CR(dcr, dcr); +IA64_NATIVE_PATCH_DEFINE_CR(itm, itm); +IA64_NATIVE_PATCH_DEFINE_CR(iva, iva); +IA64_NATIVE_PATCH_DEFINE_CR(pta, pta); +IA64_NATIVE_PATCH_DEFINE_CR(ipsr, ipsr); +IA64_NATIVE_PATCH_DEFINE_CR(isr, isr); +IA64_NATIVE_PATCH_DEFINE_CR(iip, iip); +IA64_NATIVE_PATCH_DEFINE_CR(ifa, ifa); +IA64_NATIVE_PATCH_DEFINE_CR(itir, itir); +IA64_NATIVE_PATCH_DEFINE_CR(iipa, iipa); +IA64_NATIVE_PATCH_DEFINE_CR(ifs, ifs); +IA64_NATIVE_PATCH_DEFINE_CR(iim, iim); +IA64_NATIVE_PATCH_DEFINE_CR(iha, iha); +IA64_NATIVE_PATCH_DEFINE_CR(lid, lid); +IA64_NATIVE_PATCH_DEFINE_CR(ivr, ivr); +IA64_NATIVE_PATCH_DEFINE_CR(tpr, tpr); +IA64_NATIVE_PATCH_DEFINE_CR(eoi, eoi); +IA64_NATIVE_PATCH_DEFINE_CR(irr0, irr0); +IA64_NATIVE_PATCH_DEFINE_CR(irr1, irr1); +IA64_NATIVE_PATCH_DEFINE_CR(irr2, irr2); +IA64_NATIVE_PATCH_DEFINE_CR(irr3, irr3); +IA64_NATIVE_PATCH_DEFINE_CR(itv, itv); +IA64_NATIVE_PATCH_DEFINE_CR(pmv, pmv); +IA64_NATIVE_PATCH_DEFINE_CR(cmcv, cmcv); +IA64_NATIVE_PATCH_DEFINE_CR(lrr0, lrr0); +IA64_NATIVE_PATCH_DEFINE_CR(lrr1, lrr1); + +static const struct paravirt_patch_bundle_elem ia64_native_patch_bundle_elems[] +__initdata_or_module = +{ +#define IA64_NATIVE_PATCH_BUNDLE_ELEM(name, type) \ + { \ + (void*)ia64_native_ ## name ## _direct_start, \ + (void*)ia64_native_ ## name ## _direct_end, \ + PARAVIRT_PATCH_TYPE_ ## type, \ + } + + IA64_NATIVE_PATCH_BUNDLE_ELEM(fc, FC), + IA64_NATIVE_PATCH_BUNDLE_ELEM(thash, THASH), + IA64_NATIVE_PATCH_BUNDLE_ELEM(get_cpuid, GET_CPUID), + IA64_NATIVE_PATCH_BUNDLE_ELEM(get_pmd, GET_PMD), + IA64_NATIVE_PATCH_BUNDLE_ELEM(ptcga, PTCGA), + IA64_NATIVE_PATCH_BUNDLE_ELEM(get_rr, GET_RR), + IA64_NATIVE_PATCH_BUNDLE_ELEM(set_rr, SET_RR), + IA64_NATIVE_PATCH_BUNDLE_ELEM(set_rr0_to_rr4, SET_RR0_TO_RR4), + IA64_NATIVE_PATCH_BUNDLE_ELEM(ssm_i, SSM_I), + IA64_NATIVE_PATCH_BUNDLE_ELEM(rsm_i, RSM_I), + IA64_NATIVE_PATCH_BUNDLE_ELEM(get_psr_i, GET_PSR_I), + IA64_NATIVE_PATCH_BUNDLE_ELEM(intrin_local_irq_restore, + INTRIN_LOCAL_IRQ_RESTORE), + +#define IA64_NATIVE_PATCH_BUNDLE_ELEM_GETREG(name, reg) \ + { \ + (void*)ia64_native_get_ ## name ## _direct_start, \ + (void*)ia64_native_get_ ## name ## _direct_end, \ + PARAVIRT_PATCH_TYPE_GETREG + _IA64_REG_ ## reg, \ + } + +#define IA64_NATIVE_PATCH_BUNDLE_ELEM_SETREG(name, reg) \ + { \ + (void*)ia64_native_set_ ## name ## _direct_start, \ + (void*)ia64_native_set_ ## name ## _direct_end, \ + PARAVIRT_PATCH_TYPE_SETREG + _IA64_REG_ ## reg, \ + } + +#define IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(name, reg) \ + IA64_NATIVE_PATCH_BUNDLE_ELEM_GETREG(name, reg), \ + IA64_NATIVE_PATCH_BUNDLE_ELEM_SETREG(name, reg) \ + +#define IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(name, reg) \ + IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(ar_ ## name, AR_ ## reg) + +#define IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(name, reg) \ + IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(cr_ ## name, CR_ ## reg) + + IA64_NATIVE_PATCH_BUNDLE_ELEM_GETREG(psr, PSR), + IA64_NATIVE_PATCH_BUNDLE_ELEM_GETREG(tp, TP), + + IA64_NATIVE_PATCH_BUNDLE_ELEM_SETREG(psr_l, PSR_L), + + IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(gp, GP), + IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(sp, SP), + + IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr0, AR_KR0), + IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr1, AR_KR1), + IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr2, AR_KR2), + IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr3, AR_KR3), + IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr4, AR_KR4), + IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr5, AR_KR5), + IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr6, AR_KR6), + IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr7, AR_KR7), + + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(rsc, RSC), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(bsp, BSP), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(bspstore, BSPSTORE), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(rnat, RNAT), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(fcr, FCR), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(eflag, EFLAG), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(csd, CSD), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(ssd, SSD), + IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(ar27, AR_CFLAG), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(fsr, FSR), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(fir, FIR), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(fdr, FDR), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(ccv, CCV), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(unat, UNAT), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(fpsr, FPSR), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(itc, ITC), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(pfs, PFS), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(lc, LC), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(ec, EC), + + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(dcr, DCR), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(itm, ITM), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(iva, IVA), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(pta, PTA), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(ipsr, IPSR), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(isr, ISR), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(iip, IIP), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(ifa, IFA), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(itir, ITIR), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(iipa, IIPA), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(ifs, IFS), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(iim, IIM), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(iha, IHA), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(lid, LID), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(ivr, IVR), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(tpr, TPR), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(eoi, EOI), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(irr0, IRR0), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(irr1, IRR1), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(irr2, IRR2), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(irr3, IRR3), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(itv, ITV), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(pmv, PMV), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(cmcv, CMCV), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(lrr0, LRR0), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(lrr1, LRR1), +}; + +unsigned long __init_or_module +ia64_native_patch_bundle(void *sbundle, void *ebundle, unsigned long type) +{ + const unsigned long nelems = sizeof(ia64_native_patch_bundle_elems) / + sizeof(ia64_native_patch_bundle_elems[0]); + + return __paravirt_patch_apply_bundle(sbundle, ebundle, type, + ia64_native_patch_bundle_elems, + nelems, NULL); +} +#endif /* ASM_SUPPOTED */ + +extern const char ia64_native_switch_to[]; +extern const char ia64_native_leave_syscall[]; +extern const char ia64_native_work_processed_syscall[]; +extern const char ia64_native_leave_kernel[]; + +const struct paravirt_patch_branch_target ia64_native_branch_target[] +__initconst = { +#define PARAVIRT_BR_TARGET(name, type) \ + { \ + ia64_native_ ## name, \ + PARAVIRT_PATCH_TYPE_BR_ ## type, \ + } + PARAVIRT_BR_TARGET(switch_to, SWITCH_TO), + PARAVIRT_BR_TARGET(leave_syscall, LEAVE_SYSCALL), + PARAVIRT_BR_TARGET(work_processed_syscall, WORK_PROCESSED_SYSCALL), + PARAVIRT_BR_TARGET(leave_kernel, LEAVE_KERNEL), +}; + +static void __init +ia64_native_patch_branch(unsigned long tag, unsigned long type) +{ + const unsigned long nelem = + sizeof(ia64_native_branch_target) / + sizeof(ia64_native_branch_target[0]); + __paravirt_patch_apply_branch(tag, type, + ia64_native_branch_target, nelem); +} diff --git a/kernel/arch/ia64/kernel/paravirt_inst.h b/kernel/arch/ia64/kernel/paravirt_inst.h new file mode 100644 index 000000000..1ad7512b5 --- /dev/null +++ b/kernel/arch/ia64/kernel/paravirt_inst.h @@ -0,0 +1,28 @@ +/****************************************************************************** + * linux/arch/ia64/xen/paravirt_inst.h + * + * Copyright (c) 2008 Isaku Yamahata + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifdef __IA64_ASM_PARAVIRTUALIZED_PVCHECK +#include +#else +#include +#endif + diff --git a/kernel/arch/ia64/kernel/paravirt_patch.c b/kernel/arch/ia64/kernel/paravirt_patch.c new file mode 100644 index 000000000..bfdfef1b1 --- /dev/null +++ b/kernel/arch/ia64/kernel/paravirt_patch.c @@ -0,0 +1,514 @@ +/****************************************************************************** + * linux/arch/ia64/xen/paravirt_patch.c + * + * Copyright (c) 2008 Isaku Yamahata + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include +#include +#include +#include +#include + +typedef union ia64_inst { + struct { + unsigned long long qp : 6; + unsigned long long : 31; + unsigned long long opcode : 4; + unsigned long long reserved : 23; + } generic; + unsigned long long l; +} ia64_inst_t; + +/* + * flush_icache_range() can't be used here. + * we are here before cpu_init() which initializes + * ia64_i_cache_stride_shift. flush_icache_range() uses it. + */ +void __init_or_module +paravirt_flush_i_cache_range(const void *instr, unsigned long size) +{ + extern void paravirt_fc_i(const void *addr); + unsigned long i; + + for (i = 0; i < size; i += sizeof(bundle_t)) + paravirt_fc_i(instr + i); +} + +bundle_t* __init_or_module +paravirt_get_bundle(unsigned long tag) +{ + return (bundle_t *)(tag & ~3UL); +} + +unsigned long __init_or_module +paravirt_get_slot(unsigned long tag) +{ + return tag & 3UL; +} + +unsigned long __init_or_module +paravirt_get_num_inst(unsigned long stag, unsigned long etag) +{ + bundle_t *sbundle = paravirt_get_bundle(stag); + unsigned long sslot = paravirt_get_slot(stag); + bundle_t *ebundle = paravirt_get_bundle(etag); + unsigned long eslot = paravirt_get_slot(etag); + + return (ebundle - sbundle) * 3 + eslot - sslot + 1; +} + +unsigned long __init_or_module +paravirt_get_next_tag(unsigned long tag) +{ + unsigned long slot = paravirt_get_slot(tag); + + switch (slot) { + case 0: + case 1: + return tag + 1; + case 2: { + bundle_t *bundle = paravirt_get_bundle(tag); + return (unsigned long)(bundle + 1); + } + default: + BUG(); + } + /* NOTREACHED */ +} + +ia64_inst_t __init_or_module +paravirt_read_slot0(const bundle_t *bundle) +{ + ia64_inst_t inst; + inst.l = bundle->quad0.slot0; + return inst; +} + +ia64_inst_t __init_or_module +paravirt_read_slot1(const bundle_t *bundle) +{ + ia64_inst_t inst; + inst.l = bundle->quad0.slot1_p0 | + ((unsigned long long)bundle->quad1.slot1_p1 << 18UL); + return inst; +} + +ia64_inst_t __init_or_module +paravirt_read_slot2(const bundle_t *bundle) +{ + ia64_inst_t inst; + inst.l = bundle->quad1.slot2; + return inst; +} + +ia64_inst_t __init_or_module +paravirt_read_inst(unsigned long tag) +{ + bundle_t *bundle = paravirt_get_bundle(tag); + unsigned long slot = paravirt_get_slot(tag); + + switch (slot) { + case 0: + return paravirt_read_slot0(bundle); + case 1: + return paravirt_read_slot1(bundle); + case 2: + return paravirt_read_slot2(bundle); + default: + BUG(); + } + /* NOTREACHED */ +} + +void __init_or_module +paravirt_write_slot0(bundle_t *bundle, ia64_inst_t inst) +{ + bundle->quad0.slot0 = inst.l; +} + +void __init_or_module +paravirt_write_slot1(bundle_t *bundle, ia64_inst_t inst) +{ + bundle->quad0.slot1_p0 = inst.l; + bundle->quad1.slot1_p1 = inst.l >> 18UL; +} + +void __init_or_module +paravirt_write_slot2(bundle_t *bundle, ia64_inst_t inst) +{ + bundle->quad1.slot2 = inst.l; +} + +void __init_or_module +paravirt_write_inst(unsigned long tag, ia64_inst_t inst) +{ + bundle_t *bundle = paravirt_get_bundle(tag); + unsigned long slot = paravirt_get_slot(tag); + + switch (slot) { + case 0: + paravirt_write_slot0(bundle, inst); + break; + case 1: + paravirt_write_slot1(bundle, inst); + break; + case 2: + paravirt_write_slot2(bundle, inst); + break; + default: + BUG(); + break; + } + paravirt_flush_i_cache_range(bundle, sizeof(*bundle)); +} + +/* for debug */ +void +paravirt_print_bundle(const bundle_t *bundle) +{ + const unsigned long *quad = (const unsigned long *)bundle; + ia64_inst_t slot0 = paravirt_read_slot0(bundle); + ia64_inst_t slot1 = paravirt_read_slot1(bundle); + ia64_inst_t slot2 = paravirt_read_slot2(bundle); + + printk(KERN_DEBUG + "bundle 0x%p 0x%016lx 0x%016lx\n", bundle, quad[0], quad[1]); + printk(KERN_DEBUG + "bundle template 0x%x\n", + bundle->quad0.template); + printk(KERN_DEBUG + "slot0 0x%lx slot1_p0 0x%lx slot1_p1 0x%lx slot2 0x%lx\n", + (unsigned long)bundle->quad0.slot0, + (unsigned long)bundle->quad0.slot1_p0, + (unsigned long)bundle->quad1.slot1_p1, + (unsigned long)bundle->quad1.slot2); + printk(KERN_DEBUG + "slot0 0x%016llx slot1 0x%016llx slot2 0x%016llx\n", + slot0.l, slot1.l, slot2.l); +} + +static int noreplace_paravirt __init_or_module = 0; + +static int __init setup_noreplace_paravirt(char *str) +{ + noreplace_paravirt = 1; + return 1; +} +__setup("noreplace-paravirt", setup_noreplace_paravirt); + +#ifdef ASM_SUPPORTED +static void __init_or_module +fill_nop_bundle(void *sbundle, void *ebundle) +{ + extern const char paravirt_nop_bundle[]; + extern const unsigned long paravirt_nop_bundle_size; + + void *bundle = sbundle; + + BUG_ON((((unsigned long)sbundle) % sizeof(bundle_t)) != 0); + BUG_ON((((unsigned long)ebundle) % sizeof(bundle_t)) != 0); + + while (bundle < ebundle) { + memcpy(bundle, paravirt_nop_bundle, paravirt_nop_bundle_size); + + bundle += paravirt_nop_bundle_size; + } +} + +/* helper function */ +unsigned long __init_or_module +__paravirt_patch_apply_bundle(void *sbundle, void *ebundle, unsigned long type, + const struct paravirt_patch_bundle_elem *elems, + unsigned long nelems, + const struct paravirt_patch_bundle_elem **found) +{ + unsigned long used = 0; + unsigned long i; + + BUG_ON((((unsigned long)sbundle) % sizeof(bundle_t)) != 0); + BUG_ON((((unsigned long)ebundle) % sizeof(bundle_t)) != 0); + + found = NULL; + for (i = 0; i < nelems; i++) { + const struct paravirt_patch_bundle_elem *p = &elems[i]; + if (p->type == type) { + unsigned long need = p->ebundle - p->sbundle; + unsigned long room = ebundle - sbundle; + + if (found != NULL) + *found = p; + + if (room < need) { + /* no room to replace. skip it */ + printk(KERN_DEBUG + "the space is too small to put " + "bundles. type %ld need %ld room %ld\n", + type, need, room); + break; + } + + used = need; + memcpy(sbundle, p->sbundle, used); + break; + } + } + + return used; +} + +void __init_or_module +paravirt_patch_apply_bundle(const struct paravirt_patch_site_bundle *start, + const struct paravirt_patch_site_bundle *end) +{ + const struct paravirt_patch_site_bundle *p; + + if (noreplace_paravirt) + return; + if (pv_init_ops.patch_bundle == NULL) + return; + + for (p = start; p < end; p++) { + unsigned long used; + + used = (*pv_init_ops.patch_bundle)(p->sbundle, p->ebundle, + p->type); + if (used == 0) + continue; + + fill_nop_bundle(p->sbundle + used, p->ebundle); + paravirt_flush_i_cache_range(p->sbundle, + p->ebundle - p->sbundle); + } + ia64_sync_i(); + ia64_srlz_i(); +} + +/* + * nop.i, nop.m, nop.f instruction are same format. + * but nop.b has differennt format. + * This doesn't support nop.b for now. + */ +static void __init_or_module +fill_nop_inst(unsigned long stag, unsigned long etag) +{ + extern const bundle_t paravirt_nop_mfi_inst_bundle[]; + unsigned long tag; + const ia64_inst_t nop_inst = + paravirt_read_slot0(paravirt_nop_mfi_inst_bundle); + + for (tag = stag; tag < etag; tag = paravirt_get_next_tag(tag)) + paravirt_write_inst(tag, nop_inst); +} + +void __init_or_module +paravirt_patch_apply_inst(const struct paravirt_patch_site_inst *start, + const struct paravirt_patch_site_inst *end) +{ + const struct paravirt_patch_site_inst *p; + + if (noreplace_paravirt) + return; + if (pv_init_ops.patch_inst == NULL) + return; + + for (p = start; p < end; p++) { + unsigned long tag; + bundle_t *sbundle; + bundle_t *ebundle; + + tag = (*pv_init_ops.patch_inst)(p->stag, p->etag, p->type); + if (tag == p->stag) + continue; + + fill_nop_inst(tag, p->etag); + sbundle = paravirt_get_bundle(p->stag); + ebundle = paravirt_get_bundle(p->etag) + 1; + paravirt_flush_i_cache_range(sbundle, (ebundle - sbundle) * + sizeof(bundle_t)); + } + ia64_sync_i(); + ia64_srlz_i(); +} +#endif /* ASM_SUPPOTED */ + +/* brl.cond.sptk.many X3 */ +typedef union inst_x3_op { + ia64_inst_t inst; + struct { + unsigned long qp: 6; + unsigned long btyp: 3; + unsigned long unused: 3; + unsigned long p: 1; + unsigned long imm20b: 20; + unsigned long wh: 2; + unsigned long d: 1; + unsigned long i: 1; + unsigned long opcode: 4; + }; + unsigned long l; +} inst_x3_op_t; + +typedef union inst_x3_imm { + ia64_inst_t inst; + struct { + unsigned long unused: 2; + unsigned long imm39: 39; + }; + unsigned long l; +} inst_x3_imm_t; + +void __init_or_module +paravirt_patch_reloc_brl(unsigned long tag, const void *target) +{ + unsigned long tag_op = paravirt_get_next_tag(tag); + unsigned long tag_imm = tag; + bundle_t *bundle = paravirt_get_bundle(tag); + + ia64_inst_t inst_op = paravirt_read_inst(tag_op); + ia64_inst_t inst_imm = paravirt_read_inst(tag_imm); + + inst_x3_op_t inst_x3_op = { .l = inst_op.l }; + inst_x3_imm_t inst_x3_imm = { .l = inst_imm.l }; + + unsigned long imm60 = + ((unsigned long)target - (unsigned long)bundle) >> 4; + + BUG_ON(paravirt_get_slot(tag) != 1); /* MLX */ + BUG_ON(((unsigned long)target & (sizeof(bundle_t) - 1)) != 0); + + /* imm60[59] 1bit */ + inst_x3_op.i = (imm60 >> 59) & 1; + /* imm60[19:0] 20bit */ + inst_x3_op.imm20b = imm60 & ((1UL << 20) - 1); + /* imm60[58:20] 39bit */ + inst_x3_imm.imm39 = (imm60 >> 20) & ((1UL << 39) - 1); + + inst_op.l = inst_x3_op.l; + inst_imm.l = inst_x3_imm.l; + + paravirt_write_inst(tag_op, inst_op); + paravirt_write_inst(tag_imm, inst_imm); +} + +/* br.cond.sptk.many B1 */ +typedef union inst_b1 { + ia64_inst_t inst; + struct { + unsigned long qp: 6; + unsigned long btype: 3; + unsigned long unused: 3; + unsigned long p: 1; + unsigned long imm20b: 20; + unsigned long wh: 2; + unsigned long d: 1; + unsigned long s: 1; + unsigned long opcode: 4; + }; + unsigned long l; +} inst_b1_t; + +void __init +paravirt_patch_reloc_br(unsigned long tag, const void *target) +{ + bundle_t *bundle = paravirt_get_bundle(tag); + ia64_inst_t inst = paravirt_read_inst(tag); + unsigned long target25 = (unsigned long)target - (unsigned long)bundle; + inst_b1_t inst_b1; + + BUG_ON(((unsigned long)target & (sizeof(bundle_t) - 1)) != 0); + + inst_b1.l = inst.l; + if (target25 & (1UL << 63)) + inst_b1.s = 1; + else + inst_b1.s = 0; + + inst_b1.imm20b = target25 >> 4; + inst.l = inst_b1.l; + + paravirt_write_inst(tag, inst); +} + +void __init +__paravirt_patch_apply_branch( + unsigned long tag, unsigned long type, + const struct paravirt_patch_branch_target *entries, + unsigned int nr_entries) +{ + unsigned int i; + for (i = 0; i < nr_entries; i++) { + if (entries[i].type == type) { + paravirt_patch_reloc_br(tag, entries[i].entry); + break; + } + } +} + +static void __init +paravirt_patch_apply_branch(const struct paravirt_patch_site_branch *start, + const struct paravirt_patch_site_branch *end) +{ + const struct paravirt_patch_site_branch *p; + + if (noreplace_paravirt) + return; + if (pv_init_ops.patch_branch == NULL) + return; + + for (p = start; p < end; p++) + (*pv_init_ops.patch_branch)(p->tag, p->type); + + ia64_sync_i(); + ia64_srlz_i(); +} + +void __init +paravirt_patch_apply(void) +{ + extern const char __start_paravirt_bundles[]; + extern const char __stop_paravirt_bundles[]; + extern const char __start_paravirt_insts[]; + extern const char __stop_paravirt_insts[]; + extern const char __start_paravirt_branches[]; + extern const char __stop_paravirt_branches[]; + + paravirt_patch_apply_bundle((const struct paravirt_patch_site_bundle *) + __start_paravirt_bundles, + (const struct paravirt_patch_site_bundle *) + __stop_paravirt_bundles); + paravirt_patch_apply_inst((const struct paravirt_patch_site_inst *) + __start_paravirt_insts, + (const struct paravirt_patch_site_inst *) + __stop_paravirt_insts); + paravirt_patch_apply_branch((const struct paravirt_patch_site_branch *) + __start_paravirt_branches, + (const struct paravirt_patch_site_branch *) + __stop_paravirt_branches); +} + +/* + * Local variables: + * mode: C + * c-set-style: "linux" + * c-basic-offset: 8 + * tab-width: 8 + * indent-tabs-mode: t + * End: + */ diff --git a/kernel/arch/ia64/kernel/paravirt_patchlist.c b/kernel/arch/ia64/kernel/paravirt_patchlist.c new file mode 100644 index 000000000..0a7072066 --- /dev/null +++ b/kernel/arch/ia64/kernel/paravirt_patchlist.c @@ -0,0 +1,81 @@ +/****************************************************************************** + * Copyright (c) 2008 Isaku Yamahata + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include +#include +#include +#include + +#define DECLARE(name) \ + extern unsigned long \ + __ia64_native_start_gate_##name##_patchlist[]; \ + extern unsigned long \ + __ia64_native_end_gate_##name##_patchlist[] + +DECLARE(fsyscall); +DECLARE(brl_fsys_bubble_down); +DECLARE(vtop); +DECLARE(mckinley_e9); + +extern unsigned long __start_gate_section[]; + +#define ASSIGN(name) \ + .start_##name##_patchlist = \ + (unsigned long)__ia64_native_start_gate_##name##_patchlist, \ + .end_##name##_patchlist = \ + (unsigned long)__ia64_native_end_gate_##name##_patchlist + +struct pv_patchdata pv_patchdata __initdata = { + ASSIGN(fsyscall), + ASSIGN(brl_fsys_bubble_down), + ASSIGN(vtop), + ASSIGN(mckinley_e9), + + .gate_section = (void*)__start_gate_section, +}; + + +unsigned long __init +paravirt_get_gate_patchlist(enum pv_gate_patchlist type) +{ + +#define CASE(NAME, name) \ + case PV_GATE_START_##NAME: \ + return pv_patchdata.start_##name##_patchlist; \ + case PV_GATE_END_##NAME: \ + return pv_patchdata.end_##name##_patchlist; \ + + switch (type) { + CASE(FSYSCALL, fsyscall); + CASE(BRL_FSYS_BUBBLE_DOWN, brl_fsys_bubble_down); + CASE(VTOP, vtop); + CASE(MCKINLEY_E9, mckinley_e9); + default: + BUG(); + break; + } + return 0; +} + +void * __init +paravirt_get_gate_section(void) +{ + return pv_patchdata.gate_section; +} diff --git a/kernel/arch/ia64/kernel/paravirt_patchlist.h b/kernel/arch/ia64/kernel/paravirt_patchlist.h new file mode 100644 index 000000000..67cffc364 --- /dev/null +++ b/kernel/arch/ia64/kernel/paravirt_patchlist.h @@ -0,0 +1,24 @@ +/****************************************************************************** + * linux/arch/ia64/xen/paravirt_patchlist.h + * + * Copyright (c) 2008 Isaku Yamahata + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include + diff --git a/kernel/arch/ia64/kernel/paravirtentry.S b/kernel/arch/ia64/kernel/paravirtentry.S new file mode 100644 index 000000000..92d880c4d --- /dev/null +++ b/kernel/arch/ia64/kernel/paravirtentry.S @@ -0,0 +1,121 @@ +/****************************************************************************** + * linux/arch/ia64/xen/paravirtentry.S + * + * Copyright (c) 2008 Isaku Yamahata + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include +#include +#include +#include +#include +#include "entry.h" + +#define DATA8(sym, init_value) \ + .pushsection .data..read_mostly ; \ + .align 8 ; \ + .global sym ; \ + sym: ; \ + data8 init_value ; \ + .popsection + +#define BRANCH(targ, reg, breg, type) \ + PARAVIRT_PATCH_SITE_BR(PARAVIRT_PATCH_TYPE_BR_ ## type) ; \ + ;; \ + movl reg=targ ; \ + ;; \ + ld8 reg=[reg] ; \ + ;; \ + mov breg=reg ; \ + br.cond.sptk.many breg + +#define BRANCH_PROC(sym, reg, breg, type) \ + DATA8(paravirt_ ## sym ## _targ, ia64_native_ ## sym) ; \ + GLOBAL_ENTRY(paravirt_ ## sym) ; \ + BRANCH(paravirt_ ## sym ## _targ, reg, breg, type) ; \ + END(paravirt_ ## sym) + +#define BRANCH_PROC_UNWINFO(sym, reg, breg, type) \ + DATA8(paravirt_ ## sym ## _targ, ia64_native_ ## sym) ; \ + GLOBAL_ENTRY(paravirt_ ## sym) ; \ + PT_REGS_UNWIND_INFO(0) ; \ + BRANCH(paravirt_ ## sym ## _targ, reg, breg, type) ; \ + END(paravirt_ ## sym) + + +BRANCH_PROC(switch_to, r22, b7, SWITCH_TO) +BRANCH_PROC_UNWINFO(leave_syscall, r22, b7, LEAVE_SYSCALL) +BRANCH_PROC(work_processed_syscall, r2, b7, WORK_PROCESSED_SYSCALL) +BRANCH_PROC_UNWINFO(leave_kernel, r22, b7, LEAVE_KERNEL) + + +#ifdef CONFIG_MODULES +#define __INIT_OR_MODULE .text +#define __INITDATA_OR_MODULE .data +#else +#define __INIT_OR_MODULE __INIT +#define __INITDATA_OR_MODULE __INITDATA +#endif /* CONFIG_MODULES */ + + __INIT_OR_MODULE + GLOBAL_ENTRY(paravirt_fc_i) + fc.i r32 + br.ret.sptk.many rp + END(paravirt_fc_i) + __FINIT + + __INIT_OR_MODULE + .align 32 + GLOBAL_ENTRY(paravirt_nop_b_inst_bundle) + { + nop.b 0 + nop.b 0 + nop.b 0 + } + END(paravirt_nop_b_inst_bundle) + __FINIT + + /* NOTE: nop.[mfi] has same format */ + __INIT_OR_MODULE + GLOBAL_ENTRY(paravirt_nop_mfi_inst_bundle) + { + nop.m 0 + nop.f 0 + nop.i 0 + } + END(paravirt_nop_mfi_inst_bundle) + __FINIT + + __INIT_OR_MODULE + GLOBAL_ENTRY(paravirt_nop_bundle) +paravirt_nop_bundle_start: + { + nop 0 + nop 0 + nop 0 + } +paravirt_nop_bundle_end: + END(paravirt_nop_bundle) + __FINIT + + __INITDATA_OR_MODULE + .align 8 + .global paravirt_nop_bundle_size +paravirt_nop_bundle_size: + data8 paravirt_nop_bundle_end - paravirt_nop_bundle_start diff --git a/kernel/arch/ia64/kernel/patch.c b/kernel/arch/ia64/kernel/patch.c new file mode 100644 index 000000000..1cf091793 --- /dev/null +++ b/kernel/arch/ia64/kernel/patch.c @@ -0,0 +1,256 @@ +/* + * Instruction-patching support. + * + * Copyright (C) 2003 Hewlett-Packard Co + * David Mosberger-Tang + */ +#include +#include + +#include +#include +#include +#include +#include + +/* + * This was adapted from code written by Tony Luck: + * + * The 64-bit value in a "movl reg=value" is scattered between the two words of the bundle + * like this: + * + * 6 6 5 4 3 2 1 + * 3210987654321098765432109876543210987654321098765432109876543210 + * ABBBBBBBBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCDEEEEEFFFFFFFFFGGGGGGG + * + * CCCCCCCCCCCCCCCCCCxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + * xxxxAFFFFFFFFFEEEEEDxGGGGGGGxxxxxxxxxxxxxBBBBBBBBBBBBBBBBBBBBBBB + */ +static u64 +get_imm64 (u64 insn_addr) +{ + u64 *p = (u64 *) (insn_addr & -16); /* mask out slot number */ + + return ( (p[1] & 0x0800000000000000UL) << 4) | /*A*/ + ((p[1] & 0x00000000007fffffUL) << 40) | /*B*/ + ((p[0] & 0xffffc00000000000UL) >> 24) | /*C*/ + ((p[1] & 0x0000100000000000UL) >> 23) | /*D*/ + ((p[1] & 0x0003e00000000000UL) >> 29) | /*E*/ + ((p[1] & 0x07fc000000000000UL) >> 43) | /*F*/ + ((p[1] & 0x000007f000000000UL) >> 36); /*G*/ +} + +/* Patch instruction with "val" where "mask" has 1 bits. */ +void +ia64_patch (u64 insn_addr, u64 mask, u64 val) +{ + u64 m0, m1, v0, v1, b0, b1, *b = (u64 *) (insn_addr & -16); +# define insn_mask ((1UL << 41) - 1) + unsigned long shift; + + b0 = b[0]; b1 = b[1]; + shift = 5 + 41 * (insn_addr % 16); /* 5 bits of template, then 3 x 41-bit instructions */ + if (shift >= 64) { + m1 = mask << (shift - 64); + v1 = val << (shift - 64); + } else { + m0 = mask << shift; m1 = mask >> (64 - shift); + v0 = val << shift; v1 = val >> (64 - shift); + b[0] = (b0 & ~m0) | (v0 & m0); + } + b[1] = (b1 & ~m1) | (v1 & m1); +} + +void +ia64_patch_imm64 (u64 insn_addr, u64 val) +{ + /* The assembler may generate offset pointing to either slot 1 + or slot 2 for a long (2-slot) instruction, occupying slots 1 + and 2. */ + insn_addr &= -16UL; + ia64_patch(insn_addr + 2, + 0x01fffefe000UL, ( ((val & 0x8000000000000000UL) >> 27) /* bit 63 -> 36 */ + | ((val & 0x0000000000200000UL) << 0) /* bit 21 -> 21 */ + | ((val & 0x00000000001f0000UL) << 6) /* bit 16 -> 22 */ + | ((val & 0x000000000000ff80UL) << 20) /* bit 7 -> 27 */ + | ((val & 0x000000000000007fUL) << 13) /* bit 0 -> 13 */)); + ia64_patch(insn_addr + 1, 0x1ffffffffffUL, val >> 22); +} + +void +ia64_patch_imm60 (u64 insn_addr, u64 val) +{ + /* The assembler may generate offset pointing to either slot 1 + or slot 2 for a long (2-slot) instruction, occupying slots 1 + and 2. */ + insn_addr &= -16UL; + ia64_patch(insn_addr + 2, + 0x011ffffe000UL, ( ((val & 0x0800000000000000UL) >> 23) /* bit 59 -> 36 */ + | ((val & 0x00000000000fffffUL) << 13) /* bit 0 -> 13 */)); + ia64_patch(insn_addr + 1, 0x1fffffffffcUL, val >> 18); +} + +/* + * We need sometimes to load the physical address of a kernel + * object. Often we can convert the virtual address to physical + * at execution time, but sometimes (either for performance reasons + * or during error recovery) we cannot to this. Patch the marked + * bundles to load the physical address. + */ +void __init +ia64_patch_vtop (unsigned long start, unsigned long end) +{ + s32 *offp = (s32 *) start; + u64 ip; + + while (offp < (s32 *) end) { + ip = (u64) offp + *offp; + + /* replace virtual address with corresponding physical address: */ + ia64_patch_imm64(ip, ia64_tpa(get_imm64(ip))); + ia64_fc((void *) ip); + ++offp; + } + ia64_sync_i(); + ia64_srlz_i(); +} + +/* + * Disable the RSE workaround by turning the conditional branch + * that we tagged in each place the workaround was used into an + * unconditional branch. + */ +void __init +ia64_patch_rse (unsigned long start, unsigned long end) +{ + s32 *offp = (s32 *) start; + u64 ip, *b; + + while (offp < (s32 *) end) { + ip = (u64) offp + *offp; + + b = (u64 *)(ip & -16); + b[1] &= ~0xf800000L; + ia64_fc((void *) ip); + ++offp; + } + ia64_sync_i(); + ia64_srlz_i(); +} + +void __init +ia64_patch_mckinley_e9 (unsigned long start, unsigned long end) +{ + static int first_time = 1; + int need_workaround; + s32 *offp = (s32 *) start; + u64 *wp; + + need_workaround = (local_cpu_data->family == 0x1f && local_cpu_data->model == 0); + + if (first_time) { + first_time = 0; + if (need_workaround) + printk(KERN_INFO "Leaving McKinley Errata 9 workaround enabled\n"); + } + if (need_workaround) + return; + + while (offp < (s32 *) end) { + wp = (u64 *) ia64_imva((char *) offp + *offp); + wp[0] = 0x0000000100000011UL; /* nop.m 0; nop.i 0; br.ret.sptk.many b6 */ + wp[1] = 0x0084006880000200UL; + wp[2] = 0x0000000100000000UL; /* nop.m 0; nop.i 0; nop.i 0 */ + wp[3] = 0x0004000000000200UL; + ia64_fc(wp); ia64_fc(wp + 2); + ++offp; + } + ia64_sync_i(); + ia64_srlz_i(); +} + +extern unsigned long ia64_native_fsyscall_table[NR_syscalls]; +extern char ia64_native_fsys_bubble_down[]; +struct pv_fsys_data pv_fsys_data __initdata = { + .fsyscall_table = (unsigned long *)ia64_native_fsyscall_table, + .fsys_bubble_down = (void *)ia64_native_fsys_bubble_down, +}; + +unsigned long * __init +paravirt_get_fsyscall_table(void) +{ + return pv_fsys_data.fsyscall_table; +} + +char * __init +paravirt_get_fsys_bubble_down(void) +{ + return pv_fsys_data.fsys_bubble_down; +} + +static void __init +patch_fsyscall_table (unsigned long start, unsigned long end) +{ + u64 fsyscall_table = (u64)paravirt_get_fsyscall_table(); + s32 *offp = (s32 *) start; + u64 ip; + + while (offp < (s32 *) end) { + ip = (u64) ia64_imva((char *) offp + *offp); + ia64_patch_imm64(ip, fsyscall_table); + ia64_fc((void *) ip); + ++offp; + } + ia64_sync_i(); + ia64_srlz_i(); +} + +static void __init +patch_brl_fsys_bubble_down (unsigned long start, unsigned long end) +{ + u64 fsys_bubble_down = (u64)paravirt_get_fsys_bubble_down(); + s32 *offp = (s32 *) start; + u64 ip; + + while (offp < (s32 *) end) { + ip = (u64) offp + *offp; + ia64_patch_imm60((u64) ia64_imva((void *) ip), + (u64) (fsys_bubble_down - (ip & -16)) / 16); + ia64_fc((void *) ip); + ++offp; + } + ia64_sync_i(); + ia64_srlz_i(); +} + +void __init +ia64_patch_gate (void) +{ +# define START(name) paravirt_get_gate_patchlist(PV_GATE_START_##name) +# define END(name) paravirt_get_gate_patchlist(PV_GATE_END_##name) + + patch_fsyscall_table(START(FSYSCALL), END(FSYSCALL)); + patch_brl_fsys_bubble_down(START(BRL_FSYS_BUBBLE_DOWN), END(BRL_FSYS_BUBBLE_DOWN)); + ia64_patch_vtop(START(VTOP), END(VTOP)); + ia64_patch_mckinley_e9(START(MCKINLEY_E9), END(MCKINLEY_E9)); +} + +void ia64_patch_phys_stack_reg(unsigned long val) +{ + s32 * offp = (s32 *) __start___phys_stack_reg_patchlist; + s32 * end = (s32 *) __end___phys_stack_reg_patchlist; + u64 ip, mask, imm; + + /* see instruction format A4: adds r1 = imm13, r3 */ + mask = (0x3fUL << 27) | (0x7f << 13); + imm = (((val >> 7) & 0x3f) << 27) | (val & 0x7f) << 13; + + while (offp < end) { + ip = (u64) offp + *offp; + ia64_patch(ip, mask, imm); + ia64_fc((void *)ip); + ++offp; + } + ia64_sync_i(); + ia64_srlz_i(); +} diff --git a/kernel/arch/ia64/kernel/pci-dma.c b/kernel/arch/ia64/kernel/pci-dma.c new file mode 100644 index 000000000..992c1098c --- /dev/null +++ b/kernel/arch/ia64/kernel/pci-dma.c @@ -0,0 +1,110 @@ +/* + * Dynamic DMA mapping support. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#ifdef CONFIG_INTEL_IOMMU + +#include + +#include + +dma_addr_t bad_dma_address __read_mostly; +EXPORT_SYMBOL(bad_dma_address); + +static int iommu_sac_force __read_mostly; + +int no_iommu __read_mostly; +#ifdef CONFIG_IOMMU_DEBUG +int force_iommu __read_mostly = 1; +#else +int force_iommu __read_mostly; +#endif + +int iommu_pass_through; + +extern struct dma_map_ops intel_dma_ops; + +static int __init pci_iommu_init(void) +{ + if (iommu_detected) + intel_iommu_init(); + + return 0; +} + +/* Must execute after PCI subsystem */ +fs_initcall(pci_iommu_init); + +void pci_iommu_shutdown(void) +{ + return; +} + +void __init +iommu_dma_init(void) +{ + return; +} + +int iommu_dma_supported(struct device *dev, u64 mask) +{ + /* Copied from i386. Doesn't make much sense, because it will + only work for pci_alloc_coherent. + The caller just has to use GFP_DMA in this case. */ + if (mask < DMA_BIT_MASK(24)) + return 0; + + /* Tell the device to use SAC when IOMMU force is on. This + allows the driver to use cheaper accesses in some cases. + + Problem with this is that if we overflow the IOMMU area and + return DAC as fallback address the device may not handle it + correctly. + + As a special case some controllers have a 39bit address + mode that is as efficient as 32bit (aic79xx). Don't force + SAC for these. Assume all masks <= 40 bits are of this + type. Normally this doesn't make any difference, but gives + more gentle handling of IOMMU overflow. */ + if (iommu_sac_force && (mask >= DMA_BIT_MASK(40))) { + dev_info(dev, "Force SAC with mask %llx\n", mask); + return 0; + } + + return 1; +} +EXPORT_SYMBOL(iommu_dma_supported); + +void __init pci_iommu_alloc(void) +{ + dma_ops = &intel_dma_ops; + + dma_ops->sync_single_for_cpu = machvec_dma_sync_single; + dma_ops->sync_sg_for_cpu = machvec_dma_sync_sg; + dma_ops->sync_single_for_device = machvec_dma_sync_single; + dma_ops->sync_sg_for_device = machvec_dma_sync_sg; + dma_ops->dma_supported = iommu_dma_supported; + + /* + * The order of these functions is important for + * fall-back/fail-over reasons + */ + detect_intel_iommu(); + +#ifdef CONFIG_SWIOTLB + pci_swiotlb_init(); +#endif +} + +#endif diff --git a/kernel/arch/ia64/kernel/pci-swiotlb.c b/kernel/arch/ia64/kernel/pci-swiotlb.c new file mode 100644 index 000000000..939260aea --- /dev/null +++ b/kernel/arch/ia64/kernel/pci-swiotlb.c @@ -0,0 +1,67 @@ +/* Glue code to lib/swiotlb.c */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +int swiotlb __read_mostly; +EXPORT_SYMBOL(swiotlb); + +static void *ia64_swiotlb_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp, + struct dma_attrs *attrs) +{ + if (dev->coherent_dma_mask != DMA_BIT_MASK(64)) + gfp |= GFP_DMA; + return swiotlb_alloc_coherent(dev, size, dma_handle, gfp); +} + +static void ia64_swiotlb_free_coherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_addr, + struct dma_attrs *attrs) +{ + swiotlb_free_coherent(dev, size, vaddr, dma_addr); +} + +struct dma_map_ops swiotlb_dma_ops = { + .alloc = ia64_swiotlb_alloc_coherent, + .free = ia64_swiotlb_free_coherent, + .map_page = swiotlb_map_page, + .unmap_page = swiotlb_unmap_page, + .map_sg = swiotlb_map_sg_attrs, + .unmap_sg = swiotlb_unmap_sg_attrs, + .sync_single_for_cpu = swiotlb_sync_single_for_cpu, + .sync_single_for_device = swiotlb_sync_single_for_device, + .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, + .sync_sg_for_device = swiotlb_sync_sg_for_device, + .dma_supported = swiotlb_dma_supported, + .mapping_error = swiotlb_dma_mapping_error, +}; + +void __init swiotlb_dma_init(void) +{ + dma_ops = &swiotlb_dma_ops; + swiotlb_init(1); +} + +void __init pci_swiotlb_init(void) +{ + if (!iommu_detected) { +#ifdef CONFIG_IA64_GENERIC + swiotlb = 1; + printk(KERN_INFO "PCI-DMA: Re-initialize machine vector.\n"); + machvec_init("dig"); + swiotlb_init(1); + dma_ops = &swiotlb_dma_ops; +#else + panic("Unable to find Intel IOMMU"); +#endif + } +} diff --git a/kernel/arch/ia64/kernel/perfmon.c b/kernel/arch/ia64/kernel/perfmon.c new file mode 100644 index 000000000..60e02f774 --- /dev/null +++ b/kernel/arch/ia64/kernel/perfmon.c @@ -0,0 +1,6782 @@ +/* + * This file implements the perfmon-2 subsystem which is used + * to program the IA-64 Performance Monitoring Unit (PMU). + * + * The initial version of perfmon.c was written by + * Ganesh Venkitachalam, IBM Corp. + * + * Then it was modified for perfmon-1.x by Stephane Eranian and + * David Mosberger, Hewlett Packard Co. + * + * Version Perfmon-2.x is a rewrite of perfmon-1.x + * by Stephane Eranian, Hewlett Packard Co. + * + * Copyright (C) 1999-2005 Hewlett Packard Co + * Stephane Eranian + * David Mosberger-Tang + * + * More information about perfmon available at: + * http://www.hpl.hp.com/research/linux/perfmon + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_PERFMON +/* + * perfmon context state + */ +#define PFM_CTX_UNLOADED 1 /* context is not loaded onto any task */ +#define PFM_CTX_LOADED 2 /* context is loaded onto a task */ +#define PFM_CTX_MASKED 3 /* context is loaded but monitoring is masked due to overflow */ +#define PFM_CTX_ZOMBIE 4 /* owner of the context is closing it */ + +#define PFM_INVALID_ACTIVATION (~0UL) + +#define PFM_NUM_PMC_REGS 64 /* PMC save area for ctxsw */ +#define PFM_NUM_PMD_REGS 64 /* PMD save area for ctxsw */ + +/* + * depth of message queue + */ +#define PFM_MAX_MSGS 32 +#define PFM_CTXQ_EMPTY(g) ((g)->ctx_msgq_head == (g)->ctx_msgq_tail) + +/* + * type of a PMU register (bitmask). + * bitmask structure: + * bit0 : register implemented + * bit1 : end marker + * bit2-3 : reserved + * bit4 : pmc has pmc.pm + * bit5 : pmc controls a counter (has pmc.oi), pmd is used as counter + * bit6-7 : register type + * bit8-31: reserved + */ +#define PFM_REG_NOTIMPL 0x0 /* not implemented at all */ +#define PFM_REG_IMPL 0x1 /* register implemented */ +#define PFM_REG_END 0x2 /* end marker */ +#define PFM_REG_MONITOR (0x1<<4|PFM_REG_IMPL) /* a PMC with a pmc.pm field only */ +#define PFM_REG_COUNTING (0x2<<4|PFM_REG_MONITOR) /* a monitor + pmc.oi+ PMD used as a counter */ +#define PFM_REG_CONTROL (0x4<<4|PFM_REG_IMPL) /* PMU control register */ +#define PFM_REG_CONFIG (0x8<<4|PFM_REG_IMPL) /* configuration register */ +#define PFM_REG_BUFFER (0xc<<4|PFM_REG_IMPL) /* PMD used as buffer */ + +#define PMC_IS_LAST(i) (pmu_conf->pmc_desc[i].type & PFM_REG_END) +#define PMD_IS_LAST(i) (pmu_conf->pmd_desc[i].type & PFM_REG_END) + +#define PMC_OVFL_NOTIFY(ctx, i) ((ctx)->ctx_pmds[i].flags & PFM_REGFL_OVFL_NOTIFY) + +/* i assumed unsigned */ +#define PMC_IS_IMPL(i) (i< PMU_MAX_PMCS && (pmu_conf->pmc_desc[i].type & PFM_REG_IMPL)) +#define PMD_IS_IMPL(i) (i< PMU_MAX_PMDS && (pmu_conf->pmd_desc[i].type & PFM_REG_IMPL)) + +/* XXX: these assume that register i is implemented */ +#define PMD_IS_COUNTING(i) ((pmu_conf->pmd_desc[i].type & PFM_REG_COUNTING) == PFM_REG_COUNTING) +#define PMC_IS_COUNTING(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_COUNTING) == PFM_REG_COUNTING) +#define PMC_IS_MONITOR(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_MONITOR) == PFM_REG_MONITOR) +#define PMC_IS_CONTROL(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_CONTROL) == PFM_REG_CONTROL) + +#define PMC_DFL_VAL(i) pmu_conf->pmc_desc[i].default_value +#define PMC_RSVD_MASK(i) pmu_conf->pmc_desc[i].reserved_mask +#define PMD_PMD_DEP(i) pmu_conf->pmd_desc[i].dep_pmd[0] +#define PMC_PMD_DEP(i) pmu_conf->pmc_desc[i].dep_pmd[0] + +#define PFM_NUM_IBRS IA64_NUM_DBG_REGS +#define PFM_NUM_DBRS IA64_NUM_DBG_REGS + +#define CTX_OVFL_NOBLOCK(c) ((c)->ctx_fl_block == 0) +#define CTX_HAS_SMPL(c) ((c)->ctx_fl_is_sampling) +#define PFM_CTX_TASK(h) (h)->ctx_task + +#define PMU_PMC_OI 5 /* position of pmc.oi bit */ + +/* XXX: does not support more than 64 PMDs */ +#define CTX_USED_PMD(ctx, mask) (ctx)->ctx_used_pmds[0] |= (mask) +#define CTX_IS_USED_PMD(ctx, c) (((ctx)->ctx_used_pmds[0] & (1UL << (c))) != 0UL) + +#define CTX_USED_MONITOR(ctx, mask) (ctx)->ctx_used_monitors[0] |= (mask) + +#define CTX_USED_IBR(ctx,n) (ctx)->ctx_used_ibrs[(n)>>6] |= 1UL<< ((n) % 64) +#define CTX_USED_DBR(ctx,n) (ctx)->ctx_used_dbrs[(n)>>6] |= 1UL<< ((n) % 64) +#define CTX_USES_DBREGS(ctx) (((pfm_context_t *)(ctx))->ctx_fl_using_dbreg==1) +#define PFM_CODE_RR 0 /* requesting code range restriction */ +#define PFM_DATA_RR 1 /* requestion data range restriction */ + +#define PFM_CPUINFO_CLEAR(v) pfm_get_cpu_var(pfm_syst_info) &= ~(v) +#define PFM_CPUINFO_SET(v) pfm_get_cpu_var(pfm_syst_info) |= (v) +#define PFM_CPUINFO_GET() pfm_get_cpu_var(pfm_syst_info) + +#define RDEP(x) (1UL<<(x)) + +/* + * context protection macros + * in SMP: + * - we need to protect against CPU concurrency (spin_lock) + * - we need to protect against PMU overflow interrupts (local_irq_disable) + * in UP: + * - we need to protect against PMU overflow interrupts (local_irq_disable) + * + * spin_lock_irqsave()/spin_unlock_irqrestore(): + * in SMP: local_irq_disable + spin_lock + * in UP : local_irq_disable + * + * spin_lock()/spin_lock(): + * in UP : removed automatically + * in SMP: protect against context accesses from other CPU. interrupts + * are not masked. This is useful for the PMU interrupt handler + * because we know we will not get PMU concurrency in that code. + */ +#define PROTECT_CTX(c, f) \ + do { \ + DPRINT(("spinlock_irq_save ctx %p by [%d]\n", c, task_pid_nr(current))); \ + spin_lock_irqsave(&(c)->ctx_lock, f); \ + DPRINT(("spinlocked ctx %p by [%d]\n", c, task_pid_nr(current))); \ + } while(0) + +#define UNPROTECT_CTX(c, f) \ + do { \ + DPRINT(("spinlock_irq_restore ctx %p by [%d]\n", c, task_pid_nr(current))); \ + spin_unlock_irqrestore(&(c)->ctx_lock, f); \ + } while(0) + +#define PROTECT_CTX_NOPRINT(c, f) \ + do { \ + spin_lock_irqsave(&(c)->ctx_lock, f); \ + } while(0) + + +#define UNPROTECT_CTX_NOPRINT(c, f) \ + do { \ + spin_unlock_irqrestore(&(c)->ctx_lock, f); \ + } while(0) + + +#define PROTECT_CTX_NOIRQ(c) \ + do { \ + spin_lock(&(c)->ctx_lock); \ + } while(0) + +#define UNPROTECT_CTX_NOIRQ(c) \ + do { \ + spin_unlock(&(c)->ctx_lock); \ + } while(0) + + +#ifdef CONFIG_SMP + +#define GET_ACTIVATION() pfm_get_cpu_var(pmu_activation_number) +#define INC_ACTIVATION() pfm_get_cpu_var(pmu_activation_number)++ +#define SET_ACTIVATION(c) (c)->ctx_last_activation = GET_ACTIVATION() + +#else /* !CONFIG_SMP */ +#define SET_ACTIVATION(t) do {} while(0) +#define GET_ACTIVATION(t) do {} while(0) +#define INC_ACTIVATION(t) do {} while(0) +#endif /* CONFIG_SMP */ + +#define SET_PMU_OWNER(t, c) do { pfm_get_cpu_var(pmu_owner) = (t); pfm_get_cpu_var(pmu_ctx) = (c); } while(0) +#define GET_PMU_OWNER() pfm_get_cpu_var(pmu_owner) +#define GET_PMU_CTX() pfm_get_cpu_var(pmu_ctx) + +#define LOCK_PFS(g) spin_lock_irqsave(&pfm_sessions.pfs_lock, g) +#define UNLOCK_PFS(g) spin_unlock_irqrestore(&pfm_sessions.pfs_lock, g) + +#define PFM_REG_RETFLAG_SET(flags, val) do { flags &= ~PFM_REG_RETFL_MASK; flags |= (val); } while(0) + +/* + * cmp0 must be the value of pmc0 + */ +#define PMC0_HAS_OVFL(cmp0) (cmp0 & ~0x1UL) + +#define PFMFS_MAGIC 0xa0b4d889 + +/* + * debugging + */ +#define PFM_DEBUGGING 1 +#ifdef PFM_DEBUGGING +#define DPRINT(a) \ + do { \ + if (unlikely(pfm_sysctl.debug >0)) { printk("%s.%d: CPU%d [%d] ", __func__, __LINE__, smp_processor_id(), task_pid_nr(current)); printk a; } \ + } while (0) + +#define DPRINT_ovfl(a) \ + do { \ + if (unlikely(pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0)) { printk("%s.%d: CPU%d [%d] ", __func__, __LINE__, smp_processor_id(), task_pid_nr(current)); printk a; } \ + } while (0) +#endif + +/* + * 64-bit software counter structure + * + * the next_reset_type is applied to the next call to pfm_reset_regs() + */ +typedef struct { + unsigned long val; /* virtual 64bit counter value */ + unsigned long lval; /* last reset value */ + unsigned long long_reset; /* reset value on sampling overflow */ + unsigned long short_reset; /* reset value on overflow */ + unsigned long reset_pmds[4]; /* which other pmds to reset when this counter overflows */ + unsigned long smpl_pmds[4]; /* which pmds are accessed when counter overflow */ + unsigned long seed; /* seed for random-number generator */ + unsigned long mask; /* mask for random-number generator */ + unsigned int flags; /* notify/do not notify */ + unsigned long eventid; /* overflow event identifier */ +} pfm_counter_t; + +/* + * context flags + */ +typedef struct { + unsigned int block:1; /* when 1, task will blocked on user notifications */ + unsigned int system:1; /* do system wide monitoring */ + unsigned int using_dbreg:1; /* using range restrictions (debug registers) */ + unsigned int is_sampling:1; /* true if using a custom format */ + unsigned int excl_idle:1; /* exclude idle task in system wide session */ + unsigned int going_zombie:1; /* context is zombie (MASKED+blocking) */ + unsigned int trap_reason:2; /* reason for going into pfm_handle_work() */ + unsigned int no_msg:1; /* no message sent on overflow */ + unsigned int can_restart:1; /* allowed to issue a PFM_RESTART */ + unsigned int reserved:22; +} pfm_context_flags_t; + +#define PFM_TRAP_REASON_NONE 0x0 /* default value */ +#define PFM_TRAP_REASON_BLOCK 0x1 /* we need to block on overflow */ +#define PFM_TRAP_REASON_RESET 0x2 /* we need to reset PMDs */ + + +/* + * perfmon context: encapsulates all the state of a monitoring session + */ + +typedef struct pfm_context { + spinlock_t ctx_lock; /* context protection */ + + pfm_context_flags_t ctx_flags; /* bitmask of flags (block reason incl.) */ + unsigned int ctx_state; /* state: active/inactive (no bitfield) */ + + struct task_struct *ctx_task; /* task to which context is attached */ + + unsigned long ctx_ovfl_regs[4]; /* which registers overflowed (notification) */ + + struct completion ctx_restart_done; /* use for blocking notification mode */ + + unsigned long ctx_used_pmds[4]; /* bitmask of PMD used */ + unsigned long ctx_all_pmds[4]; /* bitmask of all accessible PMDs */ + unsigned long ctx_reload_pmds[4]; /* bitmask of force reload PMD on ctxsw in */ + + unsigned long ctx_all_pmcs[4]; /* bitmask of all accessible PMCs */ + unsigned long ctx_reload_pmcs[4]; /* bitmask of force reload PMC on ctxsw in */ + unsigned long ctx_used_monitors[4]; /* bitmask of monitor PMC being used */ + + unsigned long ctx_pmcs[PFM_NUM_PMC_REGS]; /* saved copies of PMC values */ + + unsigned int ctx_used_ibrs[1]; /* bitmask of used IBR (speedup ctxsw in) */ + unsigned int ctx_used_dbrs[1]; /* bitmask of used DBR (speedup ctxsw in) */ + unsigned long ctx_dbrs[IA64_NUM_DBG_REGS]; /* DBR values (cache) when not loaded */ + unsigned long ctx_ibrs[IA64_NUM_DBG_REGS]; /* IBR values (cache) when not loaded */ + + pfm_counter_t ctx_pmds[PFM_NUM_PMD_REGS]; /* software state for PMDS */ + + unsigned long th_pmcs[PFM_NUM_PMC_REGS]; /* PMC thread save state */ + unsigned long th_pmds[PFM_NUM_PMD_REGS]; /* PMD thread save state */ + + unsigned long ctx_saved_psr_up; /* only contains psr.up value */ + + unsigned long ctx_last_activation; /* context last activation number for last_cpu */ + unsigned int ctx_last_cpu; /* CPU id of current or last CPU used (SMP only) */ + unsigned int ctx_cpu; /* cpu to which perfmon is applied (system wide) */ + + int ctx_fd; /* file descriptor used my this context */ + pfm_ovfl_arg_t ctx_ovfl_arg; /* argument to custom buffer format handler */ + + pfm_buffer_fmt_t *ctx_buf_fmt; /* buffer format callbacks */ + void *ctx_smpl_hdr; /* points to sampling buffer header kernel vaddr */ + unsigned long ctx_smpl_size; /* size of sampling buffer */ + void *ctx_smpl_vaddr; /* user level virtual address of smpl buffer */ + + wait_queue_head_t ctx_msgq_wait; + pfm_msg_t ctx_msgq[PFM_MAX_MSGS]; + int ctx_msgq_head; + int ctx_msgq_tail; + struct fasync_struct *ctx_async_queue; + + wait_queue_head_t ctx_zombieq; /* termination cleanup wait queue */ +} pfm_context_t; + +/* + * magic number used to verify that structure is really + * a perfmon context + */ +#define PFM_IS_FILE(f) ((f)->f_op == &pfm_file_ops) + +#define PFM_GET_CTX(t) ((pfm_context_t *)(t)->thread.pfm_context) + +#ifdef CONFIG_SMP +#define SET_LAST_CPU(ctx, v) (ctx)->ctx_last_cpu = (v) +#define GET_LAST_CPU(ctx) (ctx)->ctx_last_cpu +#else +#define SET_LAST_CPU(ctx, v) do {} while(0) +#define GET_LAST_CPU(ctx) do {} while(0) +#endif + + +#define ctx_fl_block ctx_flags.block +#define ctx_fl_system ctx_flags.system +#define ctx_fl_using_dbreg ctx_flags.using_dbreg +#define ctx_fl_is_sampling ctx_flags.is_sampling +#define ctx_fl_excl_idle ctx_flags.excl_idle +#define ctx_fl_going_zombie ctx_flags.going_zombie +#define ctx_fl_trap_reason ctx_flags.trap_reason +#define ctx_fl_no_msg ctx_flags.no_msg +#define ctx_fl_can_restart ctx_flags.can_restart + +#define PFM_SET_WORK_PENDING(t, v) do { (t)->thread.pfm_needs_checking = v; } while(0); +#define PFM_GET_WORK_PENDING(t) (t)->thread.pfm_needs_checking + +/* + * global information about all sessions + * mostly used to synchronize between system wide and per-process + */ +typedef struct { + spinlock_t pfs_lock; /* lock the structure */ + + unsigned int pfs_task_sessions; /* number of per task sessions */ + unsigned int pfs_sys_sessions; /* number of per system wide sessions */ + unsigned int pfs_sys_use_dbregs; /* incremented when a system wide session uses debug regs */ + unsigned int pfs_ptrace_use_dbregs; /* incremented when a process uses debug regs */ + struct task_struct *pfs_sys_session[NR_CPUS]; /* point to task owning a system-wide session */ +} pfm_session_t; + +/* + * information about a PMC or PMD. + * dep_pmd[]: a bitmask of dependent PMD registers + * dep_pmc[]: a bitmask of dependent PMC registers + */ +typedef int (*pfm_reg_check_t)(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs); +typedef struct { + unsigned int type; + int pm_pos; + unsigned long default_value; /* power-on default value */ + unsigned long reserved_mask; /* bitmask of reserved bits */ + pfm_reg_check_t read_check; + pfm_reg_check_t write_check; + unsigned long dep_pmd[4]; + unsigned long dep_pmc[4]; +} pfm_reg_desc_t; + +/* assume cnum is a valid monitor */ +#define PMC_PM(cnum, val) (((val) >> (pmu_conf->pmc_desc[cnum].pm_pos)) & 0x1) + +/* + * This structure is initialized at boot time and contains + * a description of the PMU main characteristics. + * + * If the probe function is defined, detection is based + * on its return value: + * - 0 means recognized PMU + * - anything else means not supported + * When the probe function is not defined, then the pmu_family field + * is used and it must match the host CPU family such that: + * - cpu->family & config->pmu_family != 0 + */ +typedef struct { + unsigned long ovfl_val; /* overflow value for counters */ + + pfm_reg_desc_t *pmc_desc; /* detailed PMC register dependencies descriptions */ + pfm_reg_desc_t *pmd_desc; /* detailed PMD register dependencies descriptions */ + + unsigned int num_pmcs; /* number of PMCS: computed at init time */ + unsigned int num_pmds; /* number of PMDS: computed at init time */ + unsigned long impl_pmcs[4]; /* bitmask of implemented PMCS */ + unsigned long impl_pmds[4]; /* bitmask of implemented PMDS */ + + char *pmu_name; /* PMU family name */ + unsigned int pmu_family; /* cpuid family pattern used to identify pmu */ + unsigned int flags; /* pmu specific flags */ + unsigned int num_ibrs; /* number of IBRS: computed at init time */ + unsigned int num_dbrs; /* number of DBRS: computed at init time */ + unsigned int num_counters; /* PMC/PMD counting pairs : computed at init time */ + int (*probe)(void); /* customized probe routine */ + unsigned int use_rr_dbregs:1; /* set if debug registers used for range restriction */ +} pmu_config_t; +/* + * PMU specific flags + */ +#define PFM_PMU_IRQ_RESEND 1 /* PMU needs explicit IRQ resend */ + +/* + * debug register related type definitions + */ +typedef struct { + unsigned long ibr_mask:56; + unsigned long ibr_plm:4; + unsigned long ibr_ig:3; + unsigned long ibr_x:1; +} ibr_mask_reg_t; + +typedef struct { + unsigned long dbr_mask:56; + unsigned long dbr_plm:4; + unsigned long dbr_ig:2; + unsigned long dbr_w:1; + unsigned long dbr_r:1; +} dbr_mask_reg_t; + +typedef union { + unsigned long val; + ibr_mask_reg_t ibr; + dbr_mask_reg_t dbr; +} dbreg_t; + + +/* + * perfmon command descriptions + */ +typedef struct { + int (*cmd_func)(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs); + char *cmd_name; + int cmd_flags; + unsigned int cmd_narg; + size_t cmd_argsize; + int (*cmd_getsize)(void *arg, size_t *sz); +} pfm_cmd_desc_t; + +#define PFM_CMD_FD 0x01 /* command requires a file descriptor */ +#define PFM_CMD_ARG_READ 0x02 /* command must read argument(s) */ +#define PFM_CMD_ARG_RW 0x04 /* command must read/write argument(s) */ +#define PFM_CMD_STOP 0x08 /* command does not work on zombie context */ + + +#define PFM_CMD_NAME(cmd) pfm_cmd_tab[(cmd)].cmd_name +#define PFM_CMD_READ_ARG(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_ARG_READ) +#define PFM_CMD_RW_ARG(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_ARG_RW) +#define PFM_CMD_USE_FD(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_FD) +#define PFM_CMD_STOPPED(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_STOP) + +#define PFM_CMD_ARG_MANY -1 /* cannot be zero */ + +typedef struct { + unsigned long pfm_spurious_ovfl_intr_count; /* keep track of spurious ovfl interrupts */ + unsigned long pfm_replay_ovfl_intr_count; /* keep track of replayed ovfl interrupts */ + unsigned long pfm_ovfl_intr_count; /* keep track of ovfl interrupts */ + unsigned long pfm_ovfl_intr_cycles; /* cycles spent processing ovfl interrupts */ + unsigned long pfm_ovfl_intr_cycles_min; /* min cycles spent processing ovfl interrupts */ + unsigned long pfm_ovfl_intr_cycles_max; /* max cycles spent processing ovfl interrupts */ + unsigned long pfm_smpl_handler_calls; + unsigned long pfm_smpl_handler_cycles; + char pad[SMP_CACHE_BYTES] ____cacheline_aligned; +} pfm_stats_t; + +/* + * perfmon internal variables + */ +static pfm_stats_t pfm_stats[NR_CPUS]; +static pfm_session_t pfm_sessions; /* global sessions information */ + +static DEFINE_SPINLOCK(pfm_alt_install_check); +static pfm_intr_handler_desc_t *pfm_alt_intr_handler; + +static struct proc_dir_entry *perfmon_dir; +static pfm_uuid_t pfm_null_uuid = {0,}; + +static spinlock_t pfm_buffer_fmt_lock; +static LIST_HEAD(pfm_buffer_fmt_list); + +static pmu_config_t *pmu_conf; + +/* sysctl() controls */ +pfm_sysctl_t pfm_sysctl; +EXPORT_SYMBOL(pfm_sysctl); + +static struct ctl_table pfm_ctl_table[] = { + { + .procname = "debug", + .data = &pfm_sysctl.debug, + .maxlen = sizeof(int), + .mode = 0666, + .proc_handler = proc_dointvec, + }, + { + .procname = "debug_ovfl", + .data = &pfm_sysctl.debug_ovfl, + .maxlen = sizeof(int), + .mode = 0666, + .proc_handler = proc_dointvec, + }, + { + .procname = "fastctxsw", + .data = &pfm_sysctl.fastctxsw, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = proc_dointvec, + }, + { + .procname = "expert_mode", + .data = &pfm_sysctl.expert_mode, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = proc_dointvec, + }, + {} +}; +static struct ctl_table pfm_sysctl_dir[] = { + { + .procname = "perfmon", + .mode = 0555, + .child = pfm_ctl_table, + }, + {} +}; +static struct ctl_table pfm_sysctl_root[] = { + { + .procname = "kernel", + .mode = 0555, + .child = pfm_sysctl_dir, + }, + {} +}; +static struct ctl_table_header *pfm_sysctl_header; + +static int pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs); + +#define pfm_get_cpu_var(v) __ia64_per_cpu_var(v) +#define pfm_get_cpu_data(a,b) per_cpu(a, b) + +static inline void +pfm_put_task(struct task_struct *task) +{ + if (task != current) put_task_struct(task); +} + +static inline void +pfm_reserve_page(unsigned long a) +{ + SetPageReserved(vmalloc_to_page((void *)a)); +} +static inline void +pfm_unreserve_page(unsigned long a) +{ + ClearPageReserved(vmalloc_to_page((void*)a)); +} + +static inline unsigned long +pfm_protect_ctx_ctxsw(pfm_context_t *x) +{ + spin_lock(&(x)->ctx_lock); + return 0UL; +} + +static inline void +pfm_unprotect_ctx_ctxsw(pfm_context_t *x, unsigned long f) +{ + spin_unlock(&(x)->ctx_lock); +} + +/* forward declaration */ +static const struct dentry_operations pfmfs_dentry_operations; + +static struct dentry * +pfmfs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) +{ + return mount_pseudo(fs_type, "pfm:", NULL, &pfmfs_dentry_operations, + PFMFS_MAGIC); +} + +static struct file_system_type pfm_fs_type = { + .name = "pfmfs", + .mount = pfmfs_mount, + .kill_sb = kill_anon_super, +}; +MODULE_ALIAS_FS("pfmfs"); + +DEFINE_PER_CPU(unsigned long, pfm_syst_info); +DEFINE_PER_CPU(struct task_struct *, pmu_owner); +DEFINE_PER_CPU(pfm_context_t *, pmu_ctx); +DEFINE_PER_CPU(unsigned long, pmu_activation_number); +EXPORT_PER_CPU_SYMBOL_GPL(pfm_syst_info); + + +/* forward declaration */ +static const struct file_operations pfm_file_ops; + +/* + * forward declarations + */ +#ifndef CONFIG_SMP +static void pfm_lazy_save_regs (struct task_struct *ta); +#endif + +void dump_pmu_state(const char *); +static int pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs); + +#include "perfmon_itanium.h" +#include "perfmon_mckinley.h" +#include "perfmon_montecito.h" +#include "perfmon_generic.h" + +static pmu_config_t *pmu_confs[]={ + &pmu_conf_mont, + &pmu_conf_mck, + &pmu_conf_ita, + &pmu_conf_gen, /* must be last */ + NULL +}; + + +static int pfm_end_notify_user(pfm_context_t *ctx); + +static inline void +pfm_clear_psr_pp(void) +{ + ia64_rsm(IA64_PSR_PP); + ia64_srlz_i(); +} + +static inline void +pfm_set_psr_pp(void) +{ + ia64_ssm(IA64_PSR_PP); + ia64_srlz_i(); +} + +static inline void +pfm_clear_psr_up(void) +{ + ia64_rsm(IA64_PSR_UP); + ia64_srlz_i(); +} + +static inline void +pfm_set_psr_up(void) +{ + ia64_ssm(IA64_PSR_UP); + ia64_srlz_i(); +} + +static inline unsigned long +pfm_get_psr(void) +{ + unsigned long tmp; + tmp = ia64_getreg(_IA64_REG_PSR); + ia64_srlz_i(); + return tmp; +} + +static inline void +pfm_set_psr_l(unsigned long val) +{ + ia64_setreg(_IA64_REG_PSR_L, val); + ia64_srlz_i(); +} + +static inline void +pfm_freeze_pmu(void) +{ + ia64_set_pmc(0,1UL); + ia64_srlz_d(); +} + +static inline void +pfm_unfreeze_pmu(void) +{ + ia64_set_pmc(0,0UL); + ia64_srlz_d(); +} + +static inline void +pfm_restore_ibrs(unsigned long *ibrs, unsigned int nibrs) +{ + int i; + + for (i=0; i < nibrs; i++) { + ia64_set_ibr(i, ibrs[i]); + ia64_dv_serialize_instruction(); + } + ia64_srlz_i(); +} + +static inline void +pfm_restore_dbrs(unsigned long *dbrs, unsigned int ndbrs) +{ + int i; + + for (i=0; i < ndbrs; i++) { + ia64_set_dbr(i, dbrs[i]); + ia64_dv_serialize_data(); + } + ia64_srlz_d(); +} + +/* + * PMD[i] must be a counter. no check is made + */ +static inline unsigned long +pfm_read_soft_counter(pfm_context_t *ctx, int i) +{ + return ctx->ctx_pmds[i].val + (ia64_get_pmd(i) & pmu_conf->ovfl_val); +} + +/* + * PMD[i] must be a counter. no check is made + */ +static inline void +pfm_write_soft_counter(pfm_context_t *ctx, int i, unsigned long val) +{ + unsigned long ovfl_val = pmu_conf->ovfl_val; + + ctx->ctx_pmds[i].val = val & ~ovfl_val; + /* + * writing to unimplemented part is ignore, so we do not need to + * mask off top part + */ + ia64_set_pmd(i, val & ovfl_val); +} + +static pfm_msg_t * +pfm_get_new_msg(pfm_context_t *ctx) +{ + int idx, next; + + next = (ctx->ctx_msgq_tail+1) % PFM_MAX_MSGS; + + DPRINT(("ctx_fd=%p head=%d tail=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail)); + if (next == ctx->ctx_msgq_head) return NULL; + + idx = ctx->ctx_msgq_tail; + ctx->ctx_msgq_tail = next; + + DPRINT(("ctx=%p head=%d tail=%d msg=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail, idx)); + + return ctx->ctx_msgq+idx; +} + +static pfm_msg_t * +pfm_get_next_msg(pfm_context_t *ctx) +{ + pfm_msg_t *msg; + + DPRINT(("ctx=%p head=%d tail=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail)); + + if (PFM_CTXQ_EMPTY(ctx)) return NULL; + + /* + * get oldest message + */ + msg = ctx->ctx_msgq+ctx->ctx_msgq_head; + + /* + * and move forward + */ + ctx->ctx_msgq_head = (ctx->ctx_msgq_head+1) % PFM_MAX_MSGS; + + DPRINT(("ctx=%p head=%d tail=%d type=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail, msg->pfm_gen_msg.msg_type)); + + return msg; +} + +static void +pfm_reset_msgq(pfm_context_t *ctx) +{ + ctx->ctx_msgq_head = ctx->ctx_msgq_tail = 0; + DPRINT(("ctx=%p msgq reset\n", ctx)); +} + +static void * +pfm_rvmalloc(unsigned long size) +{ + void *mem; + unsigned long addr; + + size = PAGE_ALIGN(size); + mem = vzalloc(size); + if (mem) { + //printk("perfmon: CPU%d pfm_rvmalloc(%ld)=%p\n", smp_processor_id(), size, mem); + addr = (unsigned long)mem; + while (size > 0) { + pfm_reserve_page(addr); + addr+=PAGE_SIZE; + size-=PAGE_SIZE; + } + } + return mem; +} + +static void +pfm_rvfree(void *mem, unsigned long size) +{ + unsigned long addr; + + if (mem) { + DPRINT(("freeing physical buffer @%p size=%lu\n", mem, size)); + addr = (unsigned long) mem; + while ((long) size > 0) { + pfm_unreserve_page(addr); + addr+=PAGE_SIZE; + size-=PAGE_SIZE; + } + vfree(mem); + } + return; +} + +static pfm_context_t * +pfm_context_alloc(int ctx_flags) +{ + pfm_context_t *ctx; + + /* + * allocate context descriptor + * must be able to free with interrupts disabled + */ + ctx = kzalloc(sizeof(pfm_context_t), GFP_KERNEL); + if (ctx) { + DPRINT(("alloc ctx @%p\n", ctx)); + + /* + * init context protection lock + */ + spin_lock_init(&ctx->ctx_lock); + + /* + * context is unloaded + */ + ctx->ctx_state = PFM_CTX_UNLOADED; + + /* + * initialization of context's flags + */ + ctx->ctx_fl_block = (ctx_flags & PFM_FL_NOTIFY_BLOCK) ? 1 : 0; + ctx->ctx_fl_system = (ctx_flags & PFM_FL_SYSTEM_WIDE) ? 1: 0; + ctx->ctx_fl_no_msg = (ctx_flags & PFM_FL_OVFL_NO_MSG) ? 1: 0; + /* + * will move to set properties + * ctx->ctx_fl_excl_idle = (ctx_flags & PFM_FL_EXCL_IDLE) ? 1: 0; + */ + + /* + * init restart semaphore to locked + */ + init_completion(&ctx->ctx_restart_done); + + /* + * activation is used in SMP only + */ + ctx->ctx_last_activation = PFM_INVALID_ACTIVATION; + SET_LAST_CPU(ctx, -1); + + /* + * initialize notification message queue + */ + ctx->ctx_msgq_head = ctx->ctx_msgq_tail = 0; + init_waitqueue_head(&ctx->ctx_msgq_wait); + init_waitqueue_head(&ctx->ctx_zombieq); + + } + return ctx; +} + +static void +pfm_context_free(pfm_context_t *ctx) +{ + if (ctx) { + DPRINT(("free ctx @%p\n", ctx)); + kfree(ctx); + } +} + +static void +pfm_mask_monitoring(struct task_struct *task) +{ + pfm_context_t *ctx = PFM_GET_CTX(task); + unsigned long mask, val, ovfl_mask; + int i; + + DPRINT_ovfl(("masking monitoring for [%d]\n", task_pid_nr(task))); + + ovfl_mask = pmu_conf->ovfl_val; + /* + * monitoring can only be masked as a result of a valid + * counter overflow. In UP, it means that the PMU still + * has an owner. Note that the owner can be different + * from the current task. However the PMU state belongs + * to the owner. + * In SMP, a valid overflow only happens when task is + * current. Therefore if we come here, we know that + * the PMU state belongs to the current task, therefore + * we can access the live registers. + * + * So in both cases, the live register contains the owner's + * state. We can ONLY touch the PMU registers and NOT the PSR. + * + * As a consequence to this call, the ctx->th_pmds[] array + * contains stale information which must be ignored + * when context is reloaded AND monitoring is active (see + * pfm_restart). + */ + mask = ctx->ctx_used_pmds[0]; + for (i = 0; mask; i++, mask>>=1) { + /* skip non used pmds */ + if ((mask & 0x1) == 0) continue; + val = ia64_get_pmd(i); + + if (PMD_IS_COUNTING(i)) { + /* + * we rebuild the full 64 bit value of the counter + */ + ctx->ctx_pmds[i].val += (val & ovfl_mask); + } else { + ctx->ctx_pmds[i].val = val; + } + DPRINT_ovfl(("pmd[%d]=0x%lx hw_pmd=0x%lx\n", + i, + ctx->ctx_pmds[i].val, + val & ovfl_mask)); + } + /* + * mask monitoring by setting the privilege level to 0 + * we cannot use psr.pp/psr.up for this, it is controlled by + * the user + * + * if task is current, modify actual registers, otherwise modify + * thread save state, i.e., what will be restored in pfm_load_regs() + */ + mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER; + for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) { + if ((mask & 0x1) == 0UL) continue; + ia64_set_pmc(i, ctx->th_pmcs[i] & ~0xfUL); + ctx->th_pmcs[i] &= ~0xfUL; + DPRINT_ovfl(("pmc[%d]=0x%lx\n", i, ctx->th_pmcs[i])); + } + /* + * make all of this visible + */ + ia64_srlz_d(); +} + +/* + * must always be done with task == current + * + * context must be in MASKED state when calling + */ +static void +pfm_restore_monitoring(struct task_struct *task) +{ + pfm_context_t *ctx = PFM_GET_CTX(task); + unsigned long mask, ovfl_mask; + unsigned long psr, val; + int i, is_system; + + is_system = ctx->ctx_fl_system; + ovfl_mask = pmu_conf->ovfl_val; + + if (task != current) { + printk(KERN_ERR "perfmon.%d: invalid task[%d] current[%d]\n", __LINE__, task_pid_nr(task), task_pid_nr(current)); + return; + } + if (ctx->ctx_state != PFM_CTX_MASKED) { + printk(KERN_ERR "perfmon.%d: task[%d] current[%d] invalid state=%d\n", __LINE__, + task_pid_nr(task), task_pid_nr(current), ctx->ctx_state); + return; + } + psr = pfm_get_psr(); + /* + * monitoring is masked via the PMC. + * As we restore their value, we do not want each counter to + * restart right away. We stop monitoring using the PSR, + * restore the PMC (and PMD) and then re-establish the psr + * as it was. Note that there can be no pending overflow at + * this point, because monitoring was MASKED. + * + * system-wide session are pinned and self-monitoring + */ + if (is_system && (PFM_CPUINFO_GET() & PFM_CPUINFO_DCR_PP)) { + /* disable dcr pp */ + ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) & ~IA64_DCR_PP); + pfm_clear_psr_pp(); + } else { + pfm_clear_psr_up(); + } + /* + * first, we restore the PMD + */ + mask = ctx->ctx_used_pmds[0]; + for (i = 0; mask; i++, mask>>=1) { + /* skip non used pmds */ + if ((mask & 0x1) == 0) continue; + + if (PMD_IS_COUNTING(i)) { + /* + * we split the 64bit value according to + * counter width + */ + val = ctx->ctx_pmds[i].val & ovfl_mask; + ctx->ctx_pmds[i].val &= ~ovfl_mask; + } else { + val = ctx->ctx_pmds[i].val; + } + ia64_set_pmd(i, val); + + DPRINT(("pmd[%d]=0x%lx hw_pmd=0x%lx\n", + i, + ctx->ctx_pmds[i].val, + val)); + } + /* + * restore the PMCs + */ + mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER; + for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) { + if ((mask & 0x1) == 0UL) continue; + ctx->th_pmcs[i] = ctx->ctx_pmcs[i]; + ia64_set_pmc(i, ctx->th_pmcs[i]); + DPRINT(("[%d] pmc[%d]=0x%lx\n", + task_pid_nr(task), i, ctx->th_pmcs[i])); + } + ia64_srlz_d(); + + /* + * must restore DBR/IBR because could be modified while masked + * XXX: need to optimize + */ + if (ctx->ctx_fl_using_dbreg) { + pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs); + pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs); + } + + /* + * now restore PSR + */ + if (is_system && (PFM_CPUINFO_GET() & PFM_CPUINFO_DCR_PP)) { + /* enable dcr pp */ + ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) | IA64_DCR_PP); + ia64_srlz_i(); + } + pfm_set_psr_l(psr); +} + +static inline void +pfm_save_pmds(unsigned long *pmds, unsigned long mask) +{ + int i; + + ia64_srlz_d(); + + for (i=0; mask; i++, mask>>=1) { + if (mask & 0x1) pmds[i] = ia64_get_pmd(i); + } +} + +/* + * reload from thread state (used for ctxw only) + */ +static inline void +pfm_restore_pmds(unsigned long *pmds, unsigned long mask) +{ + int i; + unsigned long val, ovfl_val = pmu_conf->ovfl_val; + + for (i=0; mask; i++, mask>>=1) { + if ((mask & 0x1) == 0) continue; + val = PMD_IS_COUNTING(i) ? pmds[i] & ovfl_val : pmds[i]; + ia64_set_pmd(i, val); + } + ia64_srlz_d(); +} + +/* + * propagate PMD from context to thread-state + */ +static inline void +pfm_copy_pmds(struct task_struct *task, pfm_context_t *ctx) +{ + unsigned long ovfl_val = pmu_conf->ovfl_val; + unsigned long mask = ctx->ctx_all_pmds[0]; + unsigned long val; + int i; + + DPRINT(("mask=0x%lx\n", mask)); + + for (i=0; mask; i++, mask>>=1) { + + val = ctx->ctx_pmds[i].val; + + /* + * We break up the 64 bit value into 2 pieces + * the lower bits go to the machine state in the + * thread (will be reloaded on ctxsw in). + * The upper part stays in the soft-counter. + */ + if (PMD_IS_COUNTING(i)) { + ctx->ctx_pmds[i].val = val & ~ovfl_val; + val &= ovfl_val; + } + ctx->th_pmds[i] = val; + + DPRINT(("pmd[%d]=0x%lx soft_val=0x%lx\n", + i, + ctx->th_pmds[i], + ctx->ctx_pmds[i].val)); + } +} + +/* + * propagate PMC from context to thread-state + */ +static inline void +pfm_copy_pmcs(struct task_struct *task, pfm_context_t *ctx) +{ + unsigned long mask = ctx->ctx_all_pmcs[0]; + int i; + + DPRINT(("mask=0x%lx\n", mask)); + + for (i=0; mask; i++, mask>>=1) { + /* masking 0 with ovfl_val yields 0 */ + ctx->th_pmcs[i] = ctx->ctx_pmcs[i]; + DPRINT(("pmc[%d]=0x%lx\n", i, ctx->th_pmcs[i])); + } +} + + + +static inline void +pfm_restore_pmcs(unsigned long *pmcs, unsigned long mask) +{ + int i; + + for (i=0; mask; i++, mask>>=1) { + if ((mask & 0x1) == 0) continue; + ia64_set_pmc(i, pmcs[i]); + } + ia64_srlz_d(); +} + +static inline int +pfm_uuid_cmp(pfm_uuid_t a, pfm_uuid_t b) +{ + return memcmp(a, b, sizeof(pfm_uuid_t)); +} + +static inline int +pfm_buf_fmt_exit(pfm_buffer_fmt_t *fmt, struct task_struct *task, void *buf, struct pt_regs *regs) +{ + int ret = 0; + if (fmt->fmt_exit) ret = (*fmt->fmt_exit)(task, buf, regs); + return ret; +} + +static inline int +pfm_buf_fmt_getsize(pfm_buffer_fmt_t *fmt, struct task_struct *task, unsigned int flags, int cpu, void *arg, unsigned long *size) +{ + int ret = 0; + if (fmt->fmt_getsize) ret = (*fmt->fmt_getsize)(task, flags, cpu, arg, size); + return ret; +} + + +static inline int +pfm_buf_fmt_validate(pfm_buffer_fmt_t *fmt, struct task_struct *task, unsigned int flags, + int cpu, void *arg) +{ + int ret = 0; + if (fmt->fmt_validate) ret = (*fmt->fmt_validate)(task, flags, cpu, arg); + return ret; +} + +static inline int +pfm_buf_fmt_init(pfm_buffer_fmt_t *fmt, struct task_struct *task, void *buf, unsigned int flags, + int cpu, void *arg) +{ + int ret = 0; + if (fmt->fmt_init) ret = (*fmt->fmt_init)(task, buf, flags, cpu, arg); + return ret; +} + +static inline int +pfm_buf_fmt_restart(pfm_buffer_fmt_t *fmt, struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs) +{ + int ret = 0; + if (fmt->fmt_restart) ret = (*fmt->fmt_restart)(task, ctrl, buf, regs); + return ret; +} + +static inline int +pfm_buf_fmt_restart_active(pfm_buffer_fmt_t *fmt, struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs) +{ + int ret = 0; + if (fmt->fmt_restart_active) ret = (*fmt->fmt_restart_active)(task, ctrl, buf, regs); + return ret; +} + +static pfm_buffer_fmt_t * +__pfm_find_buffer_fmt(pfm_uuid_t uuid) +{ + struct list_head * pos; + pfm_buffer_fmt_t * entry; + + list_for_each(pos, &pfm_buffer_fmt_list) { + entry = list_entry(pos, pfm_buffer_fmt_t, fmt_list); + if (pfm_uuid_cmp(uuid, entry->fmt_uuid) == 0) + return entry; + } + return NULL; +} + +/* + * find a buffer format based on its uuid + */ +static pfm_buffer_fmt_t * +pfm_find_buffer_fmt(pfm_uuid_t uuid) +{ + pfm_buffer_fmt_t * fmt; + spin_lock(&pfm_buffer_fmt_lock); + fmt = __pfm_find_buffer_fmt(uuid); + spin_unlock(&pfm_buffer_fmt_lock); + return fmt; +} + +int +pfm_register_buffer_fmt(pfm_buffer_fmt_t *fmt) +{ + int ret = 0; + + /* some sanity checks */ + if (fmt == NULL || fmt->fmt_name == NULL) return -EINVAL; + + /* we need at least a handler */ + if (fmt->fmt_handler == NULL) return -EINVAL; + + /* + * XXX: need check validity of fmt_arg_size + */ + + spin_lock(&pfm_buffer_fmt_lock); + + if (__pfm_find_buffer_fmt(fmt->fmt_uuid)) { + printk(KERN_ERR "perfmon: duplicate sampling format: %s\n", fmt->fmt_name); + ret = -EBUSY; + goto out; + } + list_add(&fmt->fmt_list, &pfm_buffer_fmt_list); + printk(KERN_INFO "perfmon: added sampling format %s\n", fmt->fmt_name); + +out: + spin_unlock(&pfm_buffer_fmt_lock); + return ret; +} +EXPORT_SYMBOL(pfm_register_buffer_fmt); + +int +pfm_unregister_buffer_fmt(pfm_uuid_t uuid) +{ + pfm_buffer_fmt_t *fmt; + int ret = 0; + + spin_lock(&pfm_buffer_fmt_lock); + + fmt = __pfm_find_buffer_fmt(uuid); + if (!fmt) { + printk(KERN_ERR "perfmon: cannot unregister format, not found\n"); + ret = -EINVAL; + goto out; + } + list_del_init(&fmt->fmt_list); + printk(KERN_INFO "perfmon: removed sampling format: %s\n", fmt->fmt_name); + +out: + spin_unlock(&pfm_buffer_fmt_lock); + return ret; + +} +EXPORT_SYMBOL(pfm_unregister_buffer_fmt); + +static int +pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned int cpu) +{ + unsigned long flags; + /* + * validity checks on cpu_mask have been done upstream + */ + LOCK_PFS(flags); + + DPRINT(("in sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n", + pfm_sessions.pfs_sys_sessions, + pfm_sessions.pfs_task_sessions, + pfm_sessions.pfs_sys_use_dbregs, + is_syswide, + cpu)); + + if (is_syswide) { + /* + * cannot mix system wide and per-task sessions + */ + if (pfm_sessions.pfs_task_sessions > 0UL) { + DPRINT(("system wide not possible, %u conflicting task_sessions\n", + pfm_sessions.pfs_task_sessions)); + goto abort; + } + + if (pfm_sessions.pfs_sys_session[cpu]) goto error_conflict; + + DPRINT(("reserving system wide session on CPU%u currently on CPU%u\n", cpu, smp_processor_id())); + + pfm_sessions.pfs_sys_session[cpu] = task; + + pfm_sessions.pfs_sys_sessions++ ; + + } else { + if (pfm_sessions.pfs_sys_sessions) goto abort; + pfm_sessions.pfs_task_sessions++; + } + + DPRINT(("out sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n", + pfm_sessions.pfs_sys_sessions, + pfm_sessions.pfs_task_sessions, + pfm_sessions.pfs_sys_use_dbregs, + is_syswide, + cpu)); + + /* + * Force idle() into poll mode + */ + cpu_idle_poll_ctrl(true); + + UNLOCK_PFS(flags); + + return 0; + +error_conflict: + DPRINT(("system wide not possible, conflicting session [%d] on CPU%d\n", + task_pid_nr(pfm_sessions.pfs_sys_session[cpu]), + cpu)); +abort: + UNLOCK_PFS(flags); + + return -EBUSY; + +} + +static int +pfm_unreserve_session(pfm_context_t *ctx, int is_syswide, unsigned int cpu) +{ + unsigned long flags; + /* + * validity checks on cpu_mask have been done upstream + */ + LOCK_PFS(flags); + + DPRINT(("in sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n", + pfm_sessions.pfs_sys_sessions, + pfm_sessions.pfs_task_sessions, + pfm_sessions.pfs_sys_use_dbregs, + is_syswide, + cpu)); + + + if (is_syswide) { + pfm_sessions.pfs_sys_session[cpu] = NULL; + /* + * would not work with perfmon+more than one bit in cpu_mask + */ + if (ctx && ctx->ctx_fl_using_dbreg) { + if (pfm_sessions.pfs_sys_use_dbregs == 0) { + printk(KERN_ERR "perfmon: invalid release for ctx %p sys_use_dbregs=0\n", ctx); + } else { + pfm_sessions.pfs_sys_use_dbregs--; + } + } + pfm_sessions.pfs_sys_sessions--; + } else { + pfm_sessions.pfs_task_sessions--; + } + DPRINT(("out sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n", + pfm_sessions.pfs_sys_sessions, + pfm_sessions.pfs_task_sessions, + pfm_sessions.pfs_sys_use_dbregs, + is_syswide, + cpu)); + + /* Undo forced polling. Last session reenables pal_halt */ + cpu_idle_poll_ctrl(false); + + UNLOCK_PFS(flags); + + return 0; +} + +/* + * removes virtual mapping of the sampling buffer. + * IMPORTANT: cannot be called with interrupts disable, e.g. inside + * a PROTECT_CTX() section. + */ +static int +pfm_remove_smpl_mapping(void *vaddr, unsigned long size) +{ + struct task_struct *task = current; + int r; + + /* sanity checks */ + if (task->mm == NULL || size == 0UL || vaddr == NULL) { + printk(KERN_ERR "perfmon: pfm_remove_smpl_mapping [%d] invalid context mm=%p\n", task_pid_nr(task), task->mm); + return -EINVAL; + } + + DPRINT(("smpl_vaddr=%p size=%lu\n", vaddr, size)); + + /* + * does the actual unmapping + */ + r = vm_munmap((unsigned long)vaddr, size); + + if (r !=0) { + printk(KERN_ERR "perfmon: [%d] unable to unmap sampling buffer @%p size=%lu\n", task_pid_nr(task), vaddr, size); + } + + DPRINT(("do_unmap(%p, %lu)=%d\n", vaddr, size, r)); + + return 0; +} + +/* + * free actual physical storage used by sampling buffer + */ +#if 0 +static int +pfm_free_smpl_buffer(pfm_context_t *ctx) +{ + pfm_buffer_fmt_t *fmt; + + if (ctx->ctx_smpl_hdr == NULL) goto invalid_free; + + /* + * we won't use the buffer format anymore + */ + fmt = ctx->ctx_buf_fmt; + + DPRINT(("sampling buffer @%p size %lu vaddr=%p\n", + ctx->ctx_smpl_hdr, + ctx->ctx_smpl_size, + ctx->ctx_smpl_vaddr)); + + pfm_buf_fmt_exit(fmt, current, NULL, NULL); + + /* + * free the buffer + */ + pfm_rvfree(ctx->ctx_smpl_hdr, ctx->ctx_smpl_size); + + ctx->ctx_smpl_hdr = NULL; + ctx->ctx_smpl_size = 0UL; + + return 0; + +invalid_free: + printk(KERN_ERR "perfmon: pfm_free_smpl_buffer [%d] no buffer\n", task_pid_nr(current)); + return -EINVAL; +} +#endif + +static inline void +pfm_exit_smpl_buffer(pfm_buffer_fmt_t *fmt) +{ + if (fmt == NULL) return; + + pfm_buf_fmt_exit(fmt, current, NULL, NULL); + +} + +/* + * pfmfs should _never_ be mounted by userland - too much of security hassle, + * no real gain from having the whole whorehouse mounted. So we don't need + * any operations on the root directory. However, we need a non-trivial + * d_name - pfm: will go nicely and kill the special-casing in procfs. + */ +static struct vfsmount *pfmfs_mnt __read_mostly; + +static int __init +init_pfm_fs(void) +{ + int err = register_filesystem(&pfm_fs_type); + if (!err) { + pfmfs_mnt = kern_mount(&pfm_fs_type); + err = PTR_ERR(pfmfs_mnt); + if (IS_ERR(pfmfs_mnt)) + unregister_filesystem(&pfm_fs_type); + else + err = 0; + } + return err; +} + +static ssize_t +pfm_read(struct file *filp, char __user *buf, size_t size, loff_t *ppos) +{ + pfm_context_t *ctx; + pfm_msg_t *msg; + ssize_t ret; + unsigned long flags; + DECLARE_WAITQUEUE(wait, current); + if (PFM_IS_FILE(filp) == 0) { + printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", task_pid_nr(current)); + return -EINVAL; + } + + ctx = filp->private_data; + if (ctx == NULL) { + printk(KERN_ERR "perfmon: pfm_read: NULL ctx [%d]\n", task_pid_nr(current)); + return -EINVAL; + } + + /* + * check even when there is no message + */ + if (size < sizeof(pfm_msg_t)) { + DPRINT(("message is too small ctx=%p (>=%ld)\n", ctx, sizeof(pfm_msg_t))); + return -EINVAL; + } + + PROTECT_CTX(ctx, flags); + + /* + * put ourselves on the wait queue + */ + add_wait_queue(&ctx->ctx_msgq_wait, &wait); + + + for(;;) { + /* + * check wait queue + */ + + set_current_state(TASK_INTERRUPTIBLE); + + DPRINT(("head=%d tail=%d\n", ctx->ctx_msgq_head, ctx->ctx_msgq_tail)); + + ret = 0; + if(PFM_CTXQ_EMPTY(ctx) == 0) break; + + UNPROTECT_CTX(ctx, flags); + + /* + * check non-blocking read + */ + ret = -EAGAIN; + if(filp->f_flags & O_NONBLOCK) break; + + /* + * check pending signals + */ + if(signal_pending(current)) { + ret = -EINTR; + break; + } + /* + * no message, so wait + */ + schedule(); + + PROTECT_CTX(ctx, flags); + } + DPRINT(("[%d] back to running ret=%ld\n", task_pid_nr(current), ret)); + set_current_state(TASK_RUNNING); + remove_wait_queue(&ctx->ctx_msgq_wait, &wait); + + if (ret < 0) goto abort; + + ret = -EINVAL; + msg = pfm_get_next_msg(ctx); + if (msg == NULL) { + printk(KERN_ERR "perfmon: pfm_read no msg for ctx=%p [%d]\n", ctx, task_pid_nr(current)); + goto abort_locked; + } + + DPRINT(("fd=%d type=%d\n", msg->pfm_gen_msg.msg_ctx_fd, msg->pfm_gen_msg.msg_type)); + + ret = -EFAULT; + if(copy_to_user(buf, msg, sizeof(pfm_msg_t)) == 0) ret = sizeof(pfm_msg_t); + +abort_locked: + UNPROTECT_CTX(ctx, flags); +abort: + return ret; +} + +static ssize_t +pfm_write(struct file *file, const char __user *ubuf, + size_t size, loff_t *ppos) +{ + DPRINT(("pfm_write called\n")); + return -EINVAL; +} + +static unsigned int +pfm_poll(struct file *filp, poll_table * wait) +{ + pfm_context_t *ctx; + unsigned long flags; + unsigned int mask = 0; + + if (PFM_IS_FILE(filp) == 0) { + printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", task_pid_nr(current)); + return 0; + } + + ctx = filp->private_data; + if (ctx == NULL) { + printk(KERN_ERR "perfmon: pfm_poll: NULL ctx [%d]\n", task_pid_nr(current)); + return 0; + } + + + DPRINT(("pfm_poll ctx_fd=%d before poll_wait\n", ctx->ctx_fd)); + + poll_wait(filp, &ctx->ctx_msgq_wait, wait); + + PROTECT_CTX(ctx, flags); + + if (PFM_CTXQ_EMPTY(ctx) == 0) + mask = POLLIN | POLLRDNORM; + + UNPROTECT_CTX(ctx, flags); + + DPRINT(("pfm_poll ctx_fd=%d mask=0x%x\n", ctx->ctx_fd, mask)); + + return mask; +} + +static long +pfm_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + DPRINT(("pfm_ioctl called\n")); + return -EINVAL; +} + +/* + * interrupt cannot be masked when coming here + */ +static inline int +pfm_do_fasync(int fd, struct file *filp, pfm_context_t *ctx, int on) +{ + int ret; + + ret = fasync_helper (fd, filp, on, &ctx->ctx_async_queue); + + DPRINT(("pfm_fasync called by [%d] on ctx_fd=%d on=%d async_queue=%p ret=%d\n", + task_pid_nr(current), + fd, + on, + ctx->ctx_async_queue, ret)); + + return ret; +} + +static int +pfm_fasync(int fd, struct file *filp, int on) +{ + pfm_context_t *ctx; + int ret; + + if (PFM_IS_FILE(filp) == 0) { + printk(KERN_ERR "perfmon: pfm_fasync bad magic [%d]\n", task_pid_nr(current)); + return -EBADF; + } + + ctx = filp->private_data; + if (ctx == NULL) { + printk(KERN_ERR "perfmon: pfm_fasync NULL ctx [%d]\n", task_pid_nr(current)); + return -EBADF; + } + /* + * we cannot mask interrupts during this call because this may + * may go to sleep if memory is not readily avalaible. + * + * We are protected from the conetxt disappearing by the get_fd()/put_fd() + * done in caller. Serialization of this function is ensured by caller. + */ + ret = pfm_do_fasync(fd, filp, ctx, on); + + + DPRINT(("pfm_fasync called on ctx_fd=%d on=%d async_queue=%p ret=%d\n", + fd, + on, + ctx->ctx_async_queue, ret)); + + return ret; +} + +#ifdef CONFIG_SMP +/* + * this function is exclusively called from pfm_close(). + * The context is not protected at that time, nor are interrupts + * on the remote CPU. That's necessary to avoid deadlocks. + */ +static void +pfm_syswide_force_stop(void *info) +{ + pfm_context_t *ctx = (pfm_context_t *)info; + struct pt_regs *regs = task_pt_regs(current); + struct task_struct *owner; + unsigned long flags; + int ret; + + if (ctx->ctx_cpu != smp_processor_id()) { + printk(KERN_ERR "perfmon: pfm_syswide_force_stop for CPU%d but on CPU%d\n", + ctx->ctx_cpu, + smp_processor_id()); + return; + } + owner = GET_PMU_OWNER(); + if (owner != ctx->ctx_task) { + printk(KERN_ERR "perfmon: pfm_syswide_force_stop CPU%d unexpected owner [%d] instead of [%d]\n", + smp_processor_id(), + task_pid_nr(owner), task_pid_nr(ctx->ctx_task)); + return; + } + if (GET_PMU_CTX() != ctx) { + printk(KERN_ERR "perfmon: pfm_syswide_force_stop CPU%d unexpected ctx %p instead of %p\n", + smp_processor_id(), + GET_PMU_CTX(), ctx); + return; + } + + DPRINT(("on CPU%d forcing system wide stop for [%d]\n", smp_processor_id(), task_pid_nr(ctx->ctx_task))); + /* + * the context is already protected in pfm_close(), we simply + * need to mask interrupts to avoid a PMU interrupt race on + * this CPU + */ + local_irq_save(flags); + + ret = pfm_context_unload(ctx, NULL, 0, regs); + if (ret) { + DPRINT(("context_unload returned %d\n", ret)); + } + + /* + * unmask interrupts, PMU interrupts are now spurious here + */ + local_irq_restore(flags); +} + +static void +pfm_syswide_cleanup_other_cpu(pfm_context_t *ctx) +{ + int ret; + + DPRINT(("calling CPU%d for cleanup\n", ctx->ctx_cpu)); + ret = smp_call_function_single(ctx->ctx_cpu, pfm_syswide_force_stop, ctx, 1); + DPRINT(("called CPU%d for cleanup ret=%d\n", ctx->ctx_cpu, ret)); +} +#endif /* CONFIG_SMP */ + +/* + * called for each close(). Partially free resources. + * When caller is self-monitoring, the context is unloaded. + */ +static int +pfm_flush(struct file *filp, fl_owner_t id) +{ + pfm_context_t *ctx; + struct task_struct *task; + struct pt_regs *regs; + unsigned long flags; + unsigned long smpl_buf_size = 0UL; + void *smpl_buf_vaddr = NULL; + int state, is_system; + + if (PFM_IS_FILE(filp) == 0) { + DPRINT(("bad magic for\n")); + return -EBADF; + } + + ctx = filp->private_data; + if (ctx == NULL) { + printk(KERN_ERR "perfmon: pfm_flush: NULL ctx [%d]\n", task_pid_nr(current)); + return -EBADF; + } + + /* + * remove our file from the async queue, if we use this mode. + * This can be done without the context being protected. We come + * here when the context has become unreachable by other tasks. + * + * We may still have active monitoring at this point and we may + * end up in pfm_overflow_handler(). However, fasync_helper() + * operates with interrupts disabled and it cleans up the + * queue. If the PMU handler is called prior to entering + * fasync_helper() then it will send a signal. If it is + * invoked after, it will find an empty queue and no + * signal will be sent. In both case, we are safe + */ + PROTECT_CTX(ctx, flags); + + state = ctx->ctx_state; + is_system = ctx->ctx_fl_system; + + task = PFM_CTX_TASK(ctx); + regs = task_pt_regs(task); + + DPRINT(("ctx_state=%d is_current=%d\n", + state, + task == current ? 1 : 0)); + + /* + * if state == UNLOADED, then task is NULL + */ + + /* + * we must stop and unload because we are losing access to the context. + */ + if (task == current) { +#ifdef CONFIG_SMP + /* + * the task IS the owner but it migrated to another CPU: that's bad + * but we must handle this cleanly. Unfortunately, the kernel does + * not provide a mechanism to block migration (while the context is loaded). + * + * We need to release the resource on the ORIGINAL cpu. + */ + if (is_system && ctx->ctx_cpu != smp_processor_id()) { + + DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); + /* + * keep context protected but unmask interrupt for IPI + */ + local_irq_restore(flags); + + pfm_syswide_cleanup_other_cpu(ctx); + + /* + * restore interrupt masking + */ + local_irq_save(flags); + + /* + * context is unloaded at this point + */ + } else +#endif /* CONFIG_SMP */ + { + + DPRINT(("forcing unload\n")); + /* + * stop and unload, returning with state UNLOADED + * and session unreserved. + */ + pfm_context_unload(ctx, NULL, 0, regs); + + DPRINT(("ctx_state=%d\n", ctx->ctx_state)); + } + } + + /* + * remove virtual mapping, if any, for the calling task. + * cannot reset ctx field until last user is calling close(). + * + * ctx_smpl_vaddr must never be cleared because it is needed + * by every task with access to the context + * + * When called from do_exit(), the mm context is gone already, therefore + * mm is NULL, i.e., the VMA is already gone and we do not have to + * do anything here + */ + if (ctx->ctx_smpl_vaddr && current->mm) { + smpl_buf_vaddr = ctx->ctx_smpl_vaddr; + smpl_buf_size = ctx->ctx_smpl_size; + } + + UNPROTECT_CTX(ctx, flags); + + /* + * if there was a mapping, then we systematically remove it + * at this point. Cannot be done inside critical section + * because some VM function reenables interrupts. + * + */ + if (smpl_buf_vaddr) pfm_remove_smpl_mapping(smpl_buf_vaddr, smpl_buf_size); + + return 0; +} +/* + * called either on explicit close() or from exit_files(). + * Only the LAST user of the file gets to this point, i.e., it is + * called only ONCE. + * + * IMPORTANT: we get called ONLY when the refcnt on the file gets to zero + * (fput()),i.e, last task to access the file. Nobody else can access the + * file at this point. + * + * When called from exit_files(), the VMA has been freed because exit_mm() + * is executed before exit_files(). + * + * When called from exit_files(), the current task is not yet ZOMBIE but we + * flush the PMU state to the context. + */ +static int +pfm_close(struct inode *inode, struct file *filp) +{ + pfm_context_t *ctx; + struct task_struct *task; + struct pt_regs *regs; + DECLARE_WAITQUEUE(wait, current); + unsigned long flags; + unsigned long smpl_buf_size = 0UL; + void *smpl_buf_addr = NULL; + int free_possible = 1; + int state, is_system; + + DPRINT(("pfm_close called private=%p\n", filp->private_data)); + + if (PFM_IS_FILE(filp) == 0) { + DPRINT(("bad magic\n")); + return -EBADF; + } + + ctx = filp->private_data; + if (ctx == NULL) { + printk(KERN_ERR "perfmon: pfm_close: NULL ctx [%d]\n", task_pid_nr(current)); + return -EBADF; + } + + PROTECT_CTX(ctx, flags); + + state = ctx->ctx_state; + is_system = ctx->ctx_fl_system; + + task = PFM_CTX_TASK(ctx); + regs = task_pt_regs(task); + + DPRINT(("ctx_state=%d is_current=%d\n", + state, + task == current ? 1 : 0)); + + /* + * if task == current, then pfm_flush() unloaded the context + */ + if (state == PFM_CTX_UNLOADED) goto doit; + + /* + * context is loaded/masked and task != current, we need to + * either force an unload or go zombie + */ + + /* + * The task is currently blocked or will block after an overflow. + * we must force it to wakeup to get out of the + * MASKED state and transition to the unloaded state by itself. + * + * This situation is only possible for per-task mode + */ + if (state == PFM_CTX_MASKED && CTX_OVFL_NOBLOCK(ctx) == 0) { + + /* + * set a "partial" zombie state to be checked + * upon return from down() in pfm_handle_work(). + * + * We cannot use the ZOMBIE state, because it is checked + * by pfm_load_regs() which is called upon wakeup from down(). + * In such case, it would free the context and then we would + * return to pfm_handle_work() which would access the + * stale context. Instead, we set a flag invisible to pfm_load_regs() + * but visible to pfm_handle_work(). + * + * For some window of time, we have a zombie context with + * ctx_state = MASKED and not ZOMBIE + */ + ctx->ctx_fl_going_zombie = 1; + + /* + * force task to wake up from MASKED state + */ + complete(&ctx->ctx_restart_done); + + DPRINT(("waking up ctx_state=%d\n", state)); + + /* + * put ourself to sleep waiting for the other + * task to report completion + * + * the context is protected by mutex, therefore there + * is no risk of being notified of completion before + * begin actually on the waitq. + */ + set_current_state(TASK_INTERRUPTIBLE); + add_wait_queue(&ctx->ctx_zombieq, &wait); + + UNPROTECT_CTX(ctx, flags); + + /* + * XXX: check for signals : + * - ok for explicit close + * - not ok when coming from exit_files() + */ + schedule(); + + + PROTECT_CTX(ctx, flags); + + + remove_wait_queue(&ctx->ctx_zombieq, &wait); + set_current_state(TASK_RUNNING); + + /* + * context is unloaded at this point + */ + DPRINT(("after zombie wakeup ctx_state=%d for\n", state)); + } + else if (task != current) { +#ifdef CONFIG_SMP + /* + * switch context to zombie state + */ + ctx->ctx_state = PFM_CTX_ZOMBIE; + + DPRINT(("zombie ctx for [%d]\n", task_pid_nr(task))); + /* + * cannot free the context on the spot. deferred until + * the task notices the ZOMBIE state + */ + free_possible = 0; +#else + pfm_context_unload(ctx, NULL, 0, regs); +#endif + } + +doit: + /* reload state, may have changed during opening of critical section */ + state = ctx->ctx_state; + + /* + * the context is still attached to a task (possibly current) + * we cannot destroy it right now + */ + + /* + * we must free the sampling buffer right here because + * we cannot rely on it being cleaned up later by the + * monitored task. It is not possible to free vmalloc'ed + * memory in pfm_load_regs(). Instead, we remove the buffer + * now. should there be subsequent PMU overflow originally + * meant for sampling, the will be converted to spurious + * and that's fine because the monitoring tools is gone anyway. + */ + if (ctx->ctx_smpl_hdr) { + smpl_buf_addr = ctx->ctx_smpl_hdr; + smpl_buf_size = ctx->ctx_smpl_size; + /* no more sampling */ + ctx->ctx_smpl_hdr = NULL; + ctx->ctx_fl_is_sampling = 0; + } + + DPRINT(("ctx_state=%d free_possible=%d addr=%p size=%lu\n", + state, + free_possible, + smpl_buf_addr, + smpl_buf_size)); + + if (smpl_buf_addr) pfm_exit_smpl_buffer(ctx->ctx_buf_fmt); + + /* + * UNLOADED that the session has already been unreserved. + */ + if (state == PFM_CTX_ZOMBIE) { + pfm_unreserve_session(ctx, ctx->ctx_fl_system , ctx->ctx_cpu); + } + + /* + * disconnect file descriptor from context must be done + * before we unlock. + */ + filp->private_data = NULL; + + /* + * if we free on the spot, the context is now completely unreachable + * from the callers side. The monitored task side is also cut, so we + * can freely cut. + * + * If we have a deferred free, only the caller side is disconnected. + */ + UNPROTECT_CTX(ctx, flags); + + /* + * All memory free operations (especially for vmalloc'ed memory) + * MUST be done with interrupts ENABLED. + */ + if (smpl_buf_addr) pfm_rvfree(smpl_buf_addr, smpl_buf_size); + + /* + * return the memory used by the context + */ + if (free_possible) pfm_context_free(ctx); + + return 0; +} + +static const struct file_operations pfm_file_ops = { + .llseek = no_llseek, + .read = pfm_read, + .write = pfm_write, + .poll = pfm_poll, + .unlocked_ioctl = pfm_ioctl, + .fasync = pfm_fasync, + .release = pfm_close, + .flush = pfm_flush +}; + +static char *pfmfs_dname(struct dentry *dentry, char *buffer, int buflen) +{ + return dynamic_dname(dentry, buffer, buflen, "pfm:[%lu]", + d_inode(dentry)->i_ino); +} + +static const struct dentry_operations pfmfs_dentry_operations = { + .d_delete = always_delete_dentry, + .d_dname = pfmfs_dname, +}; + + +static struct file * +pfm_alloc_file(pfm_context_t *ctx) +{ + struct file *file; + struct inode *inode; + struct path path; + struct qstr this = { .name = "" }; + + /* + * allocate a new inode + */ + inode = new_inode(pfmfs_mnt->mnt_sb); + if (!inode) + return ERR_PTR(-ENOMEM); + + DPRINT(("new inode ino=%ld @%p\n", inode->i_ino, inode)); + + inode->i_mode = S_IFCHR|S_IRUGO; + inode->i_uid = current_fsuid(); + inode->i_gid = current_fsgid(); + + /* + * allocate a new dcache entry + */ + path.dentry = d_alloc(pfmfs_mnt->mnt_root, &this); + if (!path.dentry) { + iput(inode); + return ERR_PTR(-ENOMEM); + } + path.mnt = mntget(pfmfs_mnt); + + d_add(path.dentry, inode); + + file = alloc_file(&path, FMODE_READ, &pfm_file_ops); + if (IS_ERR(file)) { + path_put(&path); + return file; + } + + file->f_flags = O_RDONLY; + file->private_data = ctx; + + return file; +} + +static int +pfm_remap_buffer(struct vm_area_struct *vma, unsigned long buf, unsigned long addr, unsigned long size) +{ + DPRINT(("CPU%d buf=0x%lx addr=0x%lx size=%ld\n", smp_processor_id(), buf, addr, size)); + + while (size > 0) { + unsigned long pfn = ia64_tpa(buf) >> PAGE_SHIFT; + + + if (remap_pfn_range(vma, addr, pfn, PAGE_SIZE, PAGE_READONLY)) + return -ENOMEM; + + addr += PAGE_SIZE; + buf += PAGE_SIZE; + size -= PAGE_SIZE; + } + return 0; +} + +/* + * allocate a sampling buffer and remaps it into the user address space of the task + */ +static int +pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t *ctx, unsigned long rsize, void **user_vaddr) +{ + struct mm_struct *mm = task->mm; + struct vm_area_struct *vma = NULL; + unsigned long size; + void *smpl_buf; + + + /* + * the fixed header + requested size and align to page boundary + */ + size = PAGE_ALIGN(rsize); + + DPRINT(("sampling buffer rsize=%lu size=%lu bytes\n", rsize, size)); + + /* + * check requested size to avoid Denial-of-service attacks + * XXX: may have to refine this test + * Check against address space limit. + * + * if ((mm->total_vm << PAGE_SHIFT) + len> task->rlim[RLIMIT_AS].rlim_cur) + * return -ENOMEM; + */ + if (size > task_rlimit(task, RLIMIT_MEMLOCK)) + return -ENOMEM; + + /* + * We do the easy to undo allocations first. + * + * pfm_rvmalloc(), clears the buffer, so there is no leak + */ + smpl_buf = pfm_rvmalloc(size); + if (smpl_buf == NULL) { + DPRINT(("Can't allocate sampling buffer\n")); + return -ENOMEM; + } + + DPRINT(("smpl_buf @%p\n", smpl_buf)); + + /* allocate vma */ + vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); + if (!vma) { + DPRINT(("Cannot allocate vma\n")); + goto error_kmem; + } + INIT_LIST_HEAD(&vma->anon_vma_chain); + + /* + * partially initialize the vma for the sampling buffer + */ + vma->vm_mm = mm; + vma->vm_file = get_file(filp); + vma->vm_flags = VM_READ|VM_MAYREAD|VM_DONTEXPAND|VM_DONTDUMP; + vma->vm_page_prot = PAGE_READONLY; /* XXX may need to change */ + + /* + * Now we have everything we need and we can initialize + * and connect all the data structures + */ + + ctx->ctx_smpl_hdr = smpl_buf; + ctx->ctx_smpl_size = size; /* aligned size */ + + /* + * Let's do the difficult operations next. + * + * now we atomically find some area in the address space and + * remap the buffer in it. + */ + down_write(&task->mm->mmap_sem); + + /* find some free area in address space, must have mmap sem held */ + vma->vm_start = get_unmapped_area(NULL, 0, size, 0, MAP_PRIVATE|MAP_ANONYMOUS); + if (IS_ERR_VALUE(vma->vm_start)) { + DPRINT(("Cannot find unmapped area for size %ld\n", size)); + up_write(&task->mm->mmap_sem); + goto error; + } + vma->vm_end = vma->vm_start + size; + vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT; + + DPRINT(("aligned size=%ld, hdr=%p mapped @0x%lx\n", size, ctx->ctx_smpl_hdr, vma->vm_start)); + + /* can only be applied to current task, need to have the mm semaphore held when called */ + if (pfm_remap_buffer(vma, (unsigned long)smpl_buf, vma->vm_start, size)) { + DPRINT(("Can't remap buffer\n")); + up_write(&task->mm->mmap_sem); + goto error; + } + + /* + * now insert the vma in the vm list for the process, must be + * done with mmap lock held + */ + insert_vm_struct(mm, vma); + + vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file, + vma_pages(vma)); + up_write(&task->mm->mmap_sem); + + /* + * keep track of user level virtual address + */ + ctx->ctx_smpl_vaddr = (void *)vma->vm_start; + *(unsigned long *)user_vaddr = vma->vm_start; + + return 0; + +error: + kmem_cache_free(vm_area_cachep, vma); +error_kmem: + pfm_rvfree(smpl_buf, size); + + return -ENOMEM; +} + +/* + * XXX: do something better here + */ +static int +pfm_bad_permissions(struct task_struct *task) +{ + const struct cred *tcred; + kuid_t uid = current_uid(); + kgid_t gid = current_gid(); + int ret; + + rcu_read_lock(); + tcred = __task_cred(task); + + /* inspired by ptrace_attach() */ + DPRINT(("cur: uid=%d gid=%d task: euid=%d suid=%d uid=%d egid=%d sgid=%d\n", + from_kuid(&init_user_ns, uid), + from_kgid(&init_user_ns, gid), + from_kuid(&init_user_ns, tcred->euid), + from_kuid(&init_user_ns, tcred->suid), + from_kuid(&init_user_ns, tcred->uid), + from_kgid(&init_user_ns, tcred->egid), + from_kgid(&init_user_ns, tcred->sgid))); + + ret = ((!uid_eq(uid, tcred->euid)) + || (!uid_eq(uid, tcred->suid)) + || (!uid_eq(uid, tcred->uid)) + || (!gid_eq(gid, tcred->egid)) + || (!gid_eq(gid, tcred->sgid)) + || (!gid_eq(gid, tcred->gid))) && !capable(CAP_SYS_PTRACE); + + rcu_read_unlock(); + return ret; +} + +static int +pfarg_is_sane(struct task_struct *task, pfarg_context_t *pfx) +{ + int ctx_flags; + + /* valid signal */ + + ctx_flags = pfx->ctx_flags; + + if (ctx_flags & PFM_FL_SYSTEM_WIDE) { + + /* + * cannot block in this mode + */ + if (ctx_flags & PFM_FL_NOTIFY_BLOCK) { + DPRINT(("cannot use blocking mode when in system wide monitoring\n")); + return -EINVAL; + } + } else { + } + /* probably more to add here */ + + return 0; +} + +static int +pfm_setup_buffer_fmt(struct task_struct *task, struct file *filp, pfm_context_t *ctx, unsigned int ctx_flags, + unsigned int cpu, pfarg_context_t *arg) +{ + pfm_buffer_fmt_t *fmt = NULL; + unsigned long size = 0UL; + void *uaddr = NULL; + void *fmt_arg = NULL; + int ret = 0; +#define PFM_CTXARG_BUF_ARG(a) (pfm_buffer_fmt_t *)(a+1) + + /* invoke and lock buffer format, if found */ + fmt = pfm_find_buffer_fmt(arg->ctx_smpl_buf_id); + if (fmt == NULL) { + DPRINT(("[%d] cannot find buffer format\n", task_pid_nr(task))); + return -EINVAL; + } + + /* + * buffer argument MUST be contiguous to pfarg_context_t + */ + if (fmt->fmt_arg_size) fmt_arg = PFM_CTXARG_BUF_ARG(arg); + + ret = pfm_buf_fmt_validate(fmt, task, ctx_flags, cpu, fmt_arg); + + DPRINT(("[%d] after validate(0x%x,%d,%p)=%d\n", task_pid_nr(task), ctx_flags, cpu, fmt_arg, ret)); + + if (ret) goto error; + + /* link buffer format and context */ + ctx->ctx_buf_fmt = fmt; + ctx->ctx_fl_is_sampling = 1; /* assume record() is defined */ + + /* + * check if buffer format wants to use perfmon buffer allocation/mapping service + */ + ret = pfm_buf_fmt_getsize(fmt, task, ctx_flags, cpu, fmt_arg, &size); + if (ret) goto error; + + if (size) { + /* + * buffer is always remapped into the caller's address space + */ + ret = pfm_smpl_buffer_alloc(current, filp, ctx, size, &uaddr); + if (ret) goto error; + + /* keep track of user address of buffer */ + arg->ctx_smpl_vaddr = uaddr; + } + ret = pfm_buf_fmt_init(fmt, task, ctx->ctx_smpl_hdr, ctx_flags, cpu, fmt_arg); + +error: + return ret; +} + +static void +pfm_reset_pmu_state(pfm_context_t *ctx) +{ + int i; + + /* + * install reset values for PMC. + */ + for (i=1; PMC_IS_LAST(i) == 0; i++) { + if (PMC_IS_IMPL(i) == 0) continue; + ctx->ctx_pmcs[i] = PMC_DFL_VAL(i); + DPRINT(("pmc[%d]=0x%lx\n", i, ctx->ctx_pmcs[i])); + } + /* + * PMD registers are set to 0UL when the context in memset() + */ + + /* + * On context switched restore, we must restore ALL pmc and ALL pmd even + * when they are not actively used by the task. In UP, the incoming process + * may otherwise pick up left over PMC, PMD state from the previous process. + * As opposed to PMD, stale PMC can cause harm to the incoming + * process because they may change what is being measured. + * Therefore, we must systematically reinstall the entire + * PMC state. In SMP, the same thing is possible on the + * same CPU but also on between 2 CPUs. + * + * The problem with PMD is information leaking especially + * to user level when psr.sp=0 + * + * There is unfortunately no easy way to avoid this problem + * on either UP or SMP. This definitively slows down the + * pfm_load_regs() function. + */ + + /* + * bitmask of all PMCs accessible to this context + * + * PMC0 is treated differently. + */ + ctx->ctx_all_pmcs[0] = pmu_conf->impl_pmcs[0] & ~0x1; + + /* + * bitmask of all PMDs that are accessible to this context + */ + ctx->ctx_all_pmds[0] = pmu_conf->impl_pmds[0]; + + DPRINT(("<%d> all_pmcs=0x%lx all_pmds=0x%lx\n", ctx->ctx_fd, ctx->ctx_all_pmcs[0],ctx->ctx_all_pmds[0])); + + /* + * useful in case of re-enable after disable + */ + ctx->ctx_used_ibrs[0] = 0UL; + ctx->ctx_used_dbrs[0] = 0UL; +} + +static int +pfm_ctx_getsize(void *arg, size_t *sz) +{ + pfarg_context_t *req = (pfarg_context_t *)arg; + pfm_buffer_fmt_t *fmt; + + *sz = 0; + + if (!pfm_uuid_cmp(req->ctx_smpl_buf_id, pfm_null_uuid)) return 0; + + fmt = pfm_find_buffer_fmt(req->ctx_smpl_buf_id); + if (fmt == NULL) { + DPRINT(("cannot find buffer format\n")); + return -EINVAL; + } + /* get just enough to copy in user parameters */ + *sz = fmt->fmt_arg_size; + DPRINT(("arg_size=%lu\n", *sz)); + + return 0; +} + + + +/* + * cannot attach if : + * - kernel task + * - task not owned by caller + * - task incompatible with context mode + */ +static int +pfm_task_incompatible(pfm_context_t *ctx, struct task_struct *task) +{ + /* + * no kernel task or task not owner by caller + */ + if (task->mm == NULL) { + DPRINT(("task [%d] has not memory context (kernel thread)\n", task_pid_nr(task))); + return -EPERM; + } + if (pfm_bad_permissions(task)) { + DPRINT(("no permission to attach to [%d]\n", task_pid_nr(task))); + return -EPERM; + } + /* + * cannot block in self-monitoring mode + */ + if (CTX_OVFL_NOBLOCK(ctx) == 0 && task == current) { + DPRINT(("cannot load a blocking context on self for [%d]\n", task_pid_nr(task))); + return -EINVAL; + } + + if (task->exit_state == EXIT_ZOMBIE) { + DPRINT(("cannot attach to zombie task [%d]\n", task_pid_nr(task))); + return -EBUSY; + } + + /* + * always ok for self + */ + if (task == current) return 0; + + if (!task_is_stopped_or_traced(task)) { + DPRINT(("cannot attach to non-stopped task [%d] state=%ld\n", task_pid_nr(task), task->state)); + return -EBUSY; + } + /* + * make sure the task is off any CPU + */ + wait_task_inactive(task, 0); + + /* more to come... */ + + return 0; +} + +static int +pfm_get_task(pfm_context_t *ctx, pid_t pid, struct task_struct **task) +{ + struct task_struct *p = current; + int ret; + + /* XXX: need to add more checks here */ + if (pid < 2) return -EPERM; + + if (pid != task_pid_vnr(current)) { + + read_lock(&tasklist_lock); + + p = find_task_by_vpid(pid); + + /* make sure task cannot go away while we operate on it */ + if (p) get_task_struct(p); + + read_unlock(&tasklist_lock); + + if (p == NULL) return -ESRCH; + } + + ret = pfm_task_incompatible(ctx, p); + if (ret == 0) { + *task = p; + } else if (p != current) { + pfm_put_task(p); + } + return ret; +} + + + +static int +pfm_context_create(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) +{ + pfarg_context_t *req = (pfarg_context_t *)arg; + struct file *filp; + struct path path; + int ctx_flags; + int fd; + int ret; + + /* let's check the arguments first */ + ret = pfarg_is_sane(current, req); + if (ret < 0) + return ret; + + ctx_flags = req->ctx_flags; + + ret = -ENOMEM; + + fd = get_unused_fd_flags(0); + if (fd < 0) + return fd; + + ctx = pfm_context_alloc(ctx_flags); + if (!ctx) + goto error; + + filp = pfm_alloc_file(ctx); + if (IS_ERR(filp)) { + ret = PTR_ERR(filp); + goto error_file; + } + + req->ctx_fd = ctx->ctx_fd = fd; + + /* + * does the user want to sample? + */ + if (pfm_uuid_cmp(req->ctx_smpl_buf_id, pfm_null_uuid)) { + ret = pfm_setup_buffer_fmt(current, filp, ctx, ctx_flags, 0, req); + if (ret) + goto buffer_error; + } + + DPRINT(("ctx=%p flags=0x%x system=%d notify_block=%d excl_idle=%d no_msg=%d ctx_fd=%d\n", + ctx, + ctx_flags, + ctx->ctx_fl_system, + ctx->ctx_fl_block, + ctx->ctx_fl_excl_idle, + ctx->ctx_fl_no_msg, + ctx->ctx_fd)); + + /* + * initialize soft PMU state + */ + pfm_reset_pmu_state(ctx); + + fd_install(fd, filp); + + return 0; + +buffer_error: + path = filp->f_path; + put_filp(filp); + path_put(&path); + + if (ctx->ctx_buf_fmt) { + pfm_buf_fmt_exit(ctx->ctx_buf_fmt, current, NULL, regs); + } +error_file: + pfm_context_free(ctx); + +error: + put_unused_fd(fd); + return ret; +} + +static inline unsigned long +pfm_new_counter_value (pfm_counter_t *reg, int is_long_reset) +{ + unsigned long val = is_long_reset ? reg->long_reset : reg->short_reset; + unsigned long new_seed, old_seed = reg->seed, mask = reg->mask; + extern unsigned long carta_random32 (unsigned long seed); + + if (reg->flags & PFM_REGFL_RANDOM) { + new_seed = carta_random32(old_seed); + val -= (old_seed & mask); /* counter values are negative numbers! */ + if ((mask >> 32) != 0) + /* construct a full 64-bit random value: */ + new_seed |= carta_random32(old_seed >> 32) << 32; + reg->seed = new_seed; + } + reg->lval = val; + return val; +} + +static void +pfm_reset_regs_masked(pfm_context_t *ctx, unsigned long *ovfl_regs, int is_long_reset) +{ + unsigned long mask = ovfl_regs[0]; + unsigned long reset_others = 0UL; + unsigned long val; + int i; + + /* + * now restore reset value on sampling overflowed counters + */ + mask >>= PMU_FIRST_COUNTER; + for(i = PMU_FIRST_COUNTER; mask; i++, mask >>= 1) { + + if ((mask & 0x1UL) == 0UL) continue; + + ctx->ctx_pmds[i].val = val = pfm_new_counter_value(ctx->ctx_pmds+ i, is_long_reset); + reset_others |= ctx->ctx_pmds[i].reset_pmds[0]; + + DPRINT_ovfl((" %s reset ctx_pmds[%d]=%lx\n", is_long_reset ? "long" : "short", i, val)); + } + + /* + * Now take care of resetting the other registers + */ + for(i = 0; reset_others; i++, reset_others >>= 1) { + + if ((reset_others & 0x1) == 0) continue; + + ctx->ctx_pmds[i].val = val = pfm_new_counter_value(ctx->ctx_pmds + i, is_long_reset); + + DPRINT_ovfl(("%s reset_others pmd[%d]=%lx\n", + is_long_reset ? "long" : "short", i, val)); + } +} + +static void +pfm_reset_regs(pfm_context_t *ctx, unsigned long *ovfl_regs, int is_long_reset) +{ + unsigned long mask = ovfl_regs[0]; + unsigned long reset_others = 0UL; + unsigned long val; + int i; + + DPRINT_ovfl(("ovfl_regs=0x%lx is_long_reset=%d\n", ovfl_regs[0], is_long_reset)); + + if (ctx->ctx_state == PFM_CTX_MASKED) { + pfm_reset_regs_masked(ctx, ovfl_regs, is_long_reset); + return; + } + + /* + * now restore reset value on sampling overflowed counters + */ + mask >>= PMU_FIRST_COUNTER; + for(i = PMU_FIRST_COUNTER; mask; i++, mask >>= 1) { + + if ((mask & 0x1UL) == 0UL) continue; + + val = pfm_new_counter_value(ctx->ctx_pmds+ i, is_long_reset); + reset_others |= ctx->ctx_pmds[i].reset_pmds[0]; + + DPRINT_ovfl((" %s reset ctx_pmds[%d]=%lx\n", is_long_reset ? "long" : "short", i, val)); + + pfm_write_soft_counter(ctx, i, val); + } + + /* + * Now take care of resetting the other registers + */ + for(i = 0; reset_others; i++, reset_others >>= 1) { + + if ((reset_others & 0x1) == 0) continue; + + val = pfm_new_counter_value(ctx->ctx_pmds + i, is_long_reset); + + if (PMD_IS_COUNTING(i)) { + pfm_write_soft_counter(ctx, i, val); + } else { + ia64_set_pmd(i, val); + } + DPRINT_ovfl(("%s reset_others pmd[%d]=%lx\n", + is_long_reset ? "long" : "short", i, val)); + } + ia64_srlz_d(); +} + +static int +pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) +{ + struct task_struct *task; + pfarg_reg_t *req = (pfarg_reg_t *)arg; + unsigned long value, pmc_pm; + unsigned long smpl_pmds, reset_pmds, impl_pmds; + unsigned int cnum, reg_flags, flags, pmc_type; + int i, can_access_pmu = 0, is_loaded, is_system, expert_mode; + int is_monitor, is_counting, state; + int ret = -EINVAL; + pfm_reg_check_t wr_func; +#define PFM_CHECK_PMC_PM(x, y, z) ((x)->ctx_fl_system ^ PMC_PM(y, z)) + + state = ctx->ctx_state; + is_loaded = state == PFM_CTX_LOADED ? 1 : 0; + is_system = ctx->ctx_fl_system; + task = ctx->ctx_task; + impl_pmds = pmu_conf->impl_pmds[0]; + + if (state == PFM_CTX_ZOMBIE) return -EINVAL; + + if (is_loaded) { + /* + * In system wide and when the context is loaded, access can only happen + * when the caller is running on the CPU being monitored by the session. + * It does not have to be the owner (ctx_task) of the context per se. + */ + if (is_system && ctx->ctx_cpu != smp_processor_id()) { + DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); + return -EBUSY; + } + can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0; + } + expert_mode = pfm_sysctl.expert_mode; + + for (i = 0; i < count; i++, req++) { + + cnum = req->reg_num; + reg_flags = req->reg_flags; + value = req->reg_value; + smpl_pmds = req->reg_smpl_pmds[0]; + reset_pmds = req->reg_reset_pmds[0]; + flags = 0; + + + if (cnum >= PMU_MAX_PMCS) { + DPRINT(("pmc%u is invalid\n", cnum)); + goto error; + } + + pmc_type = pmu_conf->pmc_desc[cnum].type; + pmc_pm = (value >> pmu_conf->pmc_desc[cnum].pm_pos) & 0x1; + is_counting = (pmc_type & PFM_REG_COUNTING) == PFM_REG_COUNTING ? 1 : 0; + is_monitor = (pmc_type & PFM_REG_MONITOR) == PFM_REG_MONITOR ? 1 : 0; + + /* + * we reject all non implemented PMC as well + * as attempts to modify PMC[0-3] which are used + * as status registers by the PMU + */ + if ((pmc_type & PFM_REG_IMPL) == 0 || (pmc_type & PFM_REG_CONTROL) == PFM_REG_CONTROL) { + DPRINT(("pmc%u is unimplemented or no-access pmc_type=%x\n", cnum, pmc_type)); + goto error; + } + wr_func = pmu_conf->pmc_desc[cnum].write_check; + /* + * If the PMC is a monitor, then if the value is not the default: + * - system-wide session: PMCx.pm=1 (privileged monitor) + * - per-task : PMCx.pm=0 (user monitor) + */ + if (is_monitor && value != PMC_DFL_VAL(cnum) && is_system ^ pmc_pm) { + DPRINT(("pmc%u pmc_pm=%lu is_system=%d\n", + cnum, + pmc_pm, + is_system)); + goto error; + } + + if (is_counting) { + /* + * enforce generation of overflow interrupt. Necessary on all + * CPUs. + */ + value |= 1 << PMU_PMC_OI; + + if (reg_flags & PFM_REGFL_OVFL_NOTIFY) { + flags |= PFM_REGFL_OVFL_NOTIFY; + } + + if (reg_flags & PFM_REGFL_RANDOM) flags |= PFM_REGFL_RANDOM; + + /* verify validity of smpl_pmds */ + if ((smpl_pmds & impl_pmds) != smpl_pmds) { + DPRINT(("invalid smpl_pmds 0x%lx for pmc%u\n", smpl_pmds, cnum)); + goto error; + } + + /* verify validity of reset_pmds */ + if ((reset_pmds & impl_pmds) != reset_pmds) { + DPRINT(("invalid reset_pmds 0x%lx for pmc%u\n", reset_pmds, cnum)); + goto error; + } + } else { + if (reg_flags & (PFM_REGFL_OVFL_NOTIFY|PFM_REGFL_RANDOM)) { + DPRINT(("cannot set ovfl_notify or random on pmc%u\n", cnum)); + goto error; + } + /* eventid on non-counting monitors are ignored */ + } + + /* + * execute write checker, if any + */ + if (likely(expert_mode == 0 && wr_func)) { + ret = (*wr_func)(task, ctx, cnum, &value, regs); + if (ret) goto error; + ret = -EINVAL; + } + + /* + * no error on this register + */ + PFM_REG_RETFLAG_SET(req->reg_flags, 0); + + /* + * Now we commit the changes to the software state + */ + + /* + * update overflow information + */ + if (is_counting) { + /* + * full flag update each time a register is programmed + */ + ctx->ctx_pmds[cnum].flags = flags; + + ctx->ctx_pmds[cnum].reset_pmds[0] = reset_pmds; + ctx->ctx_pmds[cnum].smpl_pmds[0] = smpl_pmds; + ctx->ctx_pmds[cnum].eventid = req->reg_smpl_eventid; + + /* + * Mark all PMDS to be accessed as used. + * + * We do not keep track of PMC because we have to + * systematically restore ALL of them. + * + * We do not update the used_monitors mask, because + * if we have not programmed them, then will be in + * a quiescent state, therefore we will not need to + * mask/restore then when context is MASKED. + */ + CTX_USED_PMD(ctx, reset_pmds); + CTX_USED_PMD(ctx, smpl_pmds); + /* + * make sure we do not try to reset on + * restart because we have established new values + */ + if (state == PFM_CTX_MASKED) ctx->ctx_ovfl_regs[0] &= ~1UL << cnum; + } + /* + * Needed in case the user does not initialize the equivalent + * PMD. Clearing is done indirectly via pfm_reset_pmu_state() so there is no + * possible leak here. + */ + CTX_USED_PMD(ctx, pmu_conf->pmc_desc[cnum].dep_pmd[0]); + + /* + * keep track of the monitor PMC that we are using. + * we save the value of the pmc in ctx_pmcs[] and if + * the monitoring is not stopped for the context we also + * place it in the saved state area so that it will be + * picked up later by the context switch code. + * + * The value in ctx_pmcs[] can only be changed in pfm_write_pmcs(). + * + * The value in th_pmcs[] may be modified on overflow, i.e., when + * monitoring needs to be stopped. + */ + if (is_monitor) CTX_USED_MONITOR(ctx, 1UL << cnum); + + /* + * update context state + */ + ctx->ctx_pmcs[cnum] = value; + + if (is_loaded) { + /* + * write thread state + */ + if (is_system == 0) ctx->th_pmcs[cnum] = value; + + /* + * write hardware register if we can + */ + if (can_access_pmu) { + ia64_set_pmc(cnum, value); + } +#ifdef CONFIG_SMP + else { + /* + * per-task SMP only here + * + * we are guaranteed that the task is not running on the other CPU, + * we indicate that this PMD will need to be reloaded if the task + * is rescheduled on the CPU it ran last on. + */ + ctx->ctx_reload_pmcs[0] |= 1UL << cnum; + } +#endif + } + + DPRINT(("pmc[%u]=0x%lx ld=%d apmu=%d flags=0x%x all_pmcs=0x%lx used_pmds=0x%lx eventid=%ld smpl_pmds=0x%lx reset_pmds=0x%lx reloads_pmcs=0x%lx used_monitors=0x%lx ovfl_regs=0x%lx\n", + cnum, + value, + is_loaded, + can_access_pmu, + flags, + ctx->ctx_all_pmcs[0], + ctx->ctx_used_pmds[0], + ctx->ctx_pmds[cnum].eventid, + smpl_pmds, + reset_pmds, + ctx->ctx_reload_pmcs[0], + ctx->ctx_used_monitors[0], + ctx->ctx_ovfl_regs[0])); + } + + /* + * make sure the changes are visible + */ + if (can_access_pmu) ia64_srlz_d(); + + return 0; +error: + PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL); + return ret; +} + +static int +pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) +{ + struct task_struct *task; + pfarg_reg_t *req = (pfarg_reg_t *)arg; + unsigned long value, hw_value, ovfl_mask; + unsigned int cnum; + int i, can_access_pmu = 0, state; + int is_counting, is_loaded, is_system, expert_mode; + int ret = -EINVAL; + pfm_reg_check_t wr_func; + + + state = ctx->ctx_state; + is_loaded = state == PFM_CTX_LOADED ? 1 : 0; + is_system = ctx->ctx_fl_system; + ovfl_mask = pmu_conf->ovfl_val; + task = ctx->ctx_task; + + if (unlikely(state == PFM_CTX_ZOMBIE)) return -EINVAL; + + /* + * on both UP and SMP, we can only write to the PMC when the task is + * the owner of the local PMU. + */ + if (likely(is_loaded)) { + /* + * In system wide and when the context is loaded, access can only happen + * when the caller is running on the CPU being monitored by the session. + * It does not have to be the owner (ctx_task) of the context per se. + */ + if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) { + DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); + return -EBUSY; + } + can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0; + } + expert_mode = pfm_sysctl.expert_mode; + + for (i = 0; i < count; i++, req++) { + + cnum = req->reg_num; + value = req->reg_value; + + if (!PMD_IS_IMPL(cnum)) { + DPRINT(("pmd[%u] is unimplemented or invalid\n", cnum)); + goto abort_mission; + } + is_counting = PMD_IS_COUNTING(cnum); + wr_func = pmu_conf->pmd_desc[cnum].write_check; + + /* + * execute write checker, if any + */ + if (unlikely(expert_mode == 0 && wr_func)) { + unsigned long v = value; + + ret = (*wr_func)(task, ctx, cnum, &v, regs); + if (ret) goto abort_mission; + + value = v; + ret = -EINVAL; + } + + /* + * no error on this register + */ + PFM_REG_RETFLAG_SET(req->reg_flags, 0); + + /* + * now commit changes to software state + */ + hw_value = value; + + /* + * update virtualized (64bits) counter + */ + if (is_counting) { + /* + * write context state + */ + ctx->ctx_pmds[cnum].lval = value; + + /* + * when context is load we use the split value + */ + if (is_loaded) { + hw_value = value & ovfl_mask; + value = value & ~ovfl_mask; + } + } + /* + * update reset values (not just for counters) + */ + ctx->ctx_pmds[cnum].long_reset = req->reg_long_reset; + ctx->ctx_pmds[cnum].short_reset = req->reg_short_reset; + + /* + * update randomization parameters (not just for counters) + */ + ctx->ctx_pmds[cnum].seed = req->reg_random_seed; + ctx->ctx_pmds[cnum].mask = req->reg_random_mask; + + /* + * update context value + */ + ctx->ctx_pmds[cnum].val = value; + + /* + * Keep track of what we use + * + * We do not keep track of PMC because we have to + * systematically restore ALL of them. + */ + CTX_USED_PMD(ctx, PMD_PMD_DEP(cnum)); + + /* + * mark this PMD register used as well + */ + CTX_USED_PMD(ctx, RDEP(cnum)); + + /* + * make sure we do not try to reset on + * restart because we have established new values + */ + if (is_counting && state == PFM_CTX_MASKED) { + ctx->ctx_ovfl_regs[0] &= ~1UL << cnum; + } + + if (is_loaded) { + /* + * write thread state + */ + if (is_system == 0) ctx->th_pmds[cnum] = hw_value; + + /* + * write hardware register if we can + */ + if (can_access_pmu) { + ia64_set_pmd(cnum, hw_value); + } else { +#ifdef CONFIG_SMP + /* + * we are guaranteed that the task is not running on the other CPU, + * we indicate that this PMD will need to be reloaded if the task + * is rescheduled on the CPU it ran last on. + */ + ctx->ctx_reload_pmds[0] |= 1UL << cnum; +#endif + } + } + + DPRINT(("pmd[%u]=0x%lx ld=%d apmu=%d, hw_value=0x%lx ctx_pmd=0x%lx short_reset=0x%lx " + "long_reset=0x%lx notify=%c seed=0x%lx mask=0x%lx used_pmds=0x%lx reset_pmds=0x%lx reload_pmds=0x%lx all_pmds=0x%lx ovfl_regs=0x%lx\n", + cnum, + value, + is_loaded, + can_access_pmu, + hw_value, + ctx->ctx_pmds[cnum].val, + ctx->ctx_pmds[cnum].short_reset, + ctx->ctx_pmds[cnum].long_reset, + PMC_OVFL_NOTIFY(ctx, cnum) ? 'Y':'N', + ctx->ctx_pmds[cnum].seed, + ctx->ctx_pmds[cnum].mask, + ctx->ctx_used_pmds[0], + ctx->ctx_pmds[cnum].reset_pmds[0], + ctx->ctx_reload_pmds[0], + ctx->ctx_all_pmds[0], + ctx->ctx_ovfl_regs[0])); + } + + /* + * make changes visible + */ + if (can_access_pmu) ia64_srlz_d(); + + return 0; + +abort_mission: + /* + * for now, we have only one possibility for error + */ + PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL); + return ret; +} + +/* + * By the way of PROTECT_CONTEXT(), interrupts are masked while we are in this function. + * Therefore we know, we do not have to worry about the PMU overflow interrupt. If an + * interrupt is delivered during the call, it will be kept pending until we leave, making + * it appears as if it had been generated at the UNPROTECT_CONTEXT(). At least we are + * guaranteed to return consistent data to the user, it may simply be old. It is not + * trivial to treat the overflow while inside the call because you may end up in + * some module sampling buffer code causing deadlocks. + */ +static int +pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) +{ + struct task_struct *task; + unsigned long val = 0UL, lval, ovfl_mask, sval; + pfarg_reg_t *req = (pfarg_reg_t *)arg; + unsigned int cnum, reg_flags = 0; + int i, can_access_pmu = 0, state; + int is_loaded, is_system, is_counting, expert_mode; + int ret = -EINVAL; + pfm_reg_check_t rd_func; + + /* + * access is possible when loaded only for + * self-monitoring tasks or in UP mode + */ + + state = ctx->ctx_state; + is_loaded = state == PFM_CTX_LOADED ? 1 : 0; + is_system = ctx->ctx_fl_system; + ovfl_mask = pmu_conf->ovfl_val; + task = ctx->ctx_task; + + if (state == PFM_CTX_ZOMBIE) return -EINVAL; + + if (likely(is_loaded)) { + /* + * In system wide and when the context is loaded, access can only happen + * when the caller is running on the CPU being monitored by the session. + * It does not have to be the owner (ctx_task) of the context per se. + */ + if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) { + DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); + return -EBUSY; + } + /* + * this can be true when not self-monitoring only in UP + */ + can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0; + + if (can_access_pmu) ia64_srlz_d(); + } + expert_mode = pfm_sysctl.expert_mode; + + DPRINT(("ld=%d apmu=%d ctx_state=%d\n", + is_loaded, + can_access_pmu, + state)); + + /* + * on both UP and SMP, we can only read the PMD from the hardware register when + * the task is the owner of the local PMU. + */ + + for (i = 0; i < count; i++, req++) { + + cnum = req->reg_num; + reg_flags = req->reg_flags; + + if (unlikely(!PMD_IS_IMPL(cnum))) goto error; + /* + * we can only read the register that we use. That includes + * the one we explicitly initialize AND the one we want included + * in the sampling buffer (smpl_regs). + * + * Having this restriction allows optimization in the ctxsw routine + * without compromising security (leaks) + */ + if (unlikely(!CTX_IS_USED_PMD(ctx, cnum))) goto error; + + sval = ctx->ctx_pmds[cnum].val; + lval = ctx->ctx_pmds[cnum].lval; + is_counting = PMD_IS_COUNTING(cnum); + + /* + * If the task is not the current one, then we check if the + * PMU state is still in the local live register due to lazy ctxsw. + * If true, then we read directly from the registers. + */ + if (can_access_pmu){ + val = ia64_get_pmd(cnum); + } else { + /* + * context has been saved + * if context is zombie, then task does not exist anymore. + * In this case, we use the full value saved in the context (pfm_flush_regs()). + */ + val = is_loaded ? ctx->th_pmds[cnum] : 0UL; + } + rd_func = pmu_conf->pmd_desc[cnum].read_check; + + if (is_counting) { + /* + * XXX: need to check for overflow when loaded + */ + val &= ovfl_mask; + val += sval; + } + + /* + * execute read checker, if any + */ + if (unlikely(expert_mode == 0 && rd_func)) { + unsigned long v = val; + ret = (*rd_func)(ctx->ctx_task, ctx, cnum, &v, regs); + if (ret) goto error; + val = v; + ret = -EINVAL; + } + + PFM_REG_RETFLAG_SET(reg_flags, 0); + + DPRINT(("pmd[%u]=0x%lx\n", cnum, val)); + + /* + * update register return value, abort all if problem during copy. + * we only modify the reg_flags field. no check mode is fine because + * access has been verified upfront in sys_perfmonctl(). + */ + req->reg_value = val; + req->reg_flags = reg_flags; + req->reg_last_reset_val = lval; + } + + return 0; + +error: + PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL); + return ret; +} + +int +pfm_mod_write_pmcs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs) +{ + pfm_context_t *ctx; + + if (req == NULL) return -EINVAL; + + ctx = GET_PMU_CTX(); + + if (ctx == NULL) return -EINVAL; + + /* + * for now limit to current task, which is enough when calling + * from overflow handler + */ + if (task != current && ctx->ctx_fl_system == 0) return -EBUSY; + + return pfm_write_pmcs(ctx, req, nreq, regs); +} +EXPORT_SYMBOL(pfm_mod_write_pmcs); + +int +pfm_mod_read_pmds(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs) +{ + pfm_context_t *ctx; + + if (req == NULL) return -EINVAL; + + ctx = GET_PMU_CTX(); + + if (ctx == NULL) return -EINVAL; + + /* + * for now limit to current task, which is enough when calling + * from overflow handler + */ + if (task != current && ctx->ctx_fl_system == 0) return -EBUSY; + + return pfm_read_pmds(ctx, req, nreq, regs); +} +EXPORT_SYMBOL(pfm_mod_read_pmds); + +/* + * Only call this function when a process it trying to + * write the debug registers (reading is always allowed) + */ +int +pfm_use_debug_registers(struct task_struct *task) +{ + pfm_context_t *ctx = task->thread.pfm_context; + unsigned long flags; + int ret = 0; + + if (pmu_conf->use_rr_dbregs == 0) return 0; + + DPRINT(("called for [%d]\n", task_pid_nr(task))); + + /* + * do it only once + */ + if (task->thread.flags & IA64_THREAD_DBG_VALID) return 0; + + /* + * Even on SMP, we do not need to use an atomic here because + * the only way in is via ptrace() and this is possible only when the + * process is stopped. Even in the case where the ctxsw out is not totally + * completed by the time we come here, there is no way the 'stopped' process + * could be in the middle of fiddling with the pfm_write_ibr_dbr() routine. + * So this is always safe. + */ + if (ctx && ctx->ctx_fl_using_dbreg == 1) return -1; + + LOCK_PFS(flags); + + /* + * We cannot allow setting breakpoints when system wide monitoring + * sessions are using the debug registers. + */ + if (pfm_sessions.pfs_sys_use_dbregs> 0) + ret = -1; + else + pfm_sessions.pfs_ptrace_use_dbregs++; + + DPRINT(("ptrace_use_dbregs=%u sys_use_dbregs=%u by [%d] ret = %d\n", + pfm_sessions.pfs_ptrace_use_dbregs, + pfm_sessions.pfs_sys_use_dbregs, + task_pid_nr(task), ret)); + + UNLOCK_PFS(flags); + + return ret; +} + +/* + * This function is called for every task that exits with the + * IA64_THREAD_DBG_VALID set. This indicates a task which was + * able to use the debug registers for debugging purposes via + * ptrace(). Therefore we know it was not using them for + * performance monitoring, so we only decrement the number + * of "ptraced" debug register users to keep the count up to date + */ +int +pfm_release_debug_registers(struct task_struct *task) +{ + unsigned long flags; + int ret; + + if (pmu_conf->use_rr_dbregs == 0) return 0; + + LOCK_PFS(flags); + if (pfm_sessions.pfs_ptrace_use_dbregs == 0) { + printk(KERN_ERR "perfmon: invalid release for [%d] ptrace_use_dbregs=0\n", task_pid_nr(task)); + ret = -1; + } else { + pfm_sessions.pfs_ptrace_use_dbregs--; + ret = 0; + } + UNLOCK_PFS(flags); + + return ret; +} + +static int +pfm_restart(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) +{ + struct task_struct *task; + pfm_buffer_fmt_t *fmt; + pfm_ovfl_ctrl_t rst_ctrl; + int state, is_system; + int ret = 0; + + state = ctx->ctx_state; + fmt = ctx->ctx_buf_fmt; + is_system = ctx->ctx_fl_system; + task = PFM_CTX_TASK(ctx); + + switch(state) { + case PFM_CTX_MASKED: + break; + case PFM_CTX_LOADED: + if (CTX_HAS_SMPL(ctx) && fmt->fmt_restart_active) break; + /* fall through */ + case PFM_CTX_UNLOADED: + case PFM_CTX_ZOMBIE: + DPRINT(("invalid state=%d\n", state)); + return -EBUSY; + default: + DPRINT(("state=%d, cannot operate (no active_restart handler)\n", state)); + return -EINVAL; + } + + /* + * In system wide and when the context is loaded, access can only happen + * when the caller is running on the CPU being monitored by the session. + * It does not have to be the owner (ctx_task) of the context per se. + */ + if (is_system && ctx->ctx_cpu != smp_processor_id()) { + DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); + return -EBUSY; + } + + /* sanity check */ + if (unlikely(task == NULL)) { + printk(KERN_ERR "perfmon: [%d] pfm_restart no task\n", task_pid_nr(current)); + return -EINVAL; + } + + if (task == current || is_system) { + + fmt = ctx->ctx_buf_fmt; + + DPRINT(("restarting self %d ovfl=0x%lx\n", + task_pid_nr(task), + ctx->ctx_ovfl_regs[0])); + + if (CTX_HAS_SMPL(ctx)) { + + prefetch(ctx->ctx_smpl_hdr); + + rst_ctrl.bits.mask_monitoring = 0; + rst_ctrl.bits.reset_ovfl_pmds = 0; + + if (state == PFM_CTX_LOADED) + ret = pfm_buf_fmt_restart_active(fmt, task, &rst_ctrl, ctx->ctx_smpl_hdr, regs); + else + ret = pfm_buf_fmt_restart(fmt, task, &rst_ctrl, ctx->ctx_smpl_hdr, regs); + } else { + rst_ctrl.bits.mask_monitoring = 0; + rst_ctrl.bits.reset_ovfl_pmds = 1; + } + + if (ret == 0) { + if (rst_ctrl.bits.reset_ovfl_pmds) + pfm_reset_regs(ctx, ctx->ctx_ovfl_regs, PFM_PMD_LONG_RESET); + + if (rst_ctrl.bits.mask_monitoring == 0) { + DPRINT(("resuming monitoring for [%d]\n", task_pid_nr(task))); + + if (state == PFM_CTX_MASKED) pfm_restore_monitoring(task); + } else { + DPRINT(("keeping monitoring stopped for [%d]\n", task_pid_nr(task))); + + // cannot use pfm_stop_monitoring(task, regs); + } + } + /* + * clear overflowed PMD mask to remove any stale information + */ + ctx->ctx_ovfl_regs[0] = 0UL; + + /* + * back to LOADED state + */ + ctx->ctx_state = PFM_CTX_LOADED; + + /* + * XXX: not really useful for self monitoring + */ + ctx->ctx_fl_can_restart = 0; + + return 0; + } + + /* + * restart another task + */ + + /* + * When PFM_CTX_MASKED, we cannot issue a restart before the previous + * one is seen by the task. + */ + if (state == PFM_CTX_MASKED) { + if (ctx->ctx_fl_can_restart == 0) return -EINVAL; + /* + * will prevent subsequent restart before this one is + * seen by other task + */ + ctx->ctx_fl_can_restart = 0; + } + + /* + * if blocking, then post the semaphore is PFM_CTX_MASKED, i.e. + * the task is blocked or on its way to block. That's the normal + * restart path. If the monitoring is not masked, then the task + * can be actively monitoring and we cannot directly intervene. + * Therefore we use the trap mechanism to catch the task and + * force it to reset the buffer/reset PMDs. + * + * if non-blocking, then we ensure that the task will go into + * pfm_handle_work() before returning to user mode. + * + * We cannot explicitly reset another task, it MUST always + * be done by the task itself. This works for system wide because + * the tool that is controlling the session is logically doing + * "self-monitoring". + */ + if (CTX_OVFL_NOBLOCK(ctx) == 0 && state == PFM_CTX_MASKED) { + DPRINT(("unblocking [%d]\n", task_pid_nr(task))); + complete(&ctx->ctx_restart_done); + } else { + DPRINT(("[%d] armed exit trap\n", task_pid_nr(task))); + + ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_RESET; + + PFM_SET_WORK_PENDING(task, 1); + + set_notify_resume(task); + + /* + * XXX: send reschedule if task runs on another CPU + */ + } + return 0; +} + +static int +pfm_debug(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) +{ + unsigned int m = *(unsigned int *)arg; + + pfm_sysctl.debug = m == 0 ? 0 : 1; + + printk(KERN_INFO "perfmon debugging %s (timing reset)\n", pfm_sysctl.debug ? "on" : "off"); + + if (m == 0) { + memset(pfm_stats, 0, sizeof(pfm_stats)); + for(m=0; m < NR_CPUS; m++) pfm_stats[m].pfm_ovfl_intr_cycles_min = ~0UL; + } + return 0; +} + +/* + * arg can be NULL and count can be zero for this function + */ +static int +pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) +{ + struct thread_struct *thread = NULL; + struct task_struct *task; + pfarg_dbreg_t *req = (pfarg_dbreg_t *)arg; + unsigned long flags; + dbreg_t dbreg; + unsigned int rnum; + int first_time; + int ret = 0, state; + int i, can_access_pmu = 0; + int is_system, is_loaded; + + if (pmu_conf->use_rr_dbregs == 0) return -EINVAL; + + state = ctx->ctx_state; + is_loaded = state == PFM_CTX_LOADED ? 1 : 0; + is_system = ctx->ctx_fl_system; + task = ctx->ctx_task; + + if (state == PFM_CTX_ZOMBIE) return -EINVAL; + + /* + * on both UP and SMP, we can only write to the PMC when the task is + * the owner of the local PMU. + */ + if (is_loaded) { + thread = &task->thread; + /* + * In system wide and when the context is loaded, access can only happen + * when the caller is running on the CPU being monitored by the session. + * It does not have to be the owner (ctx_task) of the context per se. + */ + if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) { + DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); + return -EBUSY; + } + can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0; + } + + /* + * we do not need to check for ipsr.db because we do clear ibr.x, dbr.r, and dbr.w + * ensuring that no real breakpoint can be installed via this call. + * + * IMPORTANT: regs can be NULL in this function + */ + + first_time = ctx->ctx_fl_using_dbreg == 0; + + /* + * don't bother if we are loaded and task is being debugged + */ + if (is_loaded && (thread->flags & IA64_THREAD_DBG_VALID) != 0) { + DPRINT(("debug registers already in use for [%d]\n", task_pid_nr(task))); + return -EBUSY; + } + + /* + * check for debug registers in system wide mode + * + * If though a check is done in pfm_context_load(), + * we must repeat it here, in case the registers are + * written after the context is loaded + */ + if (is_loaded) { + LOCK_PFS(flags); + + if (first_time && is_system) { + if (pfm_sessions.pfs_ptrace_use_dbregs) + ret = -EBUSY; + else + pfm_sessions.pfs_sys_use_dbregs++; + } + UNLOCK_PFS(flags); + } + + if (ret != 0) return ret; + + /* + * mark ourself as user of the debug registers for + * perfmon purposes. + */ + ctx->ctx_fl_using_dbreg = 1; + + /* + * clear hardware registers to make sure we don't + * pick up stale state. + * + * for a system wide session, we do not use + * thread.dbr, thread.ibr because this process + * never leaves the current CPU and the state + * is shared by all processes running on it + */ + if (first_time && can_access_pmu) { + DPRINT(("[%d] clearing ibrs, dbrs\n", task_pid_nr(task))); + for (i=0; i < pmu_conf->num_ibrs; i++) { + ia64_set_ibr(i, 0UL); + ia64_dv_serialize_instruction(); + } + ia64_srlz_i(); + for (i=0; i < pmu_conf->num_dbrs; i++) { + ia64_set_dbr(i, 0UL); + ia64_dv_serialize_data(); + } + ia64_srlz_d(); + } + + /* + * Now install the values into the registers + */ + for (i = 0; i < count; i++, req++) { + + rnum = req->dbreg_num; + dbreg.val = req->dbreg_value; + + ret = -EINVAL; + + if ((mode == PFM_CODE_RR && rnum >= PFM_NUM_IBRS) || ((mode == PFM_DATA_RR) && rnum >= PFM_NUM_DBRS)) { + DPRINT(("invalid register %u val=0x%lx mode=%d i=%d count=%d\n", + rnum, dbreg.val, mode, i, count)); + + goto abort_mission; + } + + /* + * make sure we do not install enabled breakpoint + */ + if (rnum & 0x1) { + if (mode == PFM_CODE_RR) + dbreg.ibr.ibr_x = 0; + else + dbreg.dbr.dbr_r = dbreg.dbr.dbr_w = 0; + } + + PFM_REG_RETFLAG_SET(req->dbreg_flags, 0); + + /* + * Debug registers, just like PMC, can only be modified + * by a kernel call. Moreover, perfmon() access to those + * registers are centralized in this routine. The hardware + * does not modify the value of these registers, therefore, + * if we save them as they are written, we can avoid having + * to save them on context switch out. This is made possible + * by the fact that when perfmon uses debug registers, ptrace() + * won't be able to modify them concurrently. + */ + if (mode == PFM_CODE_RR) { + CTX_USED_IBR(ctx, rnum); + + if (can_access_pmu) { + ia64_set_ibr(rnum, dbreg.val); + ia64_dv_serialize_instruction(); + } + + ctx->ctx_ibrs[rnum] = dbreg.val; + + DPRINT(("write ibr%u=0x%lx used_ibrs=0x%x ld=%d apmu=%d\n", + rnum, dbreg.val, ctx->ctx_used_ibrs[0], is_loaded, can_access_pmu)); + } else { + CTX_USED_DBR(ctx, rnum); + + if (can_access_pmu) { + ia64_set_dbr(rnum, dbreg.val); + ia64_dv_serialize_data(); + } + ctx->ctx_dbrs[rnum] = dbreg.val; + + DPRINT(("write dbr%u=0x%lx used_dbrs=0x%x ld=%d apmu=%d\n", + rnum, dbreg.val, ctx->ctx_used_dbrs[0], is_loaded, can_access_pmu)); + } + } + + return 0; + +abort_mission: + /* + * in case it was our first attempt, we undo the global modifications + */ + if (first_time) { + LOCK_PFS(flags); + if (ctx->ctx_fl_system) { + pfm_sessions.pfs_sys_use_dbregs--; + } + UNLOCK_PFS(flags); + ctx->ctx_fl_using_dbreg = 0; + } + /* + * install error return flag + */ + PFM_REG_RETFLAG_SET(req->dbreg_flags, PFM_REG_RETFL_EINVAL); + + return ret; +} + +static int +pfm_write_ibrs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) +{ + return pfm_write_ibr_dbr(PFM_CODE_RR, ctx, arg, count, regs); +} + +static int +pfm_write_dbrs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) +{ + return pfm_write_ibr_dbr(PFM_DATA_RR, ctx, arg, count, regs); +} + +int +pfm_mod_write_ibrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs) +{ + pfm_context_t *ctx; + + if (req == NULL) return -EINVAL; + + ctx = GET_PMU_CTX(); + + if (ctx == NULL) return -EINVAL; + + /* + * for now limit to current task, which is enough when calling + * from overflow handler + */ + if (task != current && ctx->ctx_fl_system == 0) return -EBUSY; + + return pfm_write_ibrs(ctx, req, nreq, regs); +} +EXPORT_SYMBOL(pfm_mod_write_ibrs); + +int +pfm_mod_write_dbrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs) +{ + pfm_context_t *ctx; + + if (req == NULL) return -EINVAL; + + ctx = GET_PMU_CTX(); + + if (ctx == NULL) return -EINVAL; + + /* + * for now limit to current task, which is enough when calling + * from overflow handler + */ + if (task != current && ctx->ctx_fl_system == 0) return -EBUSY; + + return pfm_write_dbrs(ctx, req, nreq, regs); +} +EXPORT_SYMBOL(pfm_mod_write_dbrs); + + +static int +pfm_get_features(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) +{ + pfarg_features_t *req = (pfarg_features_t *)arg; + + req->ft_version = PFM_VERSION; + return 0; +} + +static int +pfm_stop(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) +{ + struct pt_regs *tregs; + struct task_struct *task = PFM_CTX_TASK(ctx); + int state, is_system; + + state = ctx->ctx_state; + is_system = ctx->ctx_fl_system; + + /* + * context must be attached to issue the stop command (includes LOADED,MASKED,ZOMBIE) + */ + if (state == PFM_CTX_UNLOADED) return -EINVAL; + + /* + * In system wide and when the context is loaded, access can only happen + * when the caller is running on the CPU being monitored by the session. + * It does not have to be the owner (ctx_task) of the context per se. + */ + if (is_system && ctx->ctx_cpu != smp_processor_id()) { + DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); + return -EBUSY; + } + DPRINT(("task [%d] ctx_state=%d is_system=%d\n", + task_pid_nr(PFM_CTX_TASK(ctx)), + state, + is_system)); + /* + * in system mode, we need to update the PMU directly + * and the user level state of the caller, which may not + * necessarily be the creator of the context. + */ + if (is_system) { + /* + * Update local PMU first + * + * disable dcr pp + */ + ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) & ~IA64_DCR_PP); + ia64_srlz_i(); + + /* + * update local cpuinfo + */ + PFM_CPUINFO_CLEAR(PFM_CPUINFO_DCR_PP); + + /* + * stop monitoring, does srlz.i + */ + pfm_clear_psr_pp(); + + /* + * stop monitoring in the caller + */ + ia64_psr(regs)->pp = 0; + + return 0; + } + /* + * per-task mode + */ + + if (task == current) { + /* stop monitoring at kernel level */ + pfm_clear_psr_up(); + + /* + * stop monitoring at the user level + */ + ia64_psr(regs)->up = 0; + } else { + tregs = task_pt_regs(task); + + /* + * stop monitoring at the user level + */ + ia64_psr(tregs)->up = 0; + + /* + * monitoring disabled in kernel at next reschedule + */ + ctx->ctx_saved_psr_up = 0; + DPRINT(("task=[%d]\n", task_pid_nr(task))); + } + return 0; +} + + +static int +pfm_start(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) +{ + struct pt_regs *tregs; + int state, is_system; + + state = ctx->ctx_state; + is_system = ctx->ctx_fl_system; + + if (state != PFM_CTX_LOADED) return -EINVAL; + + /* + * In system wide and when the context is loaded, access can only happen + * when the caller is running on the CPU being monitored by the session. + * It does not have to be the owner (ctx_task) of the context per se. + */ + if (is_system && ctx->ctx_cpu != smp_processor_id()) { + DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); + return -EBUSY; + } + + /* + * in system mode, we need to update the PMU directly + * and the user level state of the caller, which may not + * necessarily be the creator of the context. + */ + if (is_system) { + + /* + * set user level psr.pp for the caller + */ + ia64_psr(regs)->pp = 1; + + /* + * now update the local PMU and cpuinfo + */ + PFM_CPUINFO_SET(PFM_CPUINFO_DCR_PP); + + /* + * start monitoring at kernel level + */ + pfm_set_psr_pp(); + + /* enable dcr pp */ + ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) | IA64_DCR_PP); + ia64_srlz_i(); + + return 0; + } + + /* + * per-process mode + */ + + if (ctx->ctx_task == current) { + + /* start monitoring at kernel level */ + pfm_set_psr_up(); + + /* + * activate monitoring at user level + */ + ia64_psr(regs)->up = 1; + + } else { + tregs = task_pt_regs(ctx->ctx_task); + + /* + * start monitoring at the kernel level the next + * time the task is scheduled + */ + ctx->ctx_saved_psr_up = IA64_PSR_UP; + + /* + * activate monitoring at user level + */ + ia64_psr(tregs)->up = 1; + } + return 0; +} + +static int +pfm_get_pmc_reset(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) +{ + pfarg_reg_t *req = (pfarg_reg_t *)arg; + unsigned int cnum; + int i; + int ret = -EINVAL; + + for (i = 0; i < count; i++, req++) { + + cnum = req->reg_num; + + if (!PMC_IS_IMPL(cnum)) goto abort_mission; + + req->reg_value = PMC_DFL_VAL(cnum); + + PFM_REG_RETFLAG_SET(req->reg_flags, 0); + + DPRINT(("pmc_reset_val pmc[%u]=0x%lx\n", cnum, req->reg_value)); + } + return 0; + +abort_mission: + PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL); + return ret; +} + +static int +pfm_check_task_exist(pfm_context_t *ctx) +{ + struct task_struct *g, *t; + int ret = -ESRCH; + + read_lock(&tasklist_lock); + + do_each_thread (g, t) { + if (t->thread.pfm_context == ctx) { + ret = 0; + goto out; + } + } while_each_thread (g, t); +out: + read_unlock(&tasklist_lock); + + DPRINT(("pfm_check_task_exist: ret=%d ctx=%p\n", ret, ctx)); + + return ret; +} + +static int +pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) +{ + struct task_struct *task; + struct thread_struct *thread; + struct pfm_context_t *old; + unsigned long flags; +#ifndef CONFIG_SMP + struct task_struct *owner_task = NULL; +#endif + pfarg_load_t *req = (pfarg_load_t *)arg; + unsigned long *pmcs_source, *pmds_source; + int the_cpu; + int ret = 0; + int state, is_system, set_dbregs = 0; + + state = ctx->ctx_state; + is_system = ctx->ctx_fl_system; + /* + * can only load from unloaded or terminated state + */ + if (state != PFM_CTX_UNLOADED) { + DPRINT(("cannot load to [%d], invalid ctx_state=%d\n", + req->load_pid, + ctx->ctx_state)); + return -EBUSY; + } + + DPRINT(("load_pid [%d] using_dbreg=%d\n", req->load_pid, ctx->ctx_fl_using_dbreg)); + + if (CTX_OVFL_NOBLOCK(ctx) == 0 && req->load_pid == current->pid) { + DPRINT(("cannot use blocking mode on self\n")); + return -EINVAL; + } + + ret = pfm_get_task(ctx, req->load_pid, &task); + if (ret) { + DPRINT(("load_pid [%d] get_task=%d\n", req->load_pid, ret)); + return ret; + } + + ret = -EINVAL; + + /* + * system wide is self monitoring only + */ + if (is_system && task != current) { + DPRINT(("system wide is self monitoring only load_pid=%d\n", + req->load_pid)); + goto error; + } + + thread = &task->thread; + + ret = 0; + /* + * cannot load a context which is using range restrictions, + * into a task that is being debugged. + */ + if (ctx->ctx_fl_using_dbreg) { + if (thread->flags & IA64_THREAD_DBG_VALID) { + ret = -EBUSY; + DPRINT(("load_pid [%d] task is debugged, cannot load range restrictions\n", req->load_pid)); + goto error; + } + LOCK_PFS(flags); + + if (is_system) { + if (pfm_sessions.pfs_ptrace_use_dbregs) { + DPRINT(("cannot load [%d] dbregs in use\n", + task_pid_nr(task))); + ret = -EBUSY; + } else { + pfm_sessions.pfs_sys_use_dbregs++; + DPRINT(("load [%d] increased sys_use_dbreg=%u\n", task_pid_nr(task), pfm_sessions.pfs_sys_use_dbregs)); + set_dbregs = 1; + } + } + + UNLOCK_PFS(flags); + + if (ret) goto error; + } + + /* + * SMP system-wide monitoring implies self-monitoring. + * + * The programming model expects the task to + * be pinned on a CPU throughout the session. + * Here we take note of the current CPU at the + * time the context is loaded. No call from + * another CPU will be allowed. + * + * The pinning via shed_setaffinity() + * must be done by the calling task prior + * to this call. + * + * systemwide: keep track of CPU this session is supposed to run on + */ + the_cpu = ctx->ctx_cpu = smp_processor_id(); + + ret = -EBUSY; + /* + * now reserve the session + */ + ret = pfm_reserve_session(current, is_system, the_cpu); + if (ret) goto error; + + /* + * task is necessarily stopped at this point. + * + * If the previous context was zombie, then it got removed in + * pfm_save_regs(). Therefore we should not see it here. + * If we see a context, then this is an active context + * + * XXX: needs to be atomic + */ + DPRINT(("before cmpxchg() old_ctx=%p new_ctx=%p\n", + thread->pfm_context, ctx)); + + ret = -EBUSY; + old = ia64_cmpxchg(acq, &thread->pfm_context, NULL, ctx, sizeof(pfm_context_t *)); + if (old != NULL) { + DPRINT(("load_pid [%d] already has a context\n", req->load_pid)); + goto error_unres; + } + + pfm_reset_msgq(ctx); + + ctx->ctx_state = PFM_CTX_LOADED; + + /* + * link context to task + */ + ctx->ctx_task = task; + + if (is_system) { + /* + * we load as stopped + */ + PFM_CPUINFO_SET(PFM_CPUINFO_SYST_WIDE); + PFM_CPUINFO_CLEAR(PFM_CPUINFO_DCR_PP); + + if (ctx->ctx_fl_excl_idle) PFM_CPUINFO_SET(PFM_CPUINFO_EXCL_IDLE); + } else { + thread->flags |= IA64_THREAD_PM_VALID; + } + + /* + * propagate into thread-state + */ + pfm_copy_pmds(task, ctx); + pfm_copy_pmcs(task, ctx); + + pmcs_source = ctx->th_pmcs; + pmds_source = ctx->th_pmds; + + /* + * always the case for system-wide + */ + if (task == current) { + + if (is_system == 0) { + + /* allow user level control */ + ia64_psr(regs)->sp = 0; + DPRINT(("clearing psr.sp for [%d]\n", task_pid_nr(task))); + + SET_LAST_CPU(ctx, smp_processor_id()); + INC_ACTIVATION(); + SET_ACTIVATION(ctx); +#ifndef CONFIG_SMP + /* + * push the other task out, if any + */ + owner_task = GET_PMU_OWNER(); + if (owner_task) pfm_lazy_save_regs(owner_task); +#endif + } + /* + * load all PMD from ctx to PMU (as opposed to thread state) + * restore all PMC from ctx to PMU + */ + pfm_restore_pmds(pmds_source, ctx->ctx_all_pmds[0]); + pfm_restore_pmcs(pmcs_source, ctx->ctx_all_pmcs[0]); + + ctx->ctx_reload_pmcs[0] = 0UL; + ctx->ctx_reload_pmds[0] = 0UL; + + /* + * guaranteed safe by earlier check against DBG_VALID + */ + if (ctx->ctx_fl_using_dbreg) { + pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs); + pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs); + } + /* + * set new ownership + */ + SET_PMU_OWNER(task, ctx); + + DPRINT(("context loaded on PMU for [%d]\n", task_pid_nr(task))); + } else { + /* + * when not current, task MUST be stopped, so this is safe + */ + regs = task_pt_regs(task); + + /* force a full reload */ + ctx->ctx_last_activation = PFM_INVALID_ACTIVATION; + SET_LAST_CPU(ctx, -1); + + /* initial saved psr (stopped) */ + ctx->ctx_saved_psr_up = 0UL; + ia64_psr(regs)->up = ia64_psr(regs)->pp = 0; + } + + ret = 0; + +error_unres: + if (ret) pfm_unreserve_session(ctx, ctx->ctx_fl_system, the_cpu); +error: + /* + * we must undo the dbregs setting (for system-wide) + */ + if (ret && set_dbregs) { + LOCK_PFS(flags); + pfm_sessions.pfs_sys_use_dbregs--; + UNLOCK_PFS(flags); + } + /* + * release task, there is now a link with the context + */ + if (is_system == 0 && task != current) { + pfm_put_task(task); + + if (ret == 0) { + ret = pfm_check_task_exist(ctx); + if (ret) { + ctx->ctx_state = PFM_CTX_UNLOADED; + ctx->ctx_task = NULL; + } + } + } + return ret; +} + +/* + * in this function, we do not need to increase the use count + * for the task via get_task_struct(), because we hold the + * context lock. If the task were to disappear while having + * a context attached, it would go through pfm_exit_thread() + * which also grabs the context lock and would therefore be blocked + * until we are here. + */ +static void pfm_flush_pmds(struct task_struct *, pfm_context_t *ctx); + +static int +pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) +{ + struct task_struct *task = PFM_CTX_TASK(ctx); + struct pt_regs *tregs; + int prev_state, is_system; + int ret; + + DPRINT(("ctx_state=%d task [%d]\n", ctx->ctx_state, task ? task_pid_nr(task) : -1)); + + prev_state = ctx->ctx_state; + is_system = ctx->ctx_fl_system; + + /* + * unload only when necessary + */ + if (prev_state == PFM_CTX_UNLOADED) { + DPRINT(("ctx_state=%d, nothing to do\n", prev_state)); + return 0; + } + + /* + * clear psr and dcr bits + */ + ret = pfm_stop(ctx, NULL, 0, regs); + if (ret) return ret; + + ctx->ctx_state = PFM_CTX_UNLOADED; + + /* + * in system mode, we need to update the PMU directly + * and the user level state of the caller, which may not + * necessarily be the creator of the context. + */ + if (is_system) { + + /* + * Update cpuinfo + * + * local PMU is taken care of in pfm_stop() + */ + PFM_CPUINFO_CLEAR(PFM_CPUINFO_SYST_WIDE); + PFM_CPUINFO_CLEAR(PFM_CPUINFO_EXCL_IDLE); + + /* + * save PMDs in context + * release ownership + */ + pfm_flush_pmds(current, ctx); + + /* + * at this point we are done with the PMU + * so we can unreserve the resource. + */ + if (prev_state != PFM_CTX_ZOMBIE) + pfm_unreserve_session(ctx, 1 , ctx->ctx_cpu); + + /* + * disconnect context from task + */ + task->thread.pfm_context = NULL; + /* + * disconnect task from context + */ + ctx->ctx_task = NULL; + + /* + * There is nothing more to cleanup here. + */ + return 0; + } + + /* + * per-task mode + */ + tregs = task == current ? regs : task_pt_regs(task); + + if (task == current) { + /* + * cancel user level control + */ + ia64_psr(regs)->sp = 1; + + DPRINT(("setting psr.sp for [%d]\n", task_pid_nr(task))); + } + /* + * save PMDs to context + * release ownership + */ + pfm_flush_pmds(task, ctx); + + /* + * at this point we are done with the PMU + * so we can unreserve the resource. + * + * when state was ZOMBIE, we have already unreserved. + */ + if (prev_state != PFM_CTX_ZOMBIE) + pfm_unreserve_session(ctx, 0 , ctx->ctx_cpu); + + /* + * reset activation counter and psr + */ + ctx->ctx_last_activation = PFM_INVALID_ACTIVATION; + SET_LAST_CPU(ctx, -1); + + /* + * PMU state will not be restored + */ + task->thread.flags &= ~IA64_THREAD_PM_VALID; + + /* + * break links between context and task + */ + task->thread.pfm_context = NULL; + ctx->ctx_task = NULL; + + PFM_SET_WORK_PENDING(task, 0); + + ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_NONE; + ctx->ctx_fl_can_restart = 0; + ctx->ctx_fl_going_zombie = 0; + + DPRINT(("disconnected [%d] from context\n", task_pid_nr(task))); + + return 0; +} + + +/* + * called only from exit_thread(): task == current + * we come here only if current has a context attached (loaded or masked) + */ +void +pfm_exit_thread(struct task_struct *task) +{ + pfm_context_t *ctx; + unsigned long flags; + struct pt_regs *regs = task_pt_regs(task); + int ret, state; + int free_ok = 0; + + ctx = PFM_GET_CTX(task); + + PROTECT_CTX(ctx, flags); + + DPRINT(("state=%d task [%d]\n", ctx->ctx_state, task_pid_nr(task))); + + state = ctx->ctx_state; + switch(state) { + case PFM_CTX_UNLOADED: + /* + * only comes to this function if pfm_context is not NULL, i.e., cannot + * be in unloaded state + */ + printk(KERN_ERR "perfmon: pfm_exit_thread [%d] ctx unloaded\n", task_pid_nr(task)); + break; + case PFM_CTX_LOADED: + case PFM_CTX_MASKED: + ret = pfm_context_unload(ctx, NULL, 0, regs); + if (ret) { + printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task_pid_nr(task), state, ret); + } + DPRINT(("ctx unloaded for current state was %d\n", state)); + + pfm_end_notify_user(ctx); + break; + case PFM_CTX_ZOMBIE: + ret = pfm_context_unload(ctx, NULL, 0, regs); + if (ret) { + printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task_pid_nr(task), state, ret); + } + free_ok = 1; + break; + default: + printk(KERN_ERR "perfmon: pfm_exit_thread [%d] unexpected state=%d\n", task_pid_nr(task), state); + break; + } + UNPROTECT_CTX(ctx, flags); + + { u64 psr = pfm_get_psr(); + BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP)); + BUG_ON(GET_PMU_OWNER()); + BUG_ON(ia64_psr(regs)->up); + BUG_ON(ia64_psr(regs)->pp); + } + + /* + * All memory free operations (especially for vmalloc'ed memory) + * MUST be done with interrupts ENABLED. + */ + if (free_ok) pfm_context_free(ctx); +} + +/* + * functions MUST be listed in the increasing order of their index (see permfon.h) + */ +#define PFM_CMD(name, flags, arg_count, arg_type, getsz) { name, #name, flags, arg_count, sizeof(arg_type), getsz } +#define PFM_CMD_S(name, flags) { name, #name, flags, 0, 0, NULL } +#define PFM_CMD_PCLRWS (PFM_CMD_FD|PFM_CMD_ARG_RW|PFM_CMD_STOP) +#define PFM_CMD_PCLRW (PFM_CMD_FD|PFM_CMD_ARG_RW) +#define PFM_CMD_NONE { NULL, "no-cmd", 0, 0, 0, NULL} + +static pfm_cmd_desc_t pfm_cmd_tab[]={ +/* 0 */PFM_CMD_NONE, +/* 1 */PFM_CMD(pfm_write_pmcs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL), +/* 2 */PFM_CMD(pfm_write_pmds, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL), +/* 3 */PFM_CMD(pfm_read_pmds, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL), +/* 4 */PFM_CMD_S(pfm_stop, PFM_CMD_PCLRWS), +/* 5 */PFM_CMD_S(pfm_start, PFM_CMD_PCLRWS), +/* 6 */PFM_CMD_NONE, +/* 7 */PFM_CMD_NONE, +/* 8 */PFM_CMD(pfm_context_create, PFM_CMD_ARG_RW, 1, pfarg_context_t, pfm_ctx_getsize), +/* 9 */PFM_CMD_NONE, +/* 10 */PFM_CMD_S(pfm_restart, PFM_CMD_PCLRW), +/* 11 */PFM_CMD_NONE, +/* 12 */PFM_CMD(pfm_get_features, PFM_CMD_ARG_RW, 1, pfarg_features_t, NULL), +/* 13 */PFM_CMD(pfm_debug, 0, 1, unsigned int, NULL), +/* 14 */PFM_CMD_NONE, +/* 15 */PFM_CMD(pfm_get_pmc_reset, PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL), +/* 16 */PFM_CMD(pfm_context_load, PFM_CMD_PCLRWS, 1, pfarg_load_t, NULL), +/* 17 */PFM_CMD_S(pfm_context_unload, PFM_CMD_PCLRWS), +/* 18 */PFM_CMD_NONE, +/* 19 */PFM_CMD_NONE, +/* 20 */PFM_CMD_NONE, +/* 21 */PFM_CMD_NONE, +/* 22 */PFM_CMD_NONE, +/* 23 */PFM_CMD_NONE, +/* 24 */PFM_CMD_NONE, +/* 25 */PFM_CMD_NONE, +/* 26 */PFM_CMD_NONE, +/* 27 */PFM_CMD_NONE, +/* 28 */PFM_CMD_NONE, +/* 29 */PFM_CMD_NONE, +/* 30 */PFM_CMD_NONE, +/* 31 */PFM_CMD_NONE, +/* 32 */PFM_CMD(pfm_write_ibrs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_dbreg_t, NULL), +/* 33 */PFM_CMD(pfm_write_dbrs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_dbreg_t, NULL) +}; +#define PFM_CMD_COUNT (sizeof(pfm_cmd_tab)/sizeof(pfm_cmd_desc_t)) + +static int +pfm_check_task_state(pfm_context_t *ctx, int cmd, unsigned long flags) +{ + struct task_struct *task; + int state, old_state; + +recheck: + state = ctx->ctx_state; + task = ctx->ctx_task; + + if (task == NULL) { + DPRINT(("context %d no task, state=%d\n", ctx->ctx_fd, state)); + return 0; + } + + DPRINT(("context %d state=%d [%d] task_state=%ld must_stop=%d\n", + ctx->ctx_fd, + state, + task_pid_nr(task), + task->state, PFM_CMD_STOPPED(cmd))); + + /* + * self-monitoring always ok. + * + * for system-wide the caller can either be the creator of the + * context (to one to which the context is attached to) OR + * a task running on the same CPU as the session. + */ + if (task == current || ctx->ctx_fl_system) return 0; + + /* + * we are monitoring another thread + */ + switch(state) { + case PFM_CTX_UNLOADED: + /* + * if context is UNLOADED we are safe to go + */ + return 0; + case PFM_CTX_ZOMBIE: + /* + * no command can operate on a zombie context + */ + DPRINT(("cmd %d state zombie cannot operate on context\n", cmd)); + return -EINVAL; + case PFM_CTX_MASKED: + /* + * PMU state has been saved to software even though + * the thread may still be running. + */ + if (cmd != PFM_UNLOAD_CONTEXT) return 0; + } + + /* + * context is LOADED or MASKED. Some commands may need to have + * the task stopped. + * + * We could lift this restriction for UP but it would mean that + * the user has no guarantee the task would not run between + * two successive calls to perfmonctl(). That's probably OK. + * If this user wants to ensure the task does not run, then + * the task must be stopped. + */ + if (PFM_CMD_STOPPED(cmd)) { + if (!task_is_stopped_or_traced(task)) { + DPRINT(("[%d] task not in stopped state\n", task_pid_nr(task))); + return -EBUSY; + } + /* + * task is now stopped, wait for ctxsw out + * + * This is an interesting point in the code. + * We need to unprotect the context because + * the pfm_save_regs() routines needs to grab + * the same lock. There are danger in doing + * this because it leaves a window open for + * another task to get access to the context + * and possibly change its state. The one thing + * that is not possible is for the context to disappear + * because we are protected by the VFS layer, i.e., + * get_fd()/put_fd(). + */ + old_state = state; + + UNPROTECT_CTX(ctx, flags); + + wait_task_inactive(task, 0); + + PROTECT_CTX(ctx, flags); + + /* + * we must recheck to verify if state has changed + */ + if (ctx->ctx_state != old_state) { + DPRINT(("old_state=%d new_state=%d\n", old_state, ctx->ctx_state)); + goto recheck; + } + } + return 0; +} + +/* + * system-call entry point (must return long) + */ +asmlinkage long +sys_perfmonctl (int fd, int cmd, void __user *arg, int count) +{ + struct fd f = {NULL, 0}; + pfm_context_t *ctx = NULL; + unsigned long flags = 0UL; + void *args_k = NULL; + long ret; /* will expand int return types */ + size_t base_sz, sz, xtra_sz = 0; + int narg, completed_args = 0, call_made = 0, cmd_flags; + int (*func)(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs); + int (*getsize)(void *arg, size_t *sz); +#define PFM_MAX_ARGSIZE 4096 + + /* + * reject any call if perfmon was disabled at initialization + */ + if (unlikely(pmu_conf == NULL)) return -ENOSYS; + + if (unlikely(cmd < 0 || cmd >= PFM_CMD_COUNT)) { + DPRINT(("invalid cmd=%d\n", cmd)); + return -EINVAL; + } + + func = pfm_cmd_tab[cmd].cmd_func; + narg = pfm_cmd_tab[cmd].cmd_narg; + base_sz = pfm_cmd_tab[cmd].cmd_argsize; + getsize = pfm_cmd_tab[cmd].cmd_getsize; + cmd_flags = pfm_cmd_tab[cmd].cmd_flags; + + if (unlikely(func == NULL)) { + DPRINT(("invalid cmd=%d\n", cmd)); + return -EINVAL; + } + + DPRINT(("cmd=%s idx=%d narg=0x%x argsz=%lu count=%d\n", + PFM_CMD_NAME(cmd), + cmd, + narg, + base_sz, + count)); + + /* + * check if number of arguments matches what the command expects + */ + if (unlikely((narg == PFM_CMD_ARG_MANY && count <= 0) || (narg > 0 && narg != count))) + return -EINVAL; + +restart_args: + sz = xtra_sz + base_sz*count; + /* + * limit abuse to min page size + */ + if (unlikely(sz > PFM_MAX_ARGSIZE)) { + printk(KERN_ERR "perfmon: [%d] argument too big %lu\n", task_pid_nr(current), sz); + return -E2BIG; + } + + /* + * allocate default-sized argument buffer + */ + if (likely(count && args_k == NULL)) { + args_k = kmalloc(PFM_MAX_ARGSIZE, GFP_KERNEL); + if (args_k == NULL) return -ENOMEM; + } + + ret = -EFAULT; + + /* + * copy arguments + * + * assume sz = 0 for command without parameters + */ + if (sz && copy_from_user(args_k, arg, sz)) { + DPRINT(("cannot copy_from_user %lu bytes @%p\n", sz, arg)); + goto error_args; + } + + /* + * check if command supports extra parameters + */ + if (completed_args == 0 && getsize) { + /* + * get extra parameters size (based on main argument) + */ + ret = (*getsize)(args_k, &xtra_sz); + if (ret) goto error_args; + + completed_args = 1; + + DPRINT(("restart_args sz=%lu xtra_sz=%lu\n", sz, xtra_sz)); + + /* retry if necessary */ + if (likely(xtra_sz)) goto restart_args; + } + + if (unlikely((cmd_flags & PFM_CMD_FD) == 0)) goto skip_fd; + + ret = -EBADF; + + f = fdget(fd); + if (unlikely(f.file == NULL)) { + DPRINT(("invalid fd %d\n", fd)); + goto error_args; + } + if (unlikely(PFM_IS_FILE(f.file) == 0)) { + DPRINT(("fd %d not related to perfmon\n", fd)); + goto error_args; + } + + ctx = f.file->private_data; + if (unlikely(ctx == NULL)) { + DPRINT(("no context for fd %d\n", fd)); + goto error_args; + } + prefetch(&ctx->ctx_state); + + PROTECT_CTX(ctx, flags); + + /* + * check task is stopped + */ + ret = pfm_check_task_state(ctx, cmd, flags); + if (unlikely(ret)) goto abort_locked; + +skip_fd: + ret = (*func)(ctx, args_k, count, task_pt_regs(current)); + + call_made = 1; + +abort_locked: + if (likely(ctx)) { + DPRINT(("context unlocked\n")); + UNPROTECT_CTX(ctx, flags); + } + + /* copy argument back to user, if needed */ + if (call_made && PFM_CMD_RW_ARG(cmd) && copy_to_user(arg, args_k, base_sz*count)) ret = -EFAULT; + +error_args: + if (f.file) + fdput(f); + + kfree(args_k); + + DPRINT(("cmd=%s ret=%ld\n", PFM_CMD_NAME(cmd), ret)); + + return ret; +} + +static void +pfm_resume_after_ovfl(pfm_context_t *ctx, unsigned long ovfl_regs, struct pt_regs *regs) +{ + pfm_buffer_fmt_t *fmt = ctx->ctx_buf_fmt; + pfm_ovfl_ctrl_t rst_ctrl; + int state; + int ret = 0; + + state = ctx->ctx_state; + /* + * Unlock sampling buffer and reset index atomically + * XXX: not really needed when blocking + */ + if (CTX_HAS_SMPL(ctx)) { + + rst_ctrl.bits.mask_monitoring = 0; + rst_ctrl.bits.reset_ovfl_pmds = 0; + + if (state == PFM_CTX_LOADED) + ret = pfm_buf_fmt_restart_active(fmt, current, &rst_ctrl, ctx->ctx_smpl_hdr, regs); + else + ret = pfm_buf_fmt_restart(fmt, current, &rst_ctrl, ctx->ctx_smpl_hdr, regs); + } else { + rst_ctrl.bits.mask_monitoring = 0; + rst_ctrl.bits.reset_ovfl_pmds = 1; + } + + if (ret == 0) { + if (rst_ctrl.bits.reset_ovfl_pmds) { + pfm_reset_regs(ctx, &ovfl_regs, PFM_PMD_LONG_RESET); + } + if (rst_ctrl.bits.mask_monitoring == 0) { + DPRINT(("resuming monitoring\n")); + if (ctx->ctx_state == PFM_CTX_MASKED) pfm_restore_monitoring(current); + } else { + DPRINT(("stopping monitoring\n")); + //pfm_stop_monitoring(current, regs); + } + ctx->ctx_state = PFM_CTX_LOADED; + } +} + +/* + * context MUST BE LOCKED when calling + * can only be called for current + */ +static void +pfm_context_force_terminate(pfm_context_t *ctx, struct pt_regs *regs) +{ + int ret; + + DPRINT(("entering for [%d]\n", task_pid_nr(current))); + + ret = pfm_context_unload(ctx, NULL, 0, regs); + if (ret) { + printk(KERN_ERR "pfm_context_force_terminate: [%d] unloaded failed with %d\n", task_pid_nr(current), ret); + } + + /* + * and wakeup controlling task, indicating we are now disconnected + */ + wake_up_interruptible(&ctx->ctx_zombieq); + + /* + * given that context is still locked, the controlling + * task will only get access when we return from + * pfm_handle_work(). + */ +} + +static int pfm_ovfl_notify_user(pfm_context_t *ctx, unsigned long ovfl_pmds); + + /* + * pfm_handle_work() can be called with interrupts enabled + * (TIF_NEED_RESCHED) or disabled. The down_interruptible + * call may sleep, therefore we must re-enable interrupts + * to avoid deadlocks. It is safe to do so because this function + * is called ONLY when returning to user level (pUStk=1), in which case + * there is no risk of kernel stack overflow due to deep + * interrupt nesting. + */ +void +pfm_handle_work(void) +{ + pfm_context_t *ctx; + struct pt_regs *regs; + unsigned long flags, dummy_flags; + unsigned long ovfl_regs; + unsigned int reason; + int ret; + + ctx = PFM_GET_CTX(current); + if (ctx == NULL) { + printk(KERN_ERR "perfmon: [%d] has no PFM context\n", + task_pid_nr(current)); + return; + } + + PROTECT_CTX(ctx, flags); + + PFM_SET_WORK_PENDING(current, 0); + + regs = task_pt_regs(current); + + /* + * extract reason for being here and clear + */ + reason = ctx->ctx_fl_trap_reason; + ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_NONE; + ovfl_regs = ctx->ctx_ovfl_regs[0]; + + DPRINT(("reason=%d state=%d\n", reason, ctx->ctx_state)); + + /* + * must be done before we check for simple-reset mode + */ + if (ctx->ctx_fl_going_zombie || ctx->ctx_state == PFM_CTX_ZOMBIE) + goto do_zombie; + + //if (CTX_OVFL_NOBLOCK(ctx)) goto skip_blocking; + if (reason == PFM_TRAP_REASON_RESET) + goto skip_blocking; + + /* + * restore interrupt mask to what it was on entry. + * Could be enabled/diasbled. + */ + UNPROTECT_CTX(ctx, flags); + + /* + * force interrupt enable because of down_interruptible() + */ + local_irq_enable(); + + DPRINT(("before block sleeping\n")); + + /* + * may go through without blocking on SMP systems + * if restart has been received already by the time we call down() + */ + ret = wait_for_completion_interruptible(&ctx->ctx_restart_done); + + DPRINT(("after block sleeping ret=%d\n", ret)); + + /* + * lock context and mask interrupts again + * We save flags into a dummy because we may have + * altered interrupts mask compared to entry in this + * function. + */ + PROTECT_CTX(ctx, dummy_flags); + + /* + * we need to read the ovfl_regs only after wake-up + * because we may have had pfm_write_pmds() in between + * and that can changed PMD values and therefore + * ovfl_regs is reset for these new PMD values. + */ + ovfl_regs = ctx->ctx_ovfl_regs[0]; + + if (ctx->ctx_fl_going_zombie) { +do_zombie: + DPRINT(("context is zombie, bailing out\n")); + pfm_context_force_terminate(ctx, regs); + goto nothing_to_do; + } + /* + * in case of interruption of down() we don't restart anything + */ + if (ret < 0) + goto nothing_to_do; + +skip_blocking: + pfm_resume_after_ovfl(ctx, ovfl_regs, regs); + ctx->ctx_ovfl_regs[0] = 0UL; + +nothing_to_do: + /* + * restore flags as they were upon entry + */ + UNPROTECT_CTX(ctx, flags); +} + +static int +pfm_notify_user(pfm_context_t *ctx, pfm_msg_t *msg) +{ + if (ctx->ctx_state == PFM_CTX_ZOMBIE) { + DPRINT(("ignoring overflow notification, owner is zombie\n")); + return 0; + } + + DPRINT(("waking up somebody\n")); + + if (msg) wake_up_interruptible(&ctx->ctx_msgq_wait); + + /* + * safe, we are not in intr handler, nor in ctxsw when + * we come here + */ + kill_fasync (&ctx->ctx_async_queue, SIGIO, POLL_IN); + + return 0; +} + +static int +pfm_ovfl_notify_user(pfm_context_t *ctx, unsigned long ovfl_pmds) +{ + pfm_msg_t *msg = NULL; + + if (ctx->ctx_fl_no_msg == 0) { + msg = pfm_get_new_msg(ctx); + if (msg == NULL) { + printk(KERN_ERR "perfmon: pfm_ovfl_notify_user no more notification msgs\n"); + return -1; + } + + msg->pfm_ovfl_msg.msg_type = PFM_MSG_OVFL; + msg->pfm_ovfl_msg.msg_ctx_fd = ctx->ctx_fd; + msg->pfm_ovfl_msg.msg_active_set = 0; + msg->pfm_ovfl_msg.msg_ovfl_pmds[0] = ovfl_pmds; + msg->pfm_ovfl_msg.msg_ovfl_pmds[1] = 0UL; + msg->pfm_ovfl_msg.msg_ovfl_pmds[2] = 0UL; + msg->pfm_ovfl_msg.msg_ovfl_pmds[3] = 0UL; + msg->pfm_ovfl_msg.msg_tstamp = 0UL; + } + + DPRINT(("ovfl msg: msg=%p no_msg=%d fd=%d ovfl_pmds=0x%lx\n", + msg, + ctx->ctx_fl_no_msg, + ctx->ctx_fd, + ovfl_pmds)); + + return pfm_notify_user(ctx, msg); +} + +static int +pfm_end_notify_user(pfm_context_t *ctx) +{ + pfm_msg_t *msg; + + msg = pfm_get_new_msg(ctx); + if (msg == NULL) { + printk(KERN_ERR "perfmon: pfm_end_notify_user no more notification msgs\n"); + return -1; + } + /* no leak */ + memset(msg, 0, sizeof(*msg)); + + msg->pfm_end_msg.msg_type = PFM_MSG_END; + msg->pfm_end_msg.msg_ctx_fd = ctx->ctx_fd; + msg->pfm_ovfl_msg.msg_tstamp = 0UL; + + DPRINT(("end msg: msg=%p no_msg=%d ctx_fd=%d\n", + msg, + ctx->ctx_fl_no_msg, + ctx->ctx_fd)); + + return pfm_notify_user(ctx, msg); +} + +/* + * main overflow processing routine. + * it can be called from the interrupt path or explicitly during the context switch code + */ +static void pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, + unsigned long pmc0, struct pt_regs *regs) +{ + pfm_ovfl_arg_t *ovfl_arg; + unsigned long mask; + unsigned long old_val, ovfl_val, new_val; + unsigned long ovfl_notify = 0UL, ovfl_pmds = 0UL, smpl_pmds = 0UL, reset_pmds; + unsigned long tstamp; + pfm_ovfl_ctrl_t ovfl_ctrl; + unsigned int i, has_smpl; + int must_notify = 0; + + if (unlikely(ctx->ctx_state == PFM_CTX_ZOMBIE)) goto stop_monitoring; + + /* + * sanity test. Should never happen + */ + if (unlikely((pmc0 & 0x1) == 0)) goto sanity_check; + + tstamp = ia64_get_itc(); + mask = pmc0 >> PMU_FIRST_COUNTER; + ovfl_val = pmu_conf->ovfl_val; + has_smpl = CTX_HAS_SMPL(ctx); + + DPRINT_ovfl(("pmc0=0x%lx pid=%d iip=0x%lx, %s " + "used_pmds=0x%lx\n", + pmc0, + task ? task_pid_nr(task): -1, + (regs ? regs->cr_iip : 0), + CTX_OVFL_NOBLOCK(ctx) ? "nonblocking" : "blocking", + ctx->ctx_used_pmds[0])); + + + /* + * first we update the virtual counters + * assume there was a prior ia64_srlz_d() issued + */ + for (i = PMU_FIRST_COUNTER; mask ; i++, mask >>= 1) { + + /* skip pmd which did not overflow */ + if ((mask & 0x1) == 0) continue; + + /* + * Note that the pmd is not necessarily 0 at this point as qualified events + * may have happened before the PMU was frozen. The residual count is not + * taken into consideration here but will be with any read of the pmd via + * pfm_read_pmds(). + */ + old_val = new_val = ctx->ctx_pmds[i].val; + new_val += 1 + ovfl_val; + ctx->ctx_pmds[i].val = new_val; + + /* + * check for overflow condition + */ + if (likely(old_val > new_val)) { + ovfl_pmds |= 1UL << i; + if (PMC_OVFL_NOTIFY(ctx, i)) ovfl_notify |= 1UL << i; + } + + DPRINT_ovfl(("ctx_pmd[%d].val=0x%lx old_val=0x%lx pmd=0x%lx ovfl_pmds=0x%lx ovfl_notify=0x%lx\n", + i, + new_val, + old_val, + ia64_get_pmd(i) & ovfl_val, + ovfl_pmds, + ovfl_notify)); + } + + /* + * there was no 64-bit overflow, nothing else to do + */ + if (ovfl_pmds == 0UL) return; + + /* + * reset all control bits + */ + ovfl_ctrl.val = 0; + reset_pmds = 0UL; + + /* + * if a sampling format module exists, then we "cache" the overflow by + * calling the module's handler() routine. + */ + if (has_smpl) { + unsigned long start_cycles, end_cycles; + unsigned long pmd_mask; + int j, k, ret = 0; + int this_cpu = smp_processor_id(); + + pmd_mask = ovfl_pmds >> PMU_FIRST_COUNTER; + ovfl_arg = &ctx->ctx_ovfl_arg; + + prefetch(ctx->ctx_smpl_hdr); + + for(i=PMU_FIRST_COUNTER; pmd_mask && ret == 0; i++, pmd_mask >>=1) { + + mask = 1UL << i; + + if ((pmd_mask & 0x1) == 0) continue; + + ovfl_arg->ovfl_pmd = (unsigned char )i; + ovfl_arg->ovfl_notify = ovfl_notify & mask ? 1 : 0; + ovfl_arg->active_set = 0; + ovfl_arg->ovfl_ctrl.val = 0; /* module must fill in all fields */ + ovfl_arg->smpl_pmds[0] = smpl_pmds = ctx->ctx_pmds[i].smpl_pmds[0]; + + ovfl_arg->pmd_value = ctx->ctx_pmds[i].val; + ovfl_arg->pmd_last_reset = ctx->ctx_pmds[i].lval; + ovfl_arg->pmd_eventid = ctx->ctx_pmds[i].eventid; + + /* + * copy values of pmds of interest. Sampling format may copy them + * into sampling buffer. + */ + if (smpl_pmds) { + for(j=0, k=0; smpl_pmds; j++, smpl_pmds >>=1) { + if ((smpl_pmds & 0x1) == 0) continue; + ovfl_arg->smpl_pmds_values[k++] = PMD_IS_COUNTING(j) ? pfm_read_soft_counter(ctx, j) : ia64_get_pmd(j); + DPRINT_ovfl(("smpl_pmd[%d]=pmd%u=0x%lx\n", k-1, j, ovfl_arg->smpl_pmds_values[k-1])); + } + } + + pfm_stats[this_cpu].pfm_smpl_handler_calls++; + + start_cycles = ia64_get_itc(); + + /* + * call custom buffer format record (handler) routine + */ + ret = (*ctx->ctx_buf_fmt->fmt_handler)(task, ctx->ctx_smpl_hdr, ovfl_arg, regs, tstamp); + + end_cycles = ia64_get_itc(); + + /* + * For those controls, we take the union because they have + * an all or nothing behavior. + */ + ovfl_ctrl.bits.notify_user |= ovfl_arg->ovfl_ctrl.bits.notify_user; + ovfl_ctrl.bits.block_task |= ovfl_arg->ovfl_ctrl.bits.block_task; + ovfl_ctrl.bits.mask_monitoring |= ovfl_arg->ovfl_ctrl.bits.mask_monitoring; + /* + * build the bitmask of pmds to reset now + */ + if (ovfl_arg->ovfl_ctrl.bits.reset_ovfl_pmds) reset_pmds |= mask; + + pfm_stats[this_cpu].pfm_smpl_handler_cycles += end_cycles - start_cycles; + } + /* + * when the module cannot handle the rest of the overflows, we abort right here + */ + if (ret && pmd_mask) { + DPRINT(("handler aborts leftover ovfl_pmds=0x%lx\n", + pmd_mask<ctx_ovfl_regs[0] = ovfl_pmds; + + /* + * check for blocking context + */ + if (CTX_OVFL_NOBLOCK(ctx) == 0 && ovfl_ctrl.bits.block_task) { + + ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_BLOCK; + + /* + * set the perfmon specific checking pending work for the task + */ + PFM_SET_WORK_PENDING(task, 1); + + /* + * when coming from ctxsw, current still points to the + * previous task, therefore we must work with task and not current. + */ + set_notify_resume(task); + } + /* + * defer until state is changed (shorten spin window). the context is locked + * anyway, so the signal receiver would come spin for nothing. + */ + must_notify = 1; + } + + DPRINT_ovfl(("owner [%d] pending=%ld reason=%u ovfl_pmds=0x%lx ovfl_notify=0x%lx masked=%d\n", + GET_PMU_OWNER() ? task_pid_nr(GET_PMU_OWNER()) : -1, + PFM_GET_WORK_PENDING(task), + ctx->ctx_fl_trap_reason, + ovfl_pmds, + ovfl_notify, + ovfl_ctrl.bits.mask_monitoring ? 1 : 0)); + /* + * in case monitoring must be stopped, we toggle the psr bits + */ + if (ovfl_ctrl.bits.mask_monitoring) { + pfm_mask_monitoring(task); + ctx->ctx_state = PFM_CTX_MASKED; + ctx->ctx_fl_can_restart = 1; + } + + /* + * send notification now + */ + if (must_notify) pfm_ovfl_notify_user(ctx, ovfl_notify); + + return; + +sanity_check: + printk(KERN_ERR "perfmon: CPU%d overflow handler [%d] pmc0=0x%lx\n", + smp_processor_id(), + task ? task_pid_nr(task) : -1, + pmc0); + return; + +stop_monitoring: + /* + * in SMP, zombie context is never restored but reclaimed in pfm_load_regs(). + * Moreover, zombies are also reclaimed in pfm_save_regs(). Therefore we can + * come here as zombie only if the task is the current task. In which case, we + * can access the PMU hardware directly. + * + * Note that zombies do have PM_VALID set. So here we do the minimal. + * + * In case the context was zombified it could not be reclaimed at the time + * the monitoring program exited. At this point, the PMU reservation has been + * returned, the sampiing buffer has been freed. We must convert this call + * into a spurious interrupt. However, we must also avoid infinite overflows + * by stopping monitoring for this task. We can only come here for a per-task + * context. All we need to do is to stop monitoring using the psr bits which + * are always task private. By re-enabling secure montioring, we ensure that + * the monitored task will not be able to re-activate monitoring. + * The task will eventually be context switched out, at which point the context + * will be reclaimed (that includes releasing ownership of the PMU). + * + * So there might be a window of time where the number of per-task session is zero + * yet one PMU might have a owner and get at most one overflow interrupt for a zombie + * context. This is safe because if a per-task session comes in, it will push this one + * out and by the virtue on pfm_save_regs(), this one will disappear. If a system wide + * session is force on that CPU, given that we use task pinning, pfm_save_regs() will + * also push our zombie context out. + * + * Overall pretty hairy stuff.... + */ + DPRINT(("ctx is zombie for [%d], converted to spurious\n", task ? task_pid_nr(task): -1)); + pfm_clear_psr_up(); + ia64_psr(regs)->up = 0; + ia64_psr(regs)->sp = 1; + return; +} + +static int +pfm_do_interrupt_handler(void *arg, struct pt_regs *regs) +{ + struct task_struct *task; + pfm_context_t *ctx; + unsigned long flags; + u64 pmc0; + int this_cpu = smp_processor_id(); + int retval = 0; + + pfm_stats[this_cpu].pfm_ovfl_intr_count++; + + /* + * srlz.d done before arriving here + */ + pmc0 = ia64_get_pmc(0); + + task = GET_PMU_OWNER(); + ctx = GET_PMU_CTX(); + + /* + * if we have some pending bits set + * assumes : if any PMC0.bit[63-1] is set, then PMC0.fr = 1 + */ + if (PMC0_HAS_OVFL(pmc0) && task) { + /* + * we assume that pmc0.fr is always set here + */ + + /* sanity check */ + if (!ctx) goto report_spurious1; + + if (ctx->ctx_fl_system == 0 && (task->thread.flags & IA64_THREAD_PM_VALID) == 0) + goto report_spurious2; + + PROTECT_CTX_NOPRINT(ctx, flags); + + pfm_overflow_handler(task, ctx, pmc0, regs); + + UNPROTECT_CTX_NOPRINT(ctx, flags); + + } else { + pfm_stats[this_cpu].pfm_spurious_ovfl_intr_count++; + retval = -1; + } + /* + * keep it unfrozen at all times + */ + pfm_unfreeze_pmu(); + + return retval; + +report_spurious1: + printk(KERN_INFO "perfmon: spurious overflow interrupt on CPU%d: process %d has no PFM context\n", + this_cpu, task_pid_nr(task)); + pfm_unfreeze_pmu(); + return -1; +report_spurious2: + printk(KERN_INFO "perfmon: spurious overflow interrupt on CPU%d: process %d, invalid flag\n", + this_cpu, + task_pid_nr(task)); + pfm_unfreeze_pmu(); + return -1; +} + +static irqreturn_t +pfm_interrupt_handler(int irq, void *arg) +{ + unsigned long start_cycles, total_cycles; + unsigned long min, max; + int this_cpu; + int ret; + struct pt_regs *regs = get_irq_regs(); + + this_cpu = get_cpu(); + if (likely(!pfm_alt_intr_handler)) { + min = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min; + max = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max; + + start_cycles = ia64_get_itc(); + + ret = pfm_do_interrupt_handler(arg, regs); + + total_cycles = ia64_get_itc(); + + /* + * don't measure spurious interrupts + */ + if (likely(ret == 0)) { + total_cycles -= start_cycles; + + if (total_cycles < min) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min = total_cycles; + if (total_cycles > max) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max = total_cycles; + + pfm_stats[this_cpu].pfm_ovfl_intr_cycles += total_cycles; + } + } + else { + (*pfm_alt_intr_handler->handler)(irq, arg, regs); + } + + put_cpu(); + return IRQ_HANDLED; +} + +/* + * /proc/perfmon interface, for debug only + */ + +#define PFM_PROC_SHOW_HEADER ((void *)(long)nr_cpu_ids+1) + +static void * +pfm_proc_start(struct seq_file *m, loff_t *pos) +{ + if (*pos == 0) { + return PFM_PROC_SHOW_HEADER; + } + + while (*pos <= nr_cpu_ids) { + if (cpu_online(*pos - 1)) { + return (void *)*pos; + } + ++*pos; + } + return NULL; +} + +static void * +pfm_proc_next(struct seq_file *m, void *v, loff_t *pos) +{ + ++*pos; + return pfm_proc_start(m, pos); +} + +static void +pfm_proc_stop(struct seq_file *m, void *v) +{ +} + +static void +pfm_proc_show_header(struct seq_file *m) +{ + struct list_head * pos; + pfm_buffer_fmt_t * entry; + unsigned long flags; + + seq_printf(m, + "perfmon version : %u.%u\n" + "model : %s\n" + "fastctxsw : %s\n" + "expert mode : %s\n" + "ovfl_mask : 0x%lx\n" + "PMU flags : 0x%x\n", + PFM_VERSION_MAJ, PFM_VERSION_MIN, + pmu_conf->pmu_name, + pfm_sysctl.fastctxsw > 0 ? "Yes": "No", + pfm_sysctl.expert_mode > 0 ? "Yes": "No", + pmu_conf->ovfl_val, + pmu_conf->flags); + + LOCK_PFS(flags); + + seq_printf(m, + "proc_sessions : %u\n" + "sys_sessions : %u\n" + "sys_use_dbregs : %u\n" + "ptrace_use_dbregs : %u\n", + pfm_sessions.pfs_task_sessions, + pfm_sessions.pfs_sys_sessions, + pfm_sessions.pfs_sys_use_dbregs, + pfm_sessions.pfs_ptrace_use_dbregs); + + UNLOCK_PFS(flags); + + spin_lock(&pfm_buffer_fmt_lock); + + list_for_each(pos, &pfm_buffer_fmt_list) { + entry = list_entry(pos, pfm_buffer_fmt_t, fmt_list); + seq_printf(m, "format : %16phD %s\n", + entry->fmt_uuid, entry->fmt_name); + } + spin_unlock(&pfm_buffer_fmt_lock); + +} + +static int +pfm_proc_show(struct seq_file *m, void *v) +{ + unsigned long psr; + unsigned int i; + int cpu; + + if (v == PFM_PROC_SHOW_HEADER) { + pfm_proc_show_header(m); + return 0; + } + + /* show info for CPU (v - 1) */ + + cpu = (long)v - 1; + seq_printf(m, + "CPU%-2d overflow intrs : %lu\n" + "CPU%-2d overflow cycles : %lu\n" + "CPU%-2d overflow min : %lu\n" + "CPU%-2d overflow max : %lu\n" + "CPU%-2d smpl handler calls : %lu\n" + "CPU%-2d smpl handler cycles : %lu\n" + "CPU%-2d spurious intrs : %lu\n" + "CPU%-2d replay intrs : %lu\n" + "CPU%-2d syst_wide : %d\n" + "CPU%-2d dcr_pp : %d\n" + "CPU%-2d exclude idle : %d\n" + "CPU%-2d owner : %d\n" + "CPU%-2d context : %p\n" + "CPU%-2d activations : %lu\n", + cpu, pfm_stats[cpu].pfm_ovfl_intr_count, + cpu, pfm_stats[cpu].pfm_ovfl_intr_cycles, + cpu, pfm_stats[cpu].pfm_ovfl_intr_cycles_min, + cpu, pfm_stats[cpu].pfm_ovfl_intr_cycles_max, + cpu, pfm_stats[cpu].pfm_smpl_handler_calls, + cpu, pfm_stats[cpu].pfm_smpl_handler_cycles, + cpu, pfm_stats[cpu].pfm_spurious_ovfl_intr_count, + cpu, pfm_stats[cpu].pfm_replay_ovfl_intr_count, + cpu, pfm_get_cpu_data(pfm_syst_info, cpu) & PFM_CPUINFO_SYST_WIDE ? 1 : 0, + cpu, pfm_get_cpu_data(pfm_syst_info, cpu) & PFM_CPUINFO_DCR_PP ? 1 : 0, + cpu, pfm_get_cpu_data(pfm_syst_info, cpu) & PFM_CPUINFO_EXCL_IDLE ? 1 : 0, + cpu, pfm_get_cpu_data(pmu_owner, cpu) ? pfm_get_cpu_data(pmu_owner, cpu)->pid: -1, + cpu, pfm_get_cpu_data(pmu_ctx, cpu), + cpu, pfm_get_cpu_data(pmu_activation_number, cpu)); + + if (num_online_cpus() == 1 && pfm_sysctl.debug > 0) { + + psr = pfm_get_psr(); + + ia64_srlz_d(); + + seq_printf(m, + "CPU%-2d psr : 0x%lx\n" + "CPU%-2d pmc0 : 0x%lx\n", + cpu, psr, + cpu, ia64_get_pmc(0)); + + for (i=0; PMC_IS_LAST(i) == 0; i++) { + if (PMC_IS_COUNTING(i) == 0) continue; + seq_printf(m, + "CPU%-2d pmc%u : 0x%lx\n" + "CPU%-2d pmd%u : 0x%lx\n", + cpu, i, ia64_get_pmc(i), + cpu, i, ia64_get_pmd(i)); + } + } + return 0; +} + +const struct seq_operations pfm_seq_ops = { + .start = pfm_proc_start, + .next = pfm_proc_next, + .stop = pfm_proc_stop, + .show = pfm_proc_show +}; + +static int +pfm_proc_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &pfm_seq_ops); +} + + +/* + * we come here as soon as local_cpu_data->pfm_syst_wide is set. this happens + * during pfm_enable() hence before pfm_start(). We cannot assume monitoring + * is active or inactive based on mode. We must rely on the value in + * local_cpu_data->pfm_syst_info + */ +void +pfm_syst_wide_update_task(struct task_struct *task, unsigned long info, int is_ctxswin) +{ + struct pt_regs *regs; + unsigned long dcr; + unsigned long dcr_pp; + + dcr_pp = info & PFM_CPUINFO_DCR_PP ? 1 : 0; + + /* + * pid 0 is guaranteed to be the idle task. There is one such task with pid 0 + * on every CPU, so we can rely on the pid to identify the idle task. + */ + if ((info & PFM_CPUINFO_EXCL_IDLE) == 0 || task->pid) { + regs = task_pt_regs(task); + ia64_psr(regs)->pp = is_ctxswin ? dcr_pp : 0; + return; + } + /* + * if monitoring has started + */ + if (dcr_pp) { + dcr = ia64_getreg(_IA64_REG_CR_DCR); + /* + * context switching in? + */ + if (is_ctxswin) { + /* mask monitoring for the idle task */ + ia64_setreg(_IA64_REG_CR_DCR, dcr & ~IA64_DCR_PP); + pfm_clear_psr_pp(); + ia64_srlz_i(); + return; + } + /* + * context switching out + * restore monitoring for next task + * + * Due to inlining this odd if-then-else construction generates + * better code. + */ + ia64_setreg(_IA64_REG_CR_DCR, dcr |IA64_DCR_PP); + pfm_set_psr_pp(); + ia64_srlz_i(); + } +} + +#ifdef CONFIG_SMP + +static void +pfm_force_cleanup(pfm_context_t *ctx, struct pt_regs *regs) +{ + struct task_struct *task = ctx->ctx_task; + + ia64_psr(regs)->up = 0; + ia64_psr(regs)->sp = 1; + + if (GET_PMU_OWNER() == task) { + DPRINT(("cleared ownership for [%d]\n", + task_pid_nr(ctx->ctx_task))); + SET_PMU_OWNER(NULL, NULL); + } + + /* + * disconnect the task from the context and vice-versa + */ + PFM_SET_WORK_PENDING(task, 0); + + task->thread.pfm_context = NULL; + task->thread.flags &= ~IA64_THREAD_PM_VALID; + + DPRINT(("force cleanup for [%d]\n", task_pid_nr(task))); +} + + +/* + * in 2.6, interrupts are masked when we come here and the runqueue lock is held + */ +void +pfm_save_regs(struct task_struct *task) +{ + pfm_context_t *ctx; + unsigned long flags; + u64 psr; + + + ctx = PFM_GET_CTX(task); + if (ctx == NULL) return; + + /* + * we always come here with interrupts ALREADY disabled by + * the scheduler. So we simply need to protect against concurrent + * access, not CPU concurrency. + */ + flags = pfm_protect_ctx_ctxsw(ctx); + + if (ctx->ctx_state == PFM_CTX_ZOMBIE) { + struct pt_regs *regs = task_pt_regs(task); + + pfm_clear_psr_up(); + + pfm_force_cleanup(ctx, regs); + + BUG_ON(ctx->ctx_smpl_hdr); + + pfm_unprotect_ctx_ctxsw(ctx, flags); + + pfm_context_free(ctx); + return; + } + + /* + * save current PSR: needed because we modify it + */ + ia64_srlz_d(); + psr = pfm_get_psr(); + + BUG_ON(psr & (IA64_PSR_I)); + + /* + * stop monitoring: + * This is the last instruction which may generate an overflow + * + * We do not need to set psr.sp because, it is irrelevant in kernel. + * It will be restored from ipsr when going back to user level + */ + pfm_clear_psr_up(); + + /* + * keep a copy of psr.up (for reload) + */ + ctx->ctx_saved_psr_up = psr & IA64_PSR_UP; + + /* + * release ownership of this PMU. + * PM interrupts are masked, so nothing + * can happen. + */ + SET_PMU_OWNER(NULL, NULL); + + /* + * we systematically save the PMD as we have no + * guarantee we will be schedule at that same + * CPU again. + */ + pfm_save_pmds(ctx->th_pmds, ctx->ctx_used_pmds[0]); + + /* + * save pmc0 ia64_srlz_d() done in pfm_save_pmds() + * we will need it on the restore path to check + * for pending overflow. + */ + ctx->th_pmcs[0] = ia64_get_pmc(0); + + /* + * unfreeze PMU if had pending overflows + */ + if (ctx->th_pmcs[0] & ~0x1UL) pfm_unfreeze_pmu(); + + /* + * finally, allow context access. + * interrupts will still be masked after this call. + */ + pfm_unprotect_ctx_ctxsw(ctx, flags); +} + +#else /* !CONFIG_SMP */ +void +pfm_save_regs(struct task_struct *task) +{ + pfm_context_t *ctx; + u64 psr; + + ctx = PFM_GET_CTX(task); + if (ctx == NULL) return; + + /* + * save current PSR: needed because we modify it + */ + psr = pfm_get_psr(); + + BUG_ON(psr & (IA64_PSR_I)); + + /* + * stop monitoring: + * This is the last instruction which may generate an overflow + * + * We do not need to set psr.sp because, it is irrelevant in kernel. + * It will be restored from ipsr when going back to user level + */ + pfm_clear_psr_up(); + + /* + * keep a copy of psr.up (for reload) + */ + ctx->ctx_saved_psr_up = psr & IA64_PSR_UP; +} + +static void +pfm_lazy_save_regs (struct task_struct *task) +{ + pfm_context_t *ctx; + unsigned long flags; + + { u64 psr = pfm_get_psr(); + BUG_ON(psr & IA64_PSR_UP); + } + + ctx = PFM_GET_CTX(task); + + /* + * we need to mask PMU overflow here to + * make sure that we maintain pmc0 until + * we save it. overflow interrupts are + * treated as spurious if there is no + * owner. + * + * XXX: I don't think this is necessary + */ + PROTECT_CTX(ctx,flags); + + /* + * release ownership of this PMU. + * must be done before we save the registers. + * + * after this call any PMU interrupt is treated + * as spurious. + */ + SET_PMU_OWNER(NULL, NULL); + + /* + * save all the pmds we use + */ + pfm_save_pmds(ctx->th_pmds, ctx->ctx_used_pmds[0]); + + /* + * save pmc0 ia64_srlz_d() done in pfm_save_pmds() + * it is needed to check for pended overflow + * on the restore path + */ + ctx->th_pmcs[0] = ia64_get_pmc(0); + + /* + * unfreeze PMU if had pending overflows + */ + if (ctx->th_pmcs[0] & ~0x1UL) pfm_unfreeze_pmu(); + + /* + * now get can unmask PMU interrupts, they will + * be treated as purely spurious and we will not + * lose any information + */ + UNPROTECT_CTX(ctx,flags); +} +#endif /* CONFIG_SMP */ + +#ifdef CONFIG_SMP +/* + * in 2.6, interrupts are masked when we come here and the runqueue lock is held + */ +void +pfm_load_regs (struct task_struct *task) +{ + pfm_context_t *ctx; + unsigned long pmc_mask = 0UL, pmd_mask = 0UL; + unsigned long flags; + u64 psr, psr_up; + int need_irq_resend; + + ctx = PFM_GET_CTX(task); + if (unlikely(ctx == NULL)) return; + + BUG_ON(GET_PMU_OWNER()); + + /* + * possible on unload + */ + if (unlikely((task->thread.flags & IA64_THREAD_PM_VALID) == 0)) return; + + /* + * we always come here with interrupts ALREADY disabled by + * the scheduler. So we simply need to protect against concurrent + * access, not CPU concurrency. + */ + flags = pfm_protect_ctx_ctxsw(ctx); + psr = pfm_get_psr(); + + need_irq_resend = pmu_conf->flags & PFM_PMU_IRQ_RESEND; + + BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP)); + BUG_ON(psr & IA64_PSR_I); + + if (unlikely(ctx->ctx_state == PFM_CTX_ZOMBIE)) { + struct pt_regs *regs = task_pt_regs(task); + + BUG_ON(ctx->ctx_smpl_hdr); + + pfm_force_cleanup(ctx, regs); + + pfm_unprotect_ctx_ctxsw(ctx, flags); + + /* + * this one (kmalloc'ed) is fine with interrupts disabled + */ + pfm_context_free(ctx); + + return; + } + + /* + * we restore ALL the debug registers to avoid picking up + * stale state. + */ + if (ctx->ctx_fl_using_dbreg) { + pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs); + pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs); + } + /* + * retrieve saved psr.up + */ + psr_up = ctx->ctx_saved_psr_up; + + /* + * if we were the last user of the PMU on that CPU, + * then nothing to do except restore psr + */ + if (GET_LAST_CPU(ctx) == smp_processor_id() && ctx->ctx_last_activation == GET_ACTIVATION()) { + + /* + * retrieve partial reload masks (due to user modifications) + */ + pmc_mask = ctx->ctx_reload_pmcs[0]; + pmd_mask = ctx->ctx_reload_pmds[0]; + + } else { + /* + * To avoid leaking information to the user level when psr.sp=0, + * we must reload ALL implemented pmds (even the ones we don't use). + * In the kernel we only allow PFM_READ_PMDS on registers which + * we initialized or requested (sampling) so there is no risk there. + */ + pmd_mask = pfm_sysctl.fastctxsw ? ctx->ctx_used_pmds[0] : ctx->ctx_all_pmds[0]; + + /* + * ALL accessible PMCs are systematically reloaded, unused registers + * get their default (from pfm_reset_pmu_state()) values to avoid picking + * up stale configuration. + * + * PMC0 is never in the mask. It is always restored separately. + */ + pmc_mask = ctx->ctx_all_pmcs[0]; + } + /* + * when context is MASKED, we will restore PMC with plm=0 + * and PMD with stale information, but that's ok, nothing + * will be captured. + * + * XXX: optimize here + */ + if (pmd_mask) pfm_restore_pmds(ctx->th_pmds, pmd_mask); + if (pmc_mask) pfm_restore_pmcs(ctx->th_pmcs, pmc_mask); + + /* + * check for pending overflow at the time the state + * was saved. + */ + if (unlikely(PMC0_HAS_OVFL(ctx->th_pmcs[0]))) { + /* + * reload pmc0 with the overflow information + * On McKinley PMU, this will trigger a PMU interrupt + */ + ia64_set_pmc(0, ctx->th_pmcs[0]); + ia64_srlz_d(); + ctx->th_pmcs[0] = 0UL; + + /* + * will replay the PMU interrupt + */ + if (need_irq_resend) ia64_resend_irq(IA64_PERFMON_VECTOR); + + pfm_stats[smp_processor_id()].pfm_replay_ovfl_intr_count++; + } + + /* + * we just did a reload, so we reset the partial reload fields + */ + ctx->ctx_reload_pmcs[0] = 0UL; + ctx->ctx_reload_pmds[0] = 0UL; + + SET_LAST_CPU(ctx, smp_processor_id()); + + /* + * dump activation value for this PMU + */ + INC_ACTIVATION(); + /* + * record current activation for this context + */ + SET_ACTIVATION(ctx); + + /* + * establish new ownership. + */ + SET_PMU_OWNER(task, ctx); + + /* + * restore the psr.up bit. measurement + * is active again. + * no PMU interrupt can happen at this point + * because we still have interrupts disabled. + */ + if (likely(psr_up)) pfm_set_psr_up(); + + /* + * allow concurrent access to context + */ + pfm_unprotect_ctx_ctxsw(ctx, flags); +} +#else /* !CONFIG_SMP */ +/* + * reload PMU state for UP kernels + * in 2.5 we come here with interrupts disabled + */ +void +pfm_load_regs (struct task_struct *task) +{ + pfm_context_t *ctx; + struct task_struct *owner; + unsigned long pmd_mask, pmc_mask; + u64 psr, psr_up; + int need_irq_resend; + + owner = GET_PMU_OWNER(); + ctx = PFM_GET_CTX(task); + psr = pfm_get_psr(); + + BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP)); + BUG_ON(psr & IA64_PSR_I); + + /* + * we restore ALL the debug registers to avoid picking up + * stale state. + * + * This must be done even when the task is still the owner + * as the registers may have been modified via ptrace() + * (not perfmon) by the previous task. + */ + if (ctx->ctx_fl_using_dbreg) { + pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs); + pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs); + } + + /* + * retrieved saved psr.up + */ + psr_up = ctx->ctx_saved_psr_up; + need_irq_resend = pmu_conf->flags & PFM_PMU_IRQ_RESEND; + + /* + * short path, our state is still there, just + * need to restore psr and we go + * + * we do not touch either PMC nor PMD. the psr is not touched + * by the overflow_handler. So we are safe w.r.t. to interrupt + * concurrency even without interrupt masking. + */ + if (likely(owner == task)) { + if (likely(psr_up)) pfm_set_psr_up(); + return; + } + + /* + * someone else is still using the PMU, first push it out and + * then we'll be able to install our stuff ! + * + * Upon return, there will be no owner for the current PMU + */ + if (owner) pfm_lazy_save_regs(owner); + + /* + * To avoid leaking information to the user level when psr.sp=0, + * we must reload ALL implemented pmds (even the ones we don't use). + * In the kernel we only allow PFM_READ_PMDS on registers which + * we initialized or requested (sampling) so there is no risk there. + */ + pmd_mask = pfm_sysctl.fastctxsw ? ctx->ctx_used_pmds[0] : ctx->ctx_all_pmds[0]; + + /* + * ALL accessible PMCs are systematically reloaded, unused registers + * get their default (from pfm_reset_pmu_state()) values to avoid picking + * up stale configuration. + * + * PMC0 is never in the mask. It is always restored separately + */ + pmc_mask = ctx->ctx_all_pmcs[0]; + + pfm_restore_pmds(ctx->th_pmds, pmd_mask); + pfm_restore_pmcs(ctx->th_pmcs, pmc_mask); + + /* + * check for pending overflow at the time the state + * was saved. + */ + if (unlikely(PMC0_HAS_OVFL(ctx->th_pmcs[0]))) { + /* + * reload pmc0 with the overflow information + * On McKinley PMU, this will trigger a PMU interrupt + */ + ia64_set_pmc(0, ctx->th_pmcs[0]); + ia64_srlz_d(); + + ctx->th_pmcs[0] = 0UL; + + /* + * will replay the PMU interrupt + */ + if (need_irq_resend) ia64_resend_irq(IA64_PERFMON_VECTOR); + + pfm_stats[smp_processor_id()].pfm_replay_ovfl_intr_count++; + } + + /* + * establish new ownership. + */ + SET_PMU_OWNER(task, ctx); + + /* + * restore the psr.up bit. measurement + * is active again. + * no PMU interrupt can happen at this point + * because we still have interrupts disabled. + */ + if (likely(psr_up)) pfm_set_psr_up(); +} +#endif /* CONFIG_SMP */ + +/* + * this function assumes monitoring is stopped + */ +static void +pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx) +{ + u64 pmc0; + unsigned long mask2, val, pmd_val, ovfl_val; + int i, can_access_pmu = 0; + int is_self; + + /* + * is the caller the task being monitored (or which initiated the + * session for system wide measurements) + */ + is_self = ctx->ctx_task == task ? 1 : 0; + + /* + * can access PMU is task is the owner of the PMU state on the current CPU + * or if we are running on the CPU bound to the context in system-wide mode + * (that is not necessarily the task the context is attached to in this mode). + * In system-wide we always have can_access_pmu true because a task running on an + * invalid processor is flagged earlier in the call stack (see pfm_stop). + */ + can_access_pmu = (GET_PMU_OWNER() == task) || (ctx->ctx_fl_system && ctx->ctx_cpu == smp_processor_id()); + if (can_access_pmu) { + /* + * Mark the PMU as not owned + * This will cause the interrupt handler to do nothing in case an overflow + * interrupt was in-flight + * This also guarantees that pmc0 will contain the final state + * It virtually gives us full control on overflow processing from that point + * on. + */ + SET_PMU_OWNER(NULL, NULL); + DPRINT(("releasing ownership\n")); + + /* + * read current overflow status: + * + * we are guaranteed to read the final stable state + */ + ia64_srlz_d(); + pmc0 = ia64_get_pmc(0); /* slow */ + + /* + * reset freeze bit, overflow status information destroyed + */ + pfm_unfreeze_pmu(); + } else { + pmc0 = ctx->th_pmcs[0]; + /* + * clear whatever overflow status bits there were + */ + ctx->th_pmcs[0] = 0; + } + ovfl_val = pmu_conf->ovfl_val; + /* + * we save all the used pmds + * we take care of overflows for counting PMDs + * + * XXX: sampling situation is not taken into account here + */ + mask2 = ctx->ctx_used_pmds[0]; + + DPRINT(("is_self=%d ovfl_val=0x%lx mask2=0x%lx\n", is_self, ovfl_val, mask2)); + + for (i = 0; mask2; i++, mask2>>=1) { + + /* skip non used pmds */ + if ((mask2 & 0x1) == 0) continue; + + /* + * can access PMU always true in system wide mode + */ + val = pmd_val = can_access_pmu ? ia64_get_pmd(i) : ctx->th_pmds[i]; + + if (PMD_IS_COUNTING(i)) { + DPRINT(("[%d] pmd[%d] ctx_pmd=0x%lx hw_pmd=0x%lx\n", + task_pid_nr(task), + i, + ctx->ctx_pmds[i].val, + val & ovfl_val)); + + /* + * we rebuild the full 64 bit value of the counter + */ + val = ctx->ctx_pmds[i].val + (val & ovfl_val); + + /* + * now everything is in ctx_pmds[] and we need + * to clear the saved context from save_regs() such that + * pfm_read_pmds() gets the correct value + */ + pmd_val = 0UL; + + /* + * take care of overflow inline + */ + if (pmc0 & (1UL << i)) { + val += 1 + ovfl_val; + DPRINT(("[%d] pmd[%d] overflowed\n", task_pid_nr(task), i)); + } + } + + DPRINT(("[%d] ctx_pmd[%d]=0x%lx pmd_val=0x%lx\n", task_pid_nr(task), i, val, pmd_val)); + + if (is_self) ctx->th_pmds[i] = pmd_val; + + ctx->ctx_pmds[i].val = val; + } +} + +static struct irqaction perfmon_irqaction = { + .handler = pfm_interrupt_handler, + .name = "perfmon" +}; + +static void +pfm_alt_save_pmu_state(void *data) +{ + struct pt_regs *regs; + + regs = task_pt_regs(current); + + DPRINT(("called\n")); + + /* + * should not be necessary but + * let's take not risk + */ + pfm_clear_psr_up(); + pfm_clear_psr_pp(); + ia64_psr(regs)->pp = 0; + + /* + * This call is required + * May cause a spurious interrupt on some processors + */ + pfm_freeze_pmu(); + + ia64_srlz_d(); +} + +void +pfm_alt_restore_pmu_state(void *data) +{ + struct pt_regs *regs; + + regs = task_pt_regs(current); + + DPRINT(("called\n")); + + /* + * put PMU back in state expected + * by perfmon + */ + pfm_clear_psr_up(); + pfm_clear_psr_pp(); + ia64_psr(regs)->pp = 0; + + /* + * perfmon runs with PMU unfrozen at all times + */ + pfm_unfreeze_pmu(); + + ia64_srlz_d(); +} + +int +pfm_install_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl) +{ + int ret, i; + int reserve_cpu; + + /* some sanity checks */ + if (hdl == NULL || hdl->handler == NULL) return -EINVAL; + + /* do the easy test first */ + if (pfm_alt_intr_handler) return -EBUSY; + + /* one at a time in the install or remove, just fail the others */ + if (!spin_trylock(&pfm_alt_install_check)) { + return -EBUSY; + } + + /* reserve our session */ + for_each_online_cpu(reserve_cpu) { + ret = pfm_reserve_session(NULL, 1, reserve_cpu); + if (ret) goto cleanup_reserve; + } + + /* save the current system wide pmu states */ + ret = on_each_cpu(pfm_alt_save_pmu_state, NULL, 1); + if (ret) { + DPRINT(("on_each_cpu() failed: %d\n", ret)); + goto cleanup_reserve; + } + + /* officially change to the alternate interrupt handler */ + pfm_alt_intr_handler = hdl; + + spin_unlock(&pfm_alt_install_check); + + return 0; + +cleanup_reserve: + for_each_online_cpu(i) { + /* don't unreserve more than we reserved */ + if (i >= reserve_cpu) break; + + pfm_unreserve_session(NULL, 1, i); + } + + spin_unlock(&pfm_alt_install_check); + + return ret; +} +EXPORT_SYMBOL_GPL(pfm_install_alt_pmu_interrupt); + +int +pfm_remove_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl) +{ + int i; + int ret; + + if (hdl == NULL) return -EINVAL; + + /* cannot remove someone else's handler! */ + if (pfm_alt_intr_handler != hdl) return -EINVAL; + + /* one at a time in the install or remove, just fail the others */ + if (!spin_trylock(&pfm_alt_install_check)) { + return -EBUSY; + } + + pfm_alt_intr_handler = NULL; + + ret = on_each_cpu(pfm_alt_restore_pmu_state, NULL, 1); + if (ret) { + DPRINT(("on_each_cpu() failed: %d\n", ret)); + } + + for_each_online_cpu(i) { + pfm_unreserve_session(NULL, 1, i); + } + + spin_unlock(&pfm_alt_install_check); + + return 0; +} +EXPORT_SYMBOL_GPL(pfm_remove_alt_pmu_interrupt); + +/* + * perfmon initialization routine, called from the initcall() table + */ +static int init_pfm_fs(void); + +static int __init +pfm_probe_pmu(void) +{ + pmu_config_t **p; + int family; + + family = local_cpu_data->family; + p = pmu_confs; + + while(*p) { + if ((*p)->probe) { + if ((*p)->probe() == 0) goto found; + } else if ((*p)->pmu_family == family || (*p)->pmu_family == 0xff) { + goto found; + } + p++; + } + return -1; +found: + pmu_conf = *p; + return 0; +} + +static const struct file_operations pfm_proc_fops = { + .open = pfm_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +int __init +pfm_init(void) +{ + unsigned int n, n_counters, i; + + printk("perfmon: version %u.%u IRQ %u\n", + PFM_VERSION_MAJ, + PFM_VERSION_MIN, + IA64_PERFMON_VECTOR); + + if (pfm_probe_pmu()) { + printk(KERN_INFO "perfmon: disabled, there is no support for processor family %d\n", + local_cpu_data->family); + return -ENODEV; + } + + /* + * compute the number of implemented PMD/PMC from the + * description tables + */ + n = 0; + for (i=0; PMC_IS_LAST(i) == 0; i++) { + if (PMC_IS_IMPL(i) == 0) continue; + pmu_conf->impl_pmcs[i>>6] |= 1UL << (i&63); + n++; + } + pmu_conf->num_pmcs = n; + + n = 0; n_counters = 0; + for (i=0; PMD_IS_LAST(i) == 0; i++) { + if (PMD_IS_IMPL(i) == 0) continue; + pmu_conf->impl_pmds[i>>6] |= 1UL << (i&63); + n++; + if (PMD_IS_COUNTING(i)) n_counters++; + } + pmu_conf->num_pmds = n; + pmu_conf->num_counters = n_counters; + + /* + * sanity checks on the number of debug registers + */ + if (pmu_conf->use_rr_dbregs) { + if (pmu_conf->num_ibrs > IA64_NUM_DBG_REGS) { + printk(KERN_INFO "perfmon: unsupported number of code debug registers (%u)\n", pmu_conf->num_ibrs); + pmu_conf = NULL; + return -1; + } + if (pmu_conf->num_dbrs > IA64_NUM_DBG_REGS) { + printk(KERN_INFO "perfmon: unsupported number of data debug registers (%u)\n", pmu_conf->num_ibrs); + pmu_conf = NULL; + return -1; + } + } + + printk("perfmon: %s PMU detected, %u PMCs, %u PMDs, %u counters (%lu bits)\n", + pmu_conf->pmu_name, + pmu_conf->num_pmcs, + pmu_conf->num_pmds, + pmu_conf->num_counters, + ffz(pmu_conf->ovfl_val)); + + /* sanity check */ + if (pmu_conf->num_pmds >= PFM_NUM_PMD_REGS || pmu_conf->num_pmcs >= PFM_NUM_PMC_REGS) { + printk(KERN_ERR "perfmon: not enough pmc/pmd, perfmon disabled\n"); + pmu_conf = NULL; + return -1; + } + + /* + * create /proc/perfmon (mostly for debugging purposes) + */ + perfmon_dir = proc_create("perfmon", S_IRUGO, NULL, &pfm_proc_fops); + if (perfmon_dir == NULL) { + printk(KERN_ERR "perfmon: cannot create /proc entry, perfmon disabled\n"); + pmu_conf = NULL; + return -1; + } + + /* + * create /proc/sys/kernel/perfmon (for debugging purposes) + */ + pfm_sysctl_header = register_sysctl_table(pfm_sysctl_root); + + /* + * initialize all our spinlocks + */ + spin_lock_init(&pfm_sessions.pfs_lock); + spin_lock_init(&pfm_buffer_fmt_lock); + + init_pfm_fs(); + + for(i=0; i < NR_CPUS; i++) pfm_stats[i].pfm_ovfl_intr_cycles_min = ~0UL; + + return 0; +} + +__initcall(pfm_init); + +/* + * this function is called before pfm_init() + */ +void +pfm_init_percpu (void) +{ + static int first_time=1; + /* + * make sure no measurement is active + * (may inherit programmed PMCs from EFI). + */ + pfm_clear_psr_pp(); + pfm_clear_psr_up(); + + /* + * we run with the PMU not frozen at all times + */ + pfm_unfreeze_pmu(); + + if (first_time) { + register_percpu_irq(IA64_PERFMON_VECTOR, &perfmon_irqaction); + first_time=0; + } + + ia64_setreg(_IA64_REG_CR_PMV, IA64_PERFMON_VECTOR); + ia64_srlz_d(); +} + +/* + * used for debug purposes only + */ +void +dump_pmu_state(const char *from) +{ + struct task_struct *task; + struct pt_regs *regs; + pfm_context_t *ctx; + unsigned long psr, dcr, info, flags; + int i, this_cpu; + + local_irq_save(flags); + + this_cpu = smp_processor_id(); + regs = task_pt_regs(current); + info = PFM_CPUINFO_GET(); + dcr = ia64_getreg(_IA64_REG_CR_DCR); + + if (info == 0 && ia64_psr(regs)->pp == 0 && (dcr & IA64_DCR_PP) == 0) { + local_irq_restore(flags); + return; + } + + printk("CPU%d from %s() current [%d] iip=0x%lx %s\n", + this_cpu, + from, + task_pid_nr(current), + regs->cr_iip, + current->comm); + + task = GET_PMU_OWNER(); + ctx = GET_PMU_CTX(); + + printk("->CPU%d owner [%d] ctx=%p\n", this_cpu, task ? task_pid_nr(task) : -1, ctx); + + psr = pfm_get_psr(); + + printk("->CPU%d pmc0=0x%lx psr.pp=%d psr.up=%d dcr.pp=%d syst_info=0x%lx user_psr.up=%d user_psr.pp=%d\n", + this_cpu, + ia64_get_pmc(0), + psr & IA64_PSR_PP ? 1 : 0, + psr & IA64_PSR_UP ? 1 : 0, + dcr & IA64_DCR_PP ? 1 : 0, + info, + ia64_psr(regs)->up, + ia64_psr(regs)->pp); + + ia64_psr(regs)->up = 0; + ia64_psr(regs)->pp = 0; + + for (i=1; PMC_IS_LAST(i) == 0; i++) { + if (PMC_IS_IMPL(i) == 0) continue; + printk("->CPU%d pmc[%d]=0x%lx thread_pmc[%d]=0x%lx\n", this_cpu, i, ia64_get_pmc(i), i, ctx->th_pmcs[i]); + } + + for (i=1; PMD_IS_LAST(i) == 0; i++) { + if (PMD_IS_IMPL(i) == 0) continue; + printk("->CPU%d pmd[%d]=0x%lx thread_pmd[%d]=0x%lx\n", this_cpu, i, ia64_get_pmd(i), i, ctx->th_pmds[i]); + } + + if (ctx) { + printk("->CPU%d ctx_state=%d vaddr=%p addr=%p fd=%d ctx_task=[%d] saved_psr_up=0x%lx\n", + this_cpu, + ctx->ctx_state, + ctx->ctx_smpl_vaddr, + ctx->ctx_smpl_hdr, + ctx->ctx_msgq_head, + ctx->ctx_msgq_tail, + ctx->ctx_saved_psr_up); + } + local_irq_restore(flags); +} + +/* + * called from process.c:copy_thread(). task is new child. + */ +void +pfm_inherit(struct task_struct *task, struct pt_regs *regs) +{ + struct thread_struct *thread; + + DPRINT(("perfmon: pfm_inherit clearing state for [%d]\n", task_pid_nr(task))); + + thread = &task->thread; + + /* + * cut links inherited from parent (current) + */ + thread->pfm_context = NULL; + + PFM_SET_WORK_PENDING(task, 0); + + /* + * the psr bits are already set properly in copy_threads() + */ +} +#else /* !CONFIG_PERFMON */ +asmlinkage long +sys_perfmonctl (int fd, int cmd, void *arg, int count) +{ + return -ENOSYS; +} +#endif /* CONFIG_PERFMON */ diff --git a/kernel/arch/ia64/kernel/perfmon_default_smpl.c b/kernel/arch/ia64/kernel/perfmon_default_smpl.c new file mode 100644 index 000000000..30c644ea4 --- /dev/null +++ b/kernel/arch/ia64/kernel/perfmon_default_smpl.c @@ -0,0 +1,296 @@ +/* + * Copyright (C) 2002-2003 Hewlett-Packard Co + * Stephane Eranian + * + * This file implements the default sampling buffer format + * for the Linux/ia64 perfmon-2 subsystem. + */ +#include +#include +#include +#include +#include +#include + +#include +#include + +MODULE_AUTHOR("Stephane Eranian "); +MODULE_DESCRIPTION("perfmon default sampling format"); +MODULE_LICENSE("GPL"); + +#define DEFAULT_DEBUG 1 + +#ifdef DEFAULT_DEBUG +#define DPRINT(a) \ + do { \ + if (unlikely(pfm_sysctl.debug >0)) { printk("%s.%d: CPU%d ", __func__, __LINE__, smp_processor_id()); printk a; } \ + } while (0) + +#define DPRINT_ovfl(a) \ + do { \ + if (unlikely(pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0)) { printk("%s.%d: CPU%d ", __func__, __LINE__, smp_processor_id()); printk a; } \ + } while (0) + +#else +#define DPRINT(a) +#define DPRINT_ovfl(a) +#endif + +static int +default_validate(struct task_struct *task, unsigned int flags, int cpu, void *data) +{ + pfm_default_smpl_arg_t *arg = (pfm_default_smpl_arg_t*)data; + int ret = 0; + + if (data == NULL) { + DPRINT(("[%d] no argument passed\n", task_pid_nr(task))); + return -EINVAL; + } + + DPRINT(("[%d] validate flags=0x%x CPU%d\n", task_pid_nr(task), flags, cpu)); + + /* + * must hold at least the buffer header + one minimally sized entry + */ + if (arg->buf_size < PFM_DEFAULT_SMPL_MIN_BUF_SIZE) return -EINVAL; + + DPRINT(("buf_size=%lu\n", arg->buf_size)); + + return ret; +} + +static int +default_get_size(struct task_struct *task, unsigned int flags, int cpu, void *data, unsigned long *size) +{ + pfm_default_smpl_arg_t *arg = (pfm_default_smpl_arg_t *)data; + + /* + * size has been validated in default_validate + */ + *size = arg->buf_size; + + return 0; +} + +static int +default_init(struct task_struct *task, void *buf, unsigned int flags, int cpu, void *data) +{ + pfm_default_smpl_hdr_t *hdr; + pfm_default_smpl_arg_t *arg = (pfm_default_smpl_arg_t *)data; + + hdr = (pfm_default_smpl_hdr_t *)buf; + + hdr->hdr_version = PFM_DEFAULT_SMPL_VERSION; + hdr->hdr_buf_size = arg->buf_size; + hdr->hdr_cur_offs = sizeof(*hdr); + hdr->hdr_overflows = 0UL; + hdr->hdr_count = 0UL; + + DPRINT(("[%d] buffer=%p buf_size=%lu hdr_size=%lu hdr_version=%u cur_offs=%lu\n", + task_pid_nr(task), + buf, + hdr->hdr_buf_size, + sizeof(*hdr), + hdr->hdr_version, + hdr->hdr_cur_offs)); + + return 0; +} + +static int +default_handler(struct task_struct *task, void *buf, pfm_ovfl_arg_t *arg, struct pt_regs *regs, unsigned long stamp) +{ + pfm_default_smpl_hdr_t *hdr; + pfm_default_smpl_entry_t *ent; + void *cur, *last; + unsigned long *e, entry_size; + unsigned int npmds, i; + unsigned char ovfl_pmd; + unsigned char ovfl_notify; + + if (unlikely(buf == NULL || arg == NULL|| regs == NULL || task == NULL)) { + DPRINT(("[%d] invalid arguments buf=%p arg=%p\n", task->pid, buf, arg)); + return -EINVAL; + } + + hdr = (pfm_default_smpl_hdr_t *)buf; + cur = buf+hdr->hdr_cur_offs; + last = buf+hdr->hdr_buf_size; + ovfl_pmd = arg->ovfl_pmd; + ovfl_notify = arg->ovfl_notify; + + /* + * precheck for sanity + */ + if ((last - cur) < PFM_DEFAULT_MAX_ENTRY_SIZE) goto full; + + npmds = hweight64(arg->smpl_pmds[0]); + + ent = (pfm_default_smpl_entry_t *)cur; + + prefetch(arg->smpl_pmds_values); + + entry_size = sizeof(*ent) + (npmds << 3); + + /* position for first pmd */ + e = (unsigned long *)(ent+1); + + hdr->hdr_count++; + + DPRINT_ovfl(("[%d] count=%lu cur=%p last=%p free_bytes=%lu ovfl_pmd=%d ovfl_notify=%d npmds=%u\n", + task->pid, + hdr->hdr_count, + cur, last, + last-cur, + ovfl_pmd, + ovfl_notify, npmds)); + + /* + * current = task running at the time of the overflow. + * + * per-task mode: + * - this is usually the task being monitored. + * Under certain conditions, it might be a different task + * + * system-wide: + * - this is not necessarily the task controlling the session + */ + ent->pid = current->pid; + ent->ovfl_pmd = ovfl_pmd; + ent->last_reset_val = arg->pmd_last_reset; //pmd[0].reg_last_reset_val; + + /* + * where did the fault happen (includes slot number) + */ + ent->ip = regs->cr_iip | ((regs->cr_ipsr >> 41) & 0x3); + + ent->tstamp = stamp; + ent->cpu = smp_processor_id(); + ent->set = arg->active_set; + ent->tgid = current->tgid; + + /* + * selectively store PMDs in increasing index number + */ + if (npmds) { + unsigned long *val = arg->smpl_pmds_values; + for(i=0; i < npmds; i++) { + *e++ = *val++; + } + } + + /* + * update position for next entry + */ + hdr->hdr_cur_offs += entry_size; + cur += entry_size; + + /* + * post check to avoid losing the last sample + */ + if ((last - cur) < PFM_DEFAULT_MAX_ENTRY_SIZE) goto full; + + /* + * keep same ovfl_pmds, ovfl_notify + */ + arg->ovfl_ctrl.bits.notify_user = 0; + arg->ovfl_ctrl.bits.block_task = 0; + arg->ovfl_ctrl.bits.mask_monitoring = 0; + arg->ovfl_ctrl.bits.reset_ovfl_pmds = 1; /* reset before returning from interrupt handler */ + + return 0; +full: + DPRINT_ovfl(("sampling buffer full free=%lu, count=%lu, ovfl_notify=%d\n", last-cur, hdr->hdr_count, ovfl_notify)); + + /* + * increment number of buffer overflow. + * important to detect duplicate set of samples. + */ + hdr->hdr_overflows++; + + /* + * if no notification requested, then we saturate the buffer + */ + if (ovfl_notify == 0) { + arg->ovfl_ctrl.bits.notify_user = 0; + arg->ovfl_ctrl.bits.block_task = 0; + arg->ovfl_ctrl.bits.mask_monitoring = 1; + arg->ovfl_ctrl.bits.reset_ovfl_pmds = 0; + } else { + arg->ovfl_ctrl.bits.notify_user = 1; + arg->ovfl_ctrl.bits.block_task = 1; /* ignored for non-blocking context */ + arg->ovfl_ctrl.bits.mask_monitoring = 1; + arg->ovfl_ctrl.bits.reset_ovfl_pmds = 0; /* no reset now */ + } + return -1; /* we are full, sorry */ +} + +static int +default_restart(struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs) +{ + pfm_default_smpl_hdr_t *hdr; + + hdr = (pfm_default_smpl_hdr_t *)buf; + + hdr->hdr_count = 0UL; + hdr->hdr_cur_offs = sizeof(*hdr); + + ctrl->bits.mask_monitoring = 0; + ctrl->bits.reset_ovfl_pmds = 1; /* uses long-reset values */ + + return 0; +} + +static int +default_exit(struct task_struct *task, void *buf, struct pt_regs *regs) +{ + DPRINT(("[%d] exit(%p)\n", task_pid_nr(task), buf)); + return 0; +} + +static pfm_buffer_fmt_t default_fmt={ + .fmt_name = "default_format", + .fmt_uuid = PFM_DEFAULT_SMPL_UUID, + .fmt_arg_size = sizeof(pfm_default_smpl_arg_t), + .fmt_validate = default_validate, + .fmt_getsize = default_get_size, + .fmt_init = default_init, + .fmt_handler = default_handler, + .fmt_restart = default_restart, + .fmt_restart_active = default_restart, + .fmt_exit = default_exit, +}; + +static int __init +pfm_default_smpl_init_module(void) +{ + int ret; + + ret = pfm_register_buffer_fmt(&default_fmt); + if (ret == 0) { + printk("perfmon_default_smpl: %s v%u.%u registered\n", + default_fmt.fmt_name, + PFM_DEFAULT_SMPL_VERSION_MAJ, + PFM_DEFAULT_SMPL_VERSION_MIN); + } else { + printk("perfmon_default_smpl: %s cannot register ret=%d\n", + default_fmt.fmt_name, + ret); + } + + return ret; +} + +static void __exit +pfm_default_smpl_cleanup_module(void) +{ + int ret; + ret = pfm_unregister_buffer_fmt(default_fmt.fmt_uuid); + + printk("perfmon_default_smpl: unregister %s=%d\n", default_fmt.fmt_name, ret); +} + +module_init(pfm_default_smpl_init_module); +module_exit(pfm_default_smpl_cleanup_module); + diff --git a/kernel/arch/ia64/kernel/perfmon_generic.h b/kernel/arch/ia64/kernel/perfmon_generic.h new file mode 100644 index 000000000..674894780 --- /dev/null +++ b/kernel/arch/ia64/kernel/perfmon_generic.h @@ -0,0 +1,45 @@ +/* + * This file contains the generic PMU register description tables + * and pmc checker used by perfmon.c. + * + * Copyright (C) 2002-2003 Hewlett Packard Co + * Stephane Eranian + */ + +static pfm_reg_desc_t pfm_gen_pmc_desc[PMU_MAX_PMCS]={ +/* pmc0 */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc1 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc2 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc3 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc4 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc5 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc6 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc7 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, + { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */ +}; + +static pfm_reg_desc_t pfm_gen_pmd_desc[PMU_MAX_PMDS]={ +/* pmd0 */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, +/* pmd1 */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, +/* pmd2 */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, +/* pmd3 */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, +/* pmd4 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}}, +/* pmd5 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}}, +/* pmd6 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}}, +/* pmd7 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}}, + { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */ +}; + +/* + * impl_pmcs, impl_pmds are computed at runtime to minimize errors! + */ +static pmu_config_t pmu_conf_gen={ + .pmu_name = "Generic", + .pmu_family = 0xff, /* any */ + .ovfl_val = (1UL << 32) - 1, + .num_ibrs = 0, /* does not use */ + .num_dbrs = 0, /* does not use */ + .pmd_desc = pfm_gen_pmd_desc, + .pmc_desc = pfm_gen_pmc_desc +}; + diff --git a/kernel/arch/ia64/kernel/perfmon_itanium.h b/kernel/arch/ia64/kernel/perfmon_itanium.h new file mode 100644 index 000000000..d1d508a0f --- /dev/null +++ b/kernel/arch/ia64/kernel/perfmon_itanium.h @@ -0,0 +1,115 @@ +/* + * This file contains the Itanium PMU register description tables + * and pmc checker used by perfmon.c. + * + * Copyright (C) 2002-2003 Hewlett Packard Co + * Stephane Eranian + */ +static int pfm_ita_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs); + +static pfm_reg_desc_t pfm_ita_pmc_desc[PMU_MAX_PMCS]={ +/* pmc0 */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc1 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc2 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc3 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc4 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc5 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc6 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc7 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc8 */ { PFM_REG_CONFIG , 0, 0xf00000003ffffff8UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc9 */ { PFM_REG_CONFIG , 0, 0xf00000003ffffff8UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc10 */ { PFM_REG_MONITOR , 6, 0x0UL, -1UL, NULL, NULL, {RDEP(0)|RDEP(1),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc11 */ { PFM_REG_MONITOR , 6, 0x0000000010000000UL, -1UL, NULL, pfm_ita_pmc_check, {RDEP(2)|RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc12 */ { PFM_REG_MONITOR , 6, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc13 */ { PFM_REG_CONFIG , 0, 0x0003ffff00000001UL, -1UL, NULL, pfm_ita_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, + { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */ +}; + +static pfm_reg_desc_t pfm_ita_pmd_desc[PMU_MAX_PMDS]={ +/* pmd0 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(1),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}}, +/* pmd1 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(0),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}}, +/* pmd2 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}}, +/* pmd3 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}}, +/* pmd4 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}}, +/* pmd5 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}}, +/* pmd6 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}}, +/* pmd7 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}}, +/* pmd8 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd9 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd10 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd11 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd12 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd13 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd14 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd15 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd16 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd17 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(3),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}}, + { PFM_REG_END , 0, 0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */ +}; + +static int +pfm_ita_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs) +{ + int ret; + int is_loaded; + + /* sanitfy check */ + if (ctx == NULL) return -EINVAL; + + is_loaded = ctx->ctx_state == PFM_CTX_LOADED || ctx->ctx_state == PFM_CTX_MASKED; + + /* + * we must clear the (instruction) debug registers if pmc13.ta bit is cleared + * before they are written (fl_using_dbreg==0) to avoid picking up stale information. + */ + if (cnum == 13 && is_loaded && ((*val & 0x1) == 0UL) && ctx->ctx_fl_using_dbreg == 0) { + + DPRINT(("pmc[%d]=0x%lx has active pmc13.ta cleared, clearing ibr\n", cnum, *val)); + + /* don't mix debug with perfmon */ + if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL; + + /* + * a count of 0 will mark the debug registers as in use and also + * ensure that they are properly cleared. + */ + ret = pfm_write_ibr_dbr(1, ctx, NULL, 0, regs); + if (ret) return ret; + } + + /* + * we must clear the (data) debug registers if pmc11.pt bit is cleared + * before they are written (fl_using_dbreg==0) to avoid picking up stale information. + */ + if (cnum == 11 && is_loaded && ((*val >> 28)& 0x1) == 0 && ctx->ctx_fl_using_dbreg == 0) { + + DPRINT(("pmc[%d]=0x%lx has active pmc11.pt cleared, clearing dbr\n", cnum, *val)); + + /* don't mix debug with perfmon */ + if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL; + + /* + * a count of 0 will mark the debug registers as in use and also + * ensure that they are properly cleared. + */ + ret = pfm_write_ibr_dbr(0, ctx, NULL, 0, regs); + if (ret) return ret; + } + return 0; +} + +/* + * impl_pmcs, impl_pmds are computed at runtime to minimize errors! + */ +static pmu_config_t pmu_conf_ita={ + .pmu_name = "Itanium", + .pmu_family = 0x7, + .ovfl_val = (1UL << 32) - 1, + .pmd_desc = pfm_ita_pmd_desc, + .pmc_desc = pfm_ita_pmc_desc, + .num_ibrs = 8, + .num_dbrs = 8, + .use_rr_dbregs = 1, /* debug register are use for range retrictions */ +}; + + diff --git a/kernel/arch/ia64/kernel/perfmon_mckinley.h b/kernel/arch/ia64/kernel/perfmon_mckinley.h new file mode 100644 index 000000000..c4bec7a9d --- /dev/null +++ b/kernel/arch/ia64/kernel/perfmon_mckinley.h @@ -0,0 +1,187 @@ +/* + * This file contains the McKinley PMU register description tables + * and pmc checker used by perfmon.c. + * + * Copyright (C) 2002-2003 Hewlett Packard Co + * Stephane Eranian + */ +static int pfm_mck_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs); + +static pfm_reg_desc_t pfm_mck_pmc_desc[PMU_MAX_PMCS]={ +/* pmc0 */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc1 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc2 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc3 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc4 */ { PFM_REG_COUNTING, 6, 0x0000000000800000UL, 0xfffff7fUL, NULL, pfm_mck_pmc_check, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc5 */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL, pfm_mck_pmc_check, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc6 */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL, pfm_mck_pmc_check, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc7 */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL, pfm_mck_pmc_check, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc8 */ { PFM_REG_CONFIG , 0, 0xffffffff3fffffffUL, 0xffffffff3ffffffbUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc9 */ { PFM_REG_CONFIG , 0, 0xffffffff3ffffffcUL, 0xffffffff3ffffffbUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc10 */ { PFM_REG_MONITOR , 4, 0x0UL, 0xffffUL, NULL, pfm_mck_pmc_check, {RDEP(0)|RDEP(1),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc11 */ { PFM_REG_MONITOR , 6, 0x0UL, 0x30f01cf, NULL, pfm_mck_pmc_check, {RDEP(2)|RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc12 */ { PFM_REG_MONITOR , 6, 0x0UL, 0xffffUL, NULL, pfm_mck_pmc_check, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc13 */ { PFM_REG_CONFIG , 0, 0x00002078fefefefeUL, 0x1e00018181818UL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc14 */ { PFM_REG_CONFIG , 0, 0x0db60db60db60db6UL, 0x2492UL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc15 */ { PFM_REG_CONFIG , 0, 0x00000000fffffff0UL, 0xfUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, + { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */ +}; + +static pfm_reg_desc_t pfm_mck_pmd_desc[PMU_MAX_PMDS]={ +/* pmd0 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(1),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}}, +/* pmd1 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(0),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}}, +/* pmd2 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}}, +/* pmd3 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}}, +/* pmd4 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}}, +/* pmd5 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}}, +/* pmd6 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}}, +/* pmd7 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}}, +/* pmd8 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd9 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd10 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd11 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd12 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd13 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd14 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd15 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd16 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd17 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(3),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}}, + { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */ +}; + +/* + * PMC reserved fields must have their power-up values preserved + */ +static int +pfm_mck_reserved(unsigned int cnum, unsigned long *val, struct pt_regs *regs) +{ + unsigned long tmp1, tmp2, ival = *val; + + /* remove reserved areas from user value */ + tmp1 = ival & PMC_RSVD_MASK(cnum); + + /* get reserved fields values */ + tmp2 = PMC_DFL_VAL(cnum) & ~PMC_RSVD_MASK(cnum); + + *val = tmp1 | tmp2; + + DPRINT(("pmc[%d]=0x%lx, mask=0x%lx, reset=0x%lx, val=0x%lx\n", + cnum, ival, PMC_RSVD_MASK(cnum), PMC_DFL_VAL(cnum), *val)); + return 0; +} + +/* + * task can be NULL if the context is unloaded + */ +static int +pfm_mck_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs) +{ + int ret = 0, check_case1 = 0; + unsigned long val8 = 0, val14 = 0, val13 = 0; + int is_loaded; + + /* first preserve the reserved fields */ + pfm_mck_reserved(cnum, val, regs); + + /* sanitfy check */ + if (ctx == NULL) return -EINVAL; + + is_loaded = ctx->ctx_state == PFM_CTX_LOADED || ctx->ctx_state == PFM_CTX_MASKED; + + /* + * we must clear the debug registers if pmc13 has a value which enable + * memory pipeline event constraints. In this case we need to clear the + * the debug registers if they have not yet been accessed. This is required + * to avoid picking stale state. + * PMC13 is "active" if: + * one of the pmc13.cfg_dbrpXX field is different from 0x3 + * AND + * at the corresponding pmc13.ena_dbrpXX is set. + */ + DPRINT(("cnum=%u val=0x%lx, using_dbreg=%d loaded=%d\n", cnum, *val, ctx->ctx_fl_using_dbreg, is_loaded)); + + if (cnum == 13 && is_loaded + && (*val & 0x1e00000000000UL) && (*val & 0x18181818UL) != 0x18181818UL && ctx->ctx_fl_using_dbreg == 0) { + + DPRINT(("pmc[%d]=0x%lx has active pmc13 settings, clearing dbr\n", cnum, *val)); + + /* don't mix debug with perfmon */ + if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL; + + /* + * a count of 0 will mark the debug registers as in use and also + * ensure that they are properly cleared. + */ + ret = pfm_write_ibr_dbr(PFM_DATA_RR, ctx, NULL, 0, regs); + if (ret) return ret; + } + /* + * we must clear the (instruction) debug registers if any pmc14.ibrpX bit is enabled + * before they are (fl_using_dbreg==0) to avoid picking up stale information. + */ + if (cnum == 14 && is_loaded && ((*val & 0x2222UL) != 0x2222UL) && ctx->ctx_fl_using_dbreg == 0) { + + DPRINT(("pmc[%d]=0x%lx has active pmc14 settings, clearing ibr\n", cnum, *val)); + + /* don't mix debug with perfmon */ + if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL; + + /* + * a count of 0 will mark the debug registers as in use and also + * ensure that they are properly cleared. + */ + ret = pfm_write_ibr_dbr(PFM_CODE_RR, ctx, NULL, 0, regs); + if (ret) return ret; + + } + + switch(cnum) { + case 4: *val |= 1UL << 23; /* force power enable bit */ + break; + case 8: val8 = *val; + val13 = ctx->ctx_pmcs[13]; + val14 = ctx->ctx_pmcs[14]; + check_case1 = 1; + break; + case 13: val8 = ctx->ctx_pmcs[8]; + val13 = *val; + val14 = ctx->ctx_pmcs[14]; + check_case1 = 1; + break; + case 14: val8 = ctx->ctx_pmcs[8]; + val13 = ctx->ctx_pmcs[13]; + val14 = *val; + check_case1 = 1; + break; + } + /* check illegal configuration which can produce inconsistencies in tagging + * i-side events in L1D and L2 caches + */ + if (check_case1) { + ret = ((val13 >> 45) & 0xf) == 0 + && ((val8 & 0x1) == 0) + && ((((val14>>1) & 0x3) == 0x2 || ((val14>>1) & 0x3) == 0x0) + ||(((val14>>4) & 0x3) == 0x2 || ((val14>>4) & 0x3) == 0x0)); + + if (ret) DPRINT((KERN_DEBUG "perfmon: failure check_case1\n")); + } + + return ret ? -EINVAL : 0; +} + +/* + * impl_pmcs, impl_pmds are computed at runtime to minimize errors! + */ +static pmu_config_t pmu_conf_mck={ + .pmu_name = "Itanium 2", + .pmu_family = 0x1f, + .flags = PFM_PMU_IRQ_RESEND, + .ovfl_val = (1UL << 47) - 1, + .pmd_desc = pfm_mck_pmd_desc, + .pmc_desc = pfm_mck_pmc_desc, + .num_ibrs = 8, + .num_dbrs = 8, + .use_rr_dbregs = 1 /* debug register are use for range restrictions */ +}; + + diff --git a/kernel/arch/ia64/kernel/perfmon_montecito.h b/kernel/arch/ia64/kernel/perfmon_montecito.h new file mode 100644 index 000000000..7f8da4c7c --- /dev/null +++ b/kernel/arch/ia64/kernel/perfmon_montecito.h @@ -0,0 +1,269 @@ +/* + * This file contains the Montecito PMU register description tables + * and pmc checker used by perfmon.c. + * + * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + */ +static int pfm_mont_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs); + +#define RDEP_MONT_ETB (RDEP(38)|RDEP(39)|RDEP(48)|RDEP(49)|RDEP(50)|RDEP(51)|RDEP(52)|RDEP(53)|RDEP(54)|\ + RDEP(55)|RDEP(56)|RDEP(57)|RDEP(58)|RDEP(59)|RDEP(60)|RDEP(61)|RDEP(62)|RDEP(63)) +#define RDEP_MONT_DEAR (RDEP(32)|RDEP(33)|RDEP(36)) +#define RDEP_MONT_IEAR (RDEP(34)|RDEP(35)) + +static pfm_reg_desc_t pfm_mont_pmc_desc[PMU_MAX_PMCS]={ +/* pmc0 */ { PFM_REG_CONTROL , 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {0,0, 0, 0}}, +/* pmc1 */ { PFM_REG_CONTROL , 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {0,0, 0, 0}}, +/* pmc2 */ { PFM_REG_CONTROL , 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {0,0, 0, 0}}, +/* pmc3 */ { PFM_REG_CONTROL , 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {0,0, 0, 0}}, +/* pmc4 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(4),0, 0, 0}, {0,0, 0, 0}}, +/* pmc5 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(5),0, 0, 0}, {0,0, 0, 0}}, +/* pmc6 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(6),0, 0, 0}, {0,0, 0, 0}}, +/* pmc7 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(7),0, 0, 0}, {0,0, 0, 0}}, +/* pmc8 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(8),0, 0, 0}, {0,0, 0, 0}}, +/* pmc9 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(9),0, 0, 0}, {0,0, 0, 0}}, +/* pmc10 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(10),0, 0, 0}, {0,0, 0, 0}}, +/* pmc11 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(11),0, 0, 0}, {0,0, 0, 0}}, +/* pmc12 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(12),0, 0, 0}, {0,0, 0, 0}}, +/* pmc13 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(13),0, 0, 0}, {0,0, 0, 0}}, +/* pmc14 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(14),0, 0, 0}, {0,0, 0, 0}}, +/* pmc15 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(15),0, 0, 0}, {0,0, 0, 0}}, +/* pmc16 */ { PFM_REG_NOTIMPL, }, +/* pmc17 */ { PFM_REG_NOTIMPL, }, +/* pmc18 */ { PFM_REG_NOTIMPL, }, +/* pmc19 */ { PFM_REG_NOTIMPL, }, +/* pmc20 */ { PFM_REG_NOTIMPL, }, +/* pmc21 */ { PFM_REG_NOTIMPL, }, +/* pmc22 */ { PFM_REG_NOTIMPL, }, +/* pmc23 */ { PFM_REG_NOTIMPL, }, +/* pmc24 */ { PFM_REG_NOTIMPL, }, +/* pmc25 */ { PFM_REG_NOTIMPL, }, +/* pmc26 */ { PFM_REG_NOTIMPL, }, +/* pmc27 */ { PFM_REG_NOTIMPL, }, +/* pmc28 */ { PFM_REG_NOTIMPL, }, +/* pmc29 */ { PFM_REG_NOTIMPL, }, +/* pmc30 */ { PFM_REG_NOTIMPL, }, +/* pmc31 */ { PFM_REG_NOTIMPL, }, +/* pmc32 */ { PFM_REG_CONFIG, 0, 0x30f01ffffffffffUL, 0x30f01ffffffffffUL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, +/* pmc33 */ { PFM_REG_CONFIG, 0, 0x0, 0x1ffffffffffUL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, +/* pmc34 */ { PFM_REG_CONFIG, 0, 0xf01ffffffffffUL, 0xf01ffffffffffUL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, +/* pmc35 */ { PFM_REG_CONFIG, 0, 0x0, 0x1ffffffffffUL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, +/* pmc36 */ { PFM_REG_CONFIG, 0, 0xfffffff0, 0xf, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, +/* pmc37 */ { PFM_REG_MONITOR, 4, 0x0, 0x3fff, NULL, pfm_mont_pmc_check, {RDEP_MONT_IEAR, 0, 0, 0}, {0, 0, 0, 0}}, +/* pmc38 */ { PFM_REG_CONFIG, 0, 0xdb6, 0x2492, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, +/* pmc39 */ { PFM_REG_MONITOR, 6, 0x0, 0xffcf, NULL, pfm_mont_pmc_check, {RDEP_MONT_ETB,0, 0, 0}, {0,0, 0, 0}}, +/* pmc40 */ { PFM_REG_MONITOR, 6, 0x2000000, 0xf01cf, NULL, pfm_mont_pmc_check, {RDEP_MONT_DEAR,0, 0, 0}, {0,0, 0, 0}}, +/* pmc41 */ { PFM_REG_CONFIG, 0, 0x00002078fefefefeUL, 0x1e00018181818UL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, +/* pmc42 */ { PFM_REG_MONITOR, 6, 0x0, 0x7ff4f, NULL, pfm_mont_pmc_check, {RDEP_MONT_ETB,0, 0, 0}, {0,0, 0, 0}}, + { PFM_REG_END , 0, 0x0, -1, NULL, NULL, {0,}, {0,}}, /* end marker */ +}; + +static pfm_reg_desc_t pfm_mont_pmd_desc[PMU_MAX_PMDS]={ +/* pmd0 */ { PFM_REG_NOTIMPL, }, +/* pmd1 */ { PFM_REG_NOTIMPL, }, +/* pmd2 */ { PFM_REG_NOTIMPL, }, +/* pmd3 */ { PFM_REG_NOTIMPL, }, +/* pmd4 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(4),0, 0, 0}}, +/* pmd5 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(5),0, 0, 0}}, +/* pmd6 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(6),0, 0, 0}}, +/* pmd7 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(7),0, 0, 0}}, +/* pmd8 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(8),0, 0, 0}}, +/* pmd9 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(9),0, 0, 0}}, +/* pmd10 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(10),0, 0, 0}}, +/* pmd11 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(11),0, 0, 0}}, +/* pmd12 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(12),0, 0, 0}}, +/* pmd13 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(13),0, 0, 0}}, +/* pmd14 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(14),0, 0, 0}}, +/* pmd15 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(15),0, 0, 0}}, +/* pmd16 */ { PFM_REG_NOTIMPL, }, +/* pmd17 */ { PFM_REG_NOTIMPL, }, +/* pmd18 */ { PFM_REG_NOTIMPL, }, +/* pmd19 */ { PFM_REG_NOTIMPL, }, +/* pmd20 */ { PFM_REG_NOTIMPL, }, +/* pmd21 */ { PFM_REG_NOTIMPL, }, +/* pmd22 */ { PFM_REG_NOTIMPL, }, +/* pmd23 */ { PFM_REG_NOTIMPL, }, +/* pmd24 */ { PFM_REG_NOTIMPL, }, +/* pmd25 */ { PFM_REG_NOTIMPL, }, +/* pmd26 */ { PFM_REG_NOTIMPL, }, +/* pmd27 */ { PFM_REG_NOTIMPL, }, +/* pmd28 */ { PFM_REG_NOTIMPL, }, +/* pmd29 */ { PFM_REG_NOTIMPL, }, +/* pmd30 */ { PFM_REG_NOTIMPL, }, +/* pmd31 */ { PFM_REG_NOTIMPL, }, +/* pmd32 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(33)|RDEP(36),0, 0, 0}, {RDEP(40),0, 0, 0}}, +/* pmd33 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(32)|RDEP(36),0, 0, 0}, {RDEP(40),0, 0, 0}}, +/* pmd34 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(35),0, 0, 0}, {RDEP(37),0, 0, 0}}, +/* pmd35 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(34),0, 0, 0}, {RDEP(37),0, 0, 0}}, +/* pmd36 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(32)|RDEP(33),0, 0, 0}, {RDEP(40),0, 0, 0}}, +/* pmd37 */ { PFM_REG_NOTIMPL, }, +/* pmd38 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, +/* pmd39 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, +/* pmd40 */ { PFM_REG_NOTIMPL, }, +/* pmd41 */ { PFM_REG_NOTIMPL, }, +/* pmd42 */ { PFM_REG_NOTIMPL, }, +/* pmd43 */ { PFM_REG_NOTIMPL, }, +/* pmd44 */ { PFM_REG_NOTIMPL, }, +/* pmd45 */ { PFM_REG_NOTIMPL, }, +/* pmd46 */ { PFM_REG_NOTIMPL, }, +/* pmd47 */ { PFM_REG_NOTIMPL, }, +/* pmd48 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, +/* pmd49 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, +/* pmd50 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, +/* pmd51 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, +/* pmd52 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, +/* pmd53 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, +/* pmd54 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, +/* pmd55 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, +/* pmd56 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, +/* pmd57 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, +/* pmd58 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, +/* pmd59 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, +/* pmd60 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, +/* pmd61 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, +/* pmd62 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, +/* pmd63 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, + { PFM_REG_END , 0, 0x0, -1, NULL, NULL, {0,}, {0,}}, /* end marker */ +}; + +/* + * PMC reserved fields must have their power-up values preserved + */ +static int +pfm_mont_reserved(unsigned int cnum, unsigned long *val, struct pt_regs *regs) +{ + unsigned long tmp1, tmp2, ival = *val; + + /* remove reserved areas from user value */ + tmp1 = ival & PMC_RSVD_MASK(cnum); + + /* get reserved fields values */ + tmp2 = PMC_DFL_VAL(cnum) & ~PMC_RSVD_MASK(cnum); + + *val = tmp1 | tmp2; + + DPRINT(("pmc[%d]=0x%lx, mask=0x%lx, reset=0x%lx, val=0x%lx\n", + cnum, ival, PMC_RSVD_MASK(cnum), PMC_DFL_VAL(cnum), *val)); + return 0; +} + +/* + * task can be NULL if the context is unloaded + */ +static int +pfm_mont_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs) +{ + int ret = 0; + unsigned long val32 = 0, val38 = 0, val41 = 0; + unsigned long tmpval; + int check_case1 = 0; + int is_loaded; + + /* first preserve the reserved fields */ + pfm_mont_reserved(cnum, val, regs); + + tmpval = *val; + + /* sanity check */ + if (ctx == NULL) return -EINVAL; + + is_loaded = ctx->ctx_state == PFM_CTX_LOADED || ctx->ctx_state == PFM_CTX_MASKED; + + /* + * we must clear the debug registers if pmc41 has a value which enable + * memory pipeline event constraints. In this case we need to clear the + * the debug registers if they have not yet been accessed. This is required + * to avoid picking stale state. + * PMC41 is "active" if: + * one of the pmc41.cfg_dtagXX field is different from 0x3 + * AND + * at the corresponding pmc41.en_dbrpXX is set. + * AND + * ctx_fl_using_dbreg == 0 (i.e., dbr not yet used) + */ + DPRINT(("cnum=%u val=0x%lx, using_dbreg=%d loaded=%d\n", cnum, tmpval, ctx->ctx_fl_using_dbreg, is_loaded)); + + if (cnum == 41 && is_loaded + && (tmpval & 0x1e00000000000UL) && (tmpval & 0x18181818UL) != 0x18181818UL && ctx->ctx_fl_using_dbreg == 0) { + + DPRINT(("pmc[%d]=0x%lx has active pmc41 settings, clearing dbr\n", cnum, tmpval)); + + /* don't mix debug with perfmon */ + if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL; + + /* + * a count of 0 will mark the debug registers if: + * AND + */ + ret = pfm_write_ibr_dbr(PFM_DATA_RR, ctx, NULL, 0, regs); + if (ret) return ret; + } + /* + * we must clear the (instruction) debug registers if: + * pmc38.ig_ibrpX is 0 (enabled) + * AND + * ctx_fl_using_dbreg == 0 (i.e., dbr not yet used) + */ + if (cnum == 38 && is_loaded && ((tmpval & 0x492UL) != 0x492UL) && ctx->ctx_fl_using_dbreg == 0) { + + DPRINT(("pmc38=0x%lx has active pmc38 settings, clearing ibr\n", tmpval)); + + /* don't mix debug with perfmon */ + if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL; + + /* + * a count of 0 will mark the debug registers as in use and also + * ensure that they are properly cleared. + */ + ret = pfm_write_ibr_dbr(PFM_CODE_RR, ctx, NULL, 0, regs); + if (ret) return ret; + + } + switch(cnum) { + case 32: val32 = *val; + val38 = ctx->ctx_pmcs[38]; + val41 = ctx->ctx_pmcs[41]; + check_case1 = 1; + break; + case 38: val38 = *val; + val32 = ctx->ctx_pmcs[32]; + val41 = ctx->ctx_pmcs[41]; + check_case1 = 1; + break; + case 41: val41 = *val; + val32 = ctx->ctx_pmcs[32]; + val38 = ctx->ctx_pmcs[38]; + check_case1 = 1; + break; + } + /* check illegal configuration which can produce inconsistencies in tagging + * i-side events in L1D and L2 caches + */ + if (check_case1) { + ret = (((val41 >> 45) & 0xf) == 0 && ((val32>>57) & 0x1) == 0) + && ((((val38>>1) & 0x3) == 0x2 || ((val38>>1) & 0x3) == 0) + || (((val38>>4) & 0x3) == 0x2 || ((val38>>4) & 0x3) == 0)); + if (ret) { + DPRINT(("invalid config pmc38=0x%lx pmc41=0x%lx pmc32=0x%lx\n", val38, val41, val32)); + return -EINVAL; + } + } + *val = tmpval; + return 0; +} + +/* + * impl_pmcs, impl_pmds are computed at runtime to minimize errors! + */ +static pmu_config_t pmu_conf_mont={ + .pmu_name = "Montecito", + .pmu_family = 0x20, + .flags = PFM_PMU_IRQ_RESEND, + .ovfl_val = (1UL << 47) - 1, + .pmd_desc = pfm_mont_pmd_desc, + .pmc_desc = pfm_mont_pmc_desc, + .num_ibrs = 8, + .num_dbrs = 8, + .use_rr_dbregs = 1 /* debug register are use for range retrictions */ +}; diff --git a/kernel/arch/ia64/kernel/process.c b/kernel/arch/ia64/kernel/process.c new file mode 100644 index 000000000..b51514957 --- /dev/null +++ b/kernel/arch/ia64/kernel/process.c @@ -0,0 +1,682 @@ +/* + * Architecture-specific setup. + * + * Copyright (C) 1998-2003 Hewlett-Packard Co + * David Mosberger-Tang + * 04/11/17 Ashok Raj Added CPU Hotplug Support + * + * 2005-10-07 Keith Owens + * Add notify_die() hooks. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "entry.h" + +#ifdef CONFIG_PERFMON +# include +#endif + +#include "sigframe.h" + +void (*ia64_mark_idle)(int); + +unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE; +EXPORT_SYMBOL(boot_option_idle_override); +void (*pm_power_off) (void); +EXPORT_SYMBOL(pm_power_off); + +void +ia64_do_show_stack (struct unw_frame_info *info, void *arg) +{ + unsigned long ip, sp, bsp; + char buf[128]; /* don't make it so big that it overflows the stack! */ + + printk("\nCall Trace:\n"); + do { + unw_get_ip(info, &ip); + if (ip == 0) + break; + + unw_get_sp(info, &sp); + unw_get_bsp(info, &bsp); + snprintf(buf, sizeof(buf), + " [<%016lx>] %%s\n" + " sp=%016lx bsp=%016lx\n", + ip, sp, bsp); + print_symbol(buf, ip); + } while (unw_unwind(info) >= 0); +} + +void +show_stack (struct task_struct *task, unsigned long *sp) +{ + if (!task) + unw_init_running(ia64_do_show_stack, NULL); + else { + struct unw_frame_info info; + + unw_init_from_blocked_task(&info, task); + ia64_do_show_stack(&info, NULL); + } +} + +void +show_regs (struct pt_regs *regs) +{ + unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri; + + print_modules(); + printk("\n"); + show_regs_print_info(KERN_DEFAULT); + printk("psr : %016lx ifs : %016lx ip : [<%016lx>] %s (%s)\n", + regs->cr_ipsr, regs->cr_ifs, ip, print_tainted(), + init_utsname()->release); + print_symbol("ip is at %s\n", ip); + printk("unat: %016lx pfs : %016lx rsc : %016lx\n", + regs->ar_unat, regs->ar_pfs, regs->ar_rsc); + printk("rnat: %016lx bsps: %016lx pr : %016lx\n", + regs->ar_rnat, regs->ar_bspstore, regs->pr); + printk("ldrs: %016lx ccv : %016lx fpsr: %016lx\n", + regs->loadrs, regs->ar_ccv, regs->ar_fpsr); + printk("csd : %016lx ssd : %016lx\n", regs->ar_csd, regs->ar_ssd); + printk("b0 : %016lx b6 : %016lx b7 : %016lx\n", regs->b0, regs->b6, regs->b7); + printk("f6 : %05lx%016lx f7 : %05lx%016lx\n", + regs->f6.u.bits[1], regs->f6.u.bits[0], + regs->f7.u.bits[1], regs->f7.u.bits[0]); + printk("f8 : %05lx%016lx f9 : %05lx%016lx\n", + regs->f8.u.bits[1], regs->f8.u.bits[0], + regs->f9.u.bits[1], regs->f9.u.bits[0]); + printk("f10 : %05lx%016lx f11 : %05lx%016lx\n", + regs->f10.u.bits[1], regs->f10.u.bits[0], + regs->f11.u.bits[1], regs->f11.u.bits[0]); + + printk("r1 : %016lx r2 : %016lx r3 : %016lx\n", regs->r1, regs->r2, regs->r3); + printk("r8 : %016lx r9 : %016lx r10 : %016lx\n", regs->r8, regs->r9, regs->r10); + printk("r11 : %016lx r12 : %016lx r13 : %016lx\n", regs->r11, regs->r12, regs->r13); + printk("r14 : %016lx r15 : %016lx r16 : %016lx\n", regs->r14, regs->r15, regs->r16); + printk("r17 : %016lx r18 : %016lx r19 : %016lx\n", regs->r17, regs->r18, regs->r19); + printk("r20 : %016lx r21 : %016lx r22 : %016lx\n", regs->r20, regs->r21, regs->r22); + printk("r23 : %016lx r24 : %016lx r25 : %016lx\n", regs->r23, regs->r24, regs->r25); + printk("r26 : %016lx r27 : %016lx r28 : %016lx\n", regs->r26, regs->r27, regs->r28); + printk("r29 : %016lx r30 : %016lx r31 : %016lx\n", regs->r29, regs->r30, regs->r31); + + if (user_mode(regs)) { + /* print the stacked registers */ + unsigned long val, *bsp, ndirty; + int i, sof, is_nat = 0; + + sof = regs->cr_ifs & 0x7f; /* size of frame */ + ndirty = (regs->loadrs >> 19); + bsp = ia64_rse_skip_regs((unsigned long *) regs->ar_bspstore, ndirty); + for (i = 0; i < sof; ++i) { + get_user(val, (unsigned long __user *) ia64_rse_skip_regs(bsp, i)); + printk("r%-3u:%c%016lx%s", 32 + i, is_nat ? '*' : ' ', val, + ((i == sof - 1) || (i % 3) == 2) ? "\n" : " "); + } + } else + show_stack(NULL, NULL); +} + +/* local support for deprecated console_print */ +void +console_print(const char *s) +{ + printk(KERN_EMERG "%s", s); +} + +void +do_notify_resume_user(sigset_t *unused, struct sigscratch *scr, long in_syscall) +{ + if (fsys_mode(current, &scr->pt)) { + /* + * defer signal-handling etc. until we return to + * privilege-level 0. + */ + if (!ia64_psr(&scr->pt)->lp) + ia64_psr(&scr->pt)->lp = 1; + return; + } + +#ifdef CONFIG_PERFMON + if (current->thread.pfm_needs_checking) + /* + * Note: pfm_handle_work() allow us to call it with interrupts + * disabled, and may enable interrupts within the function. + */ + pfm_handle_work(); +#endif + + /* deal with pending signal delivery */ + if (test_thread_flag(TIF_SIGPENDING)) { + local_irq_enable(); /* force interrupt enable */ + ia64_do_signal(scr, in_syscall); + } + + if (test_and_clear_thread_flag(TIF_NOTIFY_RESUME)) { + local_irq_enable(); /* force interrupt enable */ + tracehook_notify_resume(&scr->pt); + } + + /* copy user rbs to kernel rbs */ + if (unlikely(test_thread_flag(TIF_RESTORE_RSE))) { + local_irq_enable(); /* force interrupt enable */ + ia64_sync_krbs(); + } + + local_irq_disable(); /* force interrupt disable */ +} + +static int __init nohalt_setup(char * str) +{ + cpu_idle_poll_ctrl(true); + return 1; +} +__setup("nohalt", nohalt_setup); + +#ifdef CONFIG_HOTPLUG_CPU +/* We don't actually take CPU down, just spin without interrupts. */ +static inline void play_dead(void) +{ + unsigned int this_cpu = smp_processor_id(); + + /* Ack it */ + __this_cpu_write(cpu_state, CPU_DEAD); + + max_xtp(); + local_irq_disable(); + idle_task_exit(); + ia64_jump_to_sal(&sal_boot_rendez_state[this_cpu]); + /* + * The above is a point of no-return, the processor is + * expected to be in SAL loop now. + */ + BUG(); +} +#else +static inline void play_dead(void) +{ + BUG(); +} +#endif /* CONFIG_HOTPLUG_CPU */ + +void arch_cpu_idle_dead(void) +{ + play_dead(); +} + +void arch_cpu_idle(void) +{ + void (*mark_idle)(int) = ia64_mark_idle; + +#ifdef CONFIG_SMP + min_xtp(); +#endif + rmb(); + if (mark_idle) + (*mark_idle)(1); + + safe_halt(); + + if (mark_idle) + (*mark_idle)(0); +#ifdef CONFIG_SMP + normal_xtp(); +#endif +} + +void +ia64_save_extra (struct task_struct *task) +{ +#ifdef CONFIG_PERFMON + unsigned long info; +#endif + + if ((task->thread.flags & IA64_THREAD_DBG_VALID) != 0) + ia64_save_debug_regs(&task->thread.dbr[0]); + +#ifdef CONFIG_PERFMON + if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0) + pfm_save_regs(task); + + info = __this_cpu_read(pfm_syst_info); + if (info & PFM_CPUINFO_SYST_WIDE) + pfm_syst_wide_update_task(task, info, 0); +#endif +} + +void +ia64_load_extra (struct task_struct *task) +{ +#ifdef CONFIG_PERFMON + unsigned long info; +#endif + + if ((task->thread.flags & IA64_THREAD_DBG_VALID) != 0) + ia64_load_debug_regs(&task->thread.dbr[0]); + +#ifdef CONFIG_PERFMON + if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0) + pfm_load_regs(task); + + info = __this_cpu_read(pfm_syst_info); + if (info & PFM_CPUINFO_SYST_WIDE) + pfm_syst_wide_update_task(task, info, 1); +#endif +} + +/* + * Copy the state of an ia-64 thread. + * + * We get here through the following call chain: + * + * from user-level: from kernel: + * + * + * sys_clone : + * do_fork do_fork + * copy_thread copy_thread + * + * This means that the stack layout is as follows: + * + * +---------------------+ (highest addr) + * | struct pt_regs | + * +---------------------+ + * | struct switch_stack | + * +---------------------+ + * | | + * | memory stack | + * | | <-- sp (lowest addr) + * +---------------------+ + * + * Observe that we copy the unat values that are in pt_regs and switch_stack. Spilling an + * integer to address X causes bit N in ar.unat to be set to the NaT bit of the register, + * with N=(X & 0x1ff)/8. Thus, copying the unat value preserves the NaT bits ONLY if the + * pt_regs structure in the parent is congruent to that of the child, modulo 512. Since + * the stack is page aligned and the page size is at least 4KB, this is always the case, + * so there is nothing to worry about. + */ +int +copy_thread(unsigned long clone_flags, + unsigned long user_stack_base, unsigned long user_stack_size, + struct task_struct *p) +{ + extern char ia64_ret_from_clone; + struct switch_stack *child_stack, *stack; + unsigned long rbs, child_rbs, rbs_size; + struct pt_regs *child_ptregs; + struct pt_regs *regs = current_pt_regs(); + int retval = 0; + + child_ptregs = (struct pt_regs *) ((unsigned long) p + IA64_STK_OFFSET) - 1; + child_stack = (struct switch_stack *) child_ptregs - 1; + + rbs = (unsigned long) current + IA64_RBS_OFFSET; + child_rbs = (unsigned long) p + IA64_RBS_OFFSET; + + /* copy parts of thread_struct: */ + p->thread.ksp = (unsigned long) child_stack - 16; + + /* + * NOTE: The calling convention considers all floating point + * registers in the high partition (fph) to be scratch. Since + * the only way to get to this point is through a system call, + * we know that the values in fph are all dead. Hence, there + * is no need to inherit the fph state from the parent to the + * child and all we have to do is to make sure that + * IA64_THREAD_FPH_VALID is cleared in the child. + * + * XXX We could push this optimization a bit further by + * clearing IA64_THREAD_FPH_VALID on ANY system call. + * However, it's not clear this is worth doing. Also, it + * would be a slight deviation from the normal Linux system + * call behavior where scratch registers are preserved across + * system calls (unless used by the system call itself). + */ +# define THREAD_FLAGS_TO_CLEAR (IA64_THREAD_FPH_VALID | IA64_THREAD_DBG_VALID \ + | IA64_THREAD_PM_VALID) +# define THREAD_FLAGS_TO_SET 0 + p->thread.flags = ((current->thread.flags & ~THREAD_FLAGS_TO_CLEAR) + | THREAD_FLAGS_TO_SET); + + ia64_drop_fpu(p); /* don't pick up stale state from a CPU's fph */ + + if (unlikely(p->flags & PF_KTHREAD)) { + if (unlikely(!user_stack_base)) { + /* fork_idle() called us */ + return 0; + } + memset(child_stack, 0, sizeof(*child_ptregs) + sizeof(*child_stack)); + child_stack->r4 = user_stack_base; /* payload */ + child_stack->r5 = user_stack_size; /* argument */ + /* + * Preserve PSR bits, except for bits 32-34 and 37-45, + * which we can't read. + */ + child_ptregs->cr_ipsr = ia64_getreg(_IA64_REG_PSR) | IA64_PSR_BN; + /* mark as valid, empty frame */ + child_ptregs->cr_ifs = 1UL << 63; + child_stack->ar_fpsr = child_ptregs->ar_fpsr + = ia64_getreg(_IA64_REG_AR_FPSR); + child_stack->pr = (1 << PRED_KERNEL_STACK); + child_stack->ar_bspstore = child_rbs; + child_stack->b0 = (unsigned long) &ia64_ret_from_clone; + + /* stop some PSR bits from being inherited. + * the psr.up/psr.pp bits must be cleared on fork but inherited on execve() + * therefore we must specify them explicitly here and not include them in + * IA64_PSR_BITS_TO_CLEAR. + */ + child_ptregs->cr_ipsr = ((child_ptregs->cr_ipsr | IA64_PSR_BITS_TO_SET) + & ~(IA64_PSR_BITS_TO_CLEAR | IA64_PSR_PP | IA64_PSR_UP)); + + return 0; + } + stack = ((struct switch_stack *) regs) - 1; + /* copy parent's switch_stack & pt_regs to child: */ + memcpy(child_stack, stack, sizeof(*child_ptregs) + sizeof(*child_stack)); + + /* copy the parent's register backing store to the child: */ + rbs_size = stack->ar_bspstore - rbs; + memcpy((void *) child_rbs, (void *) rbs, rbs_size); + if (clone_flags & CLONE_SETTLS) + child_ptregs->r13 = regs->r16; /* see sys_clone2() in entry.S */ + if (user_stack_base) { + child_ptregs->r12 = user_stack_base + user_stack_size - 16; + child_ptregs->ar_bspstore = user_stack_base; + child_ptregs->ar_rnat = 0; + child_ptregs->loadrs = 0; + } + child_stack->ar_bspstore = child_rbs + rbs_size; + child_stack->b0 = (unsigned long) &ia64_ret_from_clone; + + /* stop some PSR bits from being inherited. + * the psr.up/psr.pp bits must be cleared on fork but inherited on execve() + * therefore we must specify them explicitly here and not include them in + * IA64_PSR_BITS_TO_CLEAR. + */ + child_ptregs->cr_ipsr = ((child_ptregs->cr_ipsr | IA64_PSR_BITS_TO_SET) + & ~(IA64_PSR_BITS_TO_CLEAR | IA64_PSR_PP | IA64_PSR_UP)); + +#ifdef CONFIG_PERFMON + if (current->thread.pfm_context) + pfm_inherit(p, child_ptregs); +#endif + return retval; +} + +static void +do_copy_task_regs (struct task_struct *task, struct unw_frame_info *info, void *arg) +{ + unsigned long mask, sp, nat_bits = 0, ar_rnat, urbs_end, cfm; + unsigned long uninitialized_var(ip); /* GCC be quiet */ + elf_greg_t *dst = arg; + struct pt_regs *pt; + char nat; + int i; + + memset(dst, 0, sizeof(elf_gregset_t)); /* don't leak any kernel bits to user-level */ + + if (unw_unwind_to_user(info) < 0) + return; + + unw_get_sp(info, &sp); + pt = (struct pt_regs *) (sp + 16); + + urbs_end = ia64_get_user_rbs_end(task, pt, &cfm); + + if (ia64_sync_user_rbs(task, info->sw, pt->ar_bspstore, urbs_end) < 0) + return; + + ia64_peek(task, info->sw, urbs_end, (long) ia64_rse_rnat_addr((long *) urbs_end), + &ar_rnat); + + /* + * coredump format: + * r0-r31 + * NaT bits (for r0-r31; bit N == 1 iff rN is a NaT) + * predicate registers (p0-p63) + * b0-b7 + * ip cfm user-mask + * ar.rsc ar.bsp ar.bspstore ar.rnat + * ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec + */ + + /* r0 is zero */ + for (i = 1, mask = (1UL << i); i < 32; ++i) { + unw_get_gr(info, i, &dst[i], &nat); + if (nat) + nat_bits |= mask; + mask <<= 1; + } + dst[32] = nat_bits; + unw_get_pr(info, &dst[33]); + + for (i = 0; i < 8; ++i) + unw_get_br(info, i, &dst[34 + i]); + + unw_get_rp(info, &ip); + dst[42] = ip + ia64_psr(pt)->ri; + dst[43] = cfm; + dst[44] = pt->cr_ipsr & IA64_PSR_UM; + + unw_get_ar(info, UNW_AR_RSC, &dst[45]); + /* + * For bsp and bspstore, unw_get_ar() would return the kernel + * addresses, but we need the user-level addresses instead: + */ + dst[46] = urbs_end; /* note: by convention PT_AR_BSP points to the end of the urbs! */ + dst[47] = pt->ar_bspstore; + dst[48] = ar_rnat; + unw_get_ar(info, UNW_AR_CCV, &dst[49]); + unw_get_ar(info, UNW_AR_UNAT, &dst[50]); + unw_get_ar(info, UNW_AR_FPSR, &dst[51]); + dst[52] = pt->ar_pfs; /* UNW_AR_PFS is == to pt->cr_ifs for interrupt frames */ + unw_get_ar(info, UNW_AR_LC, &dst[53]); + unw_get_ar(info, UNW_AR_EC, &dst[54]); + unw_get_ar(info, UNW_AR_CSD, &dst[55]); + unw_get_ar(info, UNW_AR_SSD, &dst[56]); +} + +void +do_dump_task_fpu (struct task_struct *task, struct unw_frame_info *info, void *arg) +{ + elf_fpreg_t *dst = arg; + int i; + + memset(dst, 0, sizeof(elf_fpregset_t)); /* don't leak any "random" bits */ + + if (unw_unwind_to_user(info) < 0) + return; + + /* f0 is 0.0, f1 is 1.0 */ + + for (i = 2; i < 32; ++i) + unw_get_fr(info, i, dst + i); + + ia64_flush_fph(task); + if ((task->thread.flags & IA64_THREAD_FPH_VALID) != 0) + memcpy(dst + 32, task->thread.fph, 96*16); +} + +void +do_copy_regs (struct unw_frame_info *info, void *arg) +{ + do_copy_task_regs(current, info, arg); +} + +void +do_dump_fpu (struct unw_frame_info *info, void *arg) +{ + do_dump_task_fpu(current, info, arg); +} + +void +ia64_elf_core_copy_regs (struct pt_regs *pt, elf_gregset_t dst) +{ + unw_init_running(do_copy_regs, dst); +} + +int +dump_fpu (struct pt_regs *pt, elf_fpregset_t dst) +{ + unw_init_running(do_dump_fpu, dst); + return 1; /* f0-f31 are always valid so we always return 1 */ +} + +/* + * Flush thread state. This is called when a thread does an execve(). + */ +void +flush_thread (void) +{ + /* drop floating-point and debug-register state if it exists: */ + current->thread.flags &= ~(IA64_THREAD_FPH_VALID | IA64_THREAD_DBG_VALID); + ia64_drop_fpu(current); +} + +/* + * Clean up state associated with current thread. This is called when + * the thread calls exit(). + */ +void +exit_thread (void) +{ + + ia64_drop_fpu(current); +#ifdef CONFIG_PERFMON + /* if needed, stop monitoring and flush state to perfmon context */ + if (current->thread.pfm_context) + pfm_exit_thread(current); + + /* free debug register resources */ + if (current->thread.flags & IA64_THREAD_DBG_VALID) + pfm_release_debug_registers(current); +#endif +} + +unsigned long +get_wchan (struct task_struct *p) +{ + struct unw_frame_info info; + unsigned long ip; + int count = 0; + + if (!p || p == current || p->state == TASK_RUNNING) + return 0; + + /* + * Note: p may not be a blocked task (it could be current or + * another process running on some other CPU. Rather than + * trying to determine if p is really blocked, we just assume + * it's blocked and rely on the unwind routines to fail + * gracefully if the process wasn't really blocked after all. + * --davidm 99/12/15 + */ + unw_init_from_blocked_task(&info, p); + do { + if (p->state == TASK_RUNNING) + return 0; + if (unw_unwind(&info) < 0) + return 0; + unw_get_ip(&info, &ip); + if (!in_sched_functions(ip)) + return ip; + } while (count++ < 16); + return 0; +} + +void +cpu_halt (void) +{ + pal_power_mgmt_info_u_t power_info[8]; + unsigned long min_power; + int i, min_power_state; + + if (ia64_pal_halt_info(power_info) != 0) + return; + + min_power_state = 0; + min_power = power_info[0].pal_power_mgmt_info_s.power_consumption; + for (i = 1; i < 8; ++i) + if (power_info[i].pal_power_mgmt_info_s.im + && power_info[i].pal_power_mgmt_info_s.power_consumption < min_power) { + min_power = power_info[i].pal_power_mgmt_info_s.power_consumption; + min_power_state = i; + } + + while (1) + ia64_pal_halt(min_power_state); +} + +void machine_shutdown(void) +{ +#ifdef CONFIG_HOTPLUG_CPU + int cpu; + + for_each_online_cpu(cpu) { + if (cpu != smp_processor_id()) + cpu_down(cpu); + } +#endif +#ifdef CONFIG_KEXEC + kexec_disable_iosapic(); +#endif +} + +void +machine_restart (char *restart_cmd) +{ + (void) notify_die(DIE_MACHINE_RESTART, restart_cmd, NULL, 0, 0, 0); + efi_reboot(REBOOT_WARM, NULL); +} + +void +machine_halt (void) +{ + (void) notify_die(DIE_MACHINE_HALT, "", NULL, 0, 0, 0); + cpu_halt(); +} + +void +machine_power_off (void) +{ + if (pm_power_off) + pm_power_off(); + machine_halt(); +} + diff --git a/kernel/arch/ia64/kernel/ptrace.c b/kernel/arch/ia64/kernel/ptrace.c new file mode 100644 index 000000000..6f54d511c --- /dev/null +++ b/kernel/arch/ia64/kernel/ptrace.c @@ -0,0 +1,2194 @@ +/* + * Kernel support for the ptrace() and syscall tracing interfaces. + * + * Copyright (C) 1999-2005 Hewlett-Packard Co + * David Mosberger-Tang + * Copyright (C) 2006 Intel Co + * 2006-08-12 - IA64 Native Utrace implementation support added by + * Anil S Keshavamurthy + * + * Derived from the x86 and Alpha versions. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_PERFMON +#include +#endif + +#include "entry.h" + +/* + * Bits in the PSR that we allow ptrace() to change: + * be, up, ac, mfl, mfh (the user mask; five bits total) + * db (debug breakpoint fault; one bit) + * id (instruction debug fault disable; one bit) + * dd (data debug fault disable; one bit) + * ri (restart instruction; two bits) + * is (instruction set; one bit) + */ +#define IPSR_MASK (IA64_PSR_UM | IA64_PSR_DB | IA64_PSR_IS \ + | IA64_PSR_ID | IA64_PSR_DD | IA64_PSR_RI) + +#define MASK(nbits) ((1UL << (nbits)) - 1) /* mask with NBITS bits set */ +#define PFM_MASK MASK(38) + +#define PTRACE_DEBUG 0 + +#if PTRACE_DEBUG +# define dprintk(format...) printk(format) +# define inline +#else +# define dprintk(format...) +#endif + +/* Return TRUE if PT was created due to kernel-entry via a system-call. */ + +static inline int +in_syscall (struct pt_regs *pt) +{ + return (long) pt->cr_ifs >= 0; +} + +/* + * Collect the NaT bits for r1-r31 from scratch_unat and return a NaT + * bitset where bit i is set iff the NaT bit of register i is set. + */ +unsigned long +ia64_get_scratch_nat_bits (struct pt_regs *pt, unsigned long scratch_unat) +{ +# define GET_BITS(first, last, unat) \ + ({ \ + unsigned long bit = ia64_unat_pos(&pt->r##first); \ + unsigned long nbits = (last - first + 1); \ + unsigned long mask = MASK(nbits) << first; \ + unsigned long dist; \ + if (bit < first) \ + dist = 64 + bit - first; \ + else \ + dist = bit - first; \ + ia64_rotr(unat, dist) & mask; \ + }) + unsigned long val; + + /* + * Registers that are stored consecutively in struct pt_regs + * can be handled in parallel. If the register order in + * struct_pt_regs changes, this code MUST be updated. + */ + val = GET_BITS( 1, 1, scratch_unat); + val |= GET_BITS( 2, 3, scratch_unat); + val |= GET_BITS(12, 13, scratch_unat); + val |= GET_BITS(14, 14, scratch_unat); + val |= GET_BITS(15, 15, scratch_unat); + val |= GET_BITS( 8, 11, scratch_unat); + val |= GET_BITS(16, 31, scratch_unat); + return val; + +# undef GET_BITS +} + +/* + * Set the NaT bits for the scratch registers according to NAT and + * return the resulting unat (assuming the scratch registers are + * stored in PT). + */ +unsigned long +ia64_put_scratch_nat_bits (struct pt_regs *pt, unsigned long nat) +{ +# define PUT_BITS(first, last, nat) \ + ({ \ + unsigned long bit = ia64_unat_pos(&pt->r##first); \ + unsigned long nbits = (last - first + 1); \ + unsigned long mask = MASK(nbits) << first; \ + long dist; \ + if (bit < first) \ + dist = 64 + bit - first; \ + else \ + dist = bit - first; \ + ia64_rotl(nat & mask, dist); \ + }) + unsigned long scratch_unat; + + /* + * Registers that are stored consecutively in struct pt_regs + * can be handled in parallel. If the register order in + * struct_pt_regs changes, this code MUST be updated. + */ + scratch_unat = PUT_BITS( 1, 1, nat); + scratch_unat |= PUT_BITS( 2, 3, nat); + scratch_unat |= PUT_BITS(12, 13, nat); + scratch_unat |= PUT_BITS(14, 14, nat); + scratch_unat |= PUT_BITS(15, 15, nat); + scratch_unat |= PUT_BITS( 8, 11, nat); + scratch_unat |= PUT_BITS(16, 31, nat); + + return scratch_unat; + +# undef PUT_BITS +} + +#define IA64_MLX_TEMPLATE 0x2 +#define IA64_MOVL_OPCODE 6 + +void +ia64_increment_ip (struct pt_regs *regs) +{ + unsigned long w0, ri = ia64_psr(regs)->ri + 1; + + if (ri > 2) { + ri = 0; + regs->cr_iip += 16; + } else if (ri == 2) { + get_user(w0, (char __user *) regs->cr_iip + 0); + if (((w0 >> 1) & 0xf) == IA64_MLX_TEMPLATE) { + /* + * rfi'ing to slot 2 of an MLX bundle causes + * an illegal operation fault. We don't want + * that to happen... + */ + ri = 0; + regs->cr_iip += 16; + } + } + ia64_psr(regs)->ri = ri; +} + +void +ia64_decrement_ip (struct pt_regs *regs) +{ + unsigned long w0, ri = ia64_psr(regs)->ri - 1; + + if (ia64_psr(regs)->ri == 0) { + regs->cr_iip -= 16; + ri = 2; + get_user(w0, (char __user *) regs->cr_iip + 0); + if (((w0 >> 1) & 0xf) == IA64_MLX_TEMPLATE) { + /* + * rfi'ing to slot 2 of an MLX bundle causes + * an illegal operation fault. We don't want + * that to happen... + */ + ri = 1; + } + } + ia64_psr(regs)->ri = ri; +} + +/* + * This routine is used to read an rnat bits that are stored on the + * kernel backing store. Since, in general, the alignment of the user + * and kernel are different, this is not completely trivial. In + * essence, we need to construct the user RNAT based on up to two + * kernel RNAT values and/or the RNAT value saved in the child's + * pt_regs. + * + * user rbs + * + * +--------+ <-- lowest address + * | slot62 | + * +--------+ + * | rnat | 0x....1f8 + * +--------+ + * | slot00 | \ + * +--------+ | + * | slot01 | > child_regs->ar_rnat + * +--------+ | + * | slot02 | / kernel rbs + * +--------+ +--------+ + * <- child_regs->ar_bspstore | slot61 | <-- krbs + * +- - - - + +--------+ + * | slot62 | + * +- - - - + +--------+ + * | rnat | + * +- - - - + +--------+ + * vrnat | slot00 | + * +- - - - + +--------+ + * = = + * +--------+ + * | slot00 | \ + * +--------+ | + * | slot01 | > child_stack->ar_rnat + * +--------+ | + * | slot02 | / + * +--------+ + * <--- child_stack->ar_bspstore + * + * The way to think of this code is as follows: bit 0 in the user rnat + * corresponds to some bit N (0 <= N <= 62) in one of the kernel rnat + * value. The kernel rnat value holding this bit is stored in + * variable rnat0. rnat1 is loaded with the kernel rnat value that + * form the upper bits of the user rnat value. + * + * Boundary cases: + * + * o when reading the rnat "below" the first rnat slot on the kernel + * backing store, rnat0/rnat1 are set to 0 and the low order bits are + * merged in from pt->ar_rnat. + * + * o when reading the rnat "above" the last rnat slot on the kernel + * backing store, rnat0/rnat1 gets its value from sw->ar_rnat. + */ +static unsigned long +get_rnat (struct task_struct *task, struct switch_stack *sw, + unsigned long *krbs, unsigned long *urnat_addr, + unsigned long *urbs_end) +{ + unsigned long rnat0 = 0, rnat1 = 0, urnat = 0, *slot0_kaddr; + unsigned long umask = 0, mask, m; + unsigned long *kbsp, *ubspstore, *rnat0_kaddr, *rnat1_kaddr, shift; + long num_regs, nbits; + struct pt_regs *pt; + + pt = task_pt_regs(task); + kbsp = (unsigned long *) sw->ar_bspstore; + ubspstore = (unsigned long *) pt->ar_bspstore; + + if (urbs_end < urnat_addr) + nbits = ia64_rse_num_regs(urnat_addr - 63, urbs_end); + else + nbits = 63; + mask = MASK(nbits); + /* + * First, figure out which bit number slot 0 in user-land maps + * to in the kernel rnat. Do this by figuring out how many + * register slots we're beyond the user's backingstore and + * then computing the equivalent address in kernel space. + */ + num_regs = ia64_rse_num_regs(ubspstore, urnat_addr + 1); + slot0_kaddr = ia64_rse_skip_regs(krbs, num_regs); + shift = ia64_rse_slot_num(slot0_kaddr); + rnat1_kaddr = ia64_rse_rnat_addr(slot0_kaddr); + rnat0_kaddr = rnat1_kaddr - 64; + + if (ubspstore + 63 > urnat_addr) { + /* some bits need to be merged in from pt->ar_rnat */ + umask = MASK(ia64_rse_slot_num(ubspstore)) & mask; + urnat = (pt->ar_rnat & umask); + mask &= ~umask; + if (!mask) + return urnat; + } + + m = mask << shift; + if (rnat0_kaddr >= kbsp) + rnat0 = sw->ar_rnat; + else if (rnat0_kaddr > krbs) + rnat0 = *rnat0_kaddr; + urnat |= (rnat0 & m) >> shift; + + m = mask >> (63 - shift); + if (rnat1_kaddr >= kbsp) + rnat1 = sw->ar_rnat; + else if (rnat1_kaddr > krbs) + rnat1 = *rnat1_kaddr; + urnat |= (rnat1 & m) << (63 - shift); + return urnat; +} + +/* + * The reverse of get_rnat. + */ +static void +put_rnat (struct task_struct *task, struct switch_stack *sw, + unsigned long *krbs, unsigned long *urnat_addr, unsigned long urnat, + unsigned long *urbs_end) +{ + unsigned long rnat0 = 0, rnat1 = 0, *slot0_kaddr, umask = 0, mask, m; + unsigned long *kbsp, *ubspstore, *rnat0_kaddr, *rnat1_kaddr, shift; + long num_regs, nbits; + struct pt_regs *pt; + unsigned long cfm, *urbs_kargs; + + pt = task_pt_regs(task); + kbsp = (unsigned long *) sw->ar_bspstore; + ubspstore = (unsigned long *) pt->ar_bspstore; + + urbs_kargs = urbs_end; + if (in_syscall(pt)) { + /* + * If entered via syscall, don't allow user to set rnat bits + * for syscall args. + */ + cfm = pt->cr_ifs; + urbs_kargs = ia64_rse_skip_regs(urbs_end, -(cfm & 0x7f)); + } + + if (urbs_kargs >= urnat_addr) + nbits = 63; + else { + if ((urnat_addr - 63) >= urbs_kargs) + return; + nbits = ia64_rse_num_regs(urnat_addr - 63, urbs_kargs); + } + mask = MASK(nbits); + + /* + * First, figure out which bit number slot 0 in user-land maps + * to in the kernel rnat. Do this by figuring out how many + * register slots we're beyond the user's backingstore and + * then computing the equivalent address in kernel space. + */ + num_regs = ia64_rse_num_regs(ubspstore, urnat_addr + 1); + slot0_kaddr = ia64_rse_skip_regs(krbs, num_regs); + shift = ia64_rse_slot_num(slot0_kaddr); + rnat1_kaddr = ia64_rse_rnat_addr(slot0_kaddr); + rnat0_kaddr = rnat1_kaddr - 64; + + if (ubspstore + 63 > urnat_addr) { + /* some bits need to be place in pt->ar_rnat: */ + umask = MASK(ia64_rse_slot_num(ubspstore)) & mask; + pt->ar_rnat = (pt->ar_rnat & ~umask) | (urnat & umask); + mask &= ~umask; + if (!mask) + return; + } + /* + * Note: Section 11.1 of the EAS guarantees that bit 63 of an + * rnat slot is ignored. so we don't have to clear it here. + */ + rnat0 = (urnat << shift); + m = mask << shift; + if (rnat0_kaddr >= kbsp) + sw->ar_rnat = (sw->ar_rnat & ~m) | (rnat0 & m); + else if (rnat0_kaddr > krbs) + *rnat0_kaddr = ((*rnat0_kaddr & ~m) | (rnat0 & m)); + + rnat1 = (urnat >> (63 - shift)); + m = mask >> (63 - shift); + if (rnat1_kaddr >= kbsp) + sw->ar_rnat = (sw->ar_rnat & ~m) | (rnat1 & m); + else if (rnat1_kaddr > krbs) + *rnat1_kaddr = ((*rnat1_kaddr & ~m) | (rnat1 & m)); +} + +static inline int +on_kernel_rbs (unsigned long addr, unsigned long bspstore, + unsigned long urbs_end) +{ + unsigned long *rnat_addr = ia64_rse_rnat_addr((unsigned long *) + urbs_end); + return (addr >= bspstore && addr <= (unsigned long) rnat_addr); +} + +/* + * Read a word from the user-level backing store of task CHILD. ADDR + * is the user-level address to read the word from, VAL a pointer to + * the return value, and USER_BSP gives the end of the user-level + * backing store (i.e., it's the address that would be in ar.bsp after + * the user executed a "cover" instruction). + * + * This routine takes care of accessing the kernel register backing + * store for those registers that got spilled there. It also takes + * care of calculating the appropriate RNaT collection words. + */ +long +ia64_peek (struct task_struct *child, struct switch_stack *child_stack, + unsigned long user_rbs_end, unsigned long addr, long *val) +{ + unsigned long *bspstore, *krbs, regnum, *laddr, *urbs_end, *rnat_addr; + struct pt_regs *child_regs; + size_t copied; + long ret; + + urbs_end = (long *) user_rbs_end; + laddr = (unsigned long *) addr; + child_regs = task_pt_regs(child); + bspstore = (unsigned long *) child_regs->ar_bspstore; + krbs = (unsigned long *) child + IA64_RBS_OFFSET/8; + if (on_kernel_rbs(addr, (unsigned long) bspstore, + (unsigned long) urbs_end)) + { + /* + * Attempt to read the RBS in an area that's actually + * on the kernel RBS => read the corresponding bits in + * the kernel RBS. + */ + rnat_addr = ia64_rse_rnat_addr(laddr); + ret = get_rnat(child, child_stack, krbs, rnat_addr, urbs_end); + + if (laddr == rnat_addr) { + /* return NaT collection word itself */ + *val = ret; + return 0; + } + + if (((1UL << ia64_rse_slot_num(laddr)) & ret) != 0) { + /* + * It is implementation dependent whether the + * data portion of a NaT value gets saved on a + * st8.spill or RSE spill (e.g., see EAS 2.6, + * 4.4.4.6 Register Spill and Fill). To get + * consistent behavior across all possible + * IA-64 implementations, we return zero in + * this case. + */ + *val = 0; + return 0; + } + + if (laddr < urbs_end) { + /* + * The desired word is on the kernel RBS and + * is not a NaT. + */ + regnum = ia64_rse_num_regs(bspstore, laddr); + *val = *ia64_rse_skip_regs(krbs, regnum); + return 0; + } + } + copied = access_process_vm(child, addr, &ret, sizeof(ret), 0); + if (copied != sizeof(ret)) + return -EIO; + *val = ret; + return 0; +} + +long +ia64_poke (struct task_struct *child, struct switch_stack *child_stack, + unsigned long user_rbs_end, unsigned long addr, long val) +{ + unsigned long *bspstore, *krbs, regnum, *laddr; + unsigned long *urbs_end = (long *) user_rbs_end; + struct pt_regs *child_regs; + + laddr = (unsigned long *) addr; + child_regs = task_pt_regs(child); + bspstore = (unsigned long *) child_regs->ar_bspstore; + krbs = (unsigned long *) child + IA64_RBS_OFFSET/8; + if (on_kernel_rbs(addr, (unsigned long) bspstore, + (unsigned long) urbs_end)) + { + /* + * Attempt to write the RBS in an area that's actually + * on the kernel RBS => write the corresponding bits + * in the kernel RBS. + */ + if (ia64_rse_is_rnat_slot(laddr)) + put_rnat(child, child_stack, krbs, laddr, val, + urbs_end); + else { + if (laddr < urbs_end) { + regnum = ia64_rse_num_regs(bspstore, laddr); + *ia64_rse_skip_regs(krbs, regnum) = val; + } + } + } else if (access_process_vm(child, addr, &val, sizeof(val), 1) + != sizeof(val)) + return -EIO; + return 0; +} + +/* + * Calculate the address of the end of the user-level register backing + * store. This is the address that would have been stored in ar.bsp + * if the user had executed a "cover" instruction right before + * entering the kernel. If CFMP is not NULL, it is used to return the + * "current frame mask" that was active at the time the kernel was + * entered. + */ +unsigned long +ia64_get_user_rbs_end (struct task_struct *child, struct pt_regs *pt, + unsigned long *cfmp) +{ + unsigned long *krbs, *bspstore, cfm = pt->cr_ifs; + long ndirty; + + krbs = (unsigned long *) child + IA64_RBS_OFFSET/8; + bspstore = (unsigned long *) pt->ar_bspstore; + ndirty = ia64_rse_num_regs(krbs, krbs + (pt->loadrs >> 19)); + + if (in_syscall(pt)) + ndirty += (cfm & 0x7f); + else + cfm &= ~(1UL << 63); /* clear valid bit */ + + if (cfmp) + *cfmp = cfm; + return (unsigned long) ia64_rse_skip_regs(bspstore, ndirty); +} + +/* + * Synchronize (i.e, write) the RSE backing store living in kernel + * space to the VM of the CHILD task. SW and PT are the pointers to + * the switch_stack and pt_regs structures, respectively. + * USER_RBS_END is the user-level address at which the backing store + * ends. + */ +long +ia64_sync_user_rbs (struct task_struct *child, struct switch_stack *sw, + unsigned long user_rbs_start, unsigned long user_rbs_end) +{ + unsigned long addr, val; + long ret; + + /* now copy word for word from kernel rbs to user rbs: */ + for (addr = user_rbs_start; addr < user_rbs_end; addr += 8) { + ret = ia64_peek(child, sw, user_rbs_end, addr, &val); + if (ret < 0) + return ret; + if (access_process_vm(child, addr, &val, sizeof(val), 1) + != sizeof(val)) + return -EIO; + } + return 0; +} + +static long +ia64_sync_kernel_rbs (struct task_struct *child, struct switch_stack *sw, + unsigned long user_rbs_start, unsigned long user_rbs_end) +{ + unsigned long addr, val; + long ret; + + /* now copy word for word from user rbs to kernel rbs: */ + for (addr = user_rbs_start; addr < user_rbs_end; addr += 8) { + if (access_process_vm(child, addr, &val, sizeof(val), 0) + != sizeof(val)) + return -EIO; + + ret = ia64_poke(child, sw, user_rbs_end, addr, val); + if (ret < 0) + return ret; + } + return 0; +} + +typedef long (*syncfunc_t)(struct task_struct *, struct switch_stack *, + unsigned long, unsigned long); + +static void do_sync_rbs(struct unw_frame_info *info, void *arg) +{ + struct pt_regs *pt; + unsigned long urbs_end; + syncfunc_t fn = arg; + + if (unw_unwind_to_user(info) < 0) + return; + pt = task_pt_regs(info->task); + urbs_end = ia64_get_user_rbs_end(info->task, pt, NULL); + + fn(info->task, info->sw, pt->ar_bspstore, urbs_end); +} + +/* + * when a thread is stopped (ptraced), debugger might change thread's user + * stack (change memory directly), and we must avoid the RSE stored in kernel + * to override user stack (user space's RSE is newer than kernel's in the + * case). To workaround the issue, we copy kernel RSE to user RSE before the + * task is stopped, so user RSE has updated data. we then copy user RSE to + * kernel after the task is resummed from traced stop and kernel will use the + * newer RSE to return to user. TIF_RESTORE_RSE is the flag to indicate we need + * synchronize user RSE to kernel. + */ +void ia64_ptrace_stop(void) +{ + if (test_and_set_tsk_thread_flag(current, TIF_RESTORE_RSE)) + return; + set_notify_resume(current); + unw_init_running(do_sync_rbs, ia64_sync_user_rbs); +} + +/* + * This is called to read back the register backing store. + */ +void ia64_sync_krbs(void) +{ + clear_tsk_thread_flag(current, TIF_RESTORE_RSE); + + unw_init_running(do_sync_rbs, ia64_sync_kernel_rbs); +} + +/* + * After PTRACE_ATTACH, a thread's register backing store area in user + * space is assumed to contain correct data whenever the thread is + * stopped. arch_ptrace_stop takes care of this on tracing stops. + * But if the child was already stopped for job control when we attach + * to it, then it might not ever get into ptrace_stop by the time we + * want to examine the user memory containing the RBS. + */ +void +ptrace_attach_sync_user_rbs (struct task_struct *child) +{ + int stopped = 0; + struct unw_frame_info info; + + /* + * If the child is in TASK_STOPPED, we need to change that to + * TASK_TRACED momentarily while we operate on it. This ensures + * that the child won't be woken up and return to user mode while + * we are doing the sync. (It can only be woken up for SIGKILL.) + */ + + read_lock(&tasklist_lock); + if (child->sighand) { + spin_lock_irq(&child->sighand->siglock); + if (child->state == TASK_STOPPED && + !test_and_set_tsk_thread_flag(child, TIF_RESTORE_RSE)) { + set_notify_resume(child); + + child->state = TASK_TRACED; + stopped = 1; + } + spin_unlock_irq(&child->sighand->siglock); + } + read_unlock(&tasklist_lock); + + if (!stopped) + return; + + unw_init_from_blocked_task(&info, child); + do_sync_rbs(&info, ia64_sync_user_rbs); + + /* + * Now move the child back into TASK_STOPPED if it should be in a + * job control stop, so that SIGCONT can be used to wake it up. + */ + read_lock(&tasklist_lock); + if (child->sighand) { + spin_lock_irq(&child->sighand->siglock); + if (child->state == TASK_TRACED && + (child->signal->flags & SIGNAL_STOP_STOPPED)) { + child->state = TASK_STOPPED; + } + spin_unlock_irq(&child->sighand->siglock); + } + read_unlock(&tasklist_lock); +} + +/* + * Write f32-f127 back to task->thread.fph if it has been modified. + */ +inline void +ia64_flush_fph (struct task_struct *task) +{ + struct ia64_psr *psr = ia64_psr(task_pt_regs(task)); + + /* + * Prevent migrating this task while + * we're fiddling with the FPU state + */ + preempt_disable(); + if (ia64_is_local_fpu_owner(task) && psr->mfh) { + psr->mfh = 0; + task->thread.flags |= IA64_THREAD_FPH_VALID; + ia64_save_fpu(&task->thread.fph[0]); + } + preempt_enable(); +} + +/* + * Sync the fph state of the task so that it can be manipulated + * through thread.fph. If necessary, f32-f127 are written back to + * thread.fph or, if the fph state hasn't been used before, thread.fph + * is cleared to zeroes. Also, access to f32-f127 is disabled to + * ensure that the task picks up the state from thread.fph when it + * executes again. + */ +void +ia64_sync_fph (struct task_struct *task) +{ + struct ia64_psr *psr = ia64_psr(task_pt_regs(task)); + + ia64_flush_fph(task); + if (!(task->thread.flags & IA64_THREAD_FPH_VALID)) { + task->thread.flags |= IA64_THREAD_FPH_VALID; + memset(&task->thread.fph, 0, sizeof(task->thread.fph)); + } + ia64_drop_fpu(task); + psr->dfh = 1; +} + +/* + * Change the machine-state of CHILD such that it will return via the normal + * kernel exit-path, rather than the syscall-exit path. + */ +static void +convert_to_non_syscall (struct task_struct *child, struct pt_regs *pt, + unsigned long cfm) +{ + struct unw_frame_info info, prev_info; + unsigned long ip, sp, pr; + + unw_init_from_blocked_task(&info, child); + while (1) { + prev_info = info; + if (unw_unwind(&info) < 0) + return; + + unw_get_sp(&info, &sp); + if ((long)((unsigned long)child + IA64_STK_OFFSET - sp) + < IA64_PT_REGS_SIZE) { + dprintk("ptrace.%s: ran off the top of the kernel " + "stack\n", __func__); + return; + } + if (unw_get_pr (&prev_info, &pr) < 0) { + unw_get_rp(&prev_info, &ip); + dprintk("ptrace.%s: failed to read " + "predicate register (ip=0x%lx)\n", + __func__, ip); + return; + } + if (unw_is_intr_frame(&info) + && (pr & (1UL << PRED_USER_STACK))) + break; + } + + /* + * Note: at the time of this call, the target task is blocked + * in notify_resume_user() and by clearling PRED_LEAVE_SYSCALL + * (aka, "pLvSys") we redirect execution from + * .work_pending_syscall_end to .work_processed_kernel. + */ + unw_get_pr(&prev_info, &pr); + pr &= ~((1UL << PRED_SYSCALL) | (1UL << PRED_LEAVE_SYSCALL)); + pr |= (1UL << PRED_NON_SYSCALL); + unw_set_pr(&prev_info, pr); + + pt->cr_ifs = (1UL << 63) | cfm; + /* + * Clear the memory that is NOT written on syscall-entry to + * ensure we do not leak kernel-state to user when execution + * resumes. + */ + pt->r2 = 0; + pt->r3 = 0; + pt->r14 = 0; + memset(&pt->r16, 0, 16*8); /* clear r16-r31 */ + memset(&pt->f6, 0, 6*16); /* clear f6-f11 */ + pt->b7 = 0; + pt->ar_ccv = 0; + pt->ar_csd = 0; + pt->ar_ssd = 0; +} + +static int +access_nat_bits (struct task_struct *child, struct pt_regs *pt, + struct unw_frame_info *info, + unsigned long *data, int write_access) +{ + unsigned long regnum, nat_bits, scratch_unat, dummy = 0; + char nat = 0; + + if (write_access) { + nat_bits = *data; + scratch_unat = ia64_put_scratch_nat_bits(pt, nat_bits); + if (unw_set_ar(info, UNW_AR_UNAT, scratch_unat) < 0) { + dprintk("ptrace: failed to set ar.unat\n"); + return -1; + } + for (regnum = 4; regnum <= 7; ++regnum) { + unw_get_gr(info, regnum, &dummy, &nat); + unw_set_gr(info, regnum, dummy, + (nat_bits >> regnum) & 1); + } + } else { + if (unw_get_ar(info, UNW_AR_UNAT, &scratch_unat) < 0) { + dprintk("ptrace: failed to read ar.unat\n"); + return -1; + } + nat_bits = ia64_get_scratch_nat_bits(pt, scratch_unat); + for (regnum = 4; regnum <= 7; ++regnum) { + unw_get_gr(info, regnum, &dummy, &nat); + nat_bits |= (nat != 0) << regnum; + } + *data = nat_bits; + } + return 0; +} + +static int +access_uarea (struct task_struct *child, unsigned long addr, + unsigned long *data, int write_access); + +static long +ptrace_getregs (struct task_struct *child, struct pt_all_user_regs __user *ppr) +{ + unsigned long psr, ec, lc, rnat, bsp, cfm, nat_bits, val; + struct unw_frame_info info; + struct ia64_fpreg fpval; + struct switch_stack *sw; + struct pt_regs *pt; + long ret, retval = 0; + char nat = 0; + int i; + + if (!access_ok(VERIFY_WRITE, ppr, sizeof(struct pt_all_user_regs))) + return -EIO; + + pt = task_pt_regs(child); + sw = (struct switch_stack *) (child->thread.ksp + 16); + unw_init_from_blocked_task(&info, child); + if (unw_unwind_to_user(&info) < 0) { + return -EIO; + } + + if (((unsigned long) ppr & 0x7) != 0) { + dprintk("ptrace:unaligned register address %p\n", ppr); + return -EIO; + } + + if (access_uarea(child, PT_CR_IPSR, &psr, 0) < 0 + || access_uarea(child, PT_AR_EC, &ec, 0) < 0 + || access_uarea(child, PT_AR_LC, &lc, 0) < 0 + || access_uarea(child, PT_AR_RNAT, &rnat, 0) < 0 + || access_uarea(child, PT_AR_BSP, &bsp, 0) < 0 + || access_uarea(child, PT_CFM, &cfm, 0) + || access_uarea(child, PT_NAT_BITS, &nat_bits, 0)) + return -EIO; + + /* control regs */ + + retval |= __put_user(pt->cr_iip, &ppr->cr_iip); + retval |= __put_user(psr, &ppr->cr_ipsr); + + /* app regs */ + + retval |= __put_user(pt->ar_pfs, &ppr->ar[PT_AUR_PFS]); + retval |= __put_user(pt->ar_rsc, &ppr->ar[PT_AUR_RSC]); + retval |= __put_user(pt->ar_bspstore, &ppr->ar[PT_AUR_BSPSTORE]); + retval |= __put_user(pt->ar_unat, &ppr->ar[PT_AUR_UNAT]); + retval |= __put_user(pt->ar_ccv, &ppr->ar[PT_AUR_CCV]); + retval |= __put_user(pt->ar_fpsr, &ppr->ar[PT_AUR_FPSR]); + + retval |= __put_user(ec, &ppr->ar[PT_AUR_EC]); + retval |= __put_user(lc, &ppr->ar[PT_AUR_LC]); + retval |= __put_user(rnat, &ppr->ar[PT_AUR_RNAT]); + retval |= __put_user(bsp, &ppr->ar[PT_AUR_BSP]); + retval |= __put_user(cfm, &ppr->cfm); + + /* gr1-gr3 */ + + retval |= __copy_to_user(&ppr->gr[1], &pt->r1, sizeof(long)); + retval |= __copy_to_user(&ppr->gr[2], &pt->r2, sizeof(long) *2); + + /* gr4-gr7 */ + + for (i = 4; i < 8; i++) { + if (unw_access_gr(&info, i, &val, &nat, 0) < 0) + return -EIO; + retval |= __put_user(val, &ppr->gr[i]); + } + + /* gr8-gr11 */ + + retval |= __copy_to_user(&ppr->gr[8], &pt->r8, sizeof(long) * 4); + + /* gr12-gr15 */ + + retval |= __copy_to_user(&ppr->gr[12], &pt->r12, sizeof(long) * 2); + retval |= __copy_to_user(&ppr->gr[14], &pt->r14, sizeof(long)); + retval |= __copy_to_user(&ppr->gr[15], &pt->r15, sizeof(long)); + + /* gr16-gr31 */ + + retval |= __copy_to_user(&ppr->gr[16], &pt->r16, sizeof(long) * 16); + + /* b0 */ + + retval |= __put_user(pt->b0, &ppr->br[0]); + + /* b1-b5 */ + + for (i = 1; i < 6; i++) { + if (unw_access_br(&info, i, &val, 0) < 0) + return -EIO; + __put_user(val, &ppr->br[i]); + } + + /* b6-b7 */ + + retval |= __put_user(pt->b6, &ppr->br[6]); + retval |= __put_user(pt->b7, &ppr->br[7]); + + /* fr2-fr5 */ + + for (i = 2; i < 6; i++) { + if (unw_get_fr(&info, i, &fpval) < 0) + return -EIO; + retval |= __copy_to_user(&ppr->fr[i], &fpval, sizeof (fpval)); + } + + /* fr6-fr11 */ + + retval |= __copy_to_user(&ppr->fr[6], &pt->f6, + sizeof(struct ia64_fpreg) * 6); + + /* fp scratch regs(12-15) */ + + retval |= __copy_to_user(&ppr->fr[12], &sw->f12, + sizeof(struct ia64_fpreg) * 4); + + /* fr16-fr31 */ + + for (i = 16; i < 32; i++) { + if (unw_get_fr(&info, i, &fpval) < 0) + return -EIO; + retval |= __copy_to_user(&ppr->fr[i], &fpval, sizeof (fpval)); + } + + /* fph */ + + ia64_flush_fph(child); + retval |= __copy_to_user(&ppr->fr[32], &child->thread.fph, + sizeof(ppr->fr[32]) * 96); + + /* preds */ + + retval |= __put_user(pt->pr, &ppr->pr); + + /* nat bits */ + + retval |= __put_user(nat_bits, &ppr->nat); + + ret = retval ? -EIO : 0; + return ret; +} + +static long +ptrace_setregs (struct task_struct *child, struct pt_all_user_regs __user *ppr) +{ + unsigned long psr, rsc, ec, lc, rnat, bsp, cfm, nat_bits, val = 0; + struct unw_frame_info info; + struct switch_stack *sw; + struct ia64_fpreg fpval; + struct pt_regs *pt; + long ret, retval = 0; + int i; + + memset(&fpval, 0, sizeof(fpval)); + + if (!access_ok(VERIFY_READ, ppr, sizeof(struct pt_all_user_regs))) + return -EIO; + + pt = task_pt_regs(child); + sw = (struct switch_stack *) (child->thread.ksp + 16); + unw_init_from_blocked_task(&info, child); + if (unw_unwind_to_user(&info) < 0) { + return -EIO; + } + + if (((unsigned long) ppr & 0x7) != 0) { + dprintk("ptrace:unaligned register address %p\n", ppr); + return -EIO; + } + + /* control regs */ + + retval |= __get_user(pt->cr_iip, &ppr->cr_iip); + retval |= __get_user(psr, &ppr->cr_ipsr); + + /* app regs */ + + retval |= __get_user(pt->ar_pfs, &ppr->ar[PT_AUR_PFS]); + retval |= __get_user(rsc, &ppr->ar[PT_AUR_RSC]); + retval |= __get_user(pt->ar_bspstore, &ppr->ar[PT_AUR_BSPSTORE]); + retval |= __get_user(pt->ar_unat, &ppr->ar[PT_AUR_UNAT]); + retval |= __get_user(pt->ar_ccv, &ppr->ar[PT_AUR_CCV]); + retval |= __get_user(pt->ar_fpsr, &ppr->ar[PT_AUR_FPSR]); + + retval |= __get_user(ec, &ppr->ar[PT_AUR_EC]); + retval |= __get_user(lc, &ppr->ar[PT_AUR_LC]); + retval |= __get_user(rnat, &ppr->ar[PT_AUR_RNAT]); + retval |= __get_user(bsp, &ppr->ar[PT_AUR_BSP]); + retval |= __get_user(cfm, &ppr->cfm); + + /* gr1-gr3 */ + + retval |= __copy_from_user(&pt->r1, &ppr->gr[1], sizeof(long)); + retval |= __copy_from_user(&pt->r2, &ppr->gr[2], sizeof(long) * 2); + + /* gr4-gr7 */ + + for (i = 4; i < 8; i++) { + retval |= __get_user(val, &ppr->gr[i]); + /* NaT bit will be set via PT_NAT_BITS: */ + if (unw_set_gr(&info, i, val, 0) < 0) + return -EIO; + } + + /* gr8-gr11 */ + + retval |= __copy_from_user(&pt->r8, &ppr->gr[8], sizeof(long) * 4); + + /* gr12-gr15 */ + + retval |= __copy_from_user(&pt->r12, &ppr->gr[12], sizeof(long) * 2); + retval |= __copy_from_user(&pt->r14, &ppr->gr[14], sizeof(long)); + retval |= __copy_from_user(&pt->r15, &ppr->gr[15], sizeof(long)); + + /* gr16-gr31 */ + + retval |= __copy_from_user(&pt->r16, &ppr->gr[16], sizeof(long) * 16); + + /* b0 */ + + retval |= __get_user(pt->b0, &ppr->br[0]); + + /* b1-b5 */ + + for (i = 1; i < 6; i++) { + retval |= __get_user(val, &ppr->br[i]); + unw_set_br(&info, i, val); + } + + /* b6-b7 */ + + retval |= __get_user(pt->b6, &ppr->br[6]); + retval |= __get_user(pt->b7, &ppr->br[7]); + + /* fr2-fr5 */ + + for (i = 2; i < 6; i++) { + retval |= __copy_from_user(&fpval, &ppr->fr[i], sizeof(fpval)); + if (unw_set_fr(&info, i, fpval) < 0) + return -EIO; + } + + /* fr6-fr11 */ + + retval |= __copy_from_user(&pt->f6, &ppr->fr[6], + sizeof(ppr->fr[6]) * 6); + + /* fp scratch regs(12-15) */ + + retval |= __copy_from_user(&sw->f12, &ppr->fr[12], + sizeof(ppr->fr[12]) * 4); + + /* fr16-fr31 */ + + for (i = 16; i < 32; i++) { + retval |= __copy_from_user(&fpval, &ppr->fr[i], + sizeof(fpval)); + if (unw_set_fr(&info, i, fpval) < 0) + return -EIO; + } + + /* fph */ + + ia64_sync_fph(child); + retval |= __copy_from_user(&child->thread.fph, &ppr->fr[32], + sizeof(ppr->fr[32]) * 96); + + /* preds */ + + retval |= __get_user(pt->pr, &ppr->pr); + + /* nat bits */ + + retval |= __get_user(nat_bits, &ppr->nat); + + retval |= access_uarea(child, PT_CR_IPSR, &psr, 1); + retval |= access_uarea(child, PT_AR_RSC, &rsc, 1); + retval |= access_uarea(child, PT_AR_EC, &ec, 1); + retval |= access_uarea(child, PT_AR_LC, &lc, 1); + retval |= access_uarea(child, PT_AR_RNAT, &rnat, 1); + retval |= access_uarea(child, PT_AR_BSP, &bsp, 1); + retval |= access_uarea(child, PT_CFM, &cfm, 1); + retval |= access_uarea(child, PT_NAT_BITS, &nat_bits, 1); + + ret = retval ? -EIO : 0; + return ret; +} + +void +user_enable_single_step (struct task_struct *child) +{ + struct ia64_psr *child_psr = ia64_psr(task_pt_regs(child)); + + set_tsk_thread_flag(child, TIF_SINGLESTEP); + child_psr->ss = 1; +} + +void +user_enable_block_step (struct task_struct *child) +{ + struct ia64_psr *child_psr = ia64_psr(task_pt_regs(child)); + + set_tsk_thread_flag(child, TIF_SINGLESTEP); + child_psr->tb = 1; +} + +void +user_disable_single_step (struct task_struct *child) +{ + struct ia64_psr *child_psr = ia64_psr(task_pt_regs(child)); + + /* make sure the single step/taken-branch trap bits are not set: */ + clear_tsk_thread_flag(child, TIF_SINGLESTEP); + child_psr->ss = 0; + child_psr->tb = 0; +} + +/* + * Called by kernel/ptrace.c when detaching.. + * + * Make sure the single step bit is not set. + */ +void +ptrace_disable (struct task_struct *child) +{ + user_disable_single_step(child); +} + +long +arch_ptrace (struct task_struct *child, long request, + unsigned long addr, unsigned long data) +{ + switch (request) { + case PTRACE_PEEKTEXT: + case PTRACE_PEEKDATA: + /* read word at location addr */ + if (access_process_vm(child, addr, &data, sizeof(data), 0) + != sizeof(data)) + return -EIO; + /* ensure return value is not mistaken for error code */ + force_successful_syscall_return(); + return data; + + /* PTRACE_POKETEXT and PTRACE_POKEDATA is handled + * by the generic ptrace_request(). + */ + + case PTRACE_PEEKUSR: + /* read the word at addr in the USER area */ + if (access_uarea(child, addr, &data, 0) < 0) + return -EIO; + /* ensure return value is not mistaken for error code */ + force_successful_syscall_return(); + return data; + + case PTRACE_POKEUSR: + /* write the word at addr in the USER area */ + if (access_uarea(child, addr, &data, 1) < 0) + return -EIO; + return 0; + + case PTRACE_OLD_GETSIGINFO: + /* for backwards-compatibility */ + return ptrace_request(child, PTRACE_GETSIGINFO, addr, data); + + case PTRACE_OLD_SETSIGINFO: + /* for backwards-compatibility */ + return ptrace_request(child, PTRACE_SETSIGINFO, addr, data); + + case PTRACE_GETREGS: + return ptrace_getregs(child, + (struct pt_all_user_regs __user *) data); + + case PTRACE_SETREGS: + return ptrace_setregs(child, + (struct pt_all_user_regs __user *) data); + + default: + return ptrace_request(child, request, addr, data); + } +} + + +/* "asmlinkage" so the input arguments are preserved... */ + +asmlinkage long +syscall_trace_enter (long arg0, long arg1, long arg2, long arg3, + long arg4, long arg5, long arg6, long arg7, + struct pt_regs regs) +{ + if (test_thread_flag(TIF_SYSCALL_TRACE)) + if (tracehook_report_syscall_entry(®s)) + return -ENOSYS; + + /* copy user rbs to kernel rbs */ + if (test_thread_flag(TIF_RESTORE_RSE)) + ia64_sync_krbs(); + + + audit_syscall_entry(regs.r15, arg0, arg1, arg2, arg3); + + return 0; +} + +/* "asmlinkage" so the input arguments are preserved... */ + +asmlinkage void +syscall_trace_leave (long arg0, long arg1, long arg2, long arg3, + long arg4, long arg5, long arg6, long arg7, + struct pt_regs regs) +{ + int step; + + audit_syscall_exit(®s); + + step = test_thread_flag(TIF_SINGLESTEP); + if (step || test_thread_flag(TIF_SYSCALL_TRACE)) + tracehook_report_syscall_exit(®s, step); + + /* copy user rbs to kernel rbs */ + if (test_thread_flag(TIF_RESTORE_RSE)) + ia64_sync_krbs(); +} + +/* Utrace implementation starts here */ +struct regset_get { + void *kbuf; + void __user *ubuf; +}; + +struct regset_set { + const void *kbuf; + const void __user *ubuf; +}; + +struct regset_getset { + struct task_struct *target; + const struct user_regset *regset; + union { + struct regset_get get; + struct regset_set set; + } u; + unsigned int pos; + unsigned int count; + int ret; +}; + +static int +access_elf_gpreg(struct task_struct *target, struct unw_frame_info *info, + unsigned long addr, unsigned long *data, int write_access) +{ + struct pt_regs *pt; + unsigned long *ptr = NULL; + int ret; + char nat = 0; + + pt = task_pt_regs(target); + switch (addr) { + case ELF_GR_OFFSET(1): + ptr = &pt->r1; + break; + case ELF_GR_OFFSET(2): + case ELF_GR_OFFSET(3): + ptr = (void *)&pt->r2 + (addr - ELF_GR_OFFSET(2)); + break; + case ELF_GR_OFFSET(4) ... ELF_GR_OFFSET(7): + if (write_access) { + /* read NaT bit first: */ + unsigned long dummy; + + ret = unw_get_gr(info, addr/8, &dummy, &nat); + if (ret < 0) + return ret; + } + return unw_access_gr(info, addr/8, data, &nat, write_access); + case ELF_GR_OFFSET(8) ... ELF_GR_OFFSET(11): + ptr = (void *)&pt->r8 + addr - ELF_GR_OFFSET(8); + break; + case ELF_GR_OFFSET(12): + case ELF_GR_OFFSET(13): + ptr = (void *)&pt->r12 + addr - ELF_GR_OFFSET(12); + break; + case ELF_GR_OFFSET(14): + ptr = &pt->r14; + break; + case ELF_GR_OFFSET(15): + ptr = &pt->r15; + } + if (write_access) + *ptr = *data; + else + *data = *ptr; + return 0; +} + +static int +access_elf_breg(struct task_struct *target, struct unw_frame_info *info, + unsigned long addr, unsigned long *data, int write_access) +{ + struct pt_regs *pt; + unsigned long *ptr = NULL; + + pt = task_pt_regs(target); + switch (addr) { + case ELF_BR_OFFSET(0): + ptr = &pt->b0; + break; + case ELF_BR_OFFSET(1) ... ELF_BR_OFFSET(5): + return unw_access_br(info, (addr - ELF_BR_OFFSET(0))/8, + data, write_access); + case ELF_BR_OFFSET(6): + ptr = &pt->b6; + break; + case ELF_BR_OFFSET(7): + ptr = &pt->b7; + } + if (write_access) + *ptr = *data; + else + *data = *ptr; + return 0; +} + +static int +access_elf_areg(struct task_struct *target, struct unw_frame_info *info, + unsigned long addr, unsigned long *data, int write_access) +{ + struct pt_regs *pt; + unsigned long cfm, urbs_end; + unsigned long *ptr = NULL; + + pt = task_pt_regs(target); + if (addr >= ELF_AR_RSC_OFFSET && addr <= ELF_AR_SSD_OFFSET) { + switch (addr) { + case ELF_AR_RSC_OFFSET: + /* force PL3 */ + if (write_access) + pt->ar_rsc = *data | (3 << 2); + else + *data = pt->ar_rsc; + return 0; + case ELF_AR_BSP_OFFSET: + /* + * By convention, we use PT_AR_BSP to refer to + * the end of the user-level backing store. + * Use ia64_rse_skip_regs(PT_AR_BSP, -CFM.sof) + * to get the real value of ar.bsp at the time + * the kernel was entered. + * + * Furthermore, when changing the contents of + * PT_AR_BSP (or PT_CFM) while the task is + * blocked in a system call, convert the state + * so that the non-system-call exit + * path is used. This ensures that the proper + * state will be picked up when resuming + * execution. However, it *also* means that + * once we write PT_AR_BSP/PT_CFM, it won't be + * possible to modify the syscall arguments of + * the pending system call any longer. This + * shouldn't be an issue because modifying + * PT_AR_BSP/PT_CFM generally implies that + * we're either abandoning the pending system + * call or that we defer it's re-execution + * (e.g., due to GDB doing an inferior + * function call). + */ + urbs_end = ia64_get_user_rbs_end(target, pt, &cfm); + if (write_access) { + if (*data != urbs_end) { + if (in_syscall(pt)) + convert_to_non_syscall(target, + pt, + cfm); + /* + * Simulate user-level write + * of ar.bsp: + */ + pt->loadrs = 0; + pt->ar_bspstore = *data; + } + } else + *data = urbs_end; + return 0; + case ELF_AR_BSPSTORE_OFFSET: + ptr = &pt->ar_bspstore; + break; + case ELF_AR_RNAT_OFFSET: + ptr = &pt->ar_rnat; + break; + case ELF_AR_CCV_OFFSET: + ptr = &pt->ar_ccv; + break; + case ELF_AR_UNAT_OFFSET: + ptr = &pt->ar_unat; + break; + case ELF_AR_FPSR_OFFSET: + ptr = &pt->ar_fpsr; + break; + case ELF_AR_PFS_OFFSET: + ptr = &pt->ar_pfs; + break; + case ELF_AR_LC_OFFSET: + return unw_access_ar(info, UNW_AR_LC, data, + write_access); + case ELF_AR_EC_OFFSET: + return unw_access_ar(info, UNW_AR_EC, data, + write_access); + case ELF_AR_CSD_OFFSET: + ptr = &pt->ar_csd; + break; + case ELF_AR_SSD_OFFSET: + ptr = &pt->ar_ssd; + } + } else if (addr >= ELF_CR_IIP_OFFSET && addr <= ELF_CR_IPSR_OFFSET) { + switch (addr) { + case ELF_CR_IIP_OFFSET: + ptr = &pt->cr_iip; + break; + case ELF_CFM_OFFSET: + urbs_end = ia64_get_user_rbs_end(target, pt, &cfm); + if (write_access) { + if (((cfm ^ *data) & PFM_MASK) != 0) { + if (in_syscall(pt)) + convert_to_non_syscall(target, + pt, + cfm); + pt->cr_ifs = ((pt->cr_ifs & ~PFM_MASK) + | (*data & PFM_MASK)); + } + } else + *data = cfm; + return 0; + case ELF_CR_IPSR_OFFSET: + if (write_access) { + unsigned long tmp = *data; + /* psr.ri==3 is a reserved value: SDM 2:25 */ + if ((tmp & IA64_PSR_RI) == IA64_PSR_RI) + tmp &= ~IA64_PSR_RI; + pt->cr_ipsr = ((tmp & IPSR_MASK) + | (pt->cr_ipsr & ~IPSR_MASK)); + } else + *data = (pt->cr_ipsr & IPSR_MASK); + return 0; + } + } else if (addr == ELF_NAT_OFFSET) + return access_nat_bits(target, pt, info, + data, write_access); + else if (addr == ELF_PR_OFFSET) + ptr = &pt->pr; + else + return -1; + + if (write_access) + *ptr = *data; + else + *data = *ptr; + + return 0; +} + +static int +access_elf_reg(struct task_struct *target, struct unw_frame_info *info, + unsigned long addr, unsigned long *data, int write_access) +{ + if (addr >= ELF_GR_OFFSET(1) && addr <= ELF_GR_OFFSET(15)) + return access_elf_gpreg(target, info, addr, data, write_access); + else if (addr >= ELF_BR_OFFSET(0) && addr <= ELF_BR_OFFSET(7)) + return access_elf_breg(target, info, addr, data, write_access); + else + return access_elf_areg(target, info, addr, data, write_access); +} + +void do_gpregs_get(struct unw_frame_info *info, void *arg) +{ + struct pt_regs *pt; + struct regset_getset *dst = arg; + elf_greg_t tmp[16]; + unsigned int i, index, min_copy; + + if (unw_unwind_to_user(info) < 0) + return; + + /* + * coredump format: + * r0-r31 + * NaT bits (for r0-r31; bit N == 1 iff rN is a NaT) + * predicate registers (p0-p63) + * b0-b7 + * ip cfm user-mask + * ar.rsc ar.bsp ar.bspstore ar.rnat + * ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec + */ + + + /* Skip r0 */ + if (dst->count > 0 && dst->pos < ELF_GR_OFFSET(1)) { + dst->ret = user_regset_copyout_zero(&dst->pos, &dst->count, + &dst->u.get.kbuf, + &dst->u.get.ubuf, + 0, ELF_GR_OFFSET(1)); + if (dst->ret || dst->count == 0) + return; + } + + /* gr1 - gr15 */ + if (dst->count > 0 && dst->pos < ELF_GR_OFFSET(16)) { + index = (dst->pos - ELF_GR_OFFSET(1)) / sizeof(elf_greg_t); + min_copy = ELF_GR_OFFSET(16) > (dst->pos + dst->count) ? + (dst->pos + dst->count) : ELF_GR_OFFSET(16); + for (i = dst->pos; i < min_copy; i += sizeof(elf_greg_t), + index++) + if (access_elf_reg(dst->target, info, i, + &tmp[index], 0) < 0) { + dst->ret = -EIO; + return; + } + dst->ret = user_regset_copyout(&dst->pos, &dst->count, + &dst->u.get.kbuf, &dst->u.get.ubuf, tmp, + ELF_GR_OFFSET(1), ELF_GR_OFFSET(16)); + if (dst->ret || dst->count == 0) + return; + } + + /* r16-r31 */ + if (dst->count > 0 && dst->pos < ELF_NAT_OFFSET) { + pt = task_pt_regs(dst->target); + dst->ret = user_regset_copyout(&dst->pos, &dst->count, + &dst->u.get.kbuf, &dst->u.get.ubuf, &pt->r16, + ELF_GR_OFFSET(16), ELF_NAT_OFFSET); + if (dst->ret || dst->count == 0) + return; + } + + /* nat, pr, b0 - b7 */ + if (dst->count > 0 && dst->pos < ELF_CR_IIP_OFFSET) { + index = (dst->pos - ELF_NAT_OFFSET) / sizeof(elf_greg_t); + min_copy = ELF_CR_IIP_OFFSET > (dst->pos + dst->count) ? + (dst->pos + dst->count) : ELF_CR_IIP_OFFSET; + for (i = dst->pos; i < min_copy; i += sizeof(elf_greg_t), + index++) + if (access_elf_reg(dst->target, info, i, + &tmp[index], 0) < 0) { + dst->ret = -EIO; + return; + } + dst->ret = user_regset_copyout(&dst->pos, &dst->count, + &dst->u.get.kbuf, &dst->u.get.ubuf, tmp, + ELF_NAT_OFFSET, ELF_CR_IIP_OFFSET); + if (dst->ret || dst->count == 0) + return; + } + + /* ip cfm psr ar.rsc ar.bsp ar.bspstore ar.rnat + * ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec ar.csd ar.ssd + */ + if (dst->count > 0 && dst->pos < (ELF_AR_END_OFFSET)) { + index = (dst->pos - ELF_CR_IIP_OFFSET) / sizeof(elf_greg_t); + min_copy = ELF_AR_END_OFFSET > (dst->pos + dst->count) ? + (dst->pos + dst->count) : ELF_AR_END_OFFSET; + for (i = dst->pos; i < min_copy; i += sizeof(elf_greg_t), + index++) + if (access_elf_reg(dst->target, info, i, + &tmp[index], 0) < 0) { + dst->ret = -EIO; + return; + } + dst->ret = user_regset_copyout(&dst->pos, &dst->count, + &dst->u.get.kbuf, &dst->u.get.ubuf, tmp, + ELF_CR_IIP_OFFSET, ELF_AR_END_OFFSET); + } +} + +void do_gpregs_set(struct unw_frame_info *info, void *arg) +{ + struct pt_regs *pt; + struct regset_getset *dst = arg; + elf_greg_t tmp[16]; + unsigned int i, index; + + if (unw_unwind_to_user(info) < 0) + return; + + /* Skip r0 */ + if (dst->count > 0 && dst->pos < ELF_GR_OFFSET(1)) { + dst->ret = user_regset_copyin_ignore(&dst->pos, &dst->count, + &dst->u.set.kbuf, + &dst->u.set.ubuf, + 0, ELF_GR_OFFSET(1)); + if (dst->ret || dst->count == 0) + return; + } + + /* gr1-gr15 */ + if (dst->count > 0 && dst->pos < ELF_GR_OFFSET(16)) { + i = dst->pos; + index = (dst->pos - ELF_GR_OFFSET(1)) / sizeof(elf_greg_t); + dst->ret = user_regset_copyin(&dst->pos, &dst->count, + &dst->u.set.kbuf, &dst->u.set.ubuf, tmp, + ELF_GR_OFFSET(1), ELF_GR_OFFSET(16)); + if (dst->ret) + return; + for ( ; i < dst->pos; i += sizeof(elf_greg_t), index++) + if (access_elf_reg(dst->target, info, i, + &tmp[index], 1) < 0) { + dst->ret = -EIO; + return; + } + if (dst->count == 0) + return; + } + + /* gr16-gr31 */ + if (dst->count > 0 && dst->pos < ELF_NAT_OFFSET) { + pt = task_pt_regs(dst->target); + dst->ret = user_regset_copyin(&dst->pos, &dst->count, + &dst->u.set.kbuf, &dst->u.set.ubuf, &pt->r16, + ELF_GR_OFFSET(16), ELF_NAT_OFFSET); + if (dst->ret || dst->count == 0) + return; + } + + /* nat, pr, b0 - b7 */ + if (dst->count > 0 && dst->pos < ELF_CR_IIP_OFFSET) { + i = dst->pos; + index = (dst->pos - ELF_NAT_OFFSET) / sizeof(elf_greg_t); + dst->ret = user_regset_copyin(&dst->pos, &dst->count, + &dst->u.set.kbuf, &dst->u.set.ubuf, tmp, + ELF_NAT_OFFSET, ELF_CR_IIP_OFFSET); + if (dst->ret) + return; + for (; i < dst->pos; i += sizeof(elf_greg_t), index++) + if (access_elf_reg(dst->target, info, i, + &tmp[index], 1) < 0) { + dst->ret = -EIO; + return; + } + if (dst->count == 0) + return; + } + + /* ip cfm psr ar.rsc ar.bsp ar.bspstore ar.rnat + * ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec ar.csd ar.ssd + */ + if (dst->count > 0 && dst->pos < (ELF_AR_END_OFFSET)) { + i = dst->pos; + index = (dst->pos - ELF_CR_IIP_OFFSET) / sizeof(elf_greg_t); + dst->ret = user_regset_copyin(&dst->pos, &dst->count, + &dst->u.set.kbuf, &dst->u.set.ubuf, tmp, + ELF_CR_IIP_OFFSET, ELF_AR_END_OFFSET); + if (dst->ret) + return; + for ( ; i < dst->pos; i += sizeof(elf_greg_t), index++) + if (access_elf_reg(dst->target, info, i, + &tmp[index], 1) < 0) { + dst->ret = -EIO; + return; + } + } +} + +#define ELF_FP_OFFSET(i) (i * sizeof(elf_fpreg_t)) + +void do_fpregs_get(struct unw_frame_info *info, void *arg) +{ + struct regset_getset *dst = arg; + struct task_struct *task = dst->target; + elf_fpreg_t tmp[30]; + int index, min_copy, i; + + if (unw_unwind_to_user(info) < 0) + return; + + /* Skip pos 0 and 1 */ + if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(2)) { + dst->ret = user_regset_copyout_zero(&dst->pos, &dst->count, + &dst->u.get.kbuf, + &dst->u.get.ubuf, + 0, ELF_FP_OFFSET(2)); + if (dst->count == 0 || dst->ret) + return; + } + + /* fr2-fr31 */ + if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(32)) { + index = (dst->pos - ELF_FP_OFFSET(2)) / sizeof(elf_fpreg_t); + + min_copy = min(((unsigned int)ELF_FP_OFFSET(32)), + dst->pos + dst->count); + for (i = dst->pos; i < min_copy; i += sizeof(elf_fpreg_t), + index++) + if (unw_get_fr(info, i / sizeof(elf_fpreg_t), + &tmp[index])) { + dst->ret = -EIO; + return; + } + dst->ret = user_regset_copyout(&dst->pos, &dst->count, + &dst->u.get.kbuf, &dst->u.get.ubuf, tmp, + ELF_FP_OFFSET(2), ELF_FP_OFFSET(32)); + if (dst->count == 0 || dst->ret) + return; + } + + /* fph */ + if (dst->count > 0) { + ia64_flush_fph(dst->target); + if (task->thread.flags & IA64_THREAD_FPH_VALID) + dst->ret = user_regset_copyout( + &dst->pos, &dst->count, + &dst->u.get.kbuf, &dst->u.get.ubuf, + &dst->target->thread.fph, + ELF_FP_OFFSET(32), -1); + else + /* Zero fill instead. */ + dst->ret = user_regset_copyout_zero( + &dst->pos, &dst->count, + &dst->u.get.kbuf, &dst->u.get.ubuf, + ELF_FP_OFFSET(32), -1); + } +} + +void do_fpregs_set(struct unw_frame_info *info, void *arg) +{ + struct regset_getset *dst = arg; + elf_fpreg_t fpreg, tmp[30]; + int index, start, end; + + if (unw_unwind_to_user(info) < 0) + return; + + /* Skip pos 0 and 1 */ + if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(2)) { + dst->ret = user_regset_copyin_ignore(&dst->pos, &dst->count, + &dst->u.set.kbuf, + &dst->u.set.ubuf, + 0, ELF_FP_OFFSET(2)); + if (dst->count == 0 || dst->ret) + return; + } + + /* fr2-fr31 */ + if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(32)) { + start = dst->pos; + end = min(((unsigned int)ELF_FP_OFFSET(32)), + dst->pos + dst->count); + dst->ret = user_regset_copyin(&dst->pos, &dst->count, + &dst->u.set.kbuf, &dst->u.set.ubuf, tmp, + ELF_FP_OFFSET(2), ELF_FP_OFFSET(32)); + if (dst->ret) + return; + + if (start & 0xF) { /* only write high part */ + if (unw_get_fr(info, start / sizeof(elf_fpreg_t), + &fpreg)) { + dst->ret = -EIO; + return; + } + tmp[start / sizeof(elf_fpreg_t) - 2].u.bits[0] + = fpreg.u.bits[0]; + start &= ~0xFUL; + } + if (end & 0xF) { /* only write low part */ + if (unw_get_fr(info, end / sizeof(elf_fpreg_t), + &fpreg)) { + dst->ret = -EIO; + return; + } + tmp[end / sizeof(elf_fpreg_t) - 2].u.bits[1] + = fpreg.u.bits[1]; + end = (end + 0xF) & ~0xFUL; + } + + for ( ; start < end ; start += sizeof(elf_fpreg_t)) { + index = start / sizeof(elf_fpreg_t); + if (unw_set_fr(info, index, tmp[index - 2])) { + dst->ret = -EIO; + return; + } + } + if (dst->ret || dst->count == 0) + return; + } + + /* fph */ + if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(128)) { + ia64_sync_fph(dst->target); + dst->ret = user_regset_copyin(&dst->pos, &dst->count, + &dst->u.set.kbuf, + &dst->u.set.ubuf, + &dst->target->thread.fph, + ELF_FP_OFFSET(32), -1); + } +} + +static int +do_regset_call(void (*call)(struct unw_frame_info *, void *), + struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct regset_getset info = { .target = target, .regset = regset, + .pos = pos, .count = count, + .u.set = { .kbuf = kbuf, .ubuf = ubuf }, + .ret = 0 }; + + if (target == current) + unw_init_running(call, &info); + else { + struct unw_frame_info ufi; + memset(&ufi, 0, sizeof(ufi)); + unw_init_from_blocked_task(&ufi, target); + (*call)(&ufi, &info); + } + + return info.ret; +} + +static int +gpregs_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + return do_regset_call(do_gpregs_get, target, regset, pos, count, + kbuf, ubuf); +} + +static int gpregs_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + return do_regset_call(do_gpregs_set, target, regset, pos, count, + kbuf, ubuf); +} + +static void do_gpregs_writeback(struct unw_frame_info *info, void *arg) +{ + do_sync_rbs(info, ia64_sync_user_rbs); +} + +/* + * This is called to write back the register backing store. + * ptrace does this before it stops, so that a tracer reading the user + * memory after the thread stops will get the current register data. + */ +static int +gpregs_writeback(struct task_struct *target, + const struct user_regset *regset, + int now) +{ + if (test_and_set_tsk_thread_flag(target, TIF_RESTORE_RSE)) + return 0; + set_notify_resume(target); + return do_regset_call(do_gpregs_writeback, target, regset, 0, 0, + NULL, NULL); +} + +static int +fpregs_active(struct task_struct *target, const struct user_regset *regset) +{ + return (target->thread.flags & IA64_THREAD_FPH_VALID) ? 128 : 32; +} + +static int fpregs_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + return do_regset_call(do_fpregs_get, target, regset, pos, count, + kbuf, ubuf); +} + +static int fpregs_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + return do_regset_call(do_fpregs_set, target, regset, pos, count, + kbuf, ubuf); +} + +static int +access_uarea(struct task_struct *child, unsigned long addr, + unsigned long *data, int write_access) +{ + unsigned int pos = -1; /* an invalid value */ + int ret; + unsigned long *ptr, regnum; + + if ((addr & 0x7) != 0) { + dprintk("ptrace: unaligned register address 0x%lx\n", addr); + return -1; + } + if ((addr >= PT_NAT_BITS + 8 && addr < PT_F2) || + (addr >= PT_R7 + 8 && addr < PT_B1) || + (addr >= PT_AR_LC + 8 && addr < PT_CR_IPSR) || + (addr >= PT_AR_SSD + 8 && addr < PT_DBR)) { + dprintk("ptrace: rejecting access to register " + "address 0x%lx\n", addr); + return -1; + } + + switch (addr) { + case PT_F32 ... (PT_F127 + 15): + pos = addr - PT_F32 + ELF_FP_OFFSET(32); + break; + case PT_F2 ... (PT_F5 + 15): + pos = addr - PT_F2 + ELF_FP_OFFSET(2); + break; + case PT_F10 ... (PT_F31 + 15): + pos = addr - PT_F10 + ELF_FP_OFFSET(10); + break; + case PT_F6 ... (PT_F9 + 15): + pos = addr - PT_F6 + ELF_FP_OFFSET(6); + break; + } + + if (pos != -1) { + if (write_access) + ret = fpregs_set(child, NULL, pos, + sizeof(unsigned long), data, NULL); + else + ret = fpregs_get(child, NULL, pos, + sizeof(unsigned long), data, NULL); + if (ret != 0) + return -1; + return 0; + } + + switch (addr) { + case PT_NAT_BITS: + pos = ELF_NAT_OFFSET; + break; + case PT_R4 ... PT_R7: + pos = addr - PT_R4 + ELF_GR_OFFSET(4); + break; + case PT_B1 ... PT_B5: + pos = addr - PT_B1 + ELF_BR_OFFSET(1); + break; + case PT_AR_EC: + pos = ELF_AR_EC_OFFSET; + break; + case PT_AR_LC: + pos = ELF_AR_LC_OFFSET; + break; + case PT_CR_IPSR: + pos = ELF_CR_IPSR_OFFSET; + break; + case PT_CR_IIP: + pos = ELF_CR_IIP_OFFSET; + break; + case PT_CFM: + pos = ELF_CFM_OFFSET; + break; + case PT_AR_UNAT: + pos = ELF_AR_UNAT_OFFSET; + break; + case PT_AR_PFS: + pos = ELF_AR_PFS_OFFSET; + break; + case PT_AR_RSC: + pos = ELF_AR_RSC_OFFSET; + break; + case PT_AR_RNAT: + pos = ELF_AR_RNAT_OFFSET; + break; + case PT_AR_BSPSTORE: + pos = ELF_AR_BSPSTORE_OFFSET; + break; + case PT_PR: + pos = ELF_PR_OFFSET; + break; + case PT_B6: + pos = ELF_BR_OFFSET(6); + break; + case PT_AR_BSP: + pos = ELF_AR_BSP_OFFSET; + break; + case PT_R1 ... PT_R3: + pos = addr - PT_R1 + ELF_GR_OFFSET(1); + break; + case PT_R12 ... PT_R15: + pos = addr - PT_R12 + ELF_GR_OFFSET(12); + break; + case PT_R8 ... PT_R11: + pos = addr - PT_R8 + ELF_GR_OFFSET(8); + break; + case PT_R16 ... PT_R31: + pos = addr - PT_R16 + ELF_GR_OFFSET(16); + break; + case PT_AR_CCV: + pos = ELF_AR_CCV_OFFSET; + break; + case PT_AR_FPSR: + pos = ELF_AR_FPSR_OFFSET; + break; + case PT_B0: + pos = ELF_BR_OFFSET(0); + break; + case PT_B7: + pos = ELF_BR_OFFSET(7); + break; + case PT_AR_CSD: + pos = ELF_AR_CSD_OFFSET; + break; + case PT_AR_SSD: + pos = ELF_AR_SSD_OFFSET; + break; + } + + if (pos != -1) { + if (write_access) + ret = gpregs_set(child, NULL, pos, + sizeof(unsigned long), data, NULL); + else + ret = gpregs_get(child, NULL, pos, + sizeof(unsigned long), data, NULL); + if (ret != 0) + return -1; + return 0; + } + + /* access debug registers */ + if (addr >= PT_IBR) { + regnum = (addr - PT_IBR) >> 3; + ptr = &child->thread.ibr[0]; + } else { + regnum = (addr - PT_DBR) >> 3; + ptr = &child->thread.dbr[0]; + } + + if (regnum >= 8) { + dprintk("ptrace: rejecting access to register " + "address 0x%lx\n", addr); + return -1; + } +#ifdef CONFIG_PERFMON + /* + * Check if debug registers are used by perfmon. This + * test must be done once we know that we can do the + * operation, i.e. the arguments are all valid, but + * before we start modifying the state. + * + * Perfmon needs to keep a count of how many processes + * are trying to modify the debug registers for system + * wide monitoring sessions. + * + * We also include read access here, because they may + * cause the PMU-installed debug register state + * (dbr[], ibr[]) to be reset. The two arrays are also + * used by perfmon, but we do not use + * IA64_THREAD_DBG_VALID. The registers are restored + * by the PMU context switch code. + */ + if (pfm_use_debug_registers(child)) + return -1; +#endif + + if (!(child->thread.flags & IA64_THREAD_DBG_VALID)) { + child->thread.flags |= IA64_THREAD_DBG_VALID; + memset(child->thread.dbr, 0, + sizeof(child->thread.dbr)); + memset(child->thread.ibr, 0, + sizeof(child->thread.ibr)); + } + + ptr += regnum; + + if ((regnum & 1) && write_access) { + /* don't let the user set kernel-level breakpoints: */ + *ptr = *data & ~(7UL << 56); + return 0; + } + if (write_access) + *ptr = *data; + else + *data = *ptr; + return 0; +} + +static const struct user_regset native_regsets[] = { + { + .core_note_type = NT_PRSTATUS, + .n = ELF_NGREG, + .size = sizeof(elf_greg_t), .align = sizeof(elf_greg_t), + .get = gpregs_get, .set = gpregs_set, + .writeback = gpregs_writeback + }, + { + .core_note_type = NT_PRFPREG, + .n = ELF_NFPREG, + .size = sizeof(elf_fpreg_t), .align = sizeof(elf_fpreg_t), + .get = fpregs_get, .set = fpregs_set, .active = fpregs_active + }, +}; + +static const struct user_regset_view user_ia64_view = { + .name = "ia64", + .e_machine = EM_IA_64, + .regsets = native_regsets, .n = ARRAY_SIZE(native_regsets) +}; + +const struct user_regset_view *task_user_regset_view(struct task_struct *tsk) +{ + return &user_ia64_view; +} + +struct syscall_get_set_args { + unsigned int i; + unsigned int n; + unsigned long *args; + struct pt_regs *regs; + int rw; +}; + +static void syscall_get_set_args_cb(struct unw_frame_info *info, void *data) +{ + struct syscall_get_set_args *args = data; + struct pt_regs *pt = args->regs; + unsigned long *krbs, cfm, ndirty; + int i, count; + + if (unw_unwind_to_user(info) < 0) + return; + + cfm = pt->cr_ifs; + krbs = (unsigned long *)info->task + IA64_RBS_OFFSET/8; + ndirty = ia64_rse_num_regs(krbs, krbs + (pt->loadrs >> 19)); + + count = 0; + if (in_syscall(pt)) + count = min_t(int, args->n, cfm & 0x7f); + + for (i = 0; i < count; i++) { + if (args->rw) + *ia64_rse_skip_regs(krbs, ndirty + i + args->i) = + args->args[i]; + else + args->args[i] = *ia64_rse_skip_regs(krbs, + ndirty + i + args->i); + } + + if (!args->rw) { + while (i < args->n) { + args->args[i] = 0; + i++; + } + } +} + +void ia64_syscall_get_set_arguments(struct task_struct *task, + struct pt_regs *regs, unsigned int i, unsigned int n, + unsigned long *args, int rw) +{ + struct syscall_get_set_args data = { + .i = i, + .n = n, + .args = args, + .regs = regs, + .rw = rw, + }; + + if (task == current) + unw_init_running(syscall_get_set_args_cb, &data); + else { + struct unw_frame_info ufi; + memset(&ufi, 0, sizeof(ufi)); + unw_init_from_blocked_task(&ufi, task); + syscall_get_set_args_cb(&ufi, &data); + } +} diff --git a/kernel/arch/ia64/kernel/relocate_kernel.S b/kernel/arch/ia64/kernel/relocate_kernel.S new file mode 100644 index 000000000..c370e02f0 --- /dev/null +++ b/kernel/arch/ia64/kernel/relocate_kernel.S @@ -0,0 +1,325 @@ +/* + * arch/ia64/kernel/relocate_kernel.S + * + * Relocate kexec'able kernel and start it + * + * Copyright (C) 2005 Hewlett-Packard Development Company, L.P. + * Copyright (C) 2005 Khalid Aziz + * Copyright (C) 2005 Intel Corp, Zou Nan hai + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ +#include +#include +#include +#include +#include + + /* Must be relocatable PIC code callable as a C function + */ +GLOBAL_ENTRY(relocate_new_kernel) + .prologue + alloc r31=ar.pfs,4,0,0,0 + .body +.reloc_entry: +{ + rsm psr.i| psr.ic + mov r2=ip +} + ;; +{ + flushrs // must be first insn in group + srlz.i +} + ;; + dep r2=0,r2,61,3 //to physical address + ;; + //first switch to physical mode + add r3=1f-.reloc_entry, r2 + movl r16 = IA64_PSR_AC|IA64_PSR_BN|IA64_PSR_IC + mov ar.rsc=0 // put RSE in enforced lazy mode + ;; + add sp=(memory_stack_end - 16 - .reloc_entry),r2 + add r8=(register_stack - .reloc_entry),r2 + ;; + mov r18=ar.rnat + mov ar.bspstore=r8 + ;; + mov cr.ipsr=r16 + mov cr.iip=r3 + mov cr.ifs=r0 + srlz.i + ;; + mov ar.rnat=r18 + rfi // note: this unmask MCA/INIT (psr.mc) + ;; +1: + //physical mode code begin + mov b6=in1 + dep r28=0,in2,61,3 //to physical address + + // purge all TC entries +#define O(member) IA64_CPUINFO_##member##_OFFSET + GET_THIS_PADDR(r2, ia64_cpu_info) // load phys addr of cpu_info into r2 + ;; + addl r17=O(PTCE_STRIDE),r2 + addl r2=O(PTCE_BASE),r2 + ;; + ld8 r18=[r2],(O(PTCE_COUNT)-O(PTCE_BASE));; // r18=ptce_base + ld4 r19=[r2],4 // r19=ptce_count[0] + ld4 r21=[r17],4 // r21=ptce_stride[0] + ;; + ld4 r20=[r2] // r20=ptce_count[1] + ld4 r22=[r17] // r22=ptce_stride[1] + mov r24=r0 + ;; + adds r20=-1,r20 + ;; +#undef O +2: + cmp.ltu p6,p7=r24,r19 +(p7) br.cond.dpnt.few 4f + mov ar.lc=r20 +3: + ptc.e r18 + ;; + add r18=r22,r18 + br.cloop.sptk.few 3b + ;; + add r18=r21,r18 + add r24=1,r24 + ;; + br.sptk.few 2b +4: + srlz.i + ;; + // purge TR entry for kernel text and data + movl r16=KERNEL_START + mov r18=KERNEL_TR_PAGE_SHIFT<<2 + ;; + ptr.i r16, r18 + ptr.d r16, r18 + ;; + srlz.i + ;; + + // purge TR entry for pal code + mov r16=in3 + mov r18=IA64_GRANULE_SHIFT<<2 + ;; + ptr.i r16,r18 + ;; + srlz.i + ;; + + // purge TR entry for stack + mov r16=IA64_KR(CURRENT_STACK) + ;; + shl r16=r16,IA64_GRANULE_SHIFT + movl r19=PAGE_OFFSET + ;; + add r16=r19,r16 + mov r18=IA64_GRANULE_SHIFT<<2 + ;; + ptr.d r16,r18 + ;; + srlz.i + ;; + + //copy segments + movl r16=PAGE_MASK + mov r30=in0 // in0 is page_list + br.sptk.few .dest_page + ;; +.loop: + ld8 r30=[in0], 8;; +.dest_page: + tbit.z p0, p6=r30, 0;; // 0x1 dest page +(p6) and r17=r30, r16 +(p6) br.cond.sptk.few .loop;; + + tbit.z p0, p6=r30, 1;; // 0x2 indirect page +(p6) and in0=r30, r16 +(p6) br.cond.sptk.few .loop;; + + tbit.z p0, p6=r30, 2;; // 0x4 end flag +(p6) br.cond.sptk.few .end_loop;; + + tbit.z p6, p0=r30, 3;; // 0x8 source page +(p6) br.cond.sptk.few .loop + + and r18=r30, r16 + + // simple copy page, may optimize later + movl r14=PAGE_SIZE/8 - 1;; + mov ar.lc=r14;; +1: + ld8 r14=[r18], 8;; + st8 [r17]=r14;; + fc.i r17 + add r17=8, r17 + br.ctop.sptk.few 1b + br.sptk.few .loop + ;; + +.end_loop: + sync.i // for fc.i + ;; + srlz.i + ;; + srlz.d + ;; + br.call.sptk.many b0=b6;; + +.align 32 +memory_stack: + .fill 8192, 1, 0 +memory_stack_end: +register_stack: + .fill 8192, 1, 0 +register_stack_end: +relocate_new_kernel_end: +END(relocate_new_kernel) + +.global relocate_new_kernel_size +relocate_new_kernel_size: + data8 relocate_new_kernel_end - relocate_new_kernel + +GLOBAL_ENTRY(ia64_dump_cpu_regs) + .prologue + alloc loc0=ar.pfs,1,2,0,0 + .body + mov ar.rsc=0 // put RSE in enforced lazy mode + add loc1=4*8, in0 // save r4 and r5 first + ;; +{ + flushrs // flush dirty regs to backing store + srlz.i +} + st8 [loc1]=r4, 8 + ;; + st8 [loc1]=r5, 8 + ;; + add loc1=32*8, in0 + mov r4=ar.rnat + ;; + st8 [in0]=r0, 8 // r0 + st8 [loc1]=r4, 8 // rnat + mov r5=pr + ;; + st8 [in0]=r1, 8 // r1 + st8 [loc1]=r5, 8 // pr + mov r4=b0 + ;; + st8 [in0]=r2, 8 // r2 + st8 [loc1]=r4, 8 // b0 + mov r5=b1; + ;; + st8 [in0]=r3, 24 // r3 + st8 [loc1]=r5, 8 // b1 + mov r4=b2 + ;; + st8 [in0]=r6, 8 // r6 + st8 [loc1]=r4, 8 // b2 + mov r5=b3 + ;; + st8 [in0]=r7, 8 // r7 + st8 [loc1]=r5, 8 // b3 + mov r4=b4 + ;; + st8 [in0]=r8, 8 // r8 + st8 [loc1]=r4, 8 // b4 + mov r5=b5 + ;; + st8 [in0]=r9, 8 // r9 + st8 [loc1]=r5, 8 // b5 + mov r4=b6 + ;; + st8 [in0]=r10, 8 // r10 + st8 [loc1]=r5, 8 // b6 + mov r5=b7 + ;; + st8 [in0]=r11, 8 // r11 + st8 [loc1]=r5, 8 // b7 + mov r4=b0 + ;; + st8 [in0]=r12, 8 // r12 + st8 [loc1]=r4, 8 // ip + mov r5=loc0 + ;; + st8 [in0]=r13, 8 // r13 + extr.u r5=r5, 0, 38 // ar.pfs.pfm + mov r4=r0 // user mask + ;; + st8 [in0]=r14, 8 // r14 + st8 [loc1]=r5, 8 // cfm + ;; + st8 [in0]=r15, 8 // r15 + st8 [loc1]=r4, 8 // user mask + mov r5=ar.rsc + ;; + st8 [in0]=r16, 8 // r16 + st8 [loc1]=r5, 8 // ar.rsc + mov r4=ar.bsp + ;; + st8 [in0]=r17, 8 // r17 + st8 [loc1]=r4, 8 // ar.bsp + mov r5=ar.bspstore + ;; + st8 [in0]=r18, 8 // r18 + st8 [loc1]=r5, 8 // ar.bspstore + mov r4=ar.rnat + ;; + st8 [in0]=r19, 8 // r19 + st8 [loc1]=r4, 8 // ar.rnat + mov r5=ar.ccv + ;; + st8 [in0]=r20, 8 // r20 + st8 [loc1]=r5, 8 // ar.ccv + mov r4=ar.unat + ;; + st8 [in0]=r21, 8 // r21 + st8 [loc1]=r4, 8 // ar.unat + mov r5 = ar.fpsr + ;; + st8 [in0]=r22, 8 // r22 + st8 [loc1]=r5, 8 // ar.fpsr + mov r4 = ar.unat + ;; + st8 [in0]=r23, 8 // r23 + st8 [loc1]=r4, 8 // unat + mov r5 = ar.fpsr + ;; + st8 [in0]=r24, 8 // r24 + st8 [loc1]=r5, 8 // fpsr + mov r4 = ar.pfs + ;; + st8 [in0]=r25, 8 // r25 + st8 [loc1]=r4, 8 // ar.pfs + mov r5 = ar.lc + ;; + st8 [in0]=r26, 8 // r26 + st8 [loc1]=r5, 8 // ar.lc + mov r4 = ar.ec + ;; + st8 [in0]=r27, 8 // r27 + st8 [loc1]=r4, 8 // ar.ec + mov r5 = ar.csd + ;; + st8 [in0]=r28, 8 // r28 + st8 [loc1]=r5, 8 // ar.csd + mov r4 = ar.ssd + ;; + st8 [in0]=r29, 8 // r29 + st8 [loc1]=r4, 8 // ar.ssd + ;; + st8 [in0]=r30, 8 // r30 + ;; + st8 [in0]=r31, 8 // r31 + mov ar.pfs=loc0 + ;; + br.ret.sptk.many rp +END(ia64_dump_cpu_regs) + + diff --git a/kernel/arch/ia64/kernel/sal.c b/kernel/arch/ia64/kernel/sal.c new file mode 100644 index 000000000..0464173ea --- /dev/null +++ b/kernel/arch/ia64/kernel/sal.c @@ -0,0 +1,405 @@ +/* + * System Abstraction Layer (SAL) interface routines. + * + * Copyright (C) 1998, 1999, 2001, 2003 Hewlett-Packard Co + * David Mosberger-Tang + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999 Walt Drummond + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + + __cacheline_aligned DEFINE_SPINLOCK(sal_lock); +unsigned long sal_platform_features; + +unsigned short sal_revision; +unsigned short sal_version; + +#define SAL_MAJOR(x) ((x) >> 8) +#define SAL_MINOR(x) ((x) & 0xff) + +static struct { + void *addr; /* function entry point */ + void *gpval; /* gp value to use */ +} pdesc; + +static long +default_handler (void) +{ + return -1; +} + +ia64_sal_handler ia64_sal = (ia64_sal_handler) default_handler; +ia64_sal_desc_ptc_t *ia64_ptc_domain_info; + +const char * +ia64_sal_strerror (long status) +{ + const char *str; + switch (status) { + case 0: str = "Call completed without error"; break; + case 1: str = "Effect a warm boot of the system to complete " + "the update"; break; + case -1: str = "Not implemented"; break; + case -2: str = "Invalid argument"; break; + case -3: str = "Call completed with error"; break; + case -4: str = "Virtual address not registered"; break; + case -5: str = "No information available"; break; + case -6: str = "Insufficient space to add the entry"; break; + case -7: str = "Invalid entry_addr value"; break; + case -8: str = "Invalid interrupt vector"; break; + case -9: str = "Requested memory not available"; break; + case -10: str = "Unable to write to the NVM device"; break; + case -11: str = "Invalid partition type specified"; break; + case -12: str = "Invalid NVM_Object id specified"; break; + case -13: str = "NVM_Object already has the maximum number " + "of partitions"; break; + case -14: str = "Insufficient space in partition for the " + "requested write sub-function"; break; + case -15: str = "Insufficient data buffer space for the " + "requested read record sub-function"; break; + case -16: str = "Scratch buffer required for the write/delete " + "sub-function"; break; + case -17: str = "Insufficient space in the NVM_Object for the " + "requested create sub-function"; break; + case -18: str = "Invalid value specified in the partition_rec " + "argument"; break; + case -19: str = "Record oriented I/O not supported for this " + "partition"; break; + case -20: str = "Bad format of record to be written or " + "required keyword variable not " + "specified"; break; + default: str = "Unknown SAL status code"; break; + } + return str; +} + +void __init +ia64_sal_handler_init (void *entry_point, void *gpval) +{ + /* fill in the SAL procedure descriptor and point ia64_sal to it: */ + pdesc.addr = entry_point; + pdesc.gpval = gpval; + ia64_sal = (ia64_sal_handler) &pdesc; +} + +static void __init +check_versions (struct ia64_sal_systab *systab) +{ + sal_revision = (systab->sal_rev_major << 8) | systab->sal_rev_minor; + sal_version = (systab->sal_b_rev_major << 8) | systab->sal_b_rev_minor; + + /* Check for broken firmware */ + if ((sal_revision == SAL_VERSION_CODE(49, 29)) + && (sal_version == SAL_VERSION_CODE(49, 29))) + { + /* + * Old firmware for zx2000 prototypes have this weird version number, + * reset it to something sane. + */ + sal_revision = SAL_VERSION_CODE(2, 8); + sal_version = SAL_VERSION_CODE(0, 0); + } + + if (ia64_platform_is("sn2") && (sal_revision == SAL_VERSION_CODE(2, 9))) + /* + * SGI Altix has hard-coded version 2.9 in their prom + * but they actually implement 3.2, so let's fix it here. + */ + sal_revision = SAL_VERSION_CODE(3, 2); +} + +static void __init +sal_desc_entry_point (void *p) +{ + struct ia64_sal_desc_entry_point *ep = p; + ia64_pal_handler_init(__va(ep->pal_proc)); + ia64_sal_handler_init(__va(ep->sal_proc), __va(ep->gp)); +} + +#ifdef CONFIG_SMP +static void __init +set_smp_redirect (int flag) +{ +#ifndef CONFIG_HOTPLUG_CPU + if (no_int_routing) + smp_int_redirect &= ~flag; + else + smp_int_redirect |= flag; +#else + /* + * For CPU Hotplug we dont want to do any chipset supported + * interrupt redirection. The reason is this would require that + * All interrupts be stopped and hard bind the irq to a cpu. + * Later when the interrupt is fired we need to set the redir hint + * on again in the vector. This is cumbersome for something that the + * user mode irq balancer will solve anyways. + */ + no_int_routing=1; + smp_int_redirect &= ~flag; +#endif +} +#else +#define set_smp_redirect(flag) do { } while (0) +#endif + +static void __init +sal_desc_platform_feature (void *p) +{ + struct ia64_sal_desc_platform_feature *pf = p; + sal_platform_features = pf->feature_mask; + + printk(KERN_INFO "SAL Platform features:"); + if (!sal_platform_features) { + printk(" None\n"); + return; + } + + if (sal_platform_features & IA64_SAL_PLATFORM_FEATURE_BUS_LOCK) + printk(" BusLock"); + if (sal_platform_features & IA64_SAL_PLATFORM_FEATURE_IRQ_REDIR_HINT) { + printk(" IRQ_Redirection"); + set_smp_redirect(SMP_IRQ_REDIRECTION); + } + if (sal_platform_features & IA64_SAL_PLATFORM_FEATURE_IPI_REDIR_HINT) { + printk(" IPI_Redirection"); + set_smp_redirect(SMP_IPI_REDIRECTION); + } + if (sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT) + printk(" ITC_Drift"); + printk("\n"); +} + +#ifdef CONFIG_SMP +static void __init +sal_desc_ap_wakeup (void *p) +{ + struct ia64_sal_desc_ap_wakeup *ap = p; + + switch (ap->mechanism) { + case IA64_SAL_AP_EXTERNAL_INT: + ap_wakeup_vector = ap->vector; + printk(KERN_INFO "SAL: AP wakeup using external interrupt " + "vector 0x%lx\n", ap_wakeup_vector); + break; + default: + printk(KERN_ERR "SAL: AP wakeup mechanism unsupported!\n"); + break; + } +} + +static void __init +chk_nointroute_opt(void) +{ + char *cp; + + for (cp = boot_command_line; *cp; ) { + if (memcmp(cp, "nointroute", 10) == 0) { + no_int_routing = 1; + printk ("no_int_routing on\n"); + break; + } else { + while (*cp != ' ' && *cp) + ++cp; + while (*cp == ' ') + ++cp; + } + } +} + +#else +static void __init sal_desc_ap_wakeup(void *p) { } +#endif + +/* + * HP rx5670 firmware polls for interrupts during SAL_CACHE_FLUSH by reading + * cr.ivr, but it never writes cr.eoi. This leaves any interrupt marked as + * "in-service" and masks other interrupts of equal or lower priority. + * + * HP internal defect reports: F1859, F2775, F3031. + */ +static int sal_cache_flush_drops_interrupts; + +static int __init +force_pal_cache_flush(char *str) +{ + sal_cache_flush_drops_interrupts = 1; + return 0; +} +early_param("force_pal_cache_flush", force_pal_cache_flush); + +void __init +check_sal_cache_flush (void) +{ + unsigned long flags; + int cpu; + u64 vector, cache_type = 3; + struct ia64_sal_retval isrv; + + if (sal_cache_flush_drops_interrupts) + return; + + cpu = get_cpu(); + local_irq_save(flags); + + /* + * Send ourselves a timer interrupt, wait until it's reported, and see + * if SAL_CACHE_FLUSH drops it. + */ + platform_send_ipi(cpu, IA64_TIMER_VECTOR, IA64_IPI_DM_INT, 0); + + while (!ia64_get_irr(IA64_TIMER_VECTOR)) + cpu_relax(); + + SAL_CALL(isrv, SAL_CACHE_FLUSH, cache_type, 0, 0, 0, 0, 0, 0); + + if (isrv.status) + printk(KERN_ERR "SAL_CAL_FLUSH failed with %ld\n", isrv.status); + + if (ia64_get_irr(IA64_TIMER_VECTOR)) { + vector = ia64_get_ivr(); + ia64_eoi(); + WARN_ON(vector != IA64_TIMER_VECTOR); + } else { + sal_cache_flush_drops_interrupts = 1; + printk(KERN_ERR "SAL: SAL_CACHE_FLUSH drops interrupts; " + "PAL_CACHE_FLUSH will be used instead\n"); + ia64_eoi(); + } + + local_irq_restore(flags); + put_cpu(); +} + +s64 +ia64_sal_cache_flush (u64 cache_type) +{ + struct ia64_sal_retval isrv; + + if (sal_cache_flush_drops_interrupts) { + unsigned long flags; + u64 progress; + s64 rc; + + progress = 0; + local_irq_save(flags); + rc = ia64_pal_cache_flush(cache_type, + PAL_CACHE_FLUSH_INVALIDATE, &progress, NULL); + local_irq_restore(flags); + return rc; + } + + SAL_CALL(isrv, SAL_CACHE_FLUSH, cache_type, 0, 0, 0, 0, 0, 0); + return isrv.status; +} +EXPORT_SYMBOL_GPL(ia64_sal_cache_flush); + +void __init +ia64_sal_init (struct ia64_sal_systab *systab) +{ + char *p; + int i; + + if (!systab) { + printk(KERN_WARNING "Hmm, no SAL System Table.\n"); + return; + } + + if (strncmp(systab->signature, "SST_", 4) != 0) + printk(KERN_ERR "bad signature in system table!"); + + check_versions(systab); +#ifdef CONFIG_SMP + chk_nointroute_opt(); +#endif + + /* revisions are coded in BCD, so %x does the job for us */ + printk(KERN_INFO "SAL %x.%x: %.32s %.32s%sversion %x.%x\n", + SAL_MAJOR(sal_revision), SAL_MINOR(sal_revision), + systab->oem_id, systab->product_id, + systab->product_id[0] ? " " : "", + SAL_MAJOR(sal_version), SAL_MINOR(sal_version)); + + p = (char *) (systab + 1); + for (i = 0; i < systab->entry_count; i++) { + /* + * The first byte of each entry type contains the type + * descriptor. + */ + switch (*p) { + case SAL_DESC_ENTRY_POINT: + sal_desc_entry_point(p); + break; + case SAL_DESC_PLATFORM_FEATURE: + sal_desc_platform_feature(p); + break; + case SAL_DESC_PTC: + ia64_ptc_domain_info = (ia64_sal_desc_ptc_t *)p; + break; + case SAL_DESC_AP_WAKEUP: + sal_desc_ap_wakeup(p); + break; + } + p += SAL_DESC_SIZE(*p); + } + +} + +int +ia64_sal_oemcall(struct ia64_sal_retval *isrvp, u64 oemfunc, u64 arg1, + u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7) +{ + if (oemfunc < IA64_SAL_OEMFUNC_MIN || oemfunc > IA64_SAL_OEMFUNC_MAX) + return -1; + SAL_CALL(*isrvp, oemfunc, arg1, arg2, arg3, arg4, arg5, arg6, arg7); + return 0; +} +EXPORT_SYMBOL(ia64_sal_oemcall); + +int +ia64_sal_oemcall_nolock(struct ia64_sal_retval *isrvp, u64 oemfunc, u64 arg1, + u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6, + u64 arg7) +{ + if (oemfunc < IA64_SAL_OEMFUNC_MIN || oemfunc > IA64_SAL_OEMFUNC_MAX) + return -1; + SAL_CALL_NOLOCK(*isrvp, oemfunc, arg1, arg2, arg3, arg4, arg5, arg6, + arg7); + return 0; +} +EXPORT_SYMBOL(ia64_sal_oemcall_nolock); + +int +ia64_sal_oemcall_reentrant(struct ia64_sal_retval *isrvp, u64 oemfunc, + u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, + u64 arg6, u64 arg7) +{ + if (oemfunc < IA64_SAL_OEMFUNC_MIN || oemfunc > IA64_SAL_OEMFUNC_MAX) + return -1; + SAL_CALL_REENTRANT(*isrvp, oemfunc, arg1, arg2, arg3, arg4, arg5, arg6, + arg7); + return 0; +} +EXPORT_SYMBOL(ia64_sal_oemcall_reentrant); + +long +ia64_sal_freq_base (unsigned long which, unsigned long *ticks_per_second, + unsigned long *drift_info) +{ + struct ia64_sal_retval isrv; + + SAL_CALL(isrv, SAL_FREQ_BASE, which, 0, 0, 0, 0, 0, 0); + *ticks_per_second = isrv.v0; + *drift_info = isrv.v1; + return isrv.status; +} +EXPORT_SYMBOL_GPL(ia64_sal_freq_base); diff --git a/kernel/arch/ia64/kernel/salinfo.c b/kernel/arch/ia64/kernel/salinfo.c new file mode 100644 index 000000000..1eeffb7fb --- /dev/null +++ b/kernel/arch/ia64/kernel/salinfo.c @@ -0,0 +1,704 @@ +/* + * salinfo.c + * + * Creates entries in /proc/sal for various system features. + * + * Copyright (c) 2003, 2006 Silicon Graphics, Inc. All rights reserved. + * Copyright (c) 2003 Hewlett-Packard Co + * Bjorn Helgaas + * + * 10/30/2001 jbarnes@sgi.com copied much of Stephane's palinfo + * code to create this file + * Oct 23 2003 kaos@sgi.com + * Replace IPI with set_cpus_allowed() to read a record from the required cpu. + * Redesign salinfo log processing to separate interrupt and user space + * contexts. + * Cache the record across multi-block reads from user space. + * Support > 64 cpus. + * Delete module_exit and MOD_INC/DEC_COUNT, salinfo cannot be a module. + * + * Jan 28 2004 kaos@sgi.com + * Periodically check for outstanding MCA or INIT records. + * + * Dec 5 2004 kaos@sgi.com + * Standardize which records are cleared automatically. + * + * Aug 18 2005 kaos@sgi.com + * mca.c may not pass a buffer, a NULL buffer just indicates that a new + * record is available in SAL. + * Replace some NR_CPUS by cpus_online, for hotplug cpu. + * + * Jan 5 2006 kaos@sgi.com + * Handle hotplug cpus coming online. + * Handle hotplug cpus going offline while they still have outstanding records. + * Use the cpu_* macros consistently. + * Replace the counting semaphore with a mutex and a test if the cpumask is non-empty. + * Modify the locking to make the test for "work to do" an atomic operation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +MODULE_AUTHOR("Jesse Barnes "); +MODULE_DESCRIPTION("/proc interface to IA-64 SAL features"); +MODULE_LICENSE("GPL"); + +static const struct file_operations proc_salinfo_fops; + +typedef struct { + const char *name; /* name of the proc entry */ + unsigned long feature; /* feature bit */ + struct proc_dir_entry *entry; /* registered entry (removal) */ +} salinfo_entry_t; + +/* + * List {name,feature} pairs for every entry in /proc/sal/ + * that this module exports + */ +static const salinfo_entry_t salinfo_entries[]={ + { "bus_lock", IA64_SAL_PLATFORM_FEATURE_BUS_LOCK, }, + { "irq_redirection", IA64_SAL_PLATFORM_FEATURE_IRQ_REDIR_HINT, }, + { "ipi_redirection", IA64_SAL_PLATFORM_FEATURE_IPI_REDIR_HINT, }, + { "itc_drift", IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT, }, +}; + +#define NR_SALINFO_ENTRIES ARRAY_SIZE(salinfo_entries) + +static char *salinfo_log_name[] = { + "mca", + "init", + "cmc", + "cpe", +}; + +static struct proc_dir_entry *salinfo_proc_entries[ + ARRAY_SIZE(salinfo_entries) + /* /proc/sal/bus_lock */ + ARRAY_SIZE(salinfo_log_name) + /* /proc/sal/{mca,...} */ + (2 * ARRAY_SIZE(salinfo_log_name)) + /* /proc/sal/mca/{event,data} */ + 1]; /* /proc/sal */ + +/* Some records we get ourselves, some are accessed as saved data in buffers + * that are owned by mca.c. + */ +struct salinfo_data_saved { + u8* buffer; + u64 size; + u64 id; + int cpu; +}; + +/* State transitions. Actions are :- + * Write "read " to the data file. + * Write "clear " to the data file. + * Write "oemdata to the data file. + * Read from the data file. + * Close the data file. + * + * Start state is NO_DATA. + * + * NO_DATA + * write "read " -> NO_DATA or LOG_RECORD. + * write "clear " -> NO_DATA or LOG_RECORD. + * write "oemdata -> return -EINVAL. + * read data -> return EOF. + * close -> unchanged. Free record areas. + * + * LOG_RECORD + * write "read " -> NO_DATA or LOG_RECORD. + * write "clear " -> NO_DATA or LOG_RECORD. + * write "oemdata -> format the oem data, goto OEMDATA. + * read data -> return the INIT/MCA/CMC/CPE record. + * close -> unchanged. Keep record areas. + * + * OEMDATA + * write "read " -> NO_DATA or LOG_RECORD. + * write "clear " -> NO_DATA or LOG_RECORD. + * write "oemdata -> format the oem data, goto OEMDATA. + * read data -> return the formatted oemdata. + * close -> unchanged. Keep record areas. + * + * Closing the data file does not change the state. This allows shell scripts + * to manipulate salinfo data, each shell redirection opens the file, does one + * action then closes it again. The record areas are only freed at close when + * the state is NO_DATA. + */ +enum salinfo_state { + STATE_NO_DATA, + STATE_LOG_RECORD, + STATE_OEMDATA, +}; + +struct salinfo_data { + cpumask_t cpu_event; /* which cpus have outstanding events */ + struct semaphore mutex; + u8 *log_buffer; + u64 log_size; + u8 *oemdata; /* decoded oem data */ + u64 oemdata_size; + int open; /* single-open to prevent races */ + u8 type; + u8 saved_num; /* using a saved record? */ + enum salinfo_state state :8; /* processing state */ + u8 padding; + int cpu_check; /* next CPU to check */ + struct salinfo_data_saved data_saved[5];/* save last 5 records from mca.c, must be < 255 */ +}; + +static struct salinfo_data salinfo_data[ARRAY_SIZE(salinfo_log_name)]; + +static DEFINE_SPINLOCK(data_lock); +static DEFINE_SPINLOCK(data_saved_lock); + +/** salinfo_platform_oemdata - optional callback to decode oemdata from an error + * record. + * @sect_header: pointer to the start of the section to decode. + * @oemdata: returns vmalloc area containing the decoded output. + * @oemdata_size: returns length of decoded output (strlen). + * + * Description: If user space asks for oem data to be decoded by the kernel + * and/or prom and the platform has set salinfo_platform_oemdata to the address + * of a platform specific routine then call that routine. salinfo_platform_oemdata + * vmalloc's and formats its output area, returning the address of the text + * and its strlen. Returns 0 for success, -ve for error. The callback is + * invoked on the cpu that generated the error record. + */ +int (*salinfo_platform_oemdata)(const u8 *sect_header, u8 **oemdata, u64 *oemdata_size); + +struct salinfo_platform_oemdata_parms { + const u8 *efi_guid; + u8 **oemdata; + u64 *oemdata_size; + int ret; +}; + +/* Kick the mutex that tells user space that there is work to do. Instead of + * trying to track the state of the mutex across multiple cpus, in user + * context, interrupt context, non-maskable interrupt context and hotplug cpu, + * it is far easier just to grab the mutex if it is free then release it. + * + * This routine must be called with data_saved_lock held, to make the down/up + * operation atomic. + */ +static void +salinfo_work_to_do(struct salinfo_data *data) +{ + (void)(down_trylock(&data->mutex) ?: 0); + up(&data->mutex); +} + +static void +salinfo_platform_oemdata_cpu(void *context) +{ + struct salinfo_platform_oemdata_parms *parms = context; + parms->ret = salinfo_platform_oemdata(parms->efi_guid, parms->oemdata, parms->oemdata_size); +} + +static void +shift1_data_saved (struct salinfo_data *data, int shift) +{ + memcpy(data->data_saved+shift, data->data_saved+shift+1, + (ARRAY_SIZE(data->data_saved) - (shift+1)) * sizeof(data->data_saved[0])); + memset(data->data_saved + ARRAY_SIZE(data->data_saved) - 1, 0, + sizeof(data->data_saved[0])); +} + +/* This routine is invoked in interrupt context. Note: mca.c enables + * interrupts before calling this code for CMC/CPE. MCA and INIT events are + * not irq safe, do not call any routines that use spinlocks, they may deadlock. + * MCA and INIT records are recorded, a timer event will look for any + * outstanding events and wake up the user space code. + * + * The buffer passed from mca.c points to the output from ia64_log_get. This is + * a persistent buffer but its contents can change between the interrupt and + * when user space processes the record. Save the record id to identify + * changes. If the buffer is NULL then just update the bitmap. + */ +void +salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe) +{ + struct salinfo_data *data = salinfo_data + type; + struct salinfo_data_saved *data_saved; + unsigned long flags = 0; + int i; + int saved_size = ARRAY_SIZE(data->data_saved); + + BUG_ON(type >= ARRAY_SIZE(salinfo_log_name)); + + if (irqsafe) + spin_lock_irqsave(&data_saved_lock, flags); + if (buffer) { + for (i = 0, data_saved = data->data_saved; i < saved_size; ++i, ++data_saved) { + if (!data_saved->buffer) + break; + } + if (i == saved_size) { + if (!data->saved_num) { + shift1_data_saved(data, 0); + data_saved = data->data_saved + saved_size - 1; + } else + data_saved = NULL; + } + if (data_saved) { + data_saved->cpu = smp_processor_id(); + data_saved->id = ((sal_log_record_header_t *)buffer)->id; + data_saved->size = size; + data_saved->buffer = buffer; + } + } + cpumask_set_cpu(smp_processor_id(), &data->cpu_event); + if (irqsafe) { + salinfo_work_to_do(data); + spin_unlock_irqrestore(&data_saved_lock, flags); + } +} + +/* Check for outstanding MCA/INIT records every minute (arbitrary) */ +#define SALINFO_TIMER_DELAY (60*HZ) +static struct timer_list salinfo_timer; +extern void ia64_mlogbuf_dump(void); + +static void +salinfo_timeout_check(struct salinfo_data *data) +{ + unsigned long flags; + if (!data->open) + return; + if (!cpumask_empty(&data->cpu_event)) { + spin_lock_irqsave(&data_saved_lock, flags); + salinfo_work_to_do(data); + spin_unlock_irqrestore(&data_saved_lock, flags); + } +} + +static void +salinfo_timeout (unsigned long arg) +{ + ia64_mlogbuf_dump(); + salinfo_timeout_check(salinfo_data + SAL_INFO_TYPE_MCA); + salinfo_timeout_check(salinfo_data + SAL_INFO_TYPE_INIT); + salinfo_timer.expires = jiffies + SALINFO_TIMER_DELAY; + add_timer(&salinfo_timer); +} + +static int +salinfo_event_open(struct inode *inode, struct file *file) +{ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + return 0; +} + +static ssize_t +salinfo_event_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos) +{ + struct salinfo_data *data = PDE_DATA(file_inode(file)); + char cmd[32]; + size_t size; + int i, n, cpu = -1; + +retry: + if (cpumask_empty(&data->cpu_event) && down_trylock(&data->mutex)) { + if (file->f_flags & O_NONBLOCK) + return -EAGAIN; + if (down_interruptible(&data->mutex)) + return -EINTR; + } + + n = data->cpu_check; + for (i = 0; i < nr_cpu_ids; i++) { + if (cpumask_test_cpu(n, &data->cpu_event)) { + if (!cpu_online(n)) { + cpumask_clear_cpu(n, &data->cpu_event); + continue; + } + cpu = n; + break; + } + if (++n == nr_cpu_ids) + n = 0; + } + + if (cpu == -1) + goto retry; + + ia64_mlogbuf_dump(); + + /* for next read, start checking at next CPU */ + data->cpu_check = cpu; + if (++data->cpu_check == nr_cpu_ids) + data->cpu_check = 0; + + snprintf(cmd, sizeof(cmd), "read %d\n", cpu); + + size = strlen(cmd); + if (size > count) + size = count; + if (copy_to_user(buffer, cmd, size)) + return -EFAULT; + + return size; +} + +static const struct file_operations salinfo_event_fops = { + .open = salinfo_event_open, + .read = salinfo_event_read, + .llseek = noop_llseek, +}; + +static int +salinfo_log_open(struct inode *inode, struct file *file) +{ + struct salinfo_data *data = PDE_DATA(inode); + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + spin_lock(&data_lock); + if (data->open) { + spin_unlock(&data_lock); + return -EBUSY; + } + data->open = 1; + spin_unlock(&data_lock); + + if (data->state == STATE_NO_DATA && + !(data->log_buffer = vmalloc(ia64_sal_get_state_info_size(data->type)))) { + data->open = 0; + return -ENOMEM; + } + + return 0; +} + +static int +salinfo_log_release(struct inode *inode, struct file *file) +{ + struct salinfo_data *data = PDE_DATA(inode); + + if (data->state == STATE_NO_DATA) { + vfree(data->log_buffer); + vfree(data->oemdata); + data->log_buffer = NULL; + data->oemdata = NULL; + } + spin_lock(&data_lock); + data->open = 0; + spin_unlock(&data_lock); + return 0; +} + +static void +call_on_cpu(int cpu, void (*fn)(void *), void *arg) +{ + cpumask_t save_cpus_allowed = current->cpus_allowed; + set_cpus_allowed_ptr(current, cpumask_of(cpu)); + (*fn)(arg); + set_cpus_allowed_ptr(current, &save_cpus_allowed); +} + +static void +salinfo_log_read_cpu(void *context) +{ + struct salinfo_data *data = context; + sal_log_record_header_t *rh; + data->log_size = ia64_sal_get_state_info(data->type, (u64 *) data->log_buffer); + rh = (sal_log_record_header_t *)(data->log_buffer); + /* Clear corrected errors as they are read from SAL */ + if (rh->severity == sal_log_severity_corrected) + ia64_sal_clear_state_info(data->type); +} + +static void +salinfo_log_new_read(int cpu, struct salinfo_data *data) +{ + struct salinfo_data_saved *data_saved; + unsigned long flags; + int i; + int saved_size = ARRAY_SIZE(data->data_saved); + + data->saved_num = 0; + spin_lock_irqsave(&data_saved_lock, flags); +retry: + for (i = 0, data_saved = data->data_saved; i < saved_size; ++i, ++data_saved) { + if (data_saved->buffer && data_saved->cpu == cpu) { + sal_log_record_header_t *rh = (sal_log_record_header_t *)(data_saved->buffer); + data->log_size = data_saved->size; + memcpy(data->log_buffer, rh, data->log_size); + barrier(); /* id check must not be moved */ + if (rh->id == data_saved->id) { + data->saved_num = i+1; + break; + } + /* saved record changed by mca.c since interrupt, discard it */ + shift1_data_saved(data, i); + goto retry; + } + } + spin_unlock_irqrestore(&data_saved_lock, flags); + + if (!data->saved_num) + call_on_cpu(cpu, salinfo_log_read_cpu, data); + if (!data->log_size) { + data->state = STATE_NO_DATA; + cpumask_clear_cpu(cpu, &data->cpu_event); + } else { + data->state = STATE_LOG_RECORD; + } +} + +static ssize_t +salinfo_log_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos) +{ + struct salinfo_data *data = PDE_DATA(file_inode(file)); + u8 *buf; + u64 bufsize; + + if (data->state == STATE_LOG_RECORD) { + buf = data->log_buffer; + bufsize = data->log_size; + } else if (data->state == STATE_OEMDATA) { + buf = data->oemdata; + bufsize = data->oemdata_size; + } else { + buf = NULL; + bufsize = 0; + } + return simple_read_from_buffer(buffer, count, ppos, buf, bufsize); +} + +static void +salinfo_log_clear_cpu(void *context) +{ + struct salinfo_data *data = context; + ia64_sal_clear_state_info(data->type); +} + +static int +salinfo_log_clear(struct salinfo_data *data, int cpu) +{ + sal_log_record_header_t *rh; + unsigned long flags; + spin_lock_irqsave(&data_saved_lock, flags); + data->state = STATE_NO_DATA; + if (!cpumask_test_cpu(cpu, &data->cpu_event)) { + spin_unlock_irqrestore(&data_saved_lock, flags); + return 0; + } + cpumask_clear_cpu(cpu, &data->cpu_event); + if (data->saved_num) { + shift1_data_saved(data, data->saved_num - 1); + data->saved_num = 0; + } + spin_unlock_irqrestore(&data_saved_lock, flags); + rh = (sal_log_record_header_t *)(data->log_buffer); + /* Corrected errors have already been cleared from SAL */ + if (rh->severity != sal_log_severity_corrected) + call_on_cpu(cpu, salinfo_log_clear_cpu, data); + /* clearing a record may make a new record visible */ + salinfo_log_new_read(cpu, data); + if (data->state == STATE_LOG_RECORD) { + spin_lock_irqsave(&data_saved_lock, flags); + cpumask_set_cpu(cpu, &data->cpu_event); + salinfo_work_to_do(data); + spin_unlock_irqrestore(&data_saved_lock, flags); + } + return 0; +} + +static ssize_t +salinfo_log_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) +{ + struct salinfo_data *data = PDE_DATA(file_inode(file)); + char cmd[32]; + size_t size; + u32 offset; + int cpu; + + size = sizeof(cmd); + if (count < size) + size = count; + if (copy_from_user(cmd, buffer, size)) + return -EFAULT; + + if (sscanf(cmd, "read %d", &cpu) == 1) { + salinfo_log_new_read(cpu, data); + } else if (sscanf(cmd, "clear %d", &cpu) == 1) { + int ret; + if ((ret = salinfo_log_clear(data, cpu))) + count = ret; + } else if (sscanf(cmd, "oemdata %d %d", &cpu, &offset) == 2) { + if (data->state != STATE_LOG_RECORD && data->state != STATE_OEMDATA) + return -EINVAL; + if (offset > data->log_size - sizeof(efi_guid_t)) + return -EINVAL; + data->state = STATE_OEMDATA; + if (salinfo_platform_oemdata) { + struct salinfo_platform_oemdata_parms parms = { + .efi_guid = data->log_buffer + offset, + .oemdata = &data->oemdata, + .oemdata_size = &data->oemdata_size + }; + call_on_cpu(cpu, salinfo_platform_oemdata_cpu, &parms); + if (parms.ret) + count = parms.ret; + } else + data->oemdata_size = 0; + } else + return -EINVAL; + + return count; +} + +static const struct file_operations salinfo_data_fops = { + .open = salinfo_log_open, + .release = salinfo_log_release, + .read = salinfo_log_read, + .write = salinfo_log_write, + .llseek = default_llseek, +}; + +static int +salinfo_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) +{ + unsigned int i, cpu = (unsigned long)hcpu; + unsigned long flags; + struct salinfo_data *data; + switch (action) { + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + spin_lock_irqsave(&data_saved_lock, flags); + for (i = 0, data = salinfo_data; + i < ARRAY_SIZE(salinfo_data); + ++i, ++data) { + cpumask_set_cpu(cpu, &data->cpu_event); + salinfo_work_to_do(data); + } + spin_unlock_irqrestore(&data_saved_lock, flags); + break; + case CPU_DEAD: + case CPU_DEAD_FROZEN: + spin_lock_irqsave(&data_saved_lock, flags); + for (i = 0, data = salinfo_data; + i < ARRAY_SIZE(salinfo_data); + ++i, ++data) { + struct salinfo_data_saved *data_saved; + int j; + for (j = ARRAY_SIZE(data->data_saved) - 1, data_saved = data->data_saved + j; + j >= 0; + --j, --data_saved) { + if (data_saved->buffer && data_saved->cpu == cpu) { + shift1_data_saved(data, j); + } + } + cpumask_clear_cpu(cpu, &data->cpu_event); + } + spin_unlock_irqrestore(&data_saved_lock, flags); + break; + } + return NOTIFY_OK; +} + +static struct notifier_block salinfo_cpu_notifier = +{ + .notifier_call = salinfo_cpu_callback, + .priority = 0, +}; + +static int __init +salinfo_init(void) +{ + struct proc_dir_entry *salinfo_dir; /* /proc/sal dir entry */ + struct proc_dir_entry **sdir = salinfo_proc_entries; /* keeps track of every entry */ + struct proc_dir_entry *dir, *entry; + struct salinfo_data *data; + int i, j; + + salinfo_dir = proc_mkdir("sal", NULL); + if (!salinfo_dir) + return 0; + + for (i=0; i < NR_SALINFO_ENTRIES; i++) { + /* pass the feature bit in question as misc data */ + *sdir++ = proc_create_data(salinfo_entries[i].name, 0, salinfo_dir, + &proc_salinfo_fops, + (void *)salinfo_entries[i].feature); + } + + cpu_notifier_register_begin(); + + for (i = 0; i < ARRAY_SIZE(salinfo_log_name); i++) { + data = salinfo_data + i; + data->type = i; + sema_init(&data->mutex, 1); + dir = proc_mkdir(salinfo_log_name[i], salinfo_dir); + if (!dir) + continue; + + entry = proc_create_data("event", S_IRUSR, dir, + &salinfo_event_fops, data); + if (!entry) + continue; + *sdir++ = entry; + + entry = proc_create_data("data", S_IRUSR | S_IWUSR, dir, + &salinfo_data_fops, data); + if (!entry) + continue; + *sdir++ = entry; + + /* we missed any events before now */ + for_each_online_cpu(j) + cpumask_set_cpu(j, &data->cpu_event); + + *sdir++ = dir; + } + + *sdir++ = salinfo_dir; + + init_timer(&salinfo_timer); + salinfo_timer.expires = jiffies + SALINFO_TIMER_DELAY; + salinfo_timer.function = &salinfo_timeout; + add_timer(&salinfo_timer); + + __register_hotcpu_notifier(&salinfo_cpu_notifier); + + cpu_notifier_register_done(); + + return 0; +} + +/* + * 'data' contains an integer that corresponds to the feature we're + * testing + */ +static int proc_salinfo_show(struct seq_file *m, void *v) +{ + unsigned long data = (unsigned long)v; + seq_puts(m, (sal_platform_features & data) ? "1\n" : "0\n"); + return 0; +} + +static int proc_salinfo_open(struct inode *inode, struct file *file) +{ + return single_open(file, proc_salinfo_show, PDE_DATA(inode)); +} + +static const struct file_operations proc_salinfo_fops = { + .open = proc_salinfo_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +module_init(salinfo_init); diff --git a/kernel/arch/ia64/kernel/setup.c b/kernel/arch/ia64/kernel/setup.c new file mode 100644 index 000000000..b9761389c --- /dev/null +++ b/kernel/arch/ia64/kernel/setup.c @@ -0,0 +1,1071 @@ +/* + * Architecture-specific setup. + * + * Copyright (C) 1998-2001, 2003-2004 Hewlett-Packard Co + * David Mosberger-Tang + * Stephane Eranian + * Copyright (C) 2000, 2004 Intel Corp + * Rohit Seth + * Suresh Siddha + * Gordon Jin + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999 Walt Drummond + * + * 12/26/04 S.Siddha, G.Jin, R.Seth + * Add multi-threading and multi-core detection + * 11/12/01 D.Mosberger Convert get_cpuinfo() to seq_file based show_cpuinfo(). + * 04/04/00 D.Mosberger renamed cpu_initialized to cpu_online_map + * 03/31/00 R.Seth cpu_initialized and current->processor fixes + * 02/04/00 D.Mosberger some more get_cpuinfo fixes... + * 02/01/00 R.Seth fixed get_cpuinfo for SMP + * 01/07/99 S.Eranian added the support for command line argument + * 06/24/99 W.Drummond added boot_cpu_data. + * 05/28/05 Z. Menyhart Dynamic stride size for "flush_icache_range()" + */ +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(CONFIG_SMP) && (IA64_CPU_SIZE > PAGE_SIZE) +# error "struct cpuinfo_ia64 too big!" +#endif + +#ifdef CONFIG_SMP +unsigned long __per_cpu_offset[NR_CPUS]; +EXPORT_SYMBOL(__per_cpu_offset); +#endif + +DEFINE_PER_CPU(struct cpuinfo_ia64, ia64_cpu_info); +DEFINE_PER_CPU(unsigned long, local_per_cpu_offset); +unsigned long ia64_cycles_per_usec; +struct ia64_boot_param *ia64_boot_param; +struct screen_info screen_info; +unsigned long vga_console_iobase; +unsigned long vga_console_membase; + +static struct resource data_resource = { + .name = "Kernel data", + .flags = IORESOURCE_BUSY | IORESOURCE_MEM +}; + +static struct resource code_resource = { + .name = "Kernel code", + .flags = IORESOURCE_BUSY | IORESOURCE_MEM +}; + +static struct resource bss_resource = { + .name = "Kernel bss", + .flags = IORESOURCE_BUSY | IORESOURCE_MEM +}; + +unsigned long ia64_max_cacheline_size; + +unsigned long ia64_iobase; /* virtual address for I/O accesses */ +EXPORT_SYMBOL(ia64_iobase); +struct io_space io_space[MAX_IO_SPACES]; +EXPORT_SYMBOL(io_space); +unsigned int num_io_spaces; + +/* + * "flush_icache_range()" needs to know what processor dependent stride size to use + * when it makes i-cache(s) coherent with d-caches. + */ +#define I_CACHE_STRIDE_SHIFT 5 /* Safest way to go: 32 bytes by 32 bytes */ +unsigned long ia64_i_cache_stride_shift = ~0; +/* + * "clflush_cache_range()" needs to know what processor dependent stride size to + * use when it flushes cache lines including both d-cache and i-cache. + */ +/* Safest way to go: 32 bytes by 32 bytes */ +#define CACHE_STRIDE_SHIFT 5 +unsigned long ia64_cache_stride_shift = ~0; + +/* + * The merge_mask variable needs to be set to (max(iommu_page_size(iommu)) - 1). This + * mask specifies a mask of address bits that must be 0 in order for two buffers to be + * mergeable by the I/O MMU (i.e., the end address of the first buffer and the start + * address of the second buffer must be aligned to (merge_mask+1) in order to be + * mergeable). By default, we assume there is no I/O MMU which can merge physically + * discontiguous buffers, so we set the merge_mask to ~0UL, which corresponds to a iommu + * page-size of 2^64. + */ +unsigned long ia64_max_iommu_merge_mask = ~0UL; +EXPORT_SYMBOL(ia64_max_iommu_merge_mask); + +/* + * We use a special marker for the end of memory and it uses the extra (+1) slot + */ +struct rsvd_region rsvd_region[IA64_MAX_RSVD_REGIONS + 1] __initdata; +int num_rsvd_regions __initdata; + + +/* + * Filter incoming memory segments based on the primitive map created from the boot + * parameters. Segments contained in the map are removed from the memory ranges. A + * caller-specified function is called with the memory ranges that remain after filtering. + * This routine does not assume the incoming segments are sorted. + */ +int __init +filter_rsvd_memory (u64 start, u64 end, void *arg) +{ + u64 range_start, range_end, prev_start; + void (*func)(unsigned long, unsigned long, int); + int i; + +#if IGNORE_PFN0 + if (start == PAGE_OFFSET) { + printk(KERN_WARNING "warning: skipping physical page 0\n"); + start += PAGE_SIZE; + if (start >= end) return 0; + } +#endif + /* + * lowest possible address(walker uses virtual) + */ + prev_start = PAGE_OFFSET; + func = arg; + + for (i = 0; i < num_rsvd_regions; ++i) { + range_start = max(start, prev_start); + range_end = min(end, rsvd_region[i].start); + + if (range_start < range_end) + call_pernode_memory(__pa(range_start), range_end - range_start, func); + + /* nothing more available in this segment */ + if (range_end == end) return 0; + + prev_start = rsvd_region[i].end; + } + /* end of memory marker allows full processing inside loop body */ + return 0; +} + +/* + * Similar to "filter_rsvd_memory()", but the reserved memory ranges + * are not filtered out. + */ +int __init +filter_memory(u64 start, u64 end, void *arg) +{ + void (*func)(unsigned long, unsigned long, int); + +#if IGNORE_PFN0 + if (start == PAGE_OFFSET) { + printk(KERN_WARNING "warning: skipping physical page 0\n"); + start += PAGE_SIZE; + if (start >= end) + return 0; + } +#endif + func = arg; + if (start < end) + call_pernode_memory(__pa(start), end - start, func); + return 0; +} + +static void __init +sort_regions (struct rsvd_region *rsvd_region, int max) +{ + int j; + + /* simple bubble sorting */ + while (max--) { + for (j = 0; j < max; ++j) { + if (rsvd_region[j].start > rsvd_region[j+1].start) { + struct rsvd_region tmp; + tmp = rsvd_region[j]; + rsvd_region[j] = rsvd_region[j + 1]; + rsvd_region[j + 1] = tmp; + } + } + } +} + +/* merge overlaps */ +static int __init +merge_regions (struct rsvd_region *rsvd_region, int max) +{ + int i; + for (i = 1; i < max; ++i) { + if (rsvd_region[i].start >= rsvd_region[i-1].end) + continue; + if (rsvd_region[i].end > rsvd_region[i-1].end) + rsvd_region[i-1].end = rsvd_region[i].end; + --max; + memmove(&rsvd_region[i], &rsvd_region[i+1], + (max - i) * sizeof(struct rsvd_region)); + } + return max; +} + +/* + * Request address space for all standard resources + */ +static int __init register_memory(void) +{ + code_resource.start = ia64_tpa(_text); + code_resource.end = ia64_tpa(_etext) - 1; + data_resource.start = ia64_tpa(_etext); + data_resource.end = ia64_tpa(_edata) - 1; + bss_resource.start = ia64_tpa(__bss_start); + bss_resource.end = ia64_tpa(_end) - 1; + efi_initialize_iomem_resources(&code_resource, &data_resource, + &bss_resource); + + return 0; +} + +__initcall(register_memory); + + +#ifdef CONFIG_KEXEC + +/* + * This function checks if the reserved crashkernel is allowed on the specific + * IA64 machine flavour. Machines without an IO TLB use swiotlb and require + * some memory below 4 GB (i.e. in 32 bit area), see the implementation of + * lib/swiotlb.c. The hpzx1 architecture has an IO TLB but cannot use that + * in kdump case. See the comment in sba_init() in sba_iommu.c. + * + * So, the only machvec that really supports loading the kdump kernel + * over 4 GB is "sn2". + */ +static int __init check_crashkernel_memory(unsigned long pbase, size_t size) +{ + if (ia64_platform_is("sn2") || ia64_platform_is("uv")) + return 1; + else + return pbase < (1UL << 32); +} + +static void __init setup_crashkernel(unsigned long total, int *n) +{ + unsigned long long base = 0, size = 0; + int ret; + + ret = parse_crashkernel(boot_command_line, total, + &size, &base); + if (ret == 0 && size > 0) { + if (!base) { + sort_regions(rsvd_region, *n); + *n = merge_regions(rsvd_region, *n); + base = kdump_find_rsvd_region(size, + rsvd_region, *n); + } + + if (!check_crashkernel_memory(base, size)) { + pr_warning("crashkernel: There would be kdump memory " + "at %ld GB but this is unusable because it " + "must\nbe below 4 GB. Change the memory " + "configuration of the machine.\n", + (unsigned long)(base >> 30)); + return; + } + + if (base != ~0UL) { + printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " + "for crashkernel (System RAM: %ldMB)\n", + (unsigned long)(size >> 20), + (unsigned long)(base >> 20), + (unsigned long)(total >> 20)); + rsvd_region[*n].start = + (unsigned long)__va(base); + rsvd_region[*n].end = + (unsigned long)__va(base + size); + (*n)++; + crashk_res.start = base; + crashk_res.end = base + size - 1; + } + } + efi_memmap_res.start = ia64_boot_param->efi_memmap; + efi_memmap_res.end = efi_memmap_res.start + + ia64_boot_param->efi_memmap_size; + boot_param_res.start = __pa(ia64_boot_param); + boot_param_res.end = boot_param_res.start + + sizeof(*ia64_boot_param); +} +#else +static inline void __init setup_crashkernel(unsigned long total, int *n) +{} +#endif + +/** + * reserve_memory - setup reserved memory areas + * + * Setup the reserved memory areas set aside for the boot parameters, + * initrd, etc. There are currently %IA64_MAX_RSVD_REGIONS defined, + * see arch/ia64/include/asm/meminit.h if you need to define more. + */ +void __init +reserve_memory (void) +{ + int n = 0; + unsigned long total_memory; + + /* + * none of the entries in this table overlap + */ + rsvd_region[n].start = (unsigned long) ia64_boot_param; + rsvd_region[n].end = rsvd_region[n].start + sizeof(*ia64_boot_param); + n++; + + rsvd_region[n].start = (unsigned long) __va(ia64_boot_param->efi_memmap); + rsvd_region[n].end = rsvd_region[n].start + ia64_boot_param->efi_memmap_size; + n++; + + rsvd_region[n].start = (unsigned long) __va(ia64_boot_param->command_line); + rsvd_region[n].end = (rsvd_region[n].start + + strlen(__va(ia64_boot_param->command_line)) + 1); + n++; + + rsvd_region[n].start = (unsigned long) ia64_imva((void *)KERNEL_START); + rsvd_region[n].end = (unsigned long) ia64_imva(_end); + n++; + + n += paravirt_reserve_memory(&rsvd_region[n]); + +#ifdef CONFIG_BLK_DEV_INITRD + if (ia64_boot_param->initrd_start) { + rsvd_region[n].start = (unsigned long)__va(ia64_boot_param->initrd_start); + rsvd_region[n].end = rsvd_region[n].start + ia64_boot_param->initrd_size; + n++; + } +#endif + +#ifdef CONFIG_CRASH_DUMP + if (reserve_elfcorehdr(&rsvd_region[n].start, + &rsvd_region[n].end) == 0) + n++; +#endif + + total_memory = efi_memmap_init(&rsvd_region[n].start, &rsvd_region[n].end); + n++; + + setup_crashkernel(total_memory, &n); + + /* end of memory marker */ + rsvd_region[n].start = ~0UL; + rsvd_region[n].end = ~0UL; + n++; + + num_rsvd_regions = n; + BUG_ON(IA64_MAX_RSVD_REGIONS + 1 < n); + + sort_regions(rsvd_region, num_rsvd_regions); + num_rsvd_regions = merge_regions(rsvd_region, num_rsvd_regions); +} + + +/** + * find_initrd - get initrd parameters from the boot parameter structure + * + * Grab the initrd start and end from the boot parameter struct given us by + * the boot loader. + */ +void __init +find_initrd (void) +{ +#ifdef CONFIG_BLK_DEV_INITRD + if (ia64_boot_param->initrd_start) { + initrd_start = (unsigned long)__va(ia64_boot_param->initrd_start); + initrd_end = initrd_start+ia64_boot_param->initrd_size; + + printk(KERN_INFO "Initial ramdisk at: 0x%lx (%llu bytes)\n", + initrd_start, ia64_boot_param->initrd_size); + } +#endif +} + +static void __init +io_port_init (void) +{ + unsigned long phys_iobase; + + /* + * Set `iobase' based on the EFI memory map or, failing that, the + * value firmware left in ar.k0. + * + * Note that in ia32 mode, IN/OUT instructions use ar.k0 to compute + * the port's virtual address, so ia32_load_state() loads it with a + * user virtual address. But in ia64 mode, glibc uses the + * *physical* address in ar.k0 to mmap the appropriate area from + * /dev/mem, and the inX()/outX() interfaces use MMIO. In both + * cases, user-mode can only use the legacy 0-64K I/O port space. + * + * ar.k0 is not involved in kernel I/O port accesses, which can use + * any of the I/O port spaces and are done via MMIO using the + * virtual mmio_base from the appropriate io_space[]. + */ + phys_iobase = efi_get_iobase(); + if (!phys_iobase) { + phys_iobase = ia64_get_kr(IA64_KR_IO_BASE); + printk(KERN_INFO "No I/O port range found in EFI memory map, " + "falling back to AR.KR0 (0x%lx)\n", phys_iobase); + } + ia64_iobase = (unsigned long) ioremap(phys_iobase, 0); + ia64_set_kr(IA64_KR_IO_BASE, __pa(ia64_iobase)); + + /* setup legacy IO port space */ + io_space[0].mmio_base = ia64_iobase; + io_space[0].sparse = 1; + num_io_spaces = 1; +} + +/** + * early_console_setup - setup debugging console + * + * Consoles started here require little enough setup that we can start using + * them very early in the boot process, either right after the machine + * vector initialization, or even before if the drivers can detect their hw. + * + * Returns non-zero if a console couldn't be setup. + */ +static inline int __init +early_console_setup (char *cmdline) +{ + int earlycons = 0; + +#ifdef CONFIG_SERIAL_SGI_L1_CONSOLE + { + extern int sn_serial_console_early_setup(void); + if (!sn_serial_console_early_setup()) + earlycons++; + } +#endif +#ifdef CONFIG_EFI_PCDP + if (!efi_setup_pcdp_console(cmdline)) + earlycons++; +#endif + if (!simcons_register()) + earlycons++; + + return (earlycons) ? 0 : -1; +} + +static inline void +mark_bsp_online (void) +{ +#ifdef CONFIG_SMP + /* If we register an early console, allow CPU 0 to printk */ + set_cpu_online(smp_processor_id(), true); +#endif +} + +static __initdata int nomca; +static __init int setup_nomca(char *s) +{ + nomca = 1; + return 0; +} +early_param("nomca", setup_nomca); + +#ifdef CONFIG_CRASH_DUMP +int __init reserve_elfcorehdr(u64 *start, u64 *end) +{ + u64 length; + + /* We get the address using the kernel command line, + * but the size is extracted from the EFI tables. + * Both address and size are required for reservation + * to work properly. + */ + + if (!is_vmcore_usable()) + return -EINVAL; + + if ((length = vmcore_find_descriptor_size(elfcorehdr_addr)) == 0) { + vmcore_unusable(); + return -EINVAL; + } + + *start = (unsigned long)__va(elfcorehdr_addr); + *end = *start + length; + return 0; +} + +#endif /* CONFIG_PROC_VMCORE */ + +void __init +setup_arch (char **cmdline_p) +{ + unw_init(); + + paravirt_arch_setup_early(); + + ia64_patch_vtop((u64) __start___vtop_patchlist, (u64) __end___vtop_patchlist); + paravirt_patch_apply(); + + *cmdline_p = __va(ia64_boot_param->command_line); + strlcpy(boot_command_line, *cmdline_p, COMMAND_LINE_SIZE); + + efi_init(); + io_port_init(); + +#ifdef CONFIG_IA64_GENERIC + /* machvec needs to be parsed from the command line + * before parse_early_param() is called to ensure + * that ia64_mv is initialised before any command line + * settings may cause console setup to occur + */ + machvec_init_from_cmdline(*cmdline_p); +#endif + + parse_early_param(); + + if (early_console_setup(*cmdline_p) == 0) + mark_bsp_online(); + +#ifdef CONFIG_ACPI + /* Initialize the ACPI boot-time table parser */ + acpi_table_init(); + early_acpi_boot_init(); +# ifdef CONFIG_ACPI_NUMA + acpi_numa_init(); +# ifdef CONFIG_ACPI_HOTPLUG_CPU + prefill_possible_map(); +# endif + per_cpu_scan_finalize((cpumask_weight(&early_cpu_possible_map) == 0 ? + 32 : cpumask_weight(&early_cpu_possible_map)), + additional_cpus > 0 ? additional_cpus : 0); +# endif +#endif /* CONFIG_APCI_BOOT */ + +#ifdef CONFIG_SMP + smp_build_cpu_map(); +#endif + find_memory(); + + /* process SAL system table: */ + ia64_sal_init(__va(efi.sal_systab)); + +#ifdef CONFIG_ITANIUM + ia64_patch_rse((u64) __start___rse_patchlist, (u64) __end___rse_patchlist); +#else + { + unsigned long num_phys_stacked; + + if (ia64_pal_rse_info(&num_phys_stacked, 0) == 0 && num_phys_stacked > 96) + ia64_patch_rse((u64) __start___rse_patchlist, (u64) __end___rse_patchlist); + } +#endif + +#ifdef CONFIG_SMP + cpu_physical_id(0) = hard_smp_processor_id(); +#endif + + cpu_init(); /* initialize the bootstrap CPU */ + mmu_context_init(); /* initialize context_id bitmap */ + + paravirt_banner(); + paravirt_arch_setup_console(cmdline_p); + +#ifdef CONFIG_VT + if (!conswitchp) { +# if defined(CONFIG_DUMMY_CONSOLE) + conswitchp = &dummy_con; +# endif +# if defined(CONFIG_VGA_CONSOLE) + /* + * Non-legacy systems may route legacy VGA MMIO range to system + * memory. vga_con probes the MMIO hole, so memory looks like + * a VGA device to it. The EFI memory map can tell us if it's + * memory so we can avoid this problem. + */ + if (efi_mem_type(0xA0000) != EFI_CONVENTIONAL_MEMORY) + conswitchp = &vga_con; +# endif + } +#endif + + /* enable IA-64 Machine Check Abort Handling unless disabled */ + if (paravirt_arch_setup_nomca()) + nomca = 1; + if (!nomca) + ia64_mca_init(); + + platform_setup(cmdline_p); +#ifndef CONFIG_IA64_HP_SIM + check_sal_cache_flush(); +#endif + paging_init(); +} + +/* + * Display cpu info for all CPUs. + */ +static int +show_cpuinfo (struct seq_file *m, void *v) +{ +#ifdef CONFIG_SMP +# define lpj c->loops_per_jiffy +# define cpunum c->cpu +#else +# define lpj loops_per_jiffy +# define cpunum 0 +#endif + static struct { + unsigned long mask; + const char *feature_name; + } feature_bits[] = { + { 1UL << 0, "branchlong" }, + { 1UL << 1, "spontaneous deferral"}, + { 1UL << 2, "16-byte atomic ops" } + }; + char features[128], *cp, *sep; + struct cpuinfo_ia64 *c = v; + unsigned long mask; + unsigned long proc_freq; + int i, size; + + mask = c->features; + + /* build the feature string: */ + memcpy(features, "standard", 9); + cp = features; + size = sizeof(features); + sep = ""; + for (i = 0; i < ARRAY_SIZE(feature_bits) && size > 1; ++i) { + if (mask & feature_bits[i].mask) { + cp += snprintf(cp, size, "%s%s", sep, + feature_bits[i].feature_name), + sep = ", "; + mask &= ~feature_bits[i].mask; + size = sizeof(features) - (cp - features); + } + } + if (mask && size > 1) { + /* print unknown features as a hex value */ + snprintf(cp, size, "%s0x%lx", sep, mask); + } + + proc_freq = cpufreq_quick_get(cpunum); + if (!proc_freq) + proc_freq = c->proc_freq / 1000; + + seq_printf(m, + "processor : %d\n" + "vendor : %s\n" + "arch : IA-64\n" + "family : %u\n" + "model : %u\n" + "model name : %s\n" + "revision : %u\n" + "archrev : %u\n" + "features : %s\n" + "cpu number : %lu\n" + "cpu regs : %u\n" + "cpu MHz : %lu.%03lu\n" + "itc MHz : %lu.%06lu\n" + "BogoMIPS : %lu.%02lu\n", + cpunum, c->vendor, c->family, c->model, + c->model_name, c->revision, c->archrev, + features, c->ppn, c->number, + proc_freq / 1000, proc_freq % 1000, + c->itc_freq / 1000000, c->itc_freq % 1000000, + lpj*HZ/500000, (lpj*HZ/5000) % 100); +#ifdef CONFIG_SMP + seq_printf(m, "siblings : %u\n", + cpumask_weight(&cpu_core_map[cpunum])); + if (c->socket_id != -1) + seq_printf(m, "physical id: %u\n", c->socket_id); + if (c->threads_per_core > 1 || c->cores_per_socket > 1) + seq_printf(m, + "core id : %u\n" + "thread id : %u\n", + c->core_id, c->thread_id); +#endif + seq_printf(m,"\n"); + + return 0; +} + +static void * +c_start (struct seq_file *m, loff_t *pos) +{ +#ifdef CONFIG_SMP + while (*pos < nr_cpu_ids && !cpu_online(*pos)) + ++*pos; +#endif + return *pos < nr_cpu_ids ? cpu_data(*pos) : NULL; +} + +static void * +c_next (struct seq_file *m, void *v, loff_t *pos) +{ + ++*pos; + return c_start(m, pos); +} + +static void +c_stop (struct seq_file *m, void *v) +{ +} + +const struct seq_operations cpuinfo_op = { + .start = c_start, + .next = c_next, + .stop = c_stop, + .show = show_cpuinfo +}; + +#define MAX_BRANDS 8 +static char brandname[MAX_BRANDS][128]; + +static char * +get_model_name(__u8 family, __u8 model) +{ + static int overflow; + char brand[128]; + int i; + + memcpy(brand, "Unknown", 8); + if (ia64_pal_get_brand_info(brand)) { + if (family == 0x7) + memcpy(brand, "Merced", 7); + else if (family == 0x1f) switch (model) { + case 0: memcpy(brand, "McKinley", 9); break; + case 1: memcpy(brand, "Madison", 8); break; + case 2: memcpy(brand, "Madison up to 9M cache", 23); break; + } + } + for (i = 0; i < MAX_BRANDS; i++) + if (strcmp(brandname[i], brand) == 0) + return brandname[i]; + for (i = 0; i < MAX_BRANDS; i++) + if (brandname[i][0] == '\0') + return strcpy(brandname[i], brand); + if (overflow++ == 0) + printk(KERN_ERR + "%s: Table overflow. Some processor model information will be missing\n", + __func__); + return "Unknown"; +} + +static void +identify_cpu (struct cpuinfo_ia64 *c) +{ + union { + unsigned long bits[5]; + struct { + /* id 0 & 1: */ + char vendor[16]; + + /* id 2 */ + u64 ppn; /* processor serial number */ + + /* id 3: */ + unsigned number : 8; + unsigned revision : 8; + unsigned model : 8; + unsigned family : 8; + unsigned archrev : 8; + unsigned reserved : 24; + + /* id 4: */ + u64 features; + } field; + } cpuid; + pal_vm_info_1_u_t vm1; + pal_vm_info_2_u_t vm2; + pal_status_t status; + unsigned long impl_va_msb = 50, phys_addr_size = 44; /* Itanium defaults */ + int i; + for (i = 0; i < 5; ++i) + cpuid.bits[i] = ia64_get_cpuid(i); + + memcpy(c->vendor, cpuid.field.vendor, 16); +#ifdef CONFIG_SMP + c->cpu = smp_processor_id(); + + /* below default values will be overwritten by identify_siblings() + * for Multi-Threading/Multi-Core capable CPUs + */ + c->threads_per_core = c->cores_per_socket = c->num_log = 1; + c->socket_id = -1; + + identify_siblings(c); + + if (c->threads_per_core > smp_num_siblings) + smp_num_siblings = c->threads_per_core; +#endif + c->ppn = cpuid.field.ppn; + c->number = cpuid.field.number; + c->revision = cpuid.field.revision; + c->model = cpuid.field.model; + c->family = cpuid.field.family; + c->archrev = cpuid.field.archrev; + c->features = cpuid.field.features; + c->model_name = get_model_name(c->family, c->model); + + status = ia64_pal_vm_summary(&vm1, &vm2); + if (status == PAL_STATUS_SUCCESS) { + impl_va_msb = vm2.pal_vm_info_2_s.impl_va_msb; + phys_addr_size = vm1.pal_vm_info_1_s.phys_add_size; + } + c->unimpl_va_mask = ~((7L<<61) | ((1L << (impl_va_msb + 1)) - 1)); + c->unimpl_pa_mask = ~((1L<<63) | ((1L << phys_addr_size) - 1)); +} + +/* + * Do the following calculations: + * + * 1. the max. cache line size. + * 2. the minimum of the i-cache stride sizes for "flush_icache_range()". + * 3. the minimum of the cache stride sizes for "clflush_cache_range()". + */ +static void +get_cache_info(void) +{ + unsigned long line_size, max = 1; + unsigned long l, levels, unique_caches; + pal_cache_config_info_t cci; + long status; + + status = ia64_pal_cache_summary(&levels, &unique_caches); + if (status != 0) { + printk(KERN_ERR "%s: ia64_pal_cache_summary() failed (status=%ld)\n", + __func__, status); + max = SMP_CACHE_BYTES; + /* Safest setup for "flush_icache_range()" */ + ia64_i_cache_stride_shift = I_CACHE_STRIDE_SHIFT; + /* Safest setup for "clflush_cache_range()" */ + ia64_cache_stride_shift = CACHE_STRIDE_SHIFT; + goto out; + } + + for (l = 0; l < levels; ++l) { + /* cache_type (data_or_unified)=2 */ + status = ia64_pal_cache_config_info(l, 2, &cci); + if (status != 0) { + printk(KERN_ERR "%s: ia64_pal_cache_config_info" + "(l=%lu, 2) failed (status=%ld)\n", + __func__, l, status); + max = SMP_CACHE_BYTES; + /* The safest setup for "flush_icache_range()" */ + cci.pcci_stride = I_CACHE_STRIDE_SHIFT; + /* The safest setup for "clflush_cache_range()" */ + ia64_cache_stride_shift = CACHE_STRIDE_SHIFT; + cci.pcci_unified = 1; + } else { + if (cci.pcci_stride < ia64_cache_stride_shift) + ia64_cache_stride_shift = cci.pcci_stride; + + line_size = 1 << cci.pcci_line_size; + if (line_size > max) + max = line_size; + } + + if (!cci.pcci_unified) { + /* cache_type (instruction)=1*/ + status = ia64_pal_cache_config_info(l, 1, &cci); + if (status != 0) { + printk(KERN_ERR "%s: ia64_pal_cache_config_info" + "(l=%lu, 1) failed (status=%ld)\n", + __func__, l, status); + /* The safest setup for flush_icache_range() */ + cci.pcci_stride = I_CACHE_STRIDE_SHIFT; + } + } + if (cci.pcci_stride < ia64_i_cache_stride_shift) + ia64_i_cache_stride_shift = cci.pcci_stride; + } + out: + if (max > ia64_max_cacheline_size) + ia64_max_cacheline_size = max; +} + +/* + * cpu_init() initializes state that is per-CPU. This function acts + * as a 'CPU state barrier', nothing should get across. + */ +void +cpu_init (void) +{ + extern void ia64_mmu_init(void *); + static unsigned long max_num_phys_stacked = IA64_NUM_PHYS_STACK_REG; + unsigned long num_phys_stacked; + pal_vm_info_2_u_t vmi; + unsigned int max_ctx; + struct cpuinfo_ia64 *cpu_info; + void *cpu_data; + + cpu_data = per_cpu_init(); +#ifdef CONFIG_SMP + /* + * insert boot cpu into sibling and core mapes + * (must be done after per_cpu area is setup) + */ + if (smp_processor_id() == 0) { + cpumask_set_cpu(0, &per_cpu(cpu_sibling_map, 0)); + cpumask_set_cpu(0, &cpu_core_map[0]); + } else { + /* + * Set ar.k3 so that assembly code in MCA handler can compute + * physical addresses of per cpu variables with a simple: + * phys = ar.k3 + &per_cpu_var + * and the alt-dtlb-miss handler can set per-cpu mapping into + * the TLB when needed. head.S already did this for cpu0. + */ + ia64_set_kr(IA64_KR_PER_CPU_DATA, + ia64_tpa(cpu_data) - (long) __per_cpu_start); + } +#endif + + get_cache_info(); + + /* + * We can't pass "local_cpu_data" to identify_cpu() because we haven't called + * ia64_mmu_init() yet. And we can't call ia64_mmu_init() first because it + * depends on the data returned by identify_cpu(). We break the dependency by + * accessing cpu_data() through the canonical per-CPU address. + */ + cpu_info = cpu_data + ((char *) &__ia64_per_cpu_var(ia64_cpu_info) - __per_cpu_start); + identify_cpu(cpu_info); + +#ifdef CONFIG_MCKINLEY + { +# define FEATURE_SET 16 + struct ia64_pal_retval iprv; + + if (cpu_info->family == 0x1f) { + PAL_CALL_PHYS(iprv, PAL_PROC_GET_FEATURES, 0, FEATURE_SET, 0); + if ((iprv.status == 0) && (iprv.v0 & 0x80) && (iprv.v2 & 0x80)) + PAL_CALL_PHYS(iprv, PAL_PROC_SET_FEATURES, + (iprv.v1 | 0x80), FEATURE_SET, 0); + } + } +#endif + + /* Clear the stack memory reserved for pt_regs: */ + memset(task_pt_regs(current), 0, sizeof(struct pt_regs)); + + ia64_set_kr(IA64_KR_FPU_OWNER, 0); + + /* + * Initialize the page-table base register to a global + * directory with all zeroes. This ensure that we can handle + * TLB-misses to user address-space even before we created the + * first user address-space. This may happen, e.g., due to + * aggressive use of lfetch.fault. + */ + ia64_set_kr(IA64_KR_PT_BASE, __pa(ia64_imva(empty_zero_page))); + + /* + * Initialize default control register to defer speculative faults except + * for those arising from TLB misses, which are not deferred. The + * kernel MUST NOT depend on a particular setting of these bits (in other words, + * the kernel must have recovery code for all speculative accesses). Turn on + * dcr.lc as per recommendation by the architecture team. Most IA-32 apps + * shouldn't be affected by this (moral: keep your ia32 locks aligned and you'll + * be fine). + */ + ia64_setreg(_IA64_REG_CR_DCR, ( IA64_DCR_DP | IA64_DCR_DK | IA64_DCR_DX | IA64_DCR_DR + | IA64_DCR_DA | IA64_DCR_DD | IA64_DCR_LC)); + atomic_inc(&init_mm.mm_count); + current->active_mm = &init_mm; + BUG_ON(current->mm); + + ia64_mmu_init(ia64_imva(cpu_data)); + ia64_mca_cpu_init(ia64_imva(cpu_data)); + + /* Clear ITC to eliminate sched_clock() overflows in human time. */ + ia64_set_itc(0); + + /* disable all local interrupt sources: */ + ia64_set_itv(1 << 16); + ia64_set_lrr0(1 << 16); + ia64_set_lrr1(1 << 16); + ia64_setreg(_IA64_REG_CR_PMV, 1 << 16); + ia64_setreg(_IA64_REG_CR_CMCV, 1 << 16); + + /* clear TPR & XTP to enable all interrupt classes: */ + ia64_setreg(_IA64_REG_CR_TPR, 0); + + /* Clear any pending interrupts left by SAL/EFI */ + while (ia64_get_ivr() != IA64_SPURIOUS_INT_VECTOR) + ia64_eoi(); + +#ifdef CONFIG_SMP + normal_xtp(); +#endif + + /* set ia64_ctx.max_rid to the maximum RID that is supported by all CPUs: */ + if (ia64_pal_vm_summary(NULL, &vmi) == 0) { + max_ctx = (1U << (vmi.pal_vm_info_2_s.rid_size - 3)) - 1; + setup_ptcg_sem(vmi.pal_vm_info_2_s.max_purges, NPTCG_FROM_PAL); + } else { + printk(KERN_WARNING "cpu_init: PAL VM summary failed, assuming 18 RID bits\n"); + max_ctx = (1U << 15) - 1; /* use architected minimum */ + } + while (max_ctx < ia64_ctx.max_ctx) { + unsigned int old = ia64_ctx.max_ctx; + if (cmpxchg(&ia64_ctx.max_ctx, old, max_ctx) == old) + break; + } + + if (ia64_pal_rse_info(&num_phys_stacked, NULL) != 0) { + printk(KERN_WARNING "cpu_init: PAL RSE info failed; assuming 96 physical " + "stacked regs\n"); + num_phys_stacked = 96; + } + /* size of physical stacked register partition plus 8 bytes: */ + if (num_phys_stacked > max_num_phys_stacked) { + ia64_patch_phys_stack_reg(num_phys_stacked*8 + 8); + max_num_phys_stacked = num_phys_stacked; + } + platform_cpu_init(); +} + +void __init +check_bugs (void) +{ + ia64_patch_mckinley_e9((unsigned long) __start___mckinley_e9_bundles, + (unsigned long) __end___mckinley_e9_bundles); +} + +static int __init run_dmi_scan(void) +{ + dmi_scan_machine(); + dmi_memdev_walk(); + dmi_set_dump_stack_arch_desc(); + return 0; +} +core_initcall(run_dmi_scan); diff --git a/kernel/arch/ia64/kernel/sigframe.h b/kernel/arch/ia64/kernel/sigframe.h new file mode 100644 index 000000000..9fd9a1933 --- /dev/null +++ b/kernel/arch/ia64/kernel/sigframe.h @@ -0,0 +1,25 @@ +struct sigscratch { + unsigned long scratch_unat; /* ar.unat for the general registers saved in pt */ + unsigned long ar_pfs; /* for syscalls, the user-level function-state */ + struct pt_regs pt; +}; + +struct sigframe { + /* + * Place signal handler args where user-level unwinder can find them easily. + * DO NOT MOVE THESE. They are part of the IA-64 Linux ABI and there is + * user-level code that depends on their presence! + */ + unsigned long arg0; /* signum */ + unsigned long arg1; /* siginfo pointer */ + unsigned long arg2; /* sigcontext pointer */ + /* + * End of architected state. + */ + + void __user *handler; /* pointer to the plabel of the signal handler */ + struct siginfo info; + struct sigcontext sc; +}; + +extern void ia64_do_signal (struct sigscratch *, long); diff --git a/kernel/arch/ia64/kernel/signal.c b/kernel/arch/ia64/kernel/signal.c new file mode 100644 index 000000000..b3a124da7 --- /dev/null +++ b/kernel/arch/ia64/kernel/signal.c @@ -0,0 +1,496 @@ +/* + * Architecture-specific signal handling support. + * + * Copyright (C) 1999-2004 Hewlett-Packard Co + * David Mosberger-Tang + * + * Derived from i386 and Alpha versions. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "sigframe.h" + +#define DEBUG_SIG 0 +#define STACK_ALIGN 16 /* minimal alignment for stack pointer */ + +#if _NSIG_WORDS > 1 +# define PUT_SIGSET(k,u) __copy_to_user((u)->sig, (k)->sig, sizeof(sigset_t)) +# define GET_SIGSET(k,u) __copy_from_user((k)->sig, (u)->sig, sizeof(sigset_t)) +#else +# define PUT_SIGSET(k,u) __put_user((k)->sig[0], &(u)->sig[0]) +# define GET_SIGSET(k,u) __get_user((k)->sig[0], &(u)->sig[0]) +#endif + +static long +restore_sigcontext (struct sigcontext __user *sc, struct sigscratch *scr) +{ + unsigned long ip, flags, nat, um, cfm, rsc; + long err; + + /* Always make any pending restarted system calls return -EINTR */ + current->restart_block.fn = do_no_restart_syscall; + + /* restore scratch that always needs gets updated during signal delivery: */ + err = __get_user(flags, &sc->sc_flags); + err |= __get_user(nat, &sc->sc_nat); + err |= __get_user(ip, &sc->sc_ip); /* instruction pointer */ + err |= __get_user(cfm, &sc->sc_cfm); + err |= __get_user(um, &sc->sc_um); /* user mask */ + err |= __get_user(rsc, &sc->sc_ar_rsc); + err |= __get_user(scr->pt.ar_unat, &sc->sc_ar_unat); + err |= __get_user(scr->pt.ar_fpsr, &sc->sc_ar_fpsr); + err |= __get_user(scr->pt.ar_pfs, &sc->sc_ar_pfs); + err |= __get_user(scr->pt.pr, &sc->sc_pr); /* predicates */ + err |= __get_user(scr->pt.b0, &sc->sc_br[0]); /* b0 (rp) */ + err |= __get_user(scr->pt.b6, &sc->sc_br[6]); /* b6 */ + err |= __copy_from_user(&scr->pt.r1, &sc->sc_gr[1], 8); /* r1 */ + err |= __copy_from_user(&scr->pt.r8, &sc->sc_gr[8], 4*8); /* r8-r11 */ + err |= __copy_from_user(&scr->pt.r12, &sc->sc_gr[12], 2*8); /* r12-r13 */ + err |= __copy_from_user(&scr->pt.r15, &sc->sc_gr[15], 8); /* r15 */ + + scr->pt.cr_ifs = cfm | (1UL << 63); + scr->pt.ar_rsc = rsc | (3 << 2); /* force PL3 */ + + /* establish new instruction pointer: */ + scr->pt.cr_iip = ip & ~0x3UL; + ia64_psr(&scr->pt)->ri = ip & 0x3; + scr->pt.cr_ipsr = (scr->pt.cr_ipsr & ~IA64_PSR_UM) | (um & IA64_PSR_UM); + + scr->scratch_unat = ia64_put_scratch_nat_bits(&scr->pt, nat); + + if (!(flags & IA64_SC_FLAG_IN_SYSCALL)) { + /* Restore most scratch-state only when not in syscall. */ + err |= __get_user(scr->pt.ar_ccv, &sc->sc_ar_ccv); /* ar.ccv */ + err |= __get_user(scr->pt.b7, &sc->sc_br[7]); /* b7 */ + err |= __get_user(scr->pt.r14, &sc->sc_gr[14]); /* r14 */ + err |= __copy_from_user(&scr->pt.ar_csd, &sc->sc_ar25, 2*8); /* ar.csd & ar.ssd */ + err |= __copy_from_user(&scr->pt.r2, &sc->sc_gr[2], 2*8); /* r2-r3 */ + err |= __copy_from_user(&scr->pt.r16, &sc->sc_gr[16], 16*8); /* r16-r31 */ + } + + if ((flags & IA64_SC_FLAG_FPH_VALID) != 0) { + struct ia64_psr *psr = ia64_psr(&scr->pt); + + err |= __copy_from_user(current->thread.fph, &sc->sc_fr[32], 96*16); + psr->mfh = 0; /* drop signal handler's fph contents... */ + preempt_disable(); + if (psr->dfh) + ia64_drop_fpu(current); + else { + /* We already own the local fph, otherwise psr->dfh wouldn't be 0. */ + __ia64_load_fpu(current->thread.fph); + ia64_set_local_fpu_owner(current); + } + preempt_enable(); + } + return err; +} + +int +copy_siginfo_to_user (siginfo_t __user *to, const siginfo_t *from) +{ + if (!access_ok(VERIFY_WRITE, to, sizeof(siginfo_t))) + return -EFAULT; + if (from->si_code < 0) { + if (__copy_to_user(to, from, sizeof(siginfo_t))) + return -EFAULT; + return 0; + } else { + int err; + + /* + * If you change siginfo_t structure, please be sure this code is fixed + * accordingly. It should never copy any pad contained in the structure + * to avoid security leaks, but must copy the generic 3 ints plus the + * relevant union member. + */ + err = __put_user(from->si_signo, &to->si_signo); + err |= __put_user(from->si_errno, &to->si_errno); + err |= __put_user((short)from->si_code, &to->si_code); + switch (from->si_code >> 16) { + case __SI_FAULT >> 16: + err |= __put_user(from->si_flags, &to->si_flags); + err |= __put_user(from->si_isr, &to->si_isr); + case __SI_POLL >> 16: + err |= __put_user(from->si_addr, &to->si_addr); + err |= __put_user(from->si_imm, &to->si_imm); + break; + case __SI_TIMER >> 16: + err |= __put_user(from->si_tid, &to->si_tid); + err |= __put_user(from->si_overrun, &to->si_overrun); + err |= __put_user(from->si_ptr, &to->si_ptr); + break; + case __SI_RT >> 16: /* Not generated by the kernel as of now. */ + case __SI_MESGQ >> 16: + err |= __put_user(from->si_uid, &to->si_uid); + err |= __put_user(from->si_pid, &to->si_pid); + err |= __put_user(from->si_ptr, &to->si_ptr); + break; + case __SI_CHLD >> 16: + err |= __put_user(from->si_utime, &to->si_utime); + err |= __put_user(from->si_stime, &to->si_stime); + err |= __put_user(from->si_status, &to->si_status); + default: + err |= __put_user(from->si_uid, &to->si_uid); + err |= __put_user(from->si_pid, &to->si_pid); + break; + } + return err; + } +} + +long +ia64_rt_sigreturn (struct sigscratch *scr) +{ + extern char ia64_strace_leave_kernel, ia64_leave_kernel; + struct sigcontext __user *sc; + struct siginfo si; + sigset_t set; + long retval; + + sc = &((struct sigframe __user *) (scr->pt.r12 + 16))->sc; + + /* + * When we return to the previously executing context, r8 and r10 have already + * been setup the way we want them. Indeed, if the signal wasn't delivered while + * in a system call, we must not touch r8 or r10 as otherwise user-level state + * could be corrupted. + */ + retval = (long) &ia64_leave_kernel; + if (test_thread_flag(TIF_SYSCALL_TRACE) + || test_thread_flag(TIF_SYSCALL_AUDIT)) + /* + * strace expects to be notified after sigreturn returns even though the + * context to which we return may not be in the middle of a syscall. + * Thus, the return-value that strace displays for sigreturn is + * meaningless. + */ + retval = (long) &ia64_strace_leave_kernel; + + if (!access_ok(VERIFY_READ, sc, sizeof(*sc))) + goto give_sigsegv; + + if (GET_SIGSET(&set, &sc->sc_mask)) + goto give_sigsegv; + + set_current_blocked(&set); + + if (restore_sigcontext(sc, scr)) + goto give_sigsegv; + +#if DEBUG_SIG + printk("SIG return (%s:%d): sp=%lx ip=%lx\n", + current->comm, current->pid, scr->pt.r12, scr->pt.cr_iip); +#endif + if (restore_altstack(&sc->sc_stack)) + goto give_sigsegv; + return retval; + + give_sigsegv: + si.si_signo = SIGSEGV; + si.si_errno = 0; + si.si_code = SI_KERNEL; + si.si_pid = task_pid_vnr(current); + si.si_uid = from_kuid_munged(current_user_ns(), current_uid()); + si.si_addr = sc; + force_sig_info(SIGSEGV, &si, current); + return retval; +} + +/* + * This does just the minimum required setup of sigcontext. + * Specifically, it only installs data that is either not knowable at + * the user-level or that gets modified before execution in the + * trampoline starts. Everything else is done at the user-level. + */ +static long +setup_sigcontext (struct sigcontext __user *sc, sigset_t *mask, struct sigscratch *scr) +{ + unsigned long flags = 0, ifs, cfm, nat; + long err = 0; + + ifs = scr->pt.cr_ifs; + + if (on_sig_stack((unsigned long) sc)) + flags |= IA64_SC_FLAG_ONSTACK; + if ((ifs & (1UL << 63)) == 0) + /* if cr_ifs doesn't have the valid bit set, we got here through a syscall */ + flags |= IA64_SC_FLAG_IN_SYSCALL; + cfm = ifs & ((1UL << 38) - 1); + ia64_flush_fph(current); + if ((current->thread.flags & IA64_THREAD_FPH_VALID)) { + flags |= IA64_SC_FLAG_FPH_VALID; + err = __copy_to_user(&sc->sc_fr[32], current->thread.fph, 96*16); + } + + nat = ia64_get_scratch_nat_bits(&scr->pt, scr->scratch_unat); + + err |= __put_user(flags, &sc->sc_flags); + err |= __put_user(nat, &sc->sc_nat); + err |= PUT_SIGSET(mask, &sc->sc_mask); + err |= __put_user(cfm, &sc->sc_cfm); + err |= __put_user(scr->pt.cr_ipsr & IA64_PSR_UM, &sc->sc_um); + err |= __put_user(scr->pt.ar_rsc, &sc->sc_ar_rsc); + err |= __put_user(scr->pt.ar_unat, &sc->sc_ar_unat); /* ar.unat */ + err |= __put_user(scr->pt.ar_fpsr, &sc->sc_ar_fpsr); /* ar.fpsr */ + err |= __put_user(scr->pt.ar_pfs, &sc->sc_ar_pfs); + err |= __put_user(scr->pt.pr, &sc->sc_pr); /* predicates */ + err |= __put_user(scr->pt.b0, &sc->sc_br[0]); /* b0 (rp) */ + err |= __put_user(scr->pt.b6, &sc->sc_br[6]); /* b6 */ + err |= __copy_to_user(&sc->sc_gr[1], &scr->pt.r1, 8); /* r1 */ + err |= __copy_to_user(&sc->sc_gr[8], &scr->pt.r8, 4*8); /* r8-r11 */ + err |= __copy_to_user(&sc->sc_gr[12], &scr->pt.r12, 2*8); /* r12-r13 */ + err |= __copy_to_user(&sc->sc_gr[15], &scr->pt.r15, 8); /* r15 */ + err |= __put_user(scr->pt.cr_iip + ia64_psr(&scr->pt)->ri, &sc->sc_ip); + + if (!(flags & IA64_SC_FLAG_IN_SYSCALL)) { + /* Copy scratch regs to sigcontext if the signal didn't interrupt a syscall. */ + err |= __put_user(scr->pt.ar_ccv, &sc->sc_ar_ccv); /* ar.ccv */ + err |= __put_user(scr->pt.b7, &sc->sc_br[7]); /* b7 */ + err |= __put_user(scr->pt.r14, &sc->sc_gr[14]); /* r14 */ + err |= __copy_to_user(&sc->sc_ar25, &scr->pt.ar_csd, 2*8); /* ar.csd & ar.ssd */ + err |= __copy_to_user(&sc->sc_gr[2], &scr->pt.r2, 2*8); /* r2-r3 */ + err |= __copy_to_user(&sc->sc_gr[16], &scr->pt.r16, 16*8); /* r16-r31 */ + } + return err; +} + +/* + * Check whether the register-backing store is already on the signal stack. + */ +static inline int +rbs_on_sig_stack (unsigned long bsp) +{ + return (bsp - current->sas_ss_sp < current->sas_ss_size); +} + +static long +force_sigsegv_info (int sig, void __user *addr) +{ + unsigned long flags; + struct siginfo si; + + if (sig == SIGSEGV) { + /* + * Acquiring siglock around the sa_handler-update is almost + * certainly overkill, but this isn't a + * performance-critical path and I'd rather play it safe + * here than having to debug a nasty race if and when + * something changes in kernel/signal.c that would make it + * no longer safe to modify sa_handler without holding the + * lock. + */ + spin_lock_irqsave(¤t->sighand->siglock, flags); + current->sighand->action[sig - 1].sa.sa_handler = SIG_DFL; + spin_unlock_irqrestore(¤t->sighand->siglock, flags); + } + si.si_signo = SIGSEGV; + si.si_errno = 0; + si.si_code = SI_KERNEL; + si.si_pid = task_pid_vnr(current); + si.si_uid = from_kuid_munged(current_user_ns(), current_uid()); + si.si_addr = addr; + force_sig_info(SIGSEGV, &si, current); + return 1; +} + +static long +setup_frame(struct ksignal *ksig, sigset_t *set, struct sigscratch *scr) +{ + extern char __kernel_sigtramp[]; + unsigned long tramp_addr, new_rbs = 0, new_sp; + struct sigframe __user *frame; + long err; + + new_sp = scr->pt.r12; + tramp_addr = (unsigned long) __kernel_sigtramp; + if (ksig->ka.sa.sa_flags & SA_ONSTACK) { + int onstack = sas_ss_flags(new_sp); + + if (onstack == 0) { + new_sp = current->sas_ss_sp + current->sas_ss_size; + /* + * We need to check for the register stack being on the + * signal stack separately, because it's switched + * separately (memory stack is switched in the kernel, + * register stack is switched in the signal trampoline). + */ + if (!rbs_on_sig_stack(scr->pt.ar_bspstore)) + new_rbs = ALIGN(current->sas_ss_sp, + sizeof(long)); + } else if (onstack == SS_ONSTACK) { + unsigned long check_sp; + + /* + * If we are on the alternate signal stack and would + * overflow it, don't. Return an always-bogus address + * instead so we will die with SIGSEGV. + */ + check_sp = (new_sp - sizeof(*frame)) & -STACK_ALIGN; + if (!likely(on_sig_stack(check_sp))) + return force_sigsegv_info(ksig->sig, (void __user *) + check_sp); + } + } + frame = (void __user *) ((new_sp - sizeof(*frame)) & -STACK_ALIGN); + + if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) + return force_sigsegv_info(ksig->sig, frame); + + err = __put_user(ksig->sig, &frame->arg0); + err |= __put_user(&frame->info, &frame->arg1); + err |= __put_user(&frame->sc, &frame->arg2); + err |= __put_user(new_rbs, &frame->sc.sc_rbs_base); + err |= __put_user(0, &frame->sc.sc_loadrs); /* initialize to zero */ + err |= __put_user(ksig->ka.sa.sa_handler, &frame->handler); + + err |= copy_siginfo_to_user(&frame->info, &ksig->info); + + err |= __save_altstack(&frame->sc.sc_stack, scr->pt.r12); + err |= setup_sigcontext(&frame->sc, set, scr); + + if (unlikely(err)) + return force_sigsegv_info(ksig->sig, frame); + + scr->pt.r12 = (unsigned long) frame - 16; /* new stack pointer */ + scr->pt.ar_fpsr = FPSR_DEFAULT; /* reset fpsr for signal handler */ + scr->pt.cr_iip = tramp_addr; + ia64_psr(&scr->pt)->ri = 0; /* start executing in first slot */ + ia64_psr(&scr->pt)->be = 0; /* force little-endian byte-order */ + /* + * Force the interruption function mask to zero. This has no effect when a + * system-call got interrupted by a signal (since, in that case, scr->pt_cr_ifs is + * ignored), but it has the desirable effect of making it possible to deliver a + * signal with an incomplete register frame (which happens when a mandatory RSE + * load faults). Furthermore, it has no negative effect on the getting the user's + * dirty partition preserved, because that's governed by scr->pt.loadrs. + */ + scr->pt.cr_ifs = (1UL << 63); + + /* + * Note: this affects only the NaT bits of the scratch regs (the ones saved in + * pt_regs), which is exactly what we want. + */ + scr->scratch_unat = 0; /* ensure NaT bits of r12 is clear */ + +#if DEBUG_SIG + printk("SIG deliver (%s:%d): sig=%d sp=%lx ip=%lx handler=%p\n", + current->comm, current->pid, ksig->sig, scr->pt.r12, frame->sc.sc_ip, frame->handler); +#endif + return 0; +} + +static long +handle_signal (struct ksignal *ksig, struct sigscratch *scr) +{ + int ret = setup_frame(ksig, sigmask_to_save(), scr); + + if (!ret) + signal_setup_done(ret, ksig, test_thread_flag(TIF_SINGLESTEP)); + + return ret; +} + +/* + * Note that `init' is a special process: it doesn't get signals it doesn't want to + * handle. Thus you cannot kill init even with a SIGKILL even by mistake. + */ +void +ia64_do_signal (struct sigscratch *scr, long in_syscall) +{ + long restart = in_syscall; + long errno = scr->pt.r8; + struct ksignal ksig; + + /* + * This only loops in the rare cases of handle_signal() failing, in which case we + * need to push through a forced SIGSEGV. + */ + while (1) { + get_signal(&ksig); + + /* + * get_signal_to_deliver() may have run a debugger (via notify_parent()) + * and the debugger may have modified the state (e.g., to arrange for an + * inferior call), thus it's important to check for restarting _after_ + * get_signal_to_deliver(). + */ + if ((long) scr->pt.r10 != -1) + /* + * A system calls has to be restarted only if one of the error codes + * ERESTARTNOHAND, ERESTARTSYS, or ERESTARTNOINTR is returned. If r10 + * isn't -1 then r8 doesn't hold an error code and we don't need to + * restart the syscall, so we can clear the "restart" flag here. + */ + restart = 0; + + if (ksig.sig <= 0) + break; + + if (unlikely(restart)) { + switch (errno) { + case ERESTART_RESTARTBLOCK: + case ERESTARTNOHAND: + scr->pt.r8 = EINTR; + /* note: scr->pt.r10 is already -1 */ + break; + + case ERESTARTSYS: + if ((ksig.ka.sa.sa_flags & SA_RESTART) == 0) { + scr->pt.r8 = EINTR; + /* note: scr->pt.r10 is already -1 */ + break; + } + case ERESTARTNOINTR: + ia64_decrement_ip(&scr->pt); + restart = 0; /* don't restart twice if handle_signal() fails... */ + } + } + + /* + * Whee! Actually deliver the signal. If the delivery failed, we need to + * continue to iterate in this loop so we can deliver the SIGSEGV... + */ + if (handle_signal(&ksig, scr)) + return; + } + + /* Did we come from a system call? */ + if (restart) { + /* Restart the system call - no handlers present */ + if (errno == ERESTARTNOHAND || errno == ERESTARTSYS || errno == ERESTARTNOINTR + || errno == ERESTART_RESTARTBLOCK) + { + /* + * Note: the syscall number is in r15 which is saved in + * pt_regs so all we need to do here is adjust ip so that + * the "break" instruction gets re-executed. + */ + ia64_decrement_ip(&scr->pt); + if (errno == ERESTART_RESTARTBLOCK) + scr->pt.r15 = __NR_restart_syscall; + } + } + + /* if there's no signal to deliver, we just put the saved sigmask + * back */ + restore_saved_sigmask(); +} diff --git a/kernel/arch/ia64/kernel/smp.c b/kernel/arch/ia64/kernel/smp.c new file mode 100644 index 000000000..7f706d4f8 --- /dev/null +++ b/kernel/arch/ia64/kernel/smp.c @@ -0,0 +1,342 @@ +/* + * SMP Support + * + * Copyright (C) 1999 Walt Drummond + * Copyright (C) 1999, 2001, 2003 David Mosberger-Tang + * + * Lots of stuff stolen from arch/alpha/kernel/smp.c + * + * 01/05/16 Rohit Seth IA64-SMP functions. Reorganized + * the existing code (on the lines of x86 port). + * 00/09/11 David Mosberger Do loops_per_jiffy + * calibration on each CPU. + * 00/08/23 Asit Mallick fixed logical processor id + * 00/03/31 Rohit Seth Fixes for Bootstrap Processor + * & cpu_online_map now gets done here (instead of setup.c) + * 99/10/05 davidm Update to bring it in sync with new command-line processing + * scheme. + * 10/13/00 Goutham Rao Updated smp_call_function and + * smp_call_function_single to resend IPI on timeouts + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Note: alignment of 4 entries/cacheline was empirically determined + * to be a good tradeoff between hot cachelines & spreading the array + * across too many cacheline. + */ +static struct local_tlb_flush_counts { + unsigned int count; +} __attribute__((__aligned__(32))) local_tlb_flush_counts[NR_CPUS]; + +static DEFINE_PER_CPU_SHARED_ALIGNED(unsigned short [NR_CPUS], + shadow_flush_counts); + +#define IPI_CALL_FUNC 0 +#define IPI_CPU_STOP 1 +#define IPI_CALL_FUNC_SINGLE 2 +#define IPI_KDUMP_CPU_STOP 3 + +/* This needs to be cacheline aligned because it is written to by *other* CPUs. */ +static DEFINE_PER_CPU_SHARED_ALIGNED(unsigned long, ipi_operation); + +extern void cpu_halt (void); + +static void +stop_this_cpu(void) +{ + /* + * Remove this CPU: + */ + set_cpu_online(smp_processor_id(), false); + max_xtp(); + local_irq_disable(); + cpu_halt(); +} + +void +cpu_die(void) +{ + max_xtp(); + local_irq_disable(); + cpu_halt(); + /* Should never be here */ + BUG(); + for (;;); +} + +irqreturn_t +handle_IPI (int irq, void *dev_id) +{ + int this_cpu = get_cpu(); + unsigned long *pending_ipis = &__ia64_per_cpu_var(ipi_operation); + unsigned long ops; + + mb(); /* Order interrupt and bit testing. */ + while ((ops = xchg(pending_ipis, 0)) != 0) { + mb(); /* Order bit clearing and data access. */ + do { + unsigned long which; + + which = ffz(~ops); + ops &= ~(1 << which); + + switch (which) { + case IPI_CPU_STOP: + stop_this_cpu(); + break; + case IPI_CALL_FUNC: + generic_smp_call_function_interrupt(); + break; + case IPI_CALL_FUNC_SINGLE: + generic_smp_call_function_single_interrupt(); + break; +#ifdef CONFIG_KEXEC + case IPI_KDUMP_CPU_STOP: + unw_init_running(kdump_cpu_freeze, NULL); + break; +#endif + default: + printk(KERN_CRIT "Unknown IPI on CPU %d: %lu\n", + this_cpu, which); + break; + } + } while (ops); + mb(); /* Order data access and bit testing. */ + } + put_cpu(); + return IRQ_HANDLED; +} + + + +/* + * Called with preemption disabled. + */ +static inline void +send_IPI_single (int dest_cpu, int op) +{ + set_bit(op, &per_cpu(ipi_operation, dest_cpu)); + platform_send_ipi(dest_cpu, IA64_IPI_VECTOR, IA64_IPI_DM_INT, 0); +} + +/* + * Called with preemption disabled. + */ +static inline void +send_IPI_allbutself (int op) +{ + unsigned int i; + + for_each_online_cpu(i) { + if (i != smp_processor_id()) + send_IPI_single(i, op); + } +} + +/* + * Called with preemption disabled. + */ +static inline void +send_IPI_mask(const struct cpumask *mask, int op) +{ + unsigned int cpu; + + for_each_cpu(cpu, mask) { + send_IPI_single(cpu, op); + } +} + +/* + * Called with preemption disabled. + */ +static inline void +send_IPI_all (int op) +{ + int i; + + for_each_online_cpu(i) { + send_IPI_single(i, op); + } +} + +/* + * Called with preemption disabled. + */ +static inline void +send_IPI_self (int op) +{ + send_IPI_single(smp_processor_id(), op); +} + +#ifdef CONFIG_KEXEC +void +kdump_smp_send_stop(void) +{ + send_IPI_allbutself(IPI_KDUMP_CPU_STOP); +} + +void +kdump_smp_send_init(void) +{ + unsigned int cpu, self_cpu; + self_cpu = smp_processor_id(); + for_each_online_cpu(cpu) { + if (cpu != self_cpu) { + if(kdump_status[cpu] == 0) + platform_send_ipi(cpu, 0, IA64_IPI_DM_INIT, 0); + } + } +} +#endif +/* + * Called with preemption disabled. + */ +void +smp_send_reschedule (int cpu) +{ + platform_send_ipi(cpu, IA64_IPI_RESCHEDULE, IA64_IPI_DM_INT, 0); +} +EXPORT_SYMBOL_GPL(smp_send_reschedule); + +/* + * Called with preemption disabled. + */ +static void +smp_send_local_flush_tlb (int cpu) +{ + platform_send_ipi(cpu, IA64_IPI_LOCAL_TLB_FLUSH, IA64_IPI_DM_INT, 0); +} + +void +smp_local_flush_tlb(void) +{ + /* + * Use atomic ops. Otherwise, the load/increment/store sequence from + * a "++" operation can have the line stolen between the load & store. + * The overhead of the atomic op in negligible in this case & offers + * significant benefit for the brief periods where lots of cpus + * are simultaneously flushing TLBs. + */ + ia64_fetchadd(1, &local_tlb_flush_counts[smp_processor_id()].count, acq); + local_flush_tlb_all(); +} + +#define FLUSH_DELAY 5 /* Usec backoff to eliminate excessive cacheline bouncing */ + +void +smp_flush_tlb_cpumask(cpumask_t xcpumask) +{ + unsigned short *counts = __ia64_per_cpu_var(shadow_flush_counts); + cpumask_t cpumask = xcpumask; + int mycpu, cpu, flush_mycpu = 0; + + preempt_disable(); + mycpu = smp_processor_id(); + + for_each_cpu(cpu, &cpumask) + counts[cpu] = local_tlb_flush_counts[cpu].count & 0xffff; + + mb(); + for_each_cpu(cpu, &cpumask) { + if (cpu == mycpu) + flush_mycpu = 1; + else + smp_send_local_flush_tlb(cpu); + } + + if (flush_mycpu) + smp_local_flush_tlb(); + + for_each_cpu(cpu, &cpumask) + while(counts[cpu] == (local_tlb_flush_counts[cpu].count & 0xffff)) + udelay(FLUSH_DELAY); + + preempt_enable(); +} + +void +smp_flush_tlb_all (void) +{ + on_each_cpu((void (*)(void *))local_flush_tlb_all, NULL, 1); +} + +void +smp_flush_tlb_mm (struct mm_struct *mm) +{ + cpumask_var_t cpus; + preempt_disable(); + /* this happens for the common case of a single-threaded fork(): */ + if (likely(mm == current->active_mm && atomic_read(&mm->mm_users) == 1)) + { + local_finish_flush_tlb_mm(mm); + preempt_enable(); + return; + } + if (!alloc_cpumask_var(&cpus, GFP_ATOMIC)) { + smp_call_function((void (*)(void *))local_finish_flush_tlb_mm, + mm, 1); + } else { + cpumask_copy(cpus, mm_cpumask(mm)); + smp_call_function_many(cpus, + (void (*)(void *))local_finish_flush_tlb_mm, mm, 1); + free_cpumask_var(cpus); + } + local_irq_disable(); + local_finish_flush_tlb_mm(mm); + local_irq_enable(); + preempt_enable(); +} + +void arch_send_call_function_single_ipi(int cpu) +{ + send_IPI_single(cpu, IPI_CALL_FUNC_SINGLE); +} + +void arch_send_call_function_ipi_mask(const struct cpumask *mask) +{ + send_IPI_mask(mask, IPI_CALL_FUNC); +} + +/* + * this function calls the 'stop' function on all other CPUs in the system. + */ +void +smp_send_stop (void) +{ + send_IPI_allbutself(IPI_CPU_STOP); +} + +int +setup_profiling_timer (unsigned int multiplier) +{ + return -EINVAL; +} diff --git a/kernel/arch/ia64/kernel/smpboot.c b/kernel/arch/ia64/kernel/smpboot.c new file mode 100644 index 000000000..b054c5c6e --- /dev/null +++ b/kernel/arch/ia64/kernel/smpboot.c @@ -0,0 +1,861 @@ +/* + * SMP boot-related support + * + * Copyright (C) 1998-2003, 2005 Hewlett-Packard Co + * David Mosberger-Tang + * Copyright (C) 2001, 2004-2005 Intel Corp + * Rohit Seth + * Suresh Siddha + * Gordon Jin + * Ashok Raj + * + * 01/05/16 Rohit Seth Moved SMP booting functions from smp.c to here. + * 01/04/27 David Mosberger Added ITC synching code. + * 02/07/31 David Mosberger Switch over to hotplug-CPU boot-sequence. + * smp_boot_cpus()/smp_commence() is replaced by + * smp_prepare_cpus()/__cpu_up()/smp_cpus_done(). + * 04/06/21 Ashok Raj Added CPU Hotplug Support + * 04/12/26 Jin Gordon + * 04/12/26 Rohit Seth + * Add multi-threading and multi-core detection + * 05/01/30 Suresh Siddha + * Setup cpu_sibling_map and cpu_core_map + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define SMP_DEBUG 0 + +#if SMP_DEBUG +#define Dprintk(x...) printk(x) +#else +#define Dprintk(x...) +#endif + +#ifdef CONFIG_HOTPLUG_CPU +#ifdef CONFIG_PERMIT_BSP_REMOVE +#define bsp_remove_ok 1 +#else +#define bsp_remove_ok 0 +#endif + +/* + * Global array allocated for NR_CPUS at boot time + */ +struct sal_to_os_boot sal_boot_rendez_state[NR_CPUS]; + +/* + * start_ap in head.S uses this to store current booting cpu + * info. + */ +struct sal_to_os_boot *sal_state_for_booting_cpu = &sal_boot_rendez_state[0]; + +#define set_brendez_area(x) (sal_state_for_booting_cpu = &sal_boot_rendez_state[(x)]); + +#else +#define set_brendez_area(x) +#endif + + +/* + * ITC synchronization related stuff: + */ +#define MASTER (0) +#define SLAVE (SMP_CACHE_BYTES/8) + +#define NUM_ROUNDS 64 /* magic value */ +#define NUM_ITERS 5 /* likewise */ + +static DEFINE_SPINLOCK(itc_sync_lock); +static volatile unsigned long go[SLAVE + 1]; + +#define DEBUG_ITC_SYNC 0 + +extern void start_ap (void); +extern unsigned long ia64_iobase; + +struct task_struct *task_for_booting_cpu; + +/* + * State for each CPU + */ +DEFINE_PER_CPU(int, cpu_state); + +cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned; +EXPORT_SYMBOL(cpu_core_map); +DEFINE_PER_CPU_SHARED_ALIGNED(cpumask_t, cpu_sibling_map); +EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); + +int smp_num_siblings = 1; + +/* which logical CPU number maps to which CPU (physical APIC ID) */ +volatile int ia64_cpu_to_sapicid[NR_CPUS]; +EXPORT_SYMBOL(ia64_cpu_to_sapicid); + +static cpumask_t cpu_callin_map; + +struct smp_boot_data smp_boot_data __initdata; + +unsigned long ap_wakeup_vector = -1; /* External Int use to wakeup APs */ + +char __initdata no_int_routing; + +unsigned char smp_int_redirect; /* are INT and IPI redirectable by the chipset? */ + +#ifdef CONFIG_FORCE_CPEI_RETARGET +#define CPEI_OVERRIDE_DEFAULT (1) +#else +#define CPEI_OVERRIDE_DEFAULT (0) +#endif + +unsigned int force_cpei_retarget = CPEI_OVERRIDE_DEFAULT; + +static int __init +cmdl_force_cpei(char *str) +{ + int value=0; + + get_option (&str, &value); + force_cpei_retarget = value; + + return 1; +} + +__setup("force_cpei=", cmdl_force_cpei); + +static int __init +nointroute (char *str) +{ + no_int_routing = 1; + printk ("no_int_routing on\n"); + return 1; +} + +__setup("nointroute", nointroute); + +static void fix_b0_for_bsp(void) +{ +#ifdef CONFIG_HOTPLUG_CPU + int cpuid; + static int fix_bsp_b0 = 1; + + cpuid = smp_processor_id(); + + /* + * Cache the b0 value on the first AP that comes up + */ + if (!(fix_bsp_b0 && cpuid)) + return; + + sal_boot_rendez_state[0].br[0] = sal_boot_rendez_state[cpuid].br[0]; + printk ("Fixed BSP b0 value from CPU %d\n", cpuid); + + fix_bsp_b0 = 0; +#endif +} + +void +sync_master (void *arg) +{ + unsigned long flags, i; + + go[MASTER] = 0; + + local_irq_save(flags); + { + for (i = 0; i < NUM_ROUNDS*NUM_ITERS; ++i) { + while (!go[MASTER]) + cpu_relax(); + go[MASTER] = 0; + go[SLAVE] = ia64_get_itc(); + } + } + local_irq_restore(flags); +} + +/* + * Return the number of cycles by which our itc differs from the itc on the master + * (time-keeper) CPU. A positive number indicates our itc is ahead of the master, + * negative that it is behind. + */ +static inline long +get_delta (long *rt, long *master) +{ + unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0; + unsigned long tcenter, t0, t1, tm; + long i; + + for (i = 0; i < NUM_ITERS; ++i) { + t0 = ia64_get_itc(); + go[MASTER] = 1; + while (!(tm = go[SLAVE])) + cpu_relax(); + go[SLAVE] = 0; + t1 = ia64_get_itc(); + + if (t1 - t0 < best_t1 - best_t0) + best_t0 = t0, best_t1 = t1, best_tm = tm; + } + + *rt = best_t1 - best_t0; + *master = best_tm - best_t0; + + /* average best_t0 and best_t1 without overflow: */ + tcenter = (best_t0/2 + best_t1/2); + if (best_t0 % 2 + best_t1 % 2 == 2) + ++tcenter; + return tcenter - best_tm; +} + +/* + * Synchronize ar.itc of the current (slave) CPU with the ar.itc of the MASTER CPU + * (normally the time-keeper CPU). We use a closed loop to eliminate the possibility of + * unaccounted-for errors (such as getting a machine check in the middle of a calibration + * step). The basic idea is for the slave to ask the master what itc value it has and to + * read its own itc before and after the master responds. Each iteration gives us three + * timestamps: + * + * slave master + * + * t0 ---\ + * ---\ + * ---> + * tm + * /--- + * /--- + * t1 <--- + * + * + * The goal is to adjust the slave's ar.itc such that tm falls exactly half-way between t0 + * and t1. If we achieve this, the clocks are synchronized provided the interconnect + * between the slave and the master is symmetric. Even if the interconnect were + * asymmetric, we would still know that the synchronization error is smaller than the + * roundtrip latency (t0 - t1). + * + * When the interconnect is quiet and symmetric, this lets us synchronize the itc to + * within one or two cycles. However, we can only *guarantee* that the synchronization is + * accurate to within a round-trip time, which is typically in the range of several + * hundred cycles (e.g., ~500 cycles). In practice, this means that the itc's are usually + * almost perfectly synchronized, but we shouldn't assume that the accuracy is much better + * than half a micro second or so. + */ +void +ia64_sync_itc (unsigned int master) +{ + long i, delta, adj, adjust_latency = 0, done = 0; + unsigned long flags, rt, master_time_stamp, bound; +#if DEBUG_ITC_SYNC + struct { + long rt; /* roundtrip time */ + long master; /* master's timestamp */ + long diff; /* difference between midpoint and master's timestamp */ + long lat; /* estimate of itc adjustment latency */ + } t[NUM_ROUNDS]; +#endif + + /* + * Make sure local timer ticks are disabled while we sync. If + * they were enabled, we'd have to worry about nasty issues + * like setting the ITC ahead of (or a long time before) the + * next scheduled tick. + */ + BUG_ON((ia64_get_itv() & (1 << 16)) == 0); + + go[MASTER] = 1; + + if (smp_call_function_single(master, sync_master, NULL, 0) < 0) { + printk(KERN_ERR "sync_itc: failed to get attention of CPU %u!\n", master); + return; + } + + while (go[MASTER]) + cpu_relax(); /* wait for master to be ready */ + + spin_lock_irqsave(&itc_sync_lock, flags); + { + for (i = 0; i < NUM_ROUNDS; ++i) { + delta = get_delta(&rt, &master_time_stamp); + if (delta == 0) { + done = 1; /* let's lock on to this... */ + bound = rt; + } + + if (!done) { + if (i > 0) { + adjust_latency += -delta; + adj = -delta + adjust_latency/4; + } else + adj = -delta; + + ia64_set_itc(ia64_get_itc() + adj); + } +#if DEBUG_ITC_SYNC + t[i].rt = rt; + t[i].master = master_time_stamp; + t[i].diff = delta; + t[i].lat = adjust_latency/4; +#endif + } + } + spin_unlock_irqrestore(&itc_sync_lock, flags); + +#if DEBUG_ITC_SYNC + for (i = 0; i < NUM_ROUNDS; ++i) + printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n", + t[i].rt, t[i].master, t[i].diff, t[i].lat); +#endif + + printk(KERN_INFO "CPU %d: synchronized ITC with CPU %u (last diff %ld cycles, " + "maxerr %lu cycles)\n", smp_processor_id(), master, delta, rt); +} + +/* + * Ideally sets up per-cpu profiling hooks. Doesn't do much now... + */ +static inline void smp_setup_percpu_timer(void) +{ +} + +static void +smp_callin (void) +{ + int cpuid, phys_id, itc_master; + struct cpuinfo_ia64 *last_cpuinfo, *this_cpuinfo; + extern void ia64_init_itm(void); + extern volatile int time_keeper_id; + +#ifdef CONFIG_PERFMON + extern void pfm_init_percpu(void); +#endif + + cpuid = smp_processor_id(); + phys_id = hard_smp_processor_id(); + itc_master = time_keeper_id; + + if (cpu_online(cpuid)) { + printk(KERN_ERR "huh, phys CPU#0x%x, CPU#0x%x already present??\n", + phys_id, cpuid); + BUG(); + } + + fix_b0_for_bsp(); + + /* + * numa_node_id() works after this. + */ + set_numa_node(cpu_to_node_map[cpuid]); + set_numa_mem(local_memory_node(cpu_to_node_map[cpuid])); + + spin_lock(&vector_lock); + /* Setup the per cpu irq handling data structures */ + __setup_vector_irq(cpuid); + notify_cpu_starting(cpuid); + set_cpu_online(cpuid, true); + per_cpu(cpu_state, cpuid) = CPU_ONLINE; + spin_unlock(&vector_lock); + + smp_setup_percpu_timer(); + + ia64_mca_cmc_vector_setup(); /* Setup vector on AP */ + +#ifdef CONFIG_PERFMON + pfm_init_percpu(); +#endif + + local_irq_enable(); + + if (!(sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT)) { + /* + * Synchronize the ITC with the BP. Need to do this after irqs are + * enabled because ia64_sync_itc() calls smp_call_function_single(), which + * calls spin_unlock_bh(), which calls spin_unlock_bh(), which calls + * local_bh_enable(), which bugs out if irqs are not enabled... + */ + Dprintk("Going to syncup ITC with ITC Master.\n"); + ia64_sync_itc(itc_master); + } + + /* + * Get our bogomips. + */ + ia64_init_itm(); + + /* + * Delay calibration can be skipped if new processor is identical to the + * previous processor. + */ + last_cpuinfo = cpu_data(cpuid - 1); + this_cpuinfo = local_cpu_data; + if (last_cpuinfo->itc_freq != this_cpuinfo->itc_freq || + last_cpuinfo->proc_freq != this_cpuinfo->proc_freq || + last_cpuinfo->features != this_cpuinfo->features || + last_cpuinfo->revision != this_cpuinfo->revision || + last_cpuinfo->family != this_cpuinfo->family || + last_cpuinfo->archrev != this_cpuinfo->archrev || + last_cpuinfo->model != this_cpuinfo->model) + calibrate_delay(); + local_cpu_data->loops_per_jiffy = loops_per_jiffy; + + /* + * Allow the master to continue. + */ + cpumask_set_cpu(cpuid, &cpu_callin_map); + Dprintk("Stack on CPU %d at about %p\n",cpuid, &cpuid); +} + + +/* + * Activate a secondary processor. head.S calls this. + */ +int +start_secondary (void *unused) +{ + /* Early console may use I/O ports */ + ia64_set_kr(IA64_KR_IO_BASE, __pa(ia64_iobase)); +#ifndef CONFIG_PRINTK_TIME + Dprintk("start_secondary: starting CPU 0x%x\n", hard_smp_processor_id()); +#endif + efi_map_pal_code(); + cpu_init(); + preempt_disable(); + smp_callin(); + + cpu_startup_entry(CPUHP_ONLINE); + return 0; +} + +static int +do_boot_cpu (int sapicid, int cpu, struct task_struct *idle) +{ + int timeout; + + task_for_booting_cpu = idle; + Dprintk("Sending wakeup vector %lu to AP 0x%x/0x%x.\n", ap_wakeup_vector, cpu, sapicid); + + set_brendez_area(cpu); + platform_send_ipi(cpu, ap_wakeup_vector, IA64_IPI_DM_INT, 0); + + /* + * Wait 10s total for the AP to start + */ + Dprintk("Waiting on callin_map ..."); + for (timeout = 0; timeout < 100000; timeout++) { + if (cpumask_test_cpu(cpu, &cpu_callin_map)) + break; /* It has booted */ + barrier(); /* Make sure we re-read cpu_callin_map */ + udelay(100); + } + Dprintk("\n"); + + if (!cpumask_test_cpu(cpu, &cpu_callin_map)) { + printk(KERN_ERR "Processor 0x%x/0x%x is stuck.\n", cpu, sapicid); + ia64_cpu_to_sapicid[cpu] = -1; + set_cpu_online(cpu, false); /* was set in smp_callin() */ + return -EINVAL; + } + return 0; +} + +static int __init +decay (char *str) +{ + int ticks; + get_option (&str, &ticks); + return 1; +} + +__setup("decay=", decay); + +/* + * Initialize the logical CPU number to SAPICID mapping + */ +void __init +smp_build_cpu_map (void) +{ + int sapicid, cpu, i; + int boot_cpu_id = hard_smp_processor_id(); + + for (cpu = 0; cpu < NR_CPUS; cpu++) { + ia64_cpu_to_sapicid[cpu] = -1; + } + + ia64_cpu_to_sapicid[0] = boot_cpu_id; + init_cpu_present(cpumask_of(0)); + set_cpu_possible(0, true); + for (cpu = 1, i = 0; i < smp_boot_data.cpu_count; i++) { + sapicid = smp_boot_data.cpu_phys_id[i]; + if (sapicid == boot_cpu_id) + continue; + set_cpu_present(cpu, true); + set_cpu_possible(cpu, true); + ia64_cpu_to_sapicid[cpu] = sapicid; + cpu++; + } +} + +/* + * Cycle through the APs sending Wakeup IPIs to boot each. + */ +void __init +smp_prepare_cpus (unsigned int max_cpus) +{ + int boot_cpu_id = hard_smp_processor_id(); + + /* + * Initialize the per-CPU profiling counter/multiplier + */ + + smp_setup_percpu_timer(); + + cpumask_set_cpu(0, &cpu_callin_map); + + local_cpu_data->loops_per_jiffy = loops_per_jiffy; + ia64_cpu_to_sapicid[0] = boot_cpu_id; + + printk(KERN_INFO "Boot processor id 0x%x/0x%x\n", 0, boot_cpu_id); + + current_thread_info()->cpu = 0; + + /* + * If SMP should be disabled, then really disable it! + */ + if (!max_cpus) { + printk(KERN_INFO "SMP mode deactivated.\n"); + init_cpu_online(cpumask_of(0)); + init_cpu_present(cpumask_of(0)); + init_cpu_possible(cpumask_of(0)); + return; + } +} + +void smp_prepare_boot_cpu(void) +{ + set_cpu_online(smp_processor_id(), true); + cpumask_set_cpu(smp_processor_id(), &cpu_callin_map); + set_numa_node(cpu_to_node_map[smp_processor_id()]); + per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; + paravirt_post_smp_prepare_boot_cpu(); +} + +#ifdef CONFIG_HOTPLUG_CPU +static inline void +clear_cpu_sibling_map(int cpu) +{ + int i; + + for_each_cpu(i, &per_cpu(cpu_sibling_map, cpu)) + cpumask_clear_cpu(cpu, &per_cpu(cpu_sibling_map, i)); + for_each_cpu(i, &cpu_core_map[cpu]) + cpumask_clear_cpu(cpu, &cpu_core_map[i]); + + per_cpu(cpu_sibling_map, cpu) = cpu_core_map[cpu] = CPU_MASK_NONE; +} + +static void +remove_siblinginfo(int cpu) +{ + int last = 0; + + if (cpu_data(cpu)->threads_per_core == 1 && + cpu_data(cpu)->cores_per_socket == 1) { + cpumask_clear_cpu(cpu, &cpu_core_map[cpu]); + cpumask_clear_cpu(cpu, &per_cpu(cpu_sibling_map, cpu)); + return; + } + + last = (cpumask_weight(&cpu_core_map[cpu]) == 1 ? 1 : 0); + + /* remove it from all sibling map's */ + clear_cpu_sibling_map(cpu); +} + +extern void fixup_irqs(void); + +int migrate_platform_irqs(unsigned int cpu) +{ + int new_cpei_cpu; + struct irq_data *data = NULL; + const struct cpumask *mask; + int retval = 0; + + /* + * dont permit CPEI target to removed. + */ + if (cpe_vector > 0 && is_cpu_cpei_target(cpu)) { + printk ("CPU (%d) is CPEI Target\n", cpu); + if (can_cpei_retarget()) { + /* + * Now re-target the CPEI to a different processor + */ + new_cpei_cpu = cpumask_any(cpu_online_mask); + mask = cpumask_of(new_cpei_cpu); + set_cpei_target_cpu(new_cpei_cpu); + data = irq_get_irq_data(ia64_cpe_irq); + /* + * Switch for now, immediately, we need to do fake intr + * as other interrupts, but need to study CPEI behaviour with + * polling before making changes. + */ + if (data && data->chip) { + data->chip->irq_disable(data); + data->chip->irq_set_affinity(data, mask, false); + data->chip->irq_enable(data); + printk ("Re-targeting CPEI to cpu %d\n", new_cpei_cpu); + } + } + if (!data) { + printk ("Unable to retarget CPEI, offline cpu [%d] failed\n", cpu); + retval = -EBUSY; + } + } + return retval; +} + +/* must be called with cpucontrol mutex held */ +int __cpu_disable(void) +{ + int cpu = smp_processor_id(); + + /* + * dont permit boot processor for now + */ + if (cpu == 0 && !bsp_remove_ok) { + printk ("Your platform does not support removal of BSP\n"); + return (-EBUSY); + } + + if (ia64_platform_is("sn2")) { + if (!sn_cpu_disable_allowed(cpu)) + return -EBUSY; + } + + set_cpu_online(cpu, false); + + if (migrate_platform_irqs(cpu)) { + set_cpu_online(cpu, true); + return -EBUSY; + } + + remove_siblinginfo(cpu); + fixup_irqs(); + local_flush_tlb_all(); + cpumask_clear_cpu(cpu, &cpu_callin_map); + return 0; +} + +void __cpu_die(unsigned int cpu) +{ + unsigned int i; + + for (i = 0; i < 100; i++) { + /* They ack this in play_dead by setting CPU_DEAD */ + if (per_cpu(cpu_state, cpu) == CPU_DEAD) + { + printk ("CPU %d is now offline\n", cpu); + return; + } + msleep(100); + } + printk(KERN_ERR "CPU %u didn't die...\n", cpu); +} +#endif /* CONFIG_HOTPLUG_CPU */ + +void +smp_cpus_done (unsigned int dummy) +{ + int cpu; + unsigned long bogosum = 0; + + /* + * Allow the user to impress friends. + */ + + for_each_online_cpu(cpu) { + bogosum += cpu_data(cpu)->loops_per_jiffy; + } + + printk(KERN_INFO "Total of %d processors activated (%lu.%02lu BogoMIPS).\n", + (int)num_online_cpus(), bogosum/(500000/HZ), (bogosum/(5000/HZ))%100); +} + +static inline void set_cpu_sibling_map(int cpu) +{ + int i; + + for_each_online_cpu(i) { + if ((cpu_data(cpu)->socket_id == cpu_data(i)->socket_id)) { + cpumask_set_cpu(i, &cpu_core_map[cpu]); + cpumask_set_cpu(cpu, &cpu_core_map[i]); + if (cpu_data(cpu)->core_id == cpu_data(i)->core_id) { + cpumask_set_cpu(i, + &per_cpu(cpu_sibling_map, cpu)); + cpumask_set_cpu(cpu, + &per_cpu(cpu_sibling_map, i)); + } + } + } +} + +int +__cpu_up(unsigned int cpu, struct task_struct *tidle) +{ + int ret; + int sapicid; + + sapicid = ia64_cpu_to_sapicid[cpu]; + if (sapicid == -1) + return -EINVAL; + + /* + * Already booted cpu? not valid anymore since we dont + * do idle loop tightspin anymore. + */ + if (cpumask_test_cpu(cpu, &cpu_callin_map)) + return -EINVAL; + + per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; + /* Processor goes to start_secondary(), sets online flag */ + ret = do_boot_cpu(sapicid, cpu, tidle); + if (ret < 0) + return ret; + + if (cpu_data(cpu)->threads_per_core == 1 && + cpu_data(cpu)->cores_per_socket == 1) { + cpumask_set_cpu(cpu, &per_cpu(cpu_sibling_map, cpu)); + cpumask_set_cpu(cpu, &cpu_core_map[cpu]); + return 0; + } + + set_cpu_sibling_map(cpu); + + return 0; +} + +/* + * Assume that CPUs have been discovered by some platform-dependent interface. For + * SoftSDV/Lion, that would be ACPI. + * + * Setup of the IPI irq handler is done in irq.c:init_IRQ_SMP(). + */ +void __init +init_smp_config(void) +{ + struct fptr { + unsigned long fp; + unsigned long gp; + } *ap_startup; + long sal_ret; + + /* Tell SAL where to drop the APs. */ + ap_startup = (struct fptr *) start_ap; + sal_ret = ia64_sal_set_vectors(SAL_VECTOR_OS_BOOT_RENDEZ, + ia64_tpa(ap_startup->fp), ia64_tpa(ap_startup->gp), 0, 0, 0, 0); + if (sal_ret < 0) + printk(KERN_ERR "SMP: Can't set SAL AP Boot Rendezvous: %s\n", + ia64_sal_strerror(sal_ret)); +} + +/* + * identify_siblings(cpu) gets called from identify_cpu. This populates the + * information related to logical execution units in per_cpu_data structure. + */ +void identify_siblings(struct cpuinfo_ia64 *c) +{ + long status; + u16 pltid; + pal_logical_to_physical_t info; + + status = ia64_pal_logical_to_phys(-1, &info); + if (status != PAL_STATUS_SUCCESS) { + if (status != PAL_STATUS_UNIMPLEMENTED) { + printk(KERN_ERR + "ia64_pal_logical_to_phys failed with %ld\n", + status); + return; + } + + info.overview_ppid = 0; + info.overview_cpp = 1; + info.overview_tpc = 1; + } + + status = ia64_sal_physical_id_info(&pltid); + if (status != PAL_STATUS_SUCCESS) { + if (status != PAL_STATUS_UNIMPLEMENTED) + printk(KERN_ERR + "ia64_sal_pltid failed with %ld\n", + status); + return; + } + + c->socket_id = (pltid << 8) | info.overview_ppid; + + if (info.overview_cpp == 1 && info.overview_tpc == 1) + return; + + c->cores_per_socket = info.overview_cpp; + c->threads_per_core = info.overview_tpc; + c->num_log = info.overview_num_log; + + c->core_id = info.log1_cid; + c->thread_id = info.log1_tid; +} + +/* + * returns non zero, if multi-threading is enabled + * on at least one physical package. Due to hotplug cpu + * and (maxcpus=), all threads may not necessarily be enabled + * even though the processor supports multi-threading. + */ +int is_multithreading_enabled(void) +{ + int i, j; + + for_each_present_cpu(i) { + for_each_present_cpu(j) { + if (j == i) + continue; + if ((cpu_data(j)->socket_id == cpu_data(i)->socket_id)) { + if (cpu_data(j)->core_id == cpu_data(i)->core_id) + return 1; + } + } + } + return 0; +} +EXPORT_SYMBOL_GPL(is_multithreading_enabled); diff --git a/kernel/arch/ia64/kernel/stacktrace.c b/kernel/arch/ia64/kernel/stacktrace.c new file mode 100644 index 000000000..5af2783a8 --- /dev/null +++ b/kernel/arch/ia64/kernel/stacktrace.c @@ -0,0 +1,39 @@ +/* + * arch/ia64/kernel/stacktrace.c + * + * Stack trace management functions + * + */ +#include +#include +#include + +static void +ia64_do_save_stack(struct unw_frame_info *info, void *arg) +{ + struct stack_trace *trace = arg; + unsigned long ip; + int skip = trace->skip; + + trace->nr_entries = 0; + do { + unw_get_ip(info, &ip); + if (ip == 0) + break; + if (skip == 0) { + trace->entries[trace->nr_entries++] = ip; + if (trace->nr_entries == trace->max_entries) + break; + } else + skip--; + } while (unw_unwind(info) >= 0); +} + +/* + * Save stack-backtrace addresses into a stack_trace buffer. + */ +void save_stack_trace(struct stack_trace *trace) +{ + unw_init_running(ia64_do_save_stack, trace); +} +EXPORT_SYMBOL(save_stack_trace); diff --git a/kernel/arch/ia64/kernel/sys_ia64.c b/kernel/arch/ia64/kernel/sys_ia64.c new file mode 100644 index 000000000..41e33f84c --- /dev/null +++ b/kernel/arch/ia64/kernel/sys_ia64.c @@ -0,0 +1,183 @@ +/* + * This file contains various system calls that have different calling + * conventions on different platforms. + * + * Copyright (C) 1999-2000, 2002-2003, 2005 Hewlett-Packard Co + * David Mosberger-Tang + */ +#include +#include +#include +#include +#include +#include +#include /* doh, must come after sched.h... */ +#include +#include +#include +#include + +#include +#include + +unsigned long +arch_get_unmapped_area (struct file *filp, unsigned long addr, unsigned long len, + unsigned long pgoff, unsigned long flags) +{ + long map_shared = (flags & MAP_SHARED); + unsigned long align_mask = 0; + struct mm_struct *mm = current->mm; + struct vm_unmapped_area_info info; + + if (len > RGN_MAP_LIMIT) + return -ENOMEM; + + /* handle fixed mapping: prevent overlap with huge pages */ + if (flags & MAP_FIXED) { + if (is_hugepage_only_range(mm, addr, len)) + return -EINVAL; + return addr; + } + +#ifdef CONFIG_HUGETLB_PAGE + if (REGION_NUMBER(addr) == RGN_HPAGE) + addr = 0; +#endif + if (!addr) + addr = TASK_UNMAPPED_BASE; + + if (map_shared && (TASK_SIZE > 0xfffffffful)) + /* + * For 64-bit tasks, align shared segments to 1MB to avoid potential + * performance penalty due to virtual aliasing (see ASDM). For 32-bit + * tasks, we prefer to avoid exhausting the address space too quickly by + * limiting alignment to a single page. + */ + align_mask = PAGE_MASK & (SHMLBA - 1); + + info.flags = 0; + info.length = len; + info.low_limit = addr; + info.high_limit = TASK_SIZE; + info.align_mask = align_mask; + info.align_offset = 0; + return vm_unmapped_area(&info); +} + +asmlinkage long +ia64_getpriority (int which, int who) +{ + long prio; + + prio = sys_getpriority(which, who); + if (prio >= 0) { + force_successful_syscall_return(); + prio = 20 - prio; + } + return prio; +} + +/* XXX obsolete, but leave it here until the old libc is gone... */ +asmlinkage unsigned long +sys_getpagesize (void) +{ + return PAGE_SIZE; +} + +asmlinkage unsigned long +ia64_brk (unsigned long brk) +{ + unsigned long retval = sys_brk(brk); + force_successful_syscall_return(); + return retval; +} + +/* + * On IA-64, we return the two file descriptors in ret0 and ret1 (r8 + * and r9) as this is faster than doing a copy_to_user(). + */ +asmlinkage long +sys_ia64_pipe (void) +{ + struct pt_regs *regs = task_pt_regs(current); + int fd[2]; + int retval; + + retval = do_pipe_flags(fd, 0); + if (retval) + goto out; + retval = fd[0]; + regs->r9 = fd[1]; + out: + return retval; +} + +int ia64_mmap_check(unsigned long addr, unsigned long len, + unsigned long flags) +{ + unsigned long roff; + + /* + * Don't permit mappings into unmapped space, the virtual page table + * of a region, or across a region boundary. Note: RGN_MAP_LIMIT is + * equal to 2^n-PAGE_SIZE (for some integer n <= 61) and len > 0. + */ + roff = REGION_OFFSET(addr); + if ((len > RGN_MAP_LIMIT) || (roff > (RGN_MAP_LIMIT - len))) + return -EINVAL; + return 0; +} + +/* + * mmap2() is like mmap() except that the offset is expressed in units + * of PAGE_SIZE (instead of bytes). This allows to mmap2() (pieces + * of) files that are larger than the address space of the CPU. + */ +asmlinkage unsigned long +sys_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, long pgoff) +{ + addr = sys_mmap_pgoff(addr, len, prot, flags, fd, pgoff); + if (!IS_ERR((void *) addr)) + force_successful_syscall_return(); + return addr; +} + +asmlinkage unsigned long +sys_mmap (unsigned long addr, unsigned long len, int prot, int flags, int fd, long off) +{ + if (offset_in_page(off) != 0) + return -EINVAL; + + addr = sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT); + if (!IS_ERR((void *) addr)) + force_successful_syscall_return(); + return addr; +} + +asmlinkage unsigned long +ia64_mremap (unsigned long addr, unsigned long old_len, unsigned long new_len, unsigned long flags, + unsigned long new_addr) +{ + addr = sys_mremap(addr, old_len, new_len, flags, new_addr); + if (!IS_ERR((void *) addr)) + force_successful_syscall_return(); + return addr; +} + +#ifndef CONFIG_PCI + +asmlinkage long +sys_pciconfig_read (unsigned long bus, unsigned long dfn, unsigned long off, unsigned long len, + void *buf) +{ + return -ENOSYS; +} + +asmlinkage long +sys_pciconfig_write (unsigned long bus, unsigned long dfn, unsigned long off, unsigned long len, + void *buf) +{ + return -ENOSYS; +} + +#endif /* CONFIG_PCI */ diff --git a/kernel/arch/ia64/kernel/time.c b/kernel/arch/ia64/kernel/time.c new file mode 100644 index 000000000..9a0104a38 --- /dev/null +++ b/kernel/arch/ia64/kernel/time.c @@ -0,0 +1,456 @@ +/* + * linux/arch/ia64/kernel/time.c + * + * Copyright (C) 1998-2003 Hewlett-Packard Co + * Stephane Eranian + * David Mosberger + * Copyright (C) 1999 Don Dugger + * Copyright (C) 1999-2000 VA Linux Systems + * Copyright (C) 1999-2000 Walt Drummond + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "fsyscall_gtod_data.h" + +static cycle_t itc_get_cycles(struct clocksource *cs); + +struct fsyscall_gtod_data_t fsyscall_gtod_data; + +struct itc_jitter_data_t itc_jitter_data; + +volatile int time_keeper_id = 0; /* smp_processor_id() of time-keeper */ + +#ifdef CONFIG_IA64_DEBUG_IRQ + +unsigned long last_cli_ip; +EXPORT_SYMBOL(last_cli_ip); + +#endif + +#ifdef CONFIG_PARAVIRT +/* We need to define a real function for sched_clock, to override the + weak default version */ +unsigned long long sched_clock(void) +{ + return paravirt_sched_clock(); +} +#endif + +#ifdef CONFIG_PARAVIRT +static void +paravirt_clocksource_resume(struct clocksource *cs) +{ + if (pv_time_ops.clocksource_resume) + pv_time_ops.clocksource_resume(); +} +#endif + +static struct clocksource clocksource_itc = { + .name = "itc", + .rating = 350, + .read = itc_get_cycles, + .mask = CLOCKSOURCE_MASK(64), + .flags = CLOCK_SOURCE_IS_CONTINUOUS, +#ifdef CONFIG_PARAVIRT + .resume = paravirt_clocksource_resume, +#endif +}; +static struct clocksource *itc_clocksource; + +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE + +#include + +extern cputime_t cycle_to_cputime(u64 cyc); + +void vtime_account_user(struct task_struct *tsk) +{ + cputime_t delta_utime; + struct thread_info *ti = task_thread_info(tsk); + + if (ti->ac_utime) { + delta_utime = cycle_to_cputime(ti->ac_utime); + account_user_time(tsk, delta_utime, delta_utime); + ti->ac_utime = 0; + } +} + +/* + * Called from the context switch with interrupts disabled, to charge all + * accumulated times to the current process, and to prepare accounting on + * the next process. + */ +void arch_vtime_task_switch(struct task_struct *prev) +{ + struct thread_info *pi = task_thread_info(prev); + struct thread_info *ni = task_thread_info(current); + + pi->ac_stamp = ni->ac_stamp; + ni->ac_stime = ni->ac_utime = 0; +} + +/* + * Account time for a transition between system, hard irq or soft irq state. + * Note that this function is called with interrupts enabled. + */ +static cputime_t vtime_delta(struct task_struct *tsk) +{ + struct thread_info *ti = task_thread_info(tsk); + cputime_t delta_stime; + __u64 now; + + WARN_ON_ONCE(!irqs_disabled()); + + now = ia64_get_itc(); + + delta_stime = cycle_to_cputime(ti->ac_stime + (now - ti->ac_stamp)); + ti->ac_stime = 0; + ti->ac_stamp = now; + + return delta_stime; +} + +void vtime_account_system(struct task_struct *tsk) +{ + cputime_t delta = vtime_delta(tsk); + + account_system_time(tsk, 0, delta, delta); +} +EXPORT_SYMBOL_GPL(vtime_account_system); + +void vtime_account_idle(struct task_struct *tsk) +{ + account_idle_time(vtime_delta(tsk)); +} + +#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ + +static irqreturn_t +timer_interrupt (int irq, void *dev_id) +{ + unsigned long new_itm; + + if (cpu_is_offline(smp_processor_id())) { + return IRQ_HANDLED; + } + + platform_timer_interrupt(irq, dev_id); + + new_itm = local_cpu_data->itm_next; + + if (!time_after(ia64_get_itc(), new_itm)) + printk(KERN_ERR "Oops: timer tick before it's due (itc=%lx,itm=%lx)\n", + ia64_get_itc(), new_itm); + + profile_tick(CPU_PROFILING); + + if (paravirt_do_steal_accounting(&new_itm)) + goto skip_process_time_accounting; + + while (1) { + update_process_times(user_mode(get_irq_regs())); + + new_itm += local_cpu_data->itm_delta; + + if (smp_processor_id() == time_keeper_id) + xtime_update(1); + + local_cpu_data->itm_next = new_itm; + + if (time_after(new_itm, ia64_get_itc())) + break; + + /* + * Allow IPIs to interrupt the timer loop. + */ + local_irq_enable(); + local_irq_disable(); + } + +skip_process_time_accounting: + + do { + /* + * If we're too close to the next clock tick for + * comfort, we increase the safety margin by + * intentionally dropping the next tick(s). We do NOT + * update itm.next because that would force us to call + * xtime_update() which in turn would let our clock run + * too fast (with the potentially devastating effect + * of losing monotony of time). + */ + while (!time_after(new_itm, ia64_get_itc() + local_cpu_data->itm_delta/2)) + new_itm += local_cpu_data->itm_delta; + ia64_set_itm(new_itm); + /* double check, in case we got hit by a (slow) PMI: */ + } while (time_after_eq(ia64_get_itc(), new_itm)); + return IRQ_HANDLED; +} + +/* + * Encapsulate access to the itm structure for SMP. + */ +void +ia64_cpu_local_tick (void) +{ + int cpu = smp_processor_id(); + unsigned long shift = 0, delta; + + /* arrange for the cycle counter to generate a timer interrupt: */ + ia64_set_itv(IA64_TIMER_VECTOR); + + delta = local_cpu_data->itm_delta; + /* + * Stagger the timer tick for each CPU so they don't occur all at (almost) the + * same time: + */ + if (cpu) { + unsigned long hi = 1UL << ia64_fls(cpu); + shift = (2*(cpu - hi) + 1) * delta/hi/2; + } + local_cpu_data->itm_next = ia64_get_itc() + delta + shift; + ia64_set_itm(local_cpu_data->itm_next); +} + +static int nojitter; + +static int __init nojitter_setup(char *str) +{ + nojitter = 1; + printk("Jitter checking for ITC timers disabled\n"); + return 1; +} + +__setup("nojitter", nojitter_setup); + + +void ia64_init_itm(void) +{ + unsigned long platform_base_freq, itc_freq; + struct pal_freq_ratio itc_ratio, proc_ratio; + long status, platform_base_drift, itc_drift; + + /* + * According to SAL v2.6, we need to use a SAL call to determine the platform base + * frequency and then a PAL call to determine the frequency ratio between the ITC + * and the base frequency. + */ + status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM, + &platform_base_freq, &platform_base_drift); + if (status != 0) { + printk(KERN_ERR "SAL_FREQ_BASE_PLATFORM failed: %s\n", ia64_sal_strerror(status)); + } else { + status = ia64_pal_freq_ratios(&proc_ratio, NULL, &itc_ratio); + if (status != 0) + printk(KERN_ERR "PAL_FREQ_RATIOS failed with status=%ld\n", status); + } + if (status != 0) { + /* invent "random" values */ + printk(KERN_ERR + "SAL/PAL failed to obtain frequency info---inventing reasonable values\n"); + platform_base_freq = 100000000; + platform_base_drift = -1; /* no drift info */ + itc_ratio.num = 3; + itc_ratio.den = 1; + } + if (platform_base_freq < 40000000) { + printk(KERN_ERR "Platform base frequency %lu bogus---resetting to 75MHz!\n", + platform_base_freq); + platform_base_freq = 75000000; + platform_base_drift = -1; + } + if (!proc_ratio.den) + proc_ratio.den = 1; /* avoid division by zero */ + if (!itc_ratio.den) + itc_ratio.den = 1; /* avoid division by zero */ + + itc_freq = (platform_base_freq*itc_ratio.num)/itc_ratio.den; + + local_cpu_data->itm_delta = (itc_freq + HZ/2) / HZ; + printk(KERN_DEBUG "CPU %d: base freq=%lu.%03luMHz, ITC ratio=%u/%u, " + "ITC freq=%lu.%03luMHz", smp_processor_id(), + platform_base_freq / 1000000, (platform_base_freq / 1000) % 1000, + itc_ratio.num, itc_ratio.den, itc_freq / 1000000, (itc_freq / 1000) % 1000); + + if (platform_base_drift != -1) { + itc_drift = platform_base_drift*itc_ratio.num/itc_ratio.den; + printk("+/-%ldppm\n", itc_drift); + } else { + itc_drift = -1; + printk("\n"); + } + + local_cpu_data->proc_freq = (platform_base_freq*proc_ratio.num)/proc_ratio.den; + local_cpu_data->itc_freq = itc_freq; + local_cpu_data->cyc_per_usec = (itc_freq + USEC_PER_SEC/2) / USEC_PER_SEC; + local_cpu_data->nsec_per_cyc = ((NSEC_PER_SEC<itc_freq); + itc_clocksource = &clocksource_itc; + } +} + +static cycle_t itc_get_cycles(struct clocksource *cs) +{ + unsigned long lcycle, now, ret; + + if (!itc_jitter_data.itc_jitter) + return get_cycles(); + + lcycle = itc_jitter_data.itc_lastcycle; + now = get_cycles(); + if (lcycle && time_after(lcycle, now)) + return lcycle; + + /* + * Keep track of the last timer value returned. + * In an SMP environment, you could lose out in contention of + * cmpxchg. If so, your cmpxchg returns new value which the + * winner of contention updated to. Use the new value instead. + */ + ret = cmpxchg(&itc_jitter_data.itc_lastcycle, lcycle, now); + if (unlikely(ret != lcycle)) + return ret; + + return now; +} + + +static struct irqaction timer_irqaction = { + .handler = timer_interrupt, + .flags = IRQF_IRQPOLL, + .name = "timer" +}; + +void read_persistent_clock(struct timespec *ts) +{ + efi_gettimeofday(ts); +} + +void __init +time_init (void) +{ + register_percpu_irq(IA64_TIMER_VECTOR, &timer_irqaction); + ia64_init_itm(); +} + +/* + * Generic udelay assumes that if preemption is allowed and the thread + * migrates to another CPU, that the ITC values are synchronized across + * all CPUs. + */ +static void +ia64_itc_udelay (unsigned long usecs) +{ + unsigned long start = ia64_get_itc(); + unsigned long end = start + usecs*local_cpu_data->cyc_per_usec; + + while (time_before(ia64_get_itc(), end)) + cpu_relax(); +} + +void (*ia64_udelay)(unsigned long usecs) = &ia64_itc_udelay; + +void +udelay (unsigned long usecs) +{ + (*ia64_udelay)(usecs); +} +EXPORT_SYMBOL(udelay); + +/* IA64 doesn't cache the timezone */ +void update_vsyscall_tz(void) +{ +} + +void update_vsyscall_old(struct timespec *wall, struct timespec *wtm, + struct clocksource *c, u32 mult, cycle_t cycle_last) +{ + write_seqcount_begin(&fsyscall_gtod_data.seq); + + /* copy fsyscall clock data */ + fsyscall_gtod_data.clk_mask = c->mask; + fsyscall_gtod_data.clk_mult = mult; + fsyscall_gtod_data.clk_shift = c->shift; + fsyscall_gtod_data.clk_fsys_mmio = c->archdata.fsys_mmio; + fsyscall_gtod_data.clk_cycle_last = cycle_last; + + /* copy kernel time structures */ + fsyscall_gtod_data.wall_time.tv_sec = wall->tv_sec; + fsyscall_gtod_data.wall_time.tv_nsec = wall->tv_nsec; + fsyscall_gtod_data.monotonic_time.tv_sec = wtm->tv_sec + + wall->tv_sec; + fsyscall_gtod_data.monotonic_time.tv_nsec = wtm->tv_nsec + + wall->tv_nsec; + + /* normalize */ + while (fsyscall_gtod_data.monotonic_time.tv_nsec >= NSEC_PER_SEC) { + fsyscall_gtod_data.monotonic_time.tv_nsec -= NSEC_PER_SEC; + fsyscall_gtod_data.monotonic_time.tv_sec++; + } + + write_seqcount_end(&fsyscall_gtod_data.seq); +} + diff --git a/kernel/arch/ia64/kernel/topology.c b/kernel/arch/ia64/kernel/topology.c new file mode 100644 index 000000000..c01fe8991 --- /dev/null +++ b/kernel/arch/ia64/kernel/topology.c @@ -0,0 +1,470 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * This file contains NUMA specific variables and functions which can + * be split away from DISCONTIGMEM and are used on NUMA machines with + * contiguous memory. + * 2002/08/07 Erich Focht + * Populate cpu entries in sysfs for non-numa systems as well + * Intel Corporation - Ashok Raj + * 02/27/2006 Zhang, Yanmin + * Populate cpu cache entries in sysfs for cpu cache info + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static struct ia64_cpu *sysfs_cpus; + +void arch_fix_phys_package_id(int num, u32 slot) +{ +#ifdef CONFIG_SMP + if (cpu_data(num)->socket_id == -1) + cpu_data(num)->socket_id = slot; +#endif +} +EXPORT_SYMBOL_GPL(arch_fix_phys_package_id); + + +#ifdef CONFIG_HOTPLUG_CPU +int __ref arch_register_cpu(int num) +{ +#ifdef CONFIG_ACPI + /* + * If CPEI can be re-targeted or if this is not + * CPEI target, then it is hotpluggable + */ + if (can_cpei_retarget() || !is_cpu_cpei_target(num)) + sysfs_cpus[num].cpu.hotpluggable = 1; + map_cpu_to_node(num, node_cpuid[num].nid); +#endif + return register_cpu(&sysfs_cpus[num].cpu, num); +} +EXPORT_SYMBOL(arch_register_cpu); + +void __ref arch_unregister_cpu(int num) +{ + unregister_cpu(&sysfs_cpus[num].cpu); +#ifdef CONFIG_ACPI + unmap_cpu_from_node(num, cpu_to_node(num)); +#endif +} +EXPORT_SYMBOL(arch_unregister_cpu); +#else +static int __init arch_register_cpu(int num) +{ + return register_cpu(&sysfs_cpus[num].cpu, num); +} +#endif /*CONFIG_HOTPLUG_CPU*/ + + +static int __init topology_init(void) +{ + int i, err = 0; + +#ifdef CONFIG_NUMA + /* + * MCD - Do we want to register all ONLINE nodes, or all POSSIBLE nodes? + */ + for_each_online_node(i) { + if ((err = register_one_node(i))) + goto out; + } +#endif + + sysfs_cpus = kzalloc(sizeof(struct ia64_cpu) * NR_CPUS, GFP_KERNEL); + if (!sysfs_cpus) + panic("kzalloc in topology_init failed - NR_CPUS too big?"); + + for_each_present_cpu(i) { + if((err = arch_register_cpu(i))) + goto out; + } +out: + return err; +} + +subsys_initcall(topology_init); + + +/* + * Export cpu cache information through sysfs + */ + +/* + * A bunch of string array to get pretty printing + */ +static const char *cache_types[] = { + "", /* not used */ + "Instruction", + "Data", + "Unified" /* unified */ +}; + +static const char *cache_mattrib[]={ + "WriteThrough", + "WriteBack", + "", /* reserved */ + "" /* reserved */ +}; + +struct cache_info { + pal_cache_config_info_t cci; + cpumask_t shared_cpu_map; + int level; + int type; + struct kobject kobj; +}; + +struct cpu_cache_info { + struct cache_info *cache_leaves; + int num_cache_leaves; + struct kobject kobj; +}; + +static struct cpu_cache_info all_cpu_cache_info[NR_CPUS]; +#define LEAF_KOBJECT_PTR(x,y) (&all_cpu_cache_info[x].cache_leaves[y]) + +#ifdef CONFIG_SMP +static void cache_shared_cpu_map_setup(unsigned int cpu, + struct cache_info * this_leaf) +{ + pal_cache_shared_info_t csi; + int num_shared, i = 0; + unsigned int j; + + if (cpu_data(cpu)->threads_per_core <= 1 && + cpu_data(cpu)->cores_per_socket <= 1) { + cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map); + return; + } + + if (ia64_pal_cache_shared_info(this_leaf->level, + this_leaf->type, + 0, + &csi) != PAL_STATUS_SUCCESS) + return; + + num_shared = (int) csi.num_shared; + do { + for_each_possible_cpu(j) + if (cpu_data(cpu)->socket_id == cpu_data(j)->socket_id + && cpu_data(j)->core_id == csi.log1_cid + && cpu_data(j)->thread_id == csi.log1_tid) + cpumask_set_cpu(j, &this_leaf->shared_cpu_map); + + i++; + } while (i < num_shared && + ia64_pal_cache_shared_info(this_leaf->level, + this_leaf->type, + i, + &csi) == PAL_STATUS_SUCCESS); +} +#else +static void cache_shared_cpu_map_setup(unsigned int cpu, + struct cache_info * this_leaf) +{ + cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map); + return; +} +#endif + +static ssize_t show_coherency_line_size(struct cache_info *this_leaf, + char *buf) +{ + return sprintf(buf, "%u\n", 1 << this_leaf->cci.pcci_line_size); +} + +static ssize_t show_ways_of_associativity(struct cache_info *this_leaf, + char *buf) +{ + return sprintf(buf, "%u\n", this_leaf->cci.pcci_assoc); +} + +static ssize_t show_attributes(struct cache_info *this_leaf, char *buf) +{ + return sprintf(buf, + "%s\n", + cache_mattrib[this_leaf->cci.pcci_cache_attr]); +} + +static ssize_t show_size(struct cache_info *this_leaf, char *buf) +{ + return sprintf(buf, "%uK\n", this_leaf->cci.pcci_cache_size / 1024); +} + +static ssize_t show_number_of_sets(struct cache_info *this_leaf, char *buf) +{ + unsigned number_of_sets = this_leaf->cci.pcci_cache_size; + number_of_sets /= this_leaf->cci.pcci_assoc; + number_of_sets /= 1 << this_leaf->cci.pcci_line_size; + + return sprintf(buf, "%u\n", number_of_sets); +} + +static ssize_t show_shared_cpu_map(struct cache_info *this_leaf, char *buf) +{ + cpumask_t shared_cpu_map; + + cpumask_and(&shared_cpu_map, + &this_leaf->shared_cpu_map, cpu_online_mask); + return scnprintf(buf, PAGE_SIZE, "%*pb\n", + cpumask_pr_args(&shared_cpu_map)); +} + +static ssize_t show_type(struct cache_info *this_leaf, char *buf) +{ + int type = this_leaf->type + this_leaf->cci.pcci_unified; + return sprintf(buf, "%s\n", cache_types[type]); +} + +static ssize_t show_level(struct cache_info *this_leaf, char *buf) +{ + return sprintf(buf, "%u\n", this_leaf->level); +} + +struct cache_attr { + struct attribute attr; + ssize_t (*show)(struct cache_info *, char *); + ssize_t (*store)(struct cache_info *, const char *, size_t count); +}; + +#ifdef define_one_ro + #undef define_one_ro +#endif +#define define_one_ro(_name) \ + static struct cache_attr _name = \ +__ATTR(_name, 0444, show_##_name, NULL) + +define_one_ro(level); +define_one_ro(type); +define_one_ro(coherency_line_size); +define_one_ro(ways_of_associativity); +define_one_ro(size); +define_one_ro(number_of_sets); +define_one_ro(shared_cpu_map); +define_one_ro(attributes); + +static struct attribute * cache_default_attrs[] = { + &type.attr, + &level.attr, + &coherency_line_size.attr, + &ways_of_associativity.attr, + &attributes.attr, + &size.attr, + &number_of_sets.attr, + &shared_cpu_map.attr, + NULL +}; + +#define to_object(k) container_of(k, struct cache_info, kobj) +#define to_attr(a) container_of(a, struct cache_attr, attr) + +static ssize_t ia64_cache_show(struct kobject * kobj, struct attribute * attr, char * buf) +{ + struct cache_attr *fattr = to_attr(attr); + struct cache_info *this_leaf = to_object(kobj); + ssize_t ret; + + ret = fattr->show ? fattr->show(this_leaf, buf) : 0; + return ret; +} + +static const struct sysfs_ops cache_sysfs_ops = { + .show = ia64_cache_show +}; + +static struct kobj_type cache_ktype = { + .sysfs_ops = &cache_sysfs_ops, + .default_attrs = cache_default_attrs, +}; + +static struct kobj_type cache_ktype_percpu_entry = { + .sysfs_ops = &cache_sysfs_ops, +}; + +static void cpu_cache_sysfs_exit(unsigned int cpu) +{ + kfree(all_cpu_cache_info[cpu].cache_leaves); + all_cpu_cache_info[cpu].cache_leaves = NULL; + all_cpu_cache_info[cpu].num_cache_leaves = 0; + memset(&all_cpu_cache_info[cpu].kobj, 0, sizeof(struct kobject)); + return; +} + +static int cpu_cache_sysfs_init(unsigned int cpu) +{ + unsigned long i, levels, unique_caches; + pal_cache_config_info_t cci; + int j; + long status; + struct cache_info *this_cache; + int num_cache_leaves = 0; + + if ((status = ia64_pal_cache_summary(&levels, &unique_caches)) != 0) { + printk(KERN_ERR "ia64_pal_cache_summary=%ld\n", status); + return -1; + } + + this_cache=kzalloc(sizeof(struct cache_info)*unique_caches, + GFP_KERNEL); + if (this_cache == NULL) + return -ENOMEM; + + for (i=0; i < levels; i++) { + for (j=2; j >0 ; j--) { + if ((status=ia64_pal_cache_config_info(i,j, &cci)) != + PAL_STATUS_SUCCESS) + continue; + + this_cache[num_cache_leaves].cci = cci; + this_cache[num_cache_leaves].level = i + 1; + this_cache[num_cache_leaves].type = j; + + cache_shared_cpu_map_setup(cpu, + &this_cache[num_cache_leaves]); + num_cache_leaves ++; + } + } + + all_cpu_cache_info[cpu].cache_leaves = this_cache; + all_cpu_cache_info[cpu].num_cache_leaves = num_cache_leaves; + + memset(&all_cpu_cache_info[cpu].kobj, 0, sizeof(struct kobject)); + + return 0; +} + +/* Add cache interface for CPU device */ +static int cache_add_dev(struct device *sys_dev) +{ + unsigned int cpu = sys_dev->id; + unsigned long i, j; + struct cache_info *this_object; + int retval = 0; + cpumask_t oldmask; + + if (all_cpu_cache_info[cpu].kobj.parent) + return 0; + + oldmask = current->cpus_allowed; + retval = set_cpus_allowed_ptr(current, cpumask_of(cpu)); + if (unlikely(retval)) + return retval; + + retval = cpu_cache_sysfs_init(cpu); + set_cpus_allowed_ptr(current, &oldmask); + if (unlikely(retval < 0)) + return retval; + + retval = kobject_init_and_add(&all_cpu_cache_info[cpu].kobj, + &cache_ktype_percpu_entry, &sys_dev->kobj, + "%s", "cache"); + if (unlikely(retval < 0)) { + cpu_cache_sysfs_exit(cpu); + return retval; + } + + for (i = 0; i < all_cpu_cache_info[cpu].num_cache_leaves; i++) { + this_object = LEAF_KOBJECT_PTR(cpu,i); + retval = kobject_init_and_add(&(this_object->kobj), + &cache_ktype, + &all_cpu_cache_info[cpu].kobj, + "index%1lu", i); + if (unlikely(retval)) { + for (j = 0; j < i; j++) { + kobject_put(&(LEAF_KOBJECT_PTR(cpu,j)->kobj)); + } + kobject_put(&all_cpu_cache_info[cpu].kobj); + cpu_cache_sysfs_exit(cpu); + return retval; + } + kobject_uevent(&(this_object->kobj), KOBJ_ADD); + } + kobject_uevent(&all_cpu_cache_info[cpu].kobj, KOBJ_ADD); + return retval; +} + +/* Remove cache interface for CPU device */ +static int cache_remove_dev(struct device *sys_dev) +{ + unsigned int cpu = sys_dev->id; + unsigned long i; + + for (i = 0; i < all_cpu_cache_info[cpu].num_cache_leaves; i++) + kobject_put(&(LEAF_KOBJECT_PTR(cpu,i)->kobj)); + + if (all_cpu_cache_info[cpu].kobj.parent) { + kobject_put(&all_cpu_cache_info[cpu].kobj); + memset(&all_cpu_cache_info[cpu].kobj, + 0, + sizeof(struct kobject)); + } + + cpu_cache_sysfs_exit(cpu); + + return 0; +} + +/* + * When a cpu is hot-plugged, do a check and initiate + * cache kobject if necessary + */ +static int cache_cpu_callback(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + struct device *sys_dev; + + sys_dev = get_cpu_device(cpu); + switch (action) { + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + cache_add_dev(sys_dev); + break; + case CPU_DEAD: + case CPU_DEAD_FROZEN: + cache_remove_dev(sys_dev); + break; + } + return NOTIFY_OK; +} + +static struct notifier_block cache_cpu_notifier = +{ + .notifier_call = cache_cpu_callback +}; + +static int __init cache_sysfs_init(void) +{ + int i; + + cpu_notifier_register_begin(); + + for_each_online_cpu(i) { + struct device *sys_dev = get_cpu_device((unsigned int)i); + cache_add_dev(sys_dev); + } + + __register_hotcpu_notifier(&cache_cpu_notifier); + + cpu_notifier_register_done(); + + return 0; +} + +device_initcall(cache_sysfs_init); + diff --git a/kernel/arch/ia64/kernel/traps.c b/kernel/arch/ia64/kernel/traps.c new file mode 100644 index 000000000..6f7d4a4dc --- /dev/null +++ b/kernel/arch/ia64/kernel/traps.c @@ -0,0 +1,652 @@ +/* + * Architecture-specific trap handling. + * + * Copyright (C) 1998-2003 Hewlett-Packard Co + * David Mosberger-Tang + * + * 05/12/00 grao : added isr in siginfo for SIGFPE + */ + +#include +#include +#include +#include +#include /* For unblank_screen() */ +#include /* for EXPORT_SYMBOL */ +#include +#include +#include /* for ssleep() */ +#include + +#include +#include +#include +#include +#include + +fpswa_interface_t *fpswa_interface; +EXPORT_SYMBOL(fpswa_interface); + +void __init +trap_init (void) +{ + if (ia64_boot_param->fpswa) + /* FPSWA fixup: make the interface pointer a kernel virtual address: */ + fpswa_interface = __va(ia64_boot_param->fpswa); +} + +int +die (const char *str, struct pt_regs *regs, long err) +{ + static struct { + spinlock_t lock; + u32 lock_owner; + int lock_owner_depth; + } die = { + .lock = __SPIN_LOCK_UNLOCKED(die.lock), + .lock_owner = -1, + .lock_owner_depth = 0 + }; + static int die_counter; + int cpu = get_cpu(); + + if (die.lock_owner != cpu) { + console_verbose(); + spin_lock_irq(&die.lock); + die.lock_owner = cpu; + die.lock_owner_depth = 0; + bust_spinlocks(1); + } + put_cpu(); + + if (++die.lock_owner_depth < 3) { + printk("%s[%d]: %s %ld [%d]\n", + current->comm, task_pid_nr(current), str, err, ++die_counter); + if (notify_die(DIE_OOPS, str, regs, err, 255, SIGSEGV) + != NOTIFY_STOP) + show_regs(regs); + else + regs = NULL; + } else + printk(KERN_ERR "Recursive die() failure, output suppressed\n"); + + bust_spinlocks(0); + die.lock_owner = -1; + add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); + spin_unlock_irq(&die.lock); + + if (!regs) + return 1; + + if (panic_on_oops) + panic("Fatal exception"); + + do_exit(SIGSEGV); + return 0; +} + +int +die_if_kernel (char *str, struct pt_regs *regs, long err) +{ + if (!user_mode(regs)) + return die(str, regs, err); + return 0; +} + +void +__kprobes ia64_bad_break (unsigned long break_num, struct pt_regs *regs) +{ + siginfo_t siginfo; + int sig, code; + + /* SIGILL, SIGFPE, SIGSEGV, and SIGBUS want these field initialized: */ + siginfo.si_addr = (void __user *) (regs->cr_iip + ia64_psr(regs)->ri); + siginfo.si_imm = break_num; + siginfo.si_flags = 0; /* clear __ISR_VALID */ + siginfo.si_isr = 0; + + switch (break_num) { + case 0: /* unknown error (used by GCC for __builtin_abort()) */ + if (notify_die(DIE_BREAK, "break 0", regs, break_num, TRAP_BRKPT, SIGTRAP) + == NOTIFY_STOP) + return; + if (die_if_kernel("bugcheck!", regs, break_num)) + return; + sig = SIGILL; code = ILL_ILLOPC; + break; + + case 1: /* integer divide by zero */ + sig = SIGFPE; code = FPE_INTDIV; + break; + + case 2: /* integer overflow */ + sig = SIGFPE; code = FPE_INTOVF; + break; + + case 3: /* range check/bounds check */ + sig = SIGFPE; code = FPE_FLTSUB; + break; + + case 4: /* null pointer dereference */ + sig = SIGSEGV; code = SEGV_MAPERR; + break; + + case 5: /* misaligned data */ + sig = SIGSEGV; code = BUS_ADRALN; + break; + + case 6: /* decimal overflow */ + sig = SIGFPE; code = __FPE_DECOVF; + break; + + case 7: /* decimal divide by zero */ + sig = SIGFPE; code = __FPE_DECDIV; + break; + + case 8: /* packed decimal error */ + sig = SIGFPE; code = __FPE_DECERR; + break; + + case 9: /* invalid ASCII digit */ + sig = SIGFPE; code = __FPE_INVASC; + break; + + case 10: /* invalid decimal digit */ + sig = SIGFPE; code = __FPE_INVDEC; + break; + + case 11: /* paragraph stack overflow */ + sig = SIGSEGV; code = __SEGV_PSTKOVF; + break; + + case 0x3f000 ... 0x3ffff: /* bundle-update in progress */ + sig = SIGILL; code = __ILL_BNDMOD; + break; + + default: + if ((break_num < 0x40000 || break_num > 0x100000) + && die_if_kernel("Bad break", regs, break_num)) + return; + + if (break_num < 0x80000) { + sig = SIGILL; code = __ILL_BREAK; + } else { + if (notify_die(DIE_BREAK, "bad break", regs, break_num, TRAP_BRKPT, SIGTRAP) + == NOTIFY_STOP) + return; + sig = SIGTRAP; code = TRAP_BRKPT; + } + } + siginfo.si_signo = sig; + siginfo.si_errno = 0; + siginfo.si_code = code; + force_sig_info(sig, &siginfo, current); +} + +/* + * disabled_fph_fault() is called when a user-level process attempts to access f32..f127 + * and it doesn't own the fp-high register partition. When this happens, we save the + * current fph partition in the task_struct of the fpu-owner (if necessary) and then load + * the fp-high partition of the current task (if necessary). Note that the kernel has + * access to fph by the time we get here, as the IVT's "Disabled FP-Register" handler takes + * care of clearing psr.dfh. + */ +static inline void +disabled_fph_fault (struct pt_regs *regs) +{ + struct ia64_psr *psr = ia64_psr(regs); + + /* first, grant user-level access to fph partition: */ + psr->dfh = 0; + + /* + * Make sure that no other task gets in on this processor + * while we're claiming the FPU + */ + preempt_disable(); +#ifndef CONFIG_SMP + { + struct task_struct *fpu_owner + = (struct task_struct *)ia64_get_kr(IA64_KR_FPU_OWNER); + + if (ia64_is_local_fpu_owner(current)) { + preempt_enable_no_resched(); + return; + } + + if (fpu_owner) + ia64_flush_fph(fpu_owner); + } +#endif /* !CONFIG_SMP */ + ia64_set_local_fpu_owner(current); + if ((current->thread.flags & IA64_THREAD_FPH_VALID) != 0) { + __ia64_load_fpu(current->thread.fph); + psr->mfh = 0; + } else { + __ia64_init_fpu(); + /* + * Set mfh because the state in thread.fph does not match the state in + * the fph partition. + */ + psr->mfh = 1; + } + preempt_enable_no_resched(); +} + +static inline int +fp_emulate (int fp_fault, void *bundle, long *ipsr, long *fpsr, long *isr, long *pr, long *ifs, + struct pt_regs *regs) +{ + fp_state_t fp_state; + fpswa_ret_t ret; + + if (!fpswa_interface) + return -1; + + memset(&fp_state, 0, sizeof(fp_state_t)); + + /* + * compute fp_state. only FP registers f6 - f11 are used by the + * kernel, so set those bits in the mask and set the low volatile + * pointer to point to these registers. + */ + fp_state.bitmask_low64 = 0xfc0; /* bit6..bit11 */ + + fp_state.fp_state_low_volatile = (fp_state_low_volatile_t *) ®s->f6; + /* + * unsigned long (*EFI_FPSWA) ( + * unsigned long trap_type, + * void *Bundle, + * unsigned long *pipsr, + * unsigned long *pfsr, + * unsigned long *pisr, + * unsigned long *ppreds, + * unsigned long *pifs, + * void *fp_state); + */ + ret = (*fpswa_interface->fpswa)((unsigned long) fp_fault, bundle, + (unsigned long *) ipsr, (unsigned long *) fpsr, + (unsigned long *) isr, (unsigned long *) pr, + (unsigned long *) ifs, &fp_state); + + return ret.status; +} + +struct fpu_swa_msg { + unsigned long count; + unsigned long time; +}; +static DEFINE_PER_CPU(struct fpu_swa_msg, cpulast); +DECLARE_PER_CPU(struct fpu_swa_msg, cpulast); +static struct fpu_swa_msg last __cacheline_aligned; + + +/* + * Handle floating-point assist faults and traps. + */ +static int +handle_fpu_swa (int fp_fault, struct pt_regs *regs, unsigned long isr) +{ + long exception, bundle[2]; + unsigned long fault_ip; + struct siginfo siginfo; + + fault_ip = regs->cr_iip; + if (!fp_fault && (ia64_psr(regs)->ri == 0)) + fault_ip -= 16; + if (copy_from_user(bundle, (void __user *) fault_ip, sizeof(bundle))) + return -1; + + if (!(current->thread.flags & IA64_THREAD_FPEMU_NOPRINT)) { + unsigned long count, current_jiffies = jiffies; + struct fpu_swa_msg *cp = this_cpu_ptr(&cpulast); + + if (unlikely(current_jiffies > cp->time)) + cp->count = 0; + if (unlikely(cp->count < 5)) { + cp->count++; + cp->time = current_jiffies + 5 * HZ; + + /* minimize races by grabbing a copy of count BEFORE checking last.time. */ + count = last.count; + barrier(); + + /* + * Lower 4 bits are used as a count. Upper bits are a sequence + * number that is updated when count is reset. The cmpxchg will + * fail is seqno has changed. This minimizes mutiple cpus + * resetting the count. + */ + if (current_jiffies > last.time) + (void) cmpxchg_acq(&last.count, count, 16 + (count & ~15)); + + /* used fetchadd to atomically update the count */ + if ((last.count & 15) < 5 && (ia64_fetchadd(1, &last.count, acq) & 15) < 5) { + last.time = current_jiffies + 5 * HZ; + printk(KERN_WARNING + "%s(%d): floating-point assist fault at ip %016lx, isr %016lx\n", + current->comm, task_pid_nr(current), regs->cr_iip + ia64_psr(regs)->ri, isr); + } + } + } + + exception = fp_emulate(fp_fault, bundle, ®s->cr_ipsr, ®s->ar_fpsr, &isr, ®s->pr, + ®s->cr_ifs, regs); + if (fp_fault) { + if (exception == 0) { + /* emulation was successful */ + ia64_increment_ip(regs); + } else if (exception == -1) { + printk(KERN_ERR "handle_fpu_swa: fp_emulate() returned -1\n"); + return -1; + } else { + /* is next instruction a trap? */ + if (exception & 2) { + ia64_increment_ip(regs); + } + siginfo.si_signo = SIGFPE; + siginfo.si_errno = 0; + siginfo.si_code = __SI_FAULT; /* default code */ + siginfo.si_addr = (void __user *) (regs->cr_iip + ia64_psr(regs)->ri); + if (isr & 0x11) { + siginfo.si_code = FPE_FLTINV; + } else if (isr & 0x22) { + /* denormal operand gets the same si_code as underflow + * see arch/i386/kernel/traps.c:math_error() */ + siginfo.si_code = FPE_FLTUND; + } else if (isr & 0x44) { + siginfo.si_code = FPE_FLTDIV; + } + siginfo.si_isr = isr; + siginfo.si_flags = __ISR_VALID; + siginfo.si_imm = 0; + force_sig_info(SIGFPE, &siginfo, current); + } + } else { + if (exception == -1) { + printk(KERN_ERR "handle_fpu_swa: fp_emulate() returned -1\n"); + return -1; + } else if (exception != 0) { + /* raise exception */ + siginfo.si_signo = SIGFPE; + siginfo.si_errno = 0; + siginfo.si_code = __SI_FAULT; /* default code */ + siginfo.si_addr = (void __user *) (regs->cr_iip + ia64_psr(regs)->ri); + if (isr & 0x880) { + siginfo.si_code = FPE_FLTOVF; + } else if (isr & 0x1100) { + siginfo.si_code = FPE_FLTUND; + } else if (isr & 0x2200) { + siginfo.si_code = FPE_FLTRES; + } + siginfo.si_isr = isr; + siginfo.si_flags = __ISR_VALID; + siginfo.si_imm = 0; + force_sig_info(SIGFPE, &siginfo, current); + } + } + return 0; +} + +struct illegal_op_return { + unsigned long fkt, arg1, arg2, arg3; +}; + +struct illegal_op_return +ia64_illegal_op_fault (unsigned long ec, long arg1, long arg2, long arg3, + long arg4, long arg5, long arg6, long arg7, + struct pt_regs regs) +{ + struct illegal_op_return rv; + struct siginfo si; + char buf[128]; + +#ifdef CONFIG_IA64_BRL_EMU + { + extern struct illegal_op_return ia64_emulate_brl (struct pt_regs *, unsigned long); + + rv = ia64_emulate_brl(®s, ec); + if (rv.fkt != (unsigned long) -1) + return rv; + } +#endif + + sprintf(buf, "IA-64 Illegal operation fault"); + rv.fkt = 0; + if (die_if_kernel(buf, ®s, 0)) + return rv; + + memset(&si, 0, sizeof(si)); + si.si_signo = SIGILL; + si.si_code = ILL_ILLOPC; + si.si_addr = (void __user *) (regs.cr_iip + ia64_psr(®s)->ri); + force_sig_info(SIGILL, &si, current); + return rv; +} + +void __kprobes +ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa, + unsigned long iim, unsigned long itir, long arg5, long arg6, + long arg7, struct pt_regs regs) +{ + unsigned long code, error = isr, iip; + struct siginfo siginfo; + char buf[128]; + int result, sig; + static const char *reason[] = { + "IA-64 Illegal Operation fault", + "IA-64 Privileged Operation fault", + "IA-64 Privileged Register fault", + "IA-64 Reserved Register/Field fault", + "Disabled Instruction Set Transition fault", + "Unknown fault 5", "Unknown fault 6", "Unknown fault 7", "Illegal Hazard fault", + "Unknown fault 9", "Unknown fault 10", "Unknown fault 11", "Unknown fault 12", + "Unknown fault 13", "Unknown fault 14", "Unknown fault 15" + }; + + if ((isr & IA64_ISR_NA) && ((isr & IA64_ISR_CODE_MASK) == IA64_ISR_CODE_LFETCH)) { + /* + * This fault was due to lfetch.fault, set "ed" bit in the psr to cancel + * the lfetch. + */ + ia64_psr(®s)->ed = 1; + return; + } + + iip = regs.cr_iip + ia64_psr(®s)->ri; + + switch (vector) { + case 24: /* General Exception */ + code = (isr >> 4) & 0xf; + sprintf(buf, "General Exception: %s%s", reason[code], + (code == 3) ? ((isr & (1UL << 37)) + ? " (RSE access)" : " (data access)") : ""); + if (code == 8) { +# ifdef CONFIG_IA64_PRINT_HAZARDS + printk("%s[%d]: possible hazard @ ip=%016lx (pr = %016lx)\n", + current->comm, task_pid_nr(current), + regs.cr_iip + ia64_psr(®s)->ri, regs.pr); +# endif + return; + } + break; + + case 25: /* Disabled FP-Register */ + if (isr & 2) { + disabled_fph_fault(®s); + return; + } + sprintf(buf, "Disabled FPL fault---not supposed to happen!"); + break; + + case 26: /* NaT Consumption */ + if (user_mode(®s)) { + void __user *addr; + + if (((isr >> 4) & 0xf) == 2) { + /* NaT page consumption */ + sig = SIGSEGV; + code = SEGV_ACCERR; + addr = (void __user *) ifa; + } else { + /* register NaT consumption */ + sig = SIGILL; + code = ILL_ILLOPN; + addr = (void __user *) (regs.cr_iip + + ia64_psr(®s)->ri); + } + siginfo.si_signo = sig; + siginfo.si_code = code; + siginfo.si_errno = 0; + siginfo.si_addr = addr; + siginfo.si_imm = vector; + siginfo.si_flags = __ISR_VALID; + siginfo.si_isr = isr; + force_sig_info(sig, &siginfo, current); + return; + } else if (ia64_done_with_exception(®s)) + return; + sprintf(buf, "NaT consumption"); + break; + + case 31: /* Unsupported Data Reference */ + if (user_mode(®s)) { + siginfo.si_signo = SIGILL; + siginfo.si_code = ILL_ILLOPN; + siginfo.si_errno = 0; + siginfo.si_addr = (void __user *) iip; + siginfo.si_imm = vector; + siginfo.si_flags = __ISR_VALID; + siginfo.si_isr = isr; + force_sig_info(SIGILL, &siginfo, current); + return; + } + sprintf(buf, "Unsupported data reference"); + break; + + case 29: /* Debug */ + case 35: /* Taken Branch Trap */ + case 36: /* Single Step Trap */ + if (fsys_mode(current, ®s)) { + extern char __kernel_syscall_via_break[]; + /* + * Got a trap in fsys-mode: Taken Branch Trap + * and Single Step trap need special handling; + * Debug trap is ignored (we disable it here + * and re-enable it in the lower-privilege trap). + */ + if (unlikely(vector == 29)) { + set_thread_flag(TIF_DB_DISABLED); + ia64_psr(®s)->db = 0; + ia64_psr(®s)->lp = 1; + return; + } + /* re-do the system call via break 0x100000: */ + regs.cr_iip = (unsigned long) __kernel_syscall_via_break; + ia64_psr(®s)->ri = 0; + ia64_psr(®s)->cpl = 3; + return; + } + switch (vector) { + case 29: + siginfo.si_code = TRAP_HWBKPT; +#ifdef CONFIG_ITANIUM + /* + * Erratum 10 (IFA may contain incorrect address) now has + * "NoFix" status. There are no plans for fixing this. + */ + if (ia64_psr(®s)->is == 0) + ifa = regs.cr_iip; +#endif + break; + case 35: siginfo.si_code = TRAP_BRANCH; ifa = 0; break; + case 36: siginfo.si_code = TRAP_TRACE; ifa = 0; break; + } + if (notify_die(DIE_FAULT, "ia64_fault", ®s, vector, siginfo.si_code, SIGTRAP) + == NOTIFY_STOP) + return; + siginfo.si_signo = SIGTRAP; + siginfo.si_errno = 0; + siginfo.si_addr = (void __user *) ifa; + siginfo.si_imm = 0; + siginfo.si_flags = __ISR_VALID; + siginfo.si_isr = isr; + force_sig_info(SIGTRAP, &siginfo, current); + return; + + case 32: /* fp fault */ + case 33: /* fp trap */ + result = handle_fpu_swa((vector == 32) ? 1 : 0, ®s, isr); + if ((result < 0) || (current->thread.flags & IA64_THREAD_FPEMU_SIGFPE)) { + siginfo.si_signo = SIGFPE; + siginfo.si_errno = 0; + siginfo.si_code = FPE_FLTINV; + siginfo.si_addr = (void __user *) iip; + siginfo.si_flags = __ISR_VALID; + siginfo.si_isr = isr; + siginfo.si_imm = 0; + force_sig_info(SIGFPE, &siginfo, current); + } + return; + + case 34: + if (isr & 0x2) { + /* Lower-Privilege Transfer Trap */ + + /* If we disabled debug traps during an fsyscall, + * re-enable them here. + */ + if (test_thread_flag(TIF_DB_DISABLED)) { + clear_thread_flag(TIF_DB_DISABLED); + ia64_psr(®s)->db = 1; + } + + /* + * Just clear PSR.lp and then return immediately: + * all the interesting work (e.g., signal delivery) + * is done in the kernel exit path. + */ + ia64_psr(®s)->lp = 0; + return; + } else { + /* Unimplemented Instr. Address Trap */ + if (user_mode(®s)) { + siginfo.si_signo = SIGILL; + siginfo.si_code = ILL_BADIADDR; + siginfo.si_errno = 0; + siginfo.si_flags = 0; + siginfo.si_isr = 0; + siginfo.si_imm = 0; + siginfo.si_addr = (void __user *) iip; + force_sig_info(SIGILL, &siginfo, current); + return; + } + sprintf(buf, "Unimplemented Instruction Address fault"); + } + break; + + case 45: + printk(KERN_ERR "Unexpected IA-32 exception (Trap 45)\n"); + printk(KERN_ERR " iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx\n", + iip, ifa, isr); + force_sig(SIGSEGV, current); + return; + + case 46: + printk(KERN_ERR "Unexpected IA-32 intercept trap (Trap 46)\n"); + printk(KERN_ERR " iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx, iim - 0x%lx\n", + iip, ifa, isr, iim); + force_sig(SIGSEGV, current); + return; + + case 47: + sprintf(buf, "IA-32 Interruption Fault (int 0x%lx)", isr >> 16); + break; + + default: + sprintf(buf, "Fault %lu", vector); + break; + } + if (!die_if_kernel(buf, ®s, error)) + force_sig(SIGILL, current); +} diff --git a/kernel/arch/ia64/kernel/unaligned.c b/kernel/arch/ia64/kernel/unaligned.c new file mode 100644 index 000000000..622772b7f --- /dev/null +++ b/kernel/arch/ia64/kernel/unaligned.c @@ -0,0 +1,1542 @@ +/* + * Architecture-specific unaligned trap handling. + * + * Copyright (C) 1999-2002, 2004 Hewlett-Packard Co + * Stephane Eranian + * David Mosberger-Tang + * + * 2002/12/09 Fix rotating register handling (off-by-1 error, missing fr-rotation). Fix + * get_rse_reg() to not leak kernel bits to user-level (reading an out-of-frame + * stacked register returns an undefined value; it does NOT trigger a + * "rsvd register fault"). + * 2001/10/11 Fix unaligned access to rotating registers in s/w pipelined loops. + * 2001/08/13 Correct size of extended floats (float_fsz) from 16 to 10 bytes. + * 2001/01/17 Add support emulation of unaligned kernel accesses. + */ +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +extern int die_if_kernel(char *str, struct pt_regs *regs, long err); + +#undef DEBUG_UNALIGNED_TRAP + +#ifdef DEBUG_UNALIGNED_TRAP +# define DPRINT(a...) do { printk("%s %u: ", __func__, __LINE__); printk (a); } while (0) +# define DDUMP(str,vp,len) dump(str, vp, len) + +static void +dump (const char *str, void *vp, size_t len) +{ + unsigned char *cp = vp; + int i; + + printk("%s", str); + for (i = 0; i < len; ++i) + printk (" %02x", *cp++); + printk("\n"); +} +#else +# define DPRINT(a...) +# define DDUMP(str,vp,len) +#endif + +#define IA64_FIRST_STACKED_GR 32 +#define IA64_FIRST_ROTATING_FR 32 +#define SIGN_EXT9 0xffffffffffffff00ul + +/* + * sysctl settable hook which tells the kernel whether to honor the + * IA64_THREAD_UAC_NOPRINT prctl. Because this is user settable, we want + * to allow the super user to enable/disable this for security reasons + * (i.e. don't allow attacker to fill up logs with unaligned accesses). + */ +int no_unaligned_warning; +int unaligned_dump_stack; + +/* + * For M-unit: + * + * opcode | m | x6 | + * --------|------|---------| + * [40-37] | [36] | [35:30] | + * --------|------|---------| + * 4 | 1 | 6 | = 11 bits + * -------------------------- + * However bits [31:30] are not directly useful to distinguish between + * load/store so we can use [35:32] instead, which gives the following + * mask ([40:32]) using 9 bits. The 'e' comes from the fact that we defer + * checking the m-bit until later in the load/store emulation. + */ +#define IA64_OPCODE_MASK 0x1ef +#define IA64_OPCODE_SHIFT 32 + +/* + * Table C-28 Integer Load/Store + * + * We ignore [35:32]= 0x6, 0x7, 0xE, 0xF + * + * ld8.fill, st8.fill MUST be aligned because the RNATs are based on + * the address (bits [8:3]), so we must failed. + */ +#define LD_OP 0x080 +#define LDS_OP 0x081 +#define LDA_OP 0x082 +#define LDSA_OP 0x083 +#define LDBIAS_OP 0x084 +#define LDACQ_OP 0x085 +/* 0x086, 0x087 are not relevant */ +#define LDCCLR_OP 0x088 +#define LDCNC_OP 0x089 +#define LDCCLRACQ_OP 0x08a +#define ST_OP 0x08c +#define STREL_OP 0x08d +/* 0x08e,0x8f are not relevant */ + +/* + * Table C-29 Integer Load +Reg + * + * we use the ld->m (bit [36:36]) field to determine whether or not we have + * a load/store of this form. + */ + +/* + * Table C-30 Integer Load/Store +Imm + * + * We ignore [35:32]= 0x6, 0x7, 0xE, 0xF + * + * ld8.fill, st8.fill must be aligned because the Nat register are based on + * the address, so we must fail and the program must be fixed. + */ +#define LD_IMM_OP 0x0a0 +#define LDS_IMM_OP 0x0a1 +#define LDA_IMM_OP 0x0a2 +#define LDSA_IMM_OP 0x0a3 +#define LDBIAS_IMM_OP 0x0a4 +#define LDACQ_IMM_OP 0x0a5 +/* 0x0a6, 0xa7 are not relevant */ +#define LDCCLR_IMM_OP 0x0a8 +#define LDCNC_IMM_OP 0x0a9 +#define LDCCLRACQ_IMM_OP 0x0aa +#define ST_IMM_OP 0x0ac +#define STREL_IMM_OP 0x0ad +/* 0x0ae,0xaf are not relevant */ + +/* + * Table C-32 Floating-point Load/Store + */ +#define LDF_OP 0x0c0 +#define LDFS_OP 0x0c1 +#define LDFA_OP 0x0c2 +#define LDFSA_OP 0x0c3 +/* 0x0c6 is irrelevant */ +#define LDFCCLR_OP 0x0c8 +#define LDFCNC_OP 0x0c9 +/* 0x0cb is irrelevant */ +#define STF_OP 0x0cc + +/* + * Table C-33 Floating-point Load +Reg + * + * we use the ld->m (bit [36:36]) field to determine whether or not we have + * a load/store of this form. + */ + +/* + * Table C-34 Floating-point Load/Store +Imm + */ +#define LDF_IMM_OP 0x0e0 +#define LDFS_IMM_OP 0x0e1 +#define LDFA_IMM_OP 0x0e2 +#define LDFSA_IMM_OP 0x0e3 +/* 0x0e6 is irrelevant */ +#define LDFCCLR_IMM_OP 0x0e8 +#define LDFCNC_IMM_OP 0x0e9 +#define STF_IMM_OP 0x0ec + +typedef struct { + unsigned long qp:6; /* [0:5] */ + unsigned long r1:7; /* [6:12] */ + unsigned long imm:7; /* [13:19] */ + unsigned long r3:7; /* [20:26] */ + unsigned long x:1; /* [27:27] */ + unsigned long hint:2; /* [28:29] */ + unsigned long x6_sz:2; /* [30:31] */ + unsigned long x6_op:4; /* [32:35], x6 = x6_sz|x6_op */ + unsigned long m:1; /* [36:36] */ + unsigned long op:4; /* [37:40] */ + unsigned long pad:23; /* [41:63] */ +} load_store_t; + + +typedef enum { + UPD_IMMEDIATE, /* ldXZ r1=[r3],imm(9) */ + UPD_REG /* ldXZ r1=[r3],r2 */ +} update_t; + +/* + * We use tables to keep track of the offsets of registers in the saved state. + * This way we save having big switch/case statements. + * + * We use bit 0 to indicate switch_stack or pt_regs. + * The offset is simply shifted by 1 bit. + * A 2-byte value should be enough to hold any kind of offset + * + * In case the calling convention changes (and thus pt_regs/switch_stack) + * simply use RSW instead of RPT or vice-versa. + */ + +#define RPO(x) ((size_t) &((struct pt_regs *)0)->x) +#define RSO(x) ((size_t) &((struct switch_stack *)0)->x) + +#define RPT(x) (RPO(x) << 1) +#define RSW(x) (1| RSO(x)<<1) + +#define GR_OFFS(x) (gr_info[x]>>1) +#define GR_IN_SW(x) (gr_info[x] & 0x1) + +#define FR_OFFS(x) (fr_info[x]>>1) +#define FR_IN_SW(x) (fr_info[x] & 0x1) + +static u16 gr_info[32]={ + 0, /* r0 is read-only : WE SHOULD NEVER GET THIS */ + + RPT(r1), RPT(r2), RPT(r3), + + RSW(r4), RSW(r5), RSW(r6), RSW(r7), + + RPT(r8), RPT(r9), RPT(r10), RPT(r11), + RPT(r12), RPT(r13), RPT(r14), RPT(r15), + + RPT(r16), RPT(r17), RPT(r18), RPT(r19), + RPT(r20), RPT(r21), RPT(r22), RPT(r23), + RPT(r24), RPT(r25), RPT(r26), RPT(r27), + RPT(r28), RPT(r29), RPT(r30), RPT(r31) +}; + +static u16 fr_info[32]={ + 0, /* constant : WE SHOULD NEVER GET THIS */ + 0, /* constant : WE SHOULD NEVER GET THIS */ + + RSW(f2), RSW(f3), RSW(f4), RSW(f5), + + RPT(f6), RPT(f7), RPT(f8), RPT(f9), + RPT(f10), RPT(f11), + + RSW(f12), RSW(f13), RSW(f14), + RSW(f15), RSW(f16), RSW(f17), RSW(f18), RSW(f19), + RSW(f20), RSW(f21), RSW(f22), RSW(f23), RSW(f24), + RSW(f25), RSW(f26), RSW(f27), RSW(f28), RSW(f29), + RSW(f30), RSW(f31) +}; + +/* Invalidate ALAT entry for integer register REGNO. */ +static void +invala_gr (int regno) +{ +# define F(reg) case reg: ia64_invala_gr(reg); break + + switch (regno) { + F( 0); F( 1); F( 2); F( 3); F( 4); F( 5); F( 6); F( 7); + F( 8); F( 9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15); + F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23); + F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31); + F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39); + F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47); + F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55); + F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63); + F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71); + F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79); + F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87); + F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95); + F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103); + F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111); + F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119); + F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127); + } +# undef F +} + +/* Invalidate ALAT entry for floating-point register REGNO. */ +static void +invala_fr (int regno) +{ +# define F(reg) case reg: ia64_invala_fr(reg); break + + switch (regno) { + F( 0); F( 1); F( 2); F( 3); F( 4); F( 5); F( 6); F( 7); + F( 8); F( 9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15); + F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23); + F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31); + F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39); + F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47); + F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55); + F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63); + F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71); + F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79); + F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87); + F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95); + F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103); + F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111); + F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119); + F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127); + } +# undef F +} + +static inline unsigned long +rotate_reg (unsigned long sor, unsigned long rrb, unsigned long reg) +{ + reg += rrb; + if (reg >= sor) + reg -= sor; + return reg; +} + +static void +set_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long val, int nat) +{ + struct switch_stack *sw = (struct switch_stack *) regs - 1; + unsigned long *bsp, *bspstore, *addr, *rnat_addr, *ubs_end; + unsigned long *kbs = (void *) current + IA64_RBS_OFFSET; + unsigned long rnats, nat_mask; + unsigned long on_kbs; + long sof = (regs->cr_ifs) & 0x7f; + long sor = 8 * ((regs->cr_ifs >> 14) & 0xf); + long rrb_gr = (regs->cr_ifs >> 18) & 0x7f; + long ridx = r1 - 32; + + if (ridx >= sof) { + /* this should never happen, as the "rsvd register fault" has higher priority */ + DPRINT("ignoring write to r%lu; only %lu registers are allocated!\n", r1, sof); + return; + } + + if (ridx < sor) + ridx = rotate_reg(sor, rrb_gr, ridx); + + DPRINT("r%lu, sw.bspstore=%lx pt.bspstore=%lx sof=%ld sol=%ld ridx=%ld\n", + r1, sw->ar_bspstore, regs->ar_bspstore, sof, (regs->cr_ifs >> 7) & 0x7f, ridx); + + on_kbs = ia64_rse_num_regs(kbs, (unsigned long *) sw->ar_bspstore); + addr = ia64_rse_skip_regs((unsigned long *) sw->ar_bspstore, -sof + ridx); + if (addr >= kbs) { + /* the register is on the kernel backing store: easy... */ + rnat_addr = ia64_rse_rnat_addr(addr); + if ((unsigned long) rnat_addr >= sw->ar_bspstore) + rnat_addr = &sw->ar_rnat; + nat_mask = 1UL << ia64_rse_slot_num(addr); + + *addr = val; + if (nat) + *rnat_addr |= nat_mask; + else + *rnat_addr &= ~nat_mask; + return; + } + + if (!user_stack(current, regs)) { + DPRINT("ignoring kernel write to r%lu; register isn't on the kernel RBS!", r1); + return; + } + + bspstore = (unsigned long *)regs->ar_bspstore; + ubs_end = ia64_rse_skip_regs(bspstore, on_kbs); + bsp = ia64_rse_skip_regs(ubs_end, -sof); + addr = ia64_rse_skip_regs(bsp, ridx); + + DPRINT("ubs_end=%p bsp=%p addr=%p\n", (void *) ubs_end, (void *) bsp, (void *) addr); + + ia64_poke(current, sw, (unsigned long) ubs_end, (unsigned long) addr, val); + + rnat_addr = ia64_rse_rnat_addr(addr); + + ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, &rnats); + DPRINT("rnat @%p = 0x%lx nat=%d old nat=%ld\n", + (void *) rnat_addr, rnats, nat, (rnats >> ia64_rse_slot_num(addr)) & 1); + + nat_mask = 1UL << ia64_rse_slot_num(addr); + if (nat) + rnats |= nat_mask; + else + rnats &= ~nat_mask; + ia64_poke(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, rnats); + + DPRINT("rnat changed to @%p = 0x%lx\n", (void *) rnat_addr, rnats); +} + + +static void +get_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long *val, int *nat) +{ + struct switch_stack *sw = (struct switch_stack *) regs - 1; + unsigned long *bsp, *addr, *rnat_addr, *ubs_end, *bspstore; + unsigned long *kbs = (void *) current + IA64_RBS_OFFSET; + unsigned long rnats, nat_mask; + unsigned long on_kbs; + long sof = (regs->cr_ifs) & 0x7f; + long sor = 8 * ((regs->cr_ifs >> 14) & 0xf); + long rrb_gr = (regs->cr_ifs >> 18) & 0x7f; + long ridx = r1 - 32; + + if (ridx >= sof) { + /* read of out-of-frame register returns an undefined value; 0 in our case. */ + DPRINT("ignoring read from r%lu; only %lu registers are allocated!\n", r1, sof); + goto fail; + } + + if (ridx < sor) + ridx = rotate_reg(sor, rrb_gr, ridx); + + DPRINT("r%lu, sw.bspstore=%lx pt.bspstore=%lx sof=%ld sol=%ld ridx=%ld\n", + r1, sw->ar_bspstore, regs->ar_bspstore, sof, (regs->cr_ifs >> 7) & 0x7f, ridx); + + on_kbs = ia64_rse_num_regs(kbs, (unsigned long *) sw->ar_bspstore); + addr = ia64_rse_skip_regs((unsigned long *) sw->ar_bspstore, -sof + ridx); + if (addr >= kbs) { + /* the register is on the kernel backing store: easy... */ + *val = *addr; + if (nat) { + rnat_addr = ia64_rse_rnat_addr(addr); + if ((unsigned long) rnat_addr >= sw->ar_bspstore) + rnat_addr = &sw->ar_rnat; + nat_mask = 1UL << ia64_rse_slot_num(addr); + *nat = (*rnat_addr & nat_mask) != 0; + } + return; + } + + if (!user_stack(current, regs)) { + DPRINT("ignoring kernel read of r%lu; register isn't on the RBS!", r1); + goto fail; + } + + bspstore = (unsigned long *)regs->ar_bspstore; + ubs_end = ia64_rse_skip_regs(bspstore, on_kbs); + bsp = ia64_rse_skip_regs(ubs_end, -sof); + addr = ia64_rse_skip_regs(bsp, ridx); + + DPRINT("ubs_end=%p bsp=%p addr=%p\n", (void *) ubs_end, (void *) bsp, (void *) addr); + + ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) addr, val); + + if (nat) { + rnat_addr = ia64_rse_rnat_addr(addr); + nat_mask = 1UL << ia64_rse_slot_num(addr); + + DPRINT("rnat @%p = 0x%lx\n", (void *) rnat_addr, rnats); + + ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, &rnats); + *nat = (rnats & nat_mask) != 0; + } + return; + + fail: + *val = 0; + if (nat) + *nat = 0; + return; +} + + +static void +setreg (unsigned long regnum, unsigned long val, int nat, struct pt_regs *regs) +{ + struct switch_stack *sw = (struct switch_stack *) regs - 1; + unsigned long addr; + unsigned long bitmask; + unsigned long *unat; + + /* + * First takes care of stacked registers + */ + if (regnum >= IA64_FIRST_STACKED_GR) { + set_rse_reg(regs, regnum, val, nat); + return; + } + + /* + * Using r0 as a target raises a General Exception fault which has higher priority + * than the Unaligned Reference fault. + */ + + /* + * Now look at registers in [0-31] range and init correct UNAT + */ + if (GR_IN_SW(regnum)) { + addr = (unsigned long)sw; + unat = &sw->ar_unat; + } else { + addr = (unsigned long)regs; + unat = &sw->caller_unat; + } + DPRINT("tmp_base=%lx switch_stack=%s offset=%d\n", + addr, unat==&sw->ar_unat ? "yes":"no", GR_OFFS(regnum)); + /* + * add offset from base of struct + * and do it ! + */ + addr += GR_OFFS(regnum); + + *(unsigned long *)addr = val; + + /* + * We need to clear the corresponding UNAT bit to fully emulate the load + * UNAT bit_pos = GR[r3]{8:3} form EAS-2.4 + */ + bitmask = 1UL << (addr >> 3 & 0x3f); + DPRINT("*0x%lx=0x%lx NaT=%d prev_unat @%p=%lx\n", addr, val, nat, (void *) unat, *unat); + if (nat) { + *unat |= bitmask; + } else { + *unat &= ~bitmask; + } + DPRINT("*0x%lx=0x%lx NaT=%d new unat: %p=%lx\n", addr, val, nat, (void *) unat,*unat); +} + +/* + * Return the (rotated) index for floating point register REGNUM (REGNUM must be in the + * range from 32-127, result is in the range from 0-95. + */ +static inline unsigned long +fph_index (struct pt_regs *regs, long regnum) +{ + unsigned long rrb_fr = (regs->cr_ifs >> 25) & 0x7f; + return rotate_reg(96, rrb_fr, (regnum - IA64_FIRST_ROTATING_FR)); +} + +static void +setfpreg (unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs) +{ + struct switch_stack *sw = (struct switch_stack *)regs - 1; + unsigned long addr; + + /* + * From EAS-2.5: FPDisableFault has higher priority than Unaligned + * Fault. Thus, when we get here, we know the partition is enabled. + * To update f32-f127, there are three choices: + * + * (1) save f32-f127 to thread.fph and update the values there + * (2) use a gigantic switch statement to directly access the registers + * (3) generate code on the fly to update the desired register + * + * For now, we are using approach (1). + */ + if (regnum >= IA64_FIRST_ROTATING_FR) { + ia64_sync_fph(current); + current->thread.fph[fph_index(regs, regnum)] = *fpval; + } else { + /* + * pt_regs or switch_stack ? + */ + if (FR_IN_SW(regnum)) { + addr = (unsigned long)sw; + } else { + addr = (unsigned long)regs; + } + + DPRINT("tmp_base=%lx offset=%d\n", addr, FR_OFFS(regnum)); + + addr += FR_OFFS(regnum); + *(struct ia64_fpreg *)addr = *fpval; + + /* + * mark the low partition as being used now + * + * It is highly unlikely that this bit is not already set, but + * let's do it for safety. + */ + regs->cr_ipsr |= IA64_PSR_MFL; + } +} + +/* + * Those 2 inline functions generate the spilled versions of the constant floating point + * registers which can be used with stfX + */ +static inline void +float_spill_f0 (struct ia64_fpreg *final) +{ + ia64_stf_spill(final, 0); +} + +static inline void +float_spill_f1 (struct ia64_fpreg *final) +{ + ia64_stf_spill(final, 1); +} + +static void +getfpreg (unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs) +{ + struct switch_stack *sw = (struct switch_stack *) regs - 1; + unsigned long addr; + + /* + * From EAS-2.5: FPDisableFault has higher priority than + * Unaligned Fault. Thus, when we get here, we know the partition is + * enabled. + * + * When regnum > 31, the register is still live and we need to force a save + * to current->thread.fph to get access to it. See discussion in setfpreg() + * for reasons and other ways of doing this. + */ + if (regnum >= IA64_FIRST_ROTATING_FR) { + ia64_flush_fph(current); + *fpval = current->thread.fph[fph_index(regs, regnum)]; + } else { + /* + * f0 = 0.0, f1= 1.0. Those registers are constant and are thus + * not saved, we must generate their spilled form on the fly + */ + switch(regnum) { + case 0: + float_spill_f0(fpval); + break; + case 1: + float_spill_f1(fpval); + break; + default: + /* + * pt_regs or switch_stack ? + */ + addr = FR_IN_SW(regnum) ? (unsigned long)sw + : (unsigned long)regs; + + DPRINT("is_sw=%d tmp_base=%lx offset=0x%x\n", + FR_IN_SW(regnum), addr, FR_OFFS(regnum)); + + addr += FR_OFFS(regnum); + *fpval = *(struct ia64_fpreg *)addr; + } + } +} + + +static void +getreg (unsigned long regnum, unsigned long *val, int *nat, struct pt_regs *regs) +{ + struct switch_stack *sw = (struct switch_stack *) regs - 1; + unsigned long addr, *unat; + + if (regnum >= IA64_FIRST_STACKED_GR) { + get_rse_reg(regs, regnum, val, nat); + return; + } + + /* + * take care of r0 (read-only always evaluate to 0) + */ + if (regnum == 0) { + *val = 0; + if (nat) + *nat = 0; + return; + } + + /* + * Now look at registers in [0-31] range and init correct UNAT + */ + if (GR_IN_SW(regnum)) { + addr = (unsigned long)sw; + unat = &sw->ar_unat; + } else { + addr = (unsigned long)regs; + unat = &sw->caller_unat; + } + + DPRINT("addr_base=%lx offset=0x%x\n", addr, GR_OFFS(regnum)); + + addr += GR_OFFS(regnum); + + *val = *(unsigned long *)addr; + + /* + * do it only when requested + */ + if (nat) + *nat = (*unat >> (addr >> 3 & 0x3f)) & 0x1UL; +} + +static void +emulate_load_updates (update_t type, load_store_t ld, struct pt_regs *regs, unsigned long ifa) +{ + /* + * IMPORTANT: + * Given the way we handle unaligned speculative loads, we should + * not get to this point in the code but we keep this sanity check, + * just in case. + */ + if (ld.x6_op == 1 || ld.x6_op == 3) { + printk(KERN_ERR "%s: register update on speculative load, error\n", __func__); + if (die_if_kernel("unaligned reference on speculative load with register update\n", + regs, 30)) + return; + } + + + /* + * at this point, we know that the base register to update is valid i.e., + * it's not r0 + */ + if (type == UPD_IMMEDIATE) { + unsigned long imm; + + /* + * Load +Imm: ldXZ r1=[r3],imm(9) + * + * + * form imm9: [13:19] contain the first 7 bits + */ + imm = ld.x << 7 | ld.imm; + + /* + * sign extend (1+8bits) if m set + */ + if (ld.m) imm |= SIGN_EXT9; + + /* + * ifa == r3 and we know that the NaT bit on r3 was clear so + * we can directly use ifa. + */ + ifa += imm; + + setreg(ld.r3, ifa, 0, regs); + + DPRINT("ld.x=%d ld.m=%d imm=%ld r3=0x%lx\n", ld.x, ld.m, imm, ifa); + + } else if (ld.m) { + unsigned long r2; + int nat_r2; + + /* + * Load +Reg Opcode: ldXZ r1=[r3],r2 + * + * Note: that we update r3 even in the case of ldfX.a + * (where the load does not happen) + * + * The way the load algorithm works, we know that r3 does not + * have its NaT bit set (would have gotten NaT consumption + * before getting the unaligned fault). So we can use ifa + * which equals r3 at this point. + * + * IMPORTANT: + * The above statement holds ONLY because we know that we + * never reach this code when trying to do a ldX.s. + * If we ever make it to here on an ldfX.s then + */ + getreg(ld.imm, &r2, &nat_r2, regs); + + ifa += r2; + + /* + * propagate Nat r2 -> r3 + */ + setreg(ld.r3, ifa, nat_r2, regs); + + DPRINT("imm=%d r2=%ld r3=0x%lx nat_r2=%d\n",ld.imm, r2, ifa, nat_r2); + } +} + + +static int +emulate_load_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs) +{ + unsigned int len = 1 << ld.x6_sz; + unsigned long val = 0; + + /* + * r0, as target, doesn't need to be checked because Illegal Instruction + * faults have higher priority than unaligned faults. + * + * r0 cannot be found as the base as it would never generate an + * unaligned reference. + */ + + /* + * ldX.a we will emulate load and also invalidate the ALAT entry. + * See comment below for explanation on how we handle ldX.a + */ + + if (len != 2 && len != 4 && len != 8) { + DPRINT("unknown size: x6=%d\n", ld.x6_sz); + return -1; + } + /* this assumes little-endian byte-order: */ + if (copy_from_user(&val, (void __user *) ifa, len)) + return -1; + setreg(ld.r1, val, 0, regs); + + /* + * check for updates on any kind of loads + */ + if (ld.op == 0x5 || ld.m) + emulate_load_updates(ld.op == 0x5 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa); + + /* + * handling of various loads (based on EAS2.4): + * + * ldX.acq (ordered load): + * - acquire semantics would have been used, so force fence instead. + * + * ldX.c.clr (check load and clear): + * - if we get to this handler, it's because the entry was not in the ALAT. + * Therefore the operation reverts to a normal load + * + * ldX.c.nc (check load no clear): + * - same as previous one + * + * ldX.c.clr.acq (ordered check load and clear): + * - same as above for c.clr part. The load needs to have acquire semantics. So + * we use the fence semantics which is stronger and thus ensures correctness. + * + * ldX.a (advanced load): + * - suppose ldX.a r1=[r3]. If we get to the unaligned trap it's because the + * address doesn't match requested size alignment. This means that we would + * possibly need more than one load to get the result. + * + * The load part can be handled just like a normal load, however the difficult + * part is to get the right thing into the ALAT. The critical piece of information + * in the base address of the load & size. To do that, a ld.a must be executed, + * clearly any address can be pushed into the table by using ld1.a r1=[r3]. Now + * if we use the same target register, we will be okay for the check.a instruction. + * If we look at the store, basically a stX [r3]=r1 checks the ALAT for any entry + * which would overlap within [r3,r3+X] (the size of the load was store in the + * ALAT). If such an entry is found the entry is invalidated. But this is not good + * enough, take the following example: + * r3=3 + * ld4.a r1=[r3] + * + * Could be emulated by doing: + * ld1.a r1=[r3],1 + * store to temporary; + * ld1.a r1=[r3],1 + * store & shift to temporary; + * ld1.a r1=[r3],1 + * store & shift to temporary; + * ld1.a r1=[r3] + * store & shift to temporary; + * r1=temporary + * + * So in this case, you would get the right value is r1 but the wrong info in + * the ALAT. Notice that you could do it in reverse to finish with address 3 + * but you would still get the size wrong. To get the size right, one needs to + * execute exactly the same kind of load. You could do it from a aligned + * temporary location, but you would get the address wrong. + * + * So no matter what, it is not possible to emulate an advanced load + * correctly. But is that really critical ? + * + * We will always convert ld.a into a normal load with ALAT invalidated. This + * will enable compiler to do optimization where certain code path after ld.a + * is not required to have ld.c/chk.a, e.g., code path with no intervening stores. + * + * If there is a store after the advanced load, one must either do a ld.c.* or + * chk.a.* to reuse the value stored in the ALAT. Both can "fail" (meaning no + * entry found in ALAT), and that's perfectly ok because: + * + * - ld.c.*, if the entry is not present a normal load is executed + * - chk.a.*, if the entry is not present, execution jumps to recovery code + * + * In either case, the load can be potentially retried in another form. + * + * ALAT must be invalidated for the register (so that chk.a or ld.c don't pick + * up a stale entry later). The register base update MUST also be performed. + */ + + /* + * when the load has the .acq completer then + * use ordering fence. + */ + if (ld.x6_op == 0x5 || ld.x6_op == 0xa) + mb(); + + /* + * invalidate ALAT entry in case of advanced load + */ + if (ld.x6_op == 0x2) + invala_gr(ld.r1); + + return 0; +} + +static int +emulate_store_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs) +{ + unsigned long r2; + unsigned int len = 1 << ld.x6_sz; + + /* + * if we get to this handler, Nat bits on both r3 and r2 have already + * been checked. so we don't need to do it + * + * extract the value to be stored + */ + getreg(ld.imm, &r2, NULL, regs); + + /* + * we rely on the macros in unaligned.h for now i.e., + * we let the compiler figure out how to read memory gracefully. + * + * We need this switch/case because the way the inline function + * works. The code is optimized by the compiler and looks like + * a single switch/case. + */ + DPRINT("st%d [%lx]=%lx\n", len, ifa, r2); + + if (len != 2 && len != 4 && len != 8) { + DPRINT("unknown size: x6=%d\n", ld.x6_sz); + return -1; + } + + /* this assumes little-endian byte-order: */ + if (copy_to_user((void __user *) ifa, &r2, len)) + return -1; + + /* + * stX [r3]=r2,imm(9) + * + * NOTE: + * ld.r3 can never be r0, because r0 would not generate an + * unaligned access. + */ + if (ld.op == 0x5) { + unsigned long imm; + + /* + * form imm9: [12:6] contain first 7bits + */ + imm = ld.x << 7 | ld.r1; + /* + * sign extend (8bits) if m set + */ + if (ld.m) imm |= SIGN_EXT9; + /* + * ifa == r3 (NaT is necessarily cleared) + */ + ifa += imm; + + DPRINT("imm=%lx r3=%lx\n", imm, ifa); + + setreg(ld.r3, ifa, 0, regs); + } + /* + * we don't have alat_invalidate_multiple() so we need + * to do the complete flush :-<< + */ + ia64_invala(); + + /* + * stX.rel: use fence instead of release + */ + if (ld.x6_op == 0xd) + mb(); + + return 0; +} + +/* + * floating point operations sizes in bytes + */ +static const unsigned char float_fsz[4]={ + 10, /* extended precision (e) */ + 8, /* integer (8) */ + 4, /* single precision (s) */ + 8 /* double precision (d) */ +}; + +static inline void +mem2float_extended (struct ia64_fpreg *init, struct ia64_fpreg *final) +{ + ia64_ldfe(6, init); + ia64_stop(); + ia64_stf_spill(final, 6); +} + +static inline void +mem2float_integer (struct ia64_fpreg *init, struct ia64_fpreg *final) +{ + ia64_ldf8(6, init); + ia64_stop(); + ia64_stf_spill(final, 6); +} + +static inline void +mem2float_single (struct ia64_fpreg *init, struct ia64_fpreg *final) +{ + ia64_ldfs(6, init); + ia64_stop(); + ia64_stf_spill(final, 6); +} + +static inline void +mem2float_double (struct ia64_fpreg *init, struct ia64_fpreg *final) +{ + ia64_ldfd(6, init); + ia64_stop(); + ia64_stf_spill(final, 6); +} + +static inline void +float2mem_extended (struct ia64_fpreg *init, struct ia64_fpreg *final) +{ + ia64_ldf_fill(6, init); + ia64_stop(); + ia64_stfe(final, 6); +} + +static inline void +float2mem_integer (struct ia64_fpreg *init, struct ia64_fpreg *final) +{ + ia64_ldf_fill(6, init); + ia64_stop(); + ia64_stf8(final, 6); +} + +static inline void +float2mem_single (struct ia64_fpreg *init, struct ia64_fpreg *final) +{ + ia64_ldf_fill(6, init); + ia64_stop(); + ia64_stfs(final, 6); +} + +static inline void +float2mem_double (struct ia64_fpreg *init, struct ia64_fpreg *final) +{ + ia64_ldf_fill(6, init); + ia64_stop(); + ia64_stfd(final, 6); +} + +static int +emulate_load_floatpair (unsigned long ifa, load_store_t ld, struct pt_regs *regs) +{ + struct ia64_fpreg fpr_init[2]; + struct ia64_fpreg fpr_final[2]; + unsigned long len = float_fsz[ld.x6_sz]; + + /* + * fr0 & fr1 don't need to be checked because Illegal Instruction faults have + * higher priority than unaligned faults. + * + * r0 cannot be found as the base as it would never generate an unaligned + * reference. + */ + + /* + * make sure we get clean buffers + */ + memset(&fpr_init, 0, sizeof(fpr_init)); + memset(&fpr_final, 0, sizeof(fpr_final)); + + /* + * ldfpX.a: we don't try to emulate anything but we must + * invalidate the ALAT entry and execute updates, if any. + */ + if (ld.x6_op != 0x2) { + /* + * This assumes little-endian byte-order. Note that there is no "ldfpe" + * instruction: + */ + if (copy_from_user(&fpr_init[0], (void __user *) ifa, len) + || copy_from_user(&fpr_init[1], (void __user *) (ifa + len), len)) + return -1; + + DPRINT("ld.r1=%d ld.imm=%d x6_sz=%d\n", ld.r1, ld.imm, ld.x6_sz); + DDUMP("frp_init =", &fpr_init, 2*len); + /* + * XXX fixme + * Could optimize inlines by using ldfpX & 2 spills + */ + switch( ld.x6_sz ) { + case 0: + mem2float_extended(&fpr_init[0], &fpr_final[0]); + mem2float_extended(&fpr_init[1], &fpr_final[1]); + break; + case 1: + mem2float_integer(&fpr_init[0], &fpr_final[0]); + mem2float_integer(&fpr_init[1], &fpr_final[1]); + break; + case 2: + mem2float_single(&fpr_init[0], &fpr_final[0]); + mem2float_single(&fpr_init[1], &fpr_final[1]); + break; + case 3: + mem2float_double(&fpr_init[0], &fpr_final[0]); + mem2float_double(&fpr_init[1], &fpr_final[1]); + break; + } + DDUMP("fpr_final =", &fpr_final, 2*len); + /* + * XXX fixme + * + * A possible optimization would be to drop fpr_final and directly + * use the storage from the saved context i.e., the actual final + * destination (pt_regs, switch_stack or thread structure). + */ + setfpreg(ld.r1, &fpr_final[0], regs); + setfpreg(ld.imm, &fpr_final[1], regs); + } + + /* + * Check for updates: only immediate updates are available for this + * instruction. + */ + if (ld.m) { + /* + * the immediate is implicit given the ldsz of the operation: + * single: 8 (2x4) and for all others it's 16 (2x8) + */ + ifa += len<<1; + + /* + * IMPORTANT: + * the fact that we force the NaT of r3 to zero is ONLY valid + * as long as we don't come here with a ldfpX.s. + * For this reason we keep this sanity check + */ + if (ld.x6_op == 1 || ld.x6_op == 3) + printk(KERN_ERR "%s: register update on speculative load pair, error\n", + __func__); + + setreg(ld.r3, ifa, 0, regs); + } + + /* + * Invalidate ALAT entries, if any, for both registers. + */ + if (ld.x6_op == 0x2) { + invala_fr(ld.r1); + invala_fr(ld.imm); + } + return 0; +} + + +static int +emulate_load_float (unsigned long ifa, load_store_t ld, struct pt_regs *regs) +{ + struct ia64_fpreg fpr_init; + struct ia64_fpreg fpr_final; + unsigned long len = float_fsz[ld.x6_sz]; + + /* + * fr0 & fr1 don't need to be checked because Illegal Instruction + * faults have higher priority than unaligned faults. + * + * r0 cannot be found as the base as it would never generate an + * unaligned reference. + */ + + /* + * make sure we get clean buffers + */ + memset(&fpr_init,0, sizeof(fpr_init)); + memset(&fpr_final,0, sizeof(fpr_final)); + + /* + * ldfX.a we don't try to emulate anything but we must + * invalidate the ALAT entry. + * See comments in ldX for descriptions on how the various loads are handled. + */ + if (ld.x6_op != 0x2) { + if (copy_from_user(&fpr_init, (void __user *) ifa, len)) + return -1; + + DPRINT("ld.r1=%d x6_sz=%d\n", ld.r1, ld.x6_sz); + DDUMP("fpr_init =", &fpr_init, len); + /* + * we only do something for x6_op={0,8,9} + */ + switch( ld.x6_sz ) { + case 0: + mem2float_extended(&fpr_init, &fpr_final); + break; + case 1: + mem2float_integer(&fpr_init, &fpr_final); + break; + case 2: + mem2float_single(&fpr_init, &fpr_final); + break; + case 3: + mem2float_double(&fpr_init, &fpr_final); + break; + } + DDUMP("fpr_final =", &fpr_final, len); + /* + * XXX fixme + * + * A possible optimization would be to drop fpr_final and directly + * use the storage from the saved context i.e., the actual final + * destination (pt_regs, switch_stack or thread structure). + */ + setfpreg(ld.r1, &fpr_final, regs); + } + + /* + * check for updates on any loads + */ + if (ld.op == 0x7 || ld.m) + emulate_load_updates(ld.op == 0x7 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa); + + /* + * invalidate ALAT entry in case of advanced floating point loads + */ + if (ld.x6_op == 0x2) + invala_fr(ld.r1); + + return 0; +} + + +static int +emulate_store_float (unsigned long ifa, load_store_t ld, struct pt_regs *regs) +{ + struct ia64_fpreg fpr_init; + struct ia64_fpreg fpr_final; + unsigned long len = float_fsz[ld.x6_sz]; + + /* + * make sure we get clean buffers + */ + memset(&fpr_init,0, sizeof(fpr_init)); + memset(&fpr_final,0, sizeof(fpr_final)); + + /* + * if we get to this handler, Nat bits on both r3 and r2 have already + * been checked. so we don't need to do it + * + * extract the value to be stored + */ + getfpreg(ld.imm, &fpr_init, regs); + /* + * during this step, we extract the spilled registers from the saved + * context i.e., we refill. Then we store (no spill) to temporary + * aligned location + */ + switch( ld.x6_sz ) { + case 0: + float2mem_extended(&fpr_init, &fpr_final); + break; + case 1: + float2mem_integer(&fpr_init, &fpr_final); + break; + case 2: + float2mem_single(&fpr_init, &fpr_final); + break; + case 3: + float2mem_double(&fpr_init, &fpr_final); + break; + } + DPRINT("ld.r1=%d x6_sz=%d\n", ld.r1, ld.x6_sz); + DDUMP("fpr_init =", &fpr_init, len); + DDUMP("fpr_final =", &fpr_final, len); + + if (copy_to_user((void __user *) ifa, &fpr_final, len)) + return -1; + + /* + * stfX [r3]=r2,imm(9) + * + * NOTE: + * ld.r3 can never be r0, because r0 would not generate an + * unaligned access. + */ + if (ld.op == 0x7) { + unsigned long imm; + + /* + * form imm9: [12:6] contain first 7bits + */ + imm = ld.x << 7 | ld.r1; + /* + * sign extend (8bits) if m set + */ + if (ld.m) + imm |= SIGN_EXT9; + /* + * ifa == r3 (NaT is necessarily cleared) + */ + ifa += imm; + + DPRINT("imm=%lx r3=%lx\n", imm, ifa); + + setreg(ld.r3, ifa, 0, regs); + } + /* + * we don't have alat_invalidate_multiple() so we need + * to do the complete flush :-<< + */ + ia64_invala(); + + return 0; +} + +/* + * Make sure we log the unaligned access, so that user/sysadmin can notice it and + * eventually fix the program. However, we don't want to do that for every access so we + * pace it with jiffies. + */ +static DEFINE_RATELIMIT_STATE(logging_rate_limit, 5 * HZ, 5); + +void +ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs) +{ + struct ia64_psr *ipsr = ia64_psr(regs); + mm_segment_t old_fs = get_fs(); + unsigned long bundle[2]; + unsigned long opcode; + struct siginfo si; + const struct exception_table_entry *eh = NULL; + union { + unsigned long l; + load_store_t insn; + } u; + int ret = -1; + + if (ia64_psr(regs)->be) { + /* we don't support big-endian accesses */ + if (die_if_kernel("big-endian unaligned accesses are not supported", regs, 0)) + return; + goto force_sigbus; + } + + /* + * Treat kernel accesses for which there is an exception handler entry the same as + * user-level unaligned accesses. Otherwise, a clever program could trick this + * handler into reading an arbitrary kernel addresses... + */ + if (!user_mode(regs)) + eh = search_exception_tables(regs->cr_iip + ia64_psr(regs)->ri); + if (user_mode(regs) || eh) { + if ((current->thread.flags & IA64_THREAD_UAC_SIGBUS) != 0) + goto force_sigbus; + + if (!no_unaligned_warning && + !(current->thread.flags & IA64_THREAD_UAC_NOPRINT) && + __ratelimit(&logging_rate_limit)) + { + char buf[200]; /* comm[] is at most 16 bytes... */ + size_t len; + + len = sprintf(buf, "%s(%d): unaligned access to 0x%016lx, " + "ip=0x%016lx\n\r", current->comm, + task_pid_nr(current), + ifa, regs->cr_iip + ipsr->ri); + /* + * Don't call tty_write_message() if we're in the kernel; we might + * be holding locks... + */ + if (user_mode(regs)) + tty_write_message(current->signal->tty, buf); + buf[len-1] = '\0'; /* drop '\r' */ + /* watch for command names containing %s */ + printk(KERN_WARNING "%s", buf); + } else { + if (no_unaligned_warning) { + printk_once(KERN_WARNING "%s(%d) encountered an " + "unaligned exception which required\n" + "kernel assistance, which degrades " + "the performance of the application.\n" + "Unaligned exception warnings have " + "been disabled by the system " + "administrator\n" + "echo 0 > /proc/sys/kernel/ignore-" + "unaligned-usertrap to re-enable\n", + current->comm, task_pid_nr(current)); + } + } + } else { + if (__ratelimit(&logging_rate_limit)) { + printk(KERN_WARNING "kernel unaligned access to 0x%016lx, ip=0x%016lx\n", + ifa, regs->cr_iip + ipsr->ri); + if (unaligned_dump_stack) + dump_stack(); + } + set_fs(KERNEL_DS); + } + + DPRINT("iip=%lx ifa=%lx isr=%lx (ei=%d, sp=%d)\n", + regs->cr_iip, ifa, regs->cr_ipsr, ipsr->ri, ipsr->it); + + if (__copy_from_user(bundle, (void __user *) regs->cr_iip, 16)) + goto failure; + + /* + * extract the instruction from the bundle given the slot number + */ + switch (ipsr->ri) { + case 0: u.l = (bundle[0] >> 5); break; + case 1: u.l = (bundle[0] >> 46) | (bundle[1] << 18); break; + case 2: u.l = (bundle[1] >> 23); break; + } + opcode = (u.l >> IA64_OPCODE_SHIFT) & IA64_OPCODE_MASK; + + DPRINT("opcode=%lx ld.qp=%d ld.r1=%d ld.imm=%d ld.r3=%d ld.x=%d ld.hint=%d " + "ld.x6=0x%x ld.m=%d ld.op=%d\n", opcode, u.insn.qp, u.insn.r1, u.insn.imm, + u.insn.r3, u.insn.x, u.insn.hint, u.insn.x6_sz, u.insn.m, u.insn.op); + + /* + * IMPORTANT: + * Notice that the switch statement DOES not cover all possible instructions + * that DO generate unaligned references. This is made on purpose because for some + * instructions it DOES NOT make sense to try and emulate the access. Sometimes it + * is WRONG to try and emulate. Here is a list of instruction we don't emulate i.e., + * the program will get a signal and die: + * + * load/store: + * - ldX.spill + * - stX.spill + * Reason: RNATs are based on addresses + * - ld16 + * - st16 + * Reason: ld16 and st16 are supposed to occur in a single + * memory op + * + * synchronization: + * - cmpxchg + * - fetchadd + * - xchg + * Reason: ATOMIC operations cannot be emulated properly using multiple + * instructions. + * + * speculative loads: + * - ldX.sZ + * Reason: side effects, code must be ready to deal with failure so simpler + * to let the load fail. + * --------------------------------------------------------------------------------- + * XXX fixme + * + * I would like to get rid of this switch case and do something + * more elegant. + */ + switch (opcode) { + case LDS_OP: + case LDSA_OP: + if (u.insn.x) + /* oops, really a semaphore op (cmpxchg, etc) */ + goto failure; + /* no break */ + case LDS_IMM_OP: + case LDSA_IMM_OP: + case LDFS_OP: + case LDFSA_OP: + case LDFS_IMM_OP: + /* + * The instruction will be retried with deferred exceptions turned on, and + * we should get Nat bit installed + * + * IMPORTANT: When PSR_ED is set, the register & immediate update forms + * are actually executed even though the operation failed. So we don't + * need to take care of this. + */ + DPRINT("forcing PSR_ED\n"); + regs->cr_ipsr |= IA64_PSR_ED; + goto done; + + case LD_OP: + case LDA_OP: + case LDBIAS_OP: + case LDACQ_OP: + case LDCCLR_OP: + case LDCNC_OP: + case LDCCLRACQ_OP: + if (u.insn.x) + /* oops, really a semaphore op (cmpxchg, etc) */ + goto failure; + /* no break */ + case LD_IMM_OP: + case LDA_IMM_OP: + case LDBIAS_IMM_OP: + case LDACQ_IMM_OP: + case LDCCLR_IMM_OP: + case LDCNC_IMM_OP: + case LDCCLRACQ_IMM_OP: + ret = emulate_load_int(ifa, u.insn, regs); + break; + + case ST_OP: + case STREL_OP: + if (u.insn.x) + /* oops, really a semaphore op (cmpxchg, etc) */ + goto failure; + /* no break */ + case ST_IMM_OP: + case STREL_IMM_OP: + ret = emulate_store_int(ifa, u.insn, regs); + break; + + case LDF_OP: + case LDFA_OP: + case LDFCCLR_OP: + case LDFCNC_OP: + if (u.insn.x) + ret = emulate_load_floatpair(ifa, u.insn, regs); + else + ret = emulate_load_float(ifa, u.insn, regs); + break; + + case LDF_IMM_OP: + case LDFA_IMM_OP: + case LDFCCLR_IMM_OP: + case LDFCNC_IMM_OP: + ret = emulate_load_float(ifa, u.insn, regs); + break; + + case STF_OP: + case STF_IMM_OP: + ret = emulate_store_float(ifa, u.insn, regs); + break; + + default: + goto failure; + } + DPRINT("ret=%d\n", ret); + if (ret) + goto failure; + + if (ipsr->ri == 2) + /* + * given today's architecture this case is not likely to happen because a + * memory access instruction (M) can never be in the last slot of a + * bundle. But let's keep it for now. + */ + regs->cr_iip += 16; + ipsr->ri = (ipsr->ri + 1) & 0x3; + + DPRINT("ipsr->ri=%d iip=%lx\n", ipsr->ri, regs->cr_iip); + done: + set_fs(old_fs); /* restore original address limit */ + return; + + failure: + /* something went wrong... */ + if (!user_mode(regs)) { + if (eh) { + ia64_handle_exception(regs, eh); + goto done; + } + if (die_if_kernel("error during unaligned kernel access\n", regs, ret)) + return; + /* NOT_REACHED */ + } + force_sigbus: + si.si_signo = SIGBUS; + si.si_errno = 0; + si.si_code = BUS_ADRALN; + si.si_addr = (void __user *) ifa; + si.si_flags = 0; + si.si_isr = 0; + si.si_imm = 0; + force_sig_info(SIGBUS, &si, current); + goto done; +} diff --git a/kernel/arch/ia64/kernel/uncached.c b/kernel/arch/ia64/kernel/uncached.c new file mode 100644 index 000000000..20e8a9b21 --- /dev/null +++ b/kernel/arch/ia64/kernel/uncached.c @@ -0,0 +1,281 @@ +/* + * Copyright (C) 2001-2008 Silicon Graphics, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + * + * A simple uncached page allocator using the generic allocator. This + * allocator first utilizes the spare (spill) pages found in the EFI + * memmap and will then start converting cached pages to uncached ones + * at a granule at a time. Node awareness is implemented by having a + * pool of pages per node. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +extern void __init efi_memmap_walk_uc(efi_freemem_callback_t, void *); + +struct uncached_pool { + struct gen_pool *pool; + struct mutex add_chunk_mutex; /* serialize adding a converted chunk */ + int nchunks_added; /* #of converted chunks added to pool */ + atomic_t status; /* smp called function's return status*/ +}; + +#define MAX_CONVERTED_CHUNKS_PER_NODE 2 + +struct uncached_pool uncached_pools[MAX_NUMNODES]; + + +static void uncached_ipi_visibility(void *data) +{ + int status; + struct uncached_pool *uc_pool = (struct uncached_pool *)data; + + status = ia64_pal_prefetch_visibility(PAL_VISIBILITY_PHYSICAL); + if ((status != PAL_VISIBILITY_OK) && + (status != PAL_VISIBILITY_OK_REMOTE_NEEDED)) + atomic_inc(&uc_pool->status); +} + + +static void uncached_ipi_mc_drain(void *data) +{ + int status; + struct uncached_pool *uc_pool = (struct uncached_pool *)data; + + status = ia64_pal_mc_drain(); + if (status != PAL_STATUS_SUCCESS) + atomic_inc(&uc_pool->status); +} + + +/* + * Add a new chunk of uncached memory pages to the specified pool. + * + * @pool: pool to add new chunk of uncached memory to + * @nid: node id of node to allocate memory from, or -1 + * + * This is accomplished by first allocating a granule of cached memory pages + * and then converting them to uncached memory pages. + */ +static int uncached_add_chunk(struct uncached_pool *uc_pool, int nid) +{ + struct page *page; + int status, i, nchunks_added = uc_pool->nchunks_added; + unsigned long c_addr, uc_addr; + + if (mutex_lock_interruptible(&uc_pool->add_chunk_mutex) != 0) + return -1; /* interrupted by a signal */ + + if (uc_pool->nchunks_added > nchunks_added) { + /* someone added a new chunk while we were waiting */ + mutex_unlock(&uc_pool->add_chunk_mutex); + return 0; + } + + if (uc_pool->nchunks_added >= MAX_CONVERTED_CHUNKS_PER_NODE) { + mutex_unlock(&uc_pool->add_chunk_mutex); + return -1; + } + + /* attempt to allocate a granule's worth of cached memory pages */ + + page = alloc_pages_exact_node(nid, + GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE, + IA64_GRANULE_SHIFT-PAGE_SHIFT); + if (!page) { + mutex_unlock(&uc_pool->add_chunk_mutex); + return -1; + } + + /* convert the memory pages from cached to uncached */ + + c_addr = (unsigned long)page_address(page); + uc_addr = c_addr - PAGE_OFFSET + __IA64_UNCACHED_OFFSET; + + /* + * There's a small race here where it's possible for someone to + * access the page through /dev/mem halfway through the conversion + * to uncached - not sure it's really worth bothering about + */ + for (i = 0; i < (IA64_GRANULE_SIZE / PAGE_SIZE); i++) + SetPageUncached(&page[i]); + + flush_tlb_kernel_range(uc_addr, uc_addr + IA64_GRANULE_SIZE); + + status = ia64_pal_prefetch_visibility(PAL_VISIBILITY_PHYSICAL); + if (status == PAL_VISIBILITY_OK_REMOTE_NEEDED) { + atomic_set(&uc_pool->status, 0); + status = smp_call_function(uncached_ipi_visibility, uc_pool, 1); + if (status || atomic_read(&uc_pool->status)) + goto failed; + } else if (status != PAL_VISIBILITY_OK) + goto failed; + + preempt_disable(); + + if (ia64_platform_is("sn2")) + sn_flush_all_caches(uc_addr, IA64_GRANULE_SIZE); + else + flush_icache_range(uc_addr, uc_addr + IA64_GRANULE_SIZE); + + /* flush the just introduced uncached translation from the TLB */ + local_flush_tlb_all(); + + preempt_enable(); + + status = ia64_pal_mc_drain(); + if (status != PAL_STATUS_SUCCESS) + goto failed; + atomic_set(&uc_pool->status, 0); + status = smp_call_function(uncached_ipi_mc_drain, uc_pool, 1); + if (status || atomic_read(&uc_pool->status)) + goto failed; + + /* + * The chunk of memory pages has been converted to uncached so now we + * can add it to the pool. + */ + status = gen_pool_add(uc_pool->pool, uc_addr, IA64_GRANULE_SIZE, nid); + if (status) + goto failed; + + uc_pool->nchunks_added++; + mutex_unlock(&uc_pool->add_chunk_mutex); + return 0; + + /* failed to convert or add the chunk so give it back to the kernel */ +failed: + for (i = 0; i < (IA64_GRANULE_SIZE / PAGE_SIZE); i++) + ClearPageUncached(&page[i]); + + free_pages(c_addr, IA64_GRANULE_SHIFT-PAGE_SHIFT); + mutex_unlock(&uc_pool->add_chunk_mutex); + return -1; +} + + +/* + * uncached_alloc_page + * + * @starting_nid: node id of node to start with, or -1 + * @n_pages: number of contiguous pages to allocate + * + * Allocate the specified number of contiguous uncached pages on the + * the requested node. If not enough contiguous uncached pages are available + * on the requested node, roundrobin starting with the next higher node. + */ +unsigned long uncached_alloc_page(int starting_nid, int n_pages) +{ + unsigned long uc_addr; + struct uncached_pool *uc_pool; + int nid; + + if (unlikely(starting_nid >= MAX_NUMNODES)) + return 0; + + if (starting_nid < 0) + starting_nid = numa_node_id(); + nid = starting_nid; + + do { + if (!node_state(nid, N_HIGH_MEMORY)) + continue; + uc_pool = &uncached_pools[nid]; + if (uc_pool->pool == NULL) + continue; + do { + uc_addr = gen_pool_alloc(uc_pool->pool, + n_pages * PAGE_SIZE); + if (uc_addr != 0) + return uc_addr; + } while (uncached_add_chunk(uc_pool, nid) == 0); + + } while ((nid = (nid + 1) % MAX_NUMNODES) != starting_nid); + + return 0; +} +EXPORT_SYMBOL(uncached_alloc_page); + + +/* + * uncached_free_page + * + * @uc_addr: uncached address of first page to free + * @n_pages: number of contiguous pages to free + * + * Free the specified number of uncached pages. + */ +void uncached_free_page(unsigned long uc_addr, int n_pages) +{ + int nid = paddr_to_nid(uc_addr - __IA64_UNCACHED_OFFSET); + struct gen_pool *pool = uncached_pools[nid].pool; + + if (unlikely(pool == NULL)) + return; + + if ((uc_addr & (0XFUL << 60)) != __IA64_UNCACHED_OFFSET) + panic("uncached_free_page invalid address %lx\n", uc_addr); + + gen_pool_free(pool, uc_addr, n_pages * PAGE_SIZE); +} +EXPORT_SYMBOL(uncached_free_page); + + +/* + * uncached_build_memmap, + * + * @uc_start: uncached starting address of a chunk of uncached memory + * @uc_end: uncached ending address of a chunk of uncached memory + * @arg: ignored, (NULL argument passed in on call to efi_memmap_walk_uc()) + * + * Called at boot time to build a map of pages that can be used for + * memory special operations. + */ +static int __init uncached_build_memmap(u64 uc_start, u64 uc_end, void *arg) +{ + int nid = paddr_to_nid(uc_start - __IA64_UNCACHED_OFFSET); + struct gen_pool *pool = uncached_pools[nid].pool; + size_t size = uc_end - uc_start; + + touch_softlockup_watchdog(); + + if (pool != NULL) { + memset((char *)uc_start, 0, size); + (void) gen_pool_add(pool, uc_start, size, nid); + } + return 0; +} + + +static int __init uncached_init(void) +{ + int nid; + + for_each_node_state(nid, N_ONLINE) { + uncached_pools[nid].pool = gen_pool_create(PAGE_SHIFT, nid); + mutex_init(&uncached_pools[nid].add_chunk_mutex); + } + + efi_memmap_walk_uc(uncached_build_memmap, NULL); + return 0; +} + +__initcall(uncached_init); diff --git a/kernel/arch/ia64/kernel/unwind.c b/kernel/arch/ia64/kernel/unwind.c new file mode 100644 index 000000000..8f6619599 --- /dev/null +++ b/kernel/arch/ia64/kernel/unwind.c @@ -0,0 +1,2319 @@ +/* + * Copyright (C) 1999-2004 Hewlett-Packard Co + * David Mosberger-Tang + * Copyright (C) 2003 Fenghua Yu + * - Change pt_regs_off() to make it less dependent on pt_regs structure. + */ +/* + * This file implements call frame unwind support for the Linux + * kernel. Parsing and processing the unwind information is + * time-consuming, so this implementation translates the unwind + * descriptors into unwind scripts. These scripts are very simple + * (basically a sequence of assignments) and efficient to execute. + * They are cached for later re-use. Each script is specific for a + * given instruction pointer address and the set of predicate values + * that the script depends on (most unwind descriptors are + * unconditional and scripts often do not depend on predicates at + * all). This code is based on the unwind conventions described in + * the "IA-64 Software Conventions and Runtime Architecture" manual. + * + * SMP conventions: + * o updates to the global unwind data (in structure "unw") are serialized + * by the unw.lock spinlock + * o each unwind script has its own read-write lock; a thread must acquire + * a read lock before executing a script and must acquire a write lock + * before modifying a script + * o if both the unw.lock spinlock and a script's read-write lock must be + * acquired, then the read-write lock must be acquired first. + */ +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "entry.h" +#include "unwind_i.h" + +#define UNW_LOG_CACHE_SIZE 7 /* each unw_script is ~256 bytes in size */ +#define UNW_CACHE_SIZE (1 << UNW_LOG_CACHE_SIZE) + +#define UNW_LOG_HASH_SIZE (UNW_LOG_CACHE_SIZE + 1) +#define UNW_HASH_SIZE (1 << UNW_LOG_HASH_SIZE) + +#define UNW_STATS 0 /* WARNING: this disabled interrupts for long time-spans!! */ + +#ifdef UNW_DEBUG + static unsigned int unw_debug_level = UNW_DEBUG; +# define UNW_DEBUG_ON(n) unw_debug_level >= n + /* Do not code a printk level, not all debug lines end in newline */ +# define UNW_DPRINT(n, ...) if (UNW_DEBUG_ON(n)) printk(__VA_ARGS__) +# undef inline +# define inline +#else /* !UNW_DEBUG */ +# define UNW_DEBUG_ON(n) 0 +# define UNW_DPRINT(n, ...) +#endif /* UNW_DEBUG */ + +#if UNW_STATS +# define STAT(x...) x +#else +# define STAT(x...) +#endif + +#define alloc_reg_state() kmalloc(sizeof(struct unw_reg_state), GFP_ATOMIC) +#define free_reg_state(usr) kfree(usr) +#define alloc_labeled_state() kmalloc(sizeof(struct unw_labeled_state), GFP_ATOMIC) +#define free_labeled_state(usr) kfree(usr) + +typedef unsigned long unw_word; +typedef unsigned char unw_hash_index_t; + +static struct { + spinlock_t lock; /* spinlock for unwind data */ + + /* list of unwind tables (one per load-module) */ + struct unw_table *tables; + + unsigned long r0; /* constant 0 for r0 */ + + /* table of registers that prologues can save (and order in which they're saved): */ + const unsigned char save_order[8]; + + /* maps a preserved register index (preg_index) to corresponding switch_stack offset: */ + unsigned short sw_off[sizeof(struct unw_frame_info) / 8]; + + unsigned short lru_head; /* index of lead-recently used script */ + unsigned short lru_tail; /* index of most-recently used script */ + + /* index into unw_frame_info for preserved register i */ + unsigned short preg_index[UNW_NUM_REGS]; + + short pt_regs_offsets[32]; + + /* unwind table for the kernel: */ + struct unw_table kernel_table; + + /* unwind table describing the gate page (kernel code that is mapped into user space): */ + size_t gate_table_size; + unsigned long *gate_table; + + /* hash table that maps instruction pointer to script index: */ + unsigned short hash[UNW_HASH_SIZE]; + + /* script cache: */ + struct unw_script cache[UNW_CACHE_SIZE]; + +# ifdef UNW_DEBUG + const char *preg_name[UNW_NUM_REGS]; +# endif +# if UNW_STATS + struct { + struct { + int lookups; + int hinted_hits; + int normal_hits; + int collision_chain_traversals; + } cache; + struct { + unsigned long build_time; + unsigned long run_time; + unsigned long parse_time; + int builds; + int news; + int collisions; + int runs; + } script; + struct { + unsigned long init_time; + unsigned long unwind_time; + int inits; + int unwinds; + } api; + } stat; +# endif +} unw = { + .tables = &unw.kernel_table, + .lock = __SPIN_LOCK_UNLOCKED(unw.lock), + .save_order = { + UNW_REG_RP, UNW_REG_PFS, UNW_REG_PSP, UNW_REG_PR, + UNW_REG_UNAT, UNW_REG_LC, UNW_REG_FPSR, UNW_REG_PRI_UNAT_GR + }, + .preg_index = { + offsetof(struct unw_frame_info, pri_unat_loc)/8, /* PRI_UNAT_GR */ + offsetof(struct unw_frame_info, pri_unat_loc)/8, /* PRI_UNAT_MEM */ + offsetof(struct unw_frame_info, bsp_loc)/8, + offsetof(struct unw_frame_info, bspstore_loc)/8, + offsetof(struct unw_frame_info, pfs_loc)/8, + offsetof(struct unw_frame_info, rnat_loc)/8, + offsetof(struct unw_frame_info, psp)/8, + offsetof(struct unw_frame_info, rp_loc)/8, + offsetof(struct unw_frame_info, r4)/8, + offsetof(struct unw_frame_info, r5)/8, + offsetof(struct unw_frame_info, r6)/8, + offsetof(struct unw_frame_info, r7)/8, + offsetof(struct unw_frame_info, unat_loc)/8, + offsetof(struct unw_frame_info, pr_loc)/8, + offsetof(struct unw_frame_info, lc_loc)/8, + offsetof(struct unw_frame_info, fpsr_loc)/8, + offsetof(struct unw_frame_info, b1_loc)/8, + offsetof(struct unw_frame_info, b2_loc)/8, + offsetof(struct unw_frame_info, b3_loc)/8, + offsetof(struct unw_frame_info, b4_loc)/8, + offsetof(struct unw_frame_info, b5_loc)/8, + offsetof(struct unw_frame_info, f2_loc)/8, + offsetof(struct unw_frame_info, f3_loc)/8, + offsetof(struct unw_frame_info, f4_loc)/8, + offsetof(struct unw_frame_info, f5_loc)/8, + offsetof(struct unw_frame_info, fr_loc[16 - 16])/8, + offsetof(struct unw_frame_info, fr_loc[17 - 16])/8, + offsetof(struct unw_frame_info, fr_loc[18 - 16])/8, + offsetof(struct unw_frame_info, fr_loc[19 - 16])/8, + offsetof(struct unw_frame_info, fr_loc[20 - 16])/8, + offsetof(struct unw_frame_info, fr_loc[21 - 16])/8, + offsetof(struct unw_frame_info, fr_loc[22 - 16])/8, + offsetof(struct unw_frame_info, fr_loc[23 - 16])/8, + offsetof(struct unw_frame_info, fr_loc[24 - 16])/8, + offsetof(struct unw_frame_info, fr_loc[25 - 16])/8, + offsetof(struct unw_frame_info, fr_loc[26 - 16])/8, + offsetof(struct unw_frame_info, fr_loc[27 - 16])/8, + offsetof(struct unw_frame_info, fr_loc[28 - 16])/8, + offsetof(struct unw_frame_info, fr_loc[29 - 16])/8, + offsetof(struct unw_frame_info, fr_loc[30 - 16])/8, + offsetof(struct unw_frame_info, fr_loc[31 - 16])/8, + }, + .pt_regs_offsets = { + [0] = -1, + offsetof(struct pt_regs, r1), + offsetof(struct pt_regs, r2), + offsetof(struct pt_regs, r3), + [4] = -1, [5] = -1, [6] = -1, [7] = -1, + offsetof(struct pt_regs, r8), + offsetof(struct pt_regs, r9), + offsetof(struct pt_regs, r10), + offsetof(struct pt_regs, r11), + offsetof(struct pt_regs, r12), + offsetof(struct pt_regs, r13), + offsetof(struct pt_regs, r14), + offsetof(struct pt_regs, r15), + offsetof(struct pt_regs, r16), + offsetof(struct pt_regs, r17), + offsetof(struct pt_regs, r18), + offsetof(struct pt_regs, r19), + offsetof(struct pt_regs, r20), + offsetof(struct pt_regs, r21), + offsetof(struct pt_regs, r22), + offsetof(struct pt_regs, r23), + offsetof(struct pt_regs, r24), + offsetof(struct pt_regs, r25), + offsetof(struct pt_regs, r26), + offsetof(struct pt_regs, r27), + offsetof(struct pt_regs, r28), + offsetof(struct pt_regs, r29), + offsetof(struct pt_regs, r30), + offsetof(struct pt_regs, r31), + }, + .hash = { [0 ... UNW_HASH_SIZE - 1] = -1 }, +#ifdef UNW_DEBUG + .preg_name = { + "pri_unat_gr", "pri_unat_mem", "bsp", "bspstore", "ar.pfs", "ar.rnat", "psp", "rp", + "r4", "r5", "r6", "r7", + "ar.unat", "pr", "ar.lc", "ar.fpsr", + "b1", "b2", "b3", "b4", "b5", + "f2", "f3", "f4", "f5", + "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", + "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31" + } +#endif +}; + +static inline int +read_only (void *addr) +{ + return (unsigned long) ((char *) addr - (char *) &unw.r0) < sizeof(unw.r0); +} + +/* + * Returns offset of rREG in struct pt_regs. + */ +static inline unsigned long +pt_regs_off (unsigned long reg) +{ + short off = -1; + + if (reg < ARRAY_SIZE(unw.pt_regs_offsets)) + off = unw.pt_regs_offsets[reg]; + + if (off < 0) { + UNW_DPRINT(0, "unwind.%s: bad scratch reg r%lu\n", __func__, reg); + off = 0; + } + return (unsigned long) off; +} + +static inline struct pt_regs * +get_scratch_regs (struct unw_frame_info *info) +{ + if (!info->pt) { + /* This should not happen with valid unwind info. */ + UNW_DPRINT(0, "unwind.%s: bad unwind info: resetting info->pt\n", __func__); + if (info->flags & UNW_FLAG_INTERRUPT_FRAME) + info->pt = (unsigned long) ((struct pt_regs *) info->psp - 1); + else + info->pt = info->sp - 16; + } + UNW_DPRINT(3, "unwind.%s: sp 0x%lx pt 0x%lx\n", __func__, info->sp, info->pt); + return (struct pt_regs *) info->pt; +} + +/* Unwind accessors. */ + +int +unw_access_gr (struct unw_frame_info *info, int regnum, unsigned long *val, char *nat, int write) +{ + unsigned long *addr, *nat_addr, nat_mask = 0, dummy_nat; + struct unw_ireg *ireg; + struct pt_regs *pt; + + if ((unsigned) regnum - 1 >= 127) { + if (regnum == 0 && !write) { + *val = 0; /* read r0 always returns 0 */ + *nat = 0; + return 0; + } + UNW_DPRINT(0, "unwind.%s: trying to access non-existent r%u\n", + __func__, regnum); + return -1; + } + + if (regnum < 32) { + if (regnum >= 4 && regnum <= 7) { + /* access a preserved register */ + ireg = &info->r4 + (regnum - 4); + addr = ireg->loc; + if (addr) { + nat_addr = addr + ireg->nat.off; + switch (ireg->nat.type) { + case UNW_NAT_VAL: + /* simulate getf.sig/setf.sig */ + if (write) { + if (*nat) { + /* write NaTVal and be done with it */ + addr[0] = 0; + addr[1] = 0x1fffe; + return 0; + } + addr[1] = 0x1003e; + } else { + if (addr[0] == 0 && addr[1] == 0x1ffe) { + /* return NaT and be done with it */ + *val = 0; + *nat = 1; + return 0; + } + } + /* fall through */ + case UNW_NAT_NONE: + dummy_nat = 0; + nat_addr = &dummy_nat; + break; + + case UNW_NAT_MEMSTK: + nat_mask = (1UL << ((long) addr & 0x1f8)/8); + break; + + case UNW_NAT_REGSTK: + nat_addr = ia64_rse_rnat_addr(addr); + if ((unsigned long) addr < info->regstk.limit + || (unsigned long) addr >= info->regstk.top) + { + UNW_DPRINT(0, "unwind.%s: %p outside of regstk " + "[0x%lx-0x%lx)\n", + __func__, (void *) addr, + info->regstk.limit, + info->regstk.top); + return -1; + } + if ((unsigned long) nat_addr >= info->regstk.top) + nat_addr = &info->sw->ar_rnat; + nat_mask = (1UL << ia64_rse_slot_num(addr)); + break; + } + } else { + addr = &info->sw->r4 + (regnum - 4); + nat_addr = &info->sw->ar_unat; + nat_mask = (1UL << ((long) addr & 0x1f8)/8); + } + } else { + /* access a scratch register */ + pt = get_scratch_regs(info); + addr = (unsigned long *) ((unsigned long)pt + pt_regs_off(regnum)); + if (info->pri_unat_loc) + nat_addr = info->pri_unat_loc; + else + nat_addr = &info->sw->caller_unat; + nat_mask = (1UL << ((long) addr & 0x1f8)/8); + } + } else { + /* access a stacked register */ + addr = ia64_rse_skip_regs((unsigned long *) info->bsp, regnum - 32); + nat_addr = ia64_rse_rnat_addr(addr); + if ((unsigned long) addr < info->regstk.limit + || (unsigned long) addr >= info->regstk.top) + { + UNW_DPRINT(0, "unwind.%s: ignoring attempt to access register outside " + "of rbs\n", __func__); + return -1; + } + if ((unsigned long) nat_addr >= info->regstk.top) + nat_addr = &info->sw->ar_rnat; + nat_mask = (1UL << ia64_rse_slot_num(addr)); + } + + if (write) { + if (read_only(addr)) { + UNW_DPRINT(0, "unwind.%s: ignoring attempt to write read-only location\n", + __func__); + } else { + *addr = *val; + if (*nat) + *nat_addr |= nat_mask; + else + *nat_addr &= ~nat_mask; + } + } else { + if ((*nat_addr & nat_mask) == 0) { + *val = *addr; + *nat = 0; + } else { + *val = 0; /* if register is a NaT, *addr may contain kernel data! */ + *nat = 1; + } + } + return 0; +} +EXPORT_SYMBOL(unw_access_gr); + +int +unw_access_br (struct unw_frame_info *info, int regnum, unsigned long *val, int write) +{ + unsigned long *addr; + struct pt_regs *pt; + + switch (regnum) { + /* scratch: */ + case 0: pt = get_scratch_regs(info); addr = &pt->b0; break; + case 6: pt = get_scratch_regs(info); addr = &pt->b6; break; + case 7: pt = get_scratch_regs(info); addr = &pt->b7; break; + + /* preserved: */ + case 1: case 2: case 3: case 4: case 5: + addr = *(&info->b1_loc + (regnum - 1)); + if (!addr) + addr = &info->sw->b1 + (regnum - 1); + break; + + default: + UNW_DPRINT(0, "unwind.%s: trying to access non-existent b%u\n", + __func__, regnum); + return -1; + } + if (write) + if (read_only(addr)) { + UNW_DPRINT(0, "unwind.%s: ignoring attempt to write read-only location\n", + __func__); + } else + *addr = *val; + else + *val = *addr; + return 0; +} +EXPORT_SYMBOL(unw_access_br); + +int +unw_access_fr (struct unw_frame_info *info, int regnum, struct ia64_fpreg *val, int write) +{ + struct ia64_fpreg *addr = NULL; + struct pt_regs *pt; + + if ((unsigned) (regnum - 2) >= 126) { + UNW_DPRINT(0, "unwind.%s: trying to access non-existent f%u\n", + __func__, regnum); + return -1; + } + + if (regnum <= 5) { + addr = *(&info->f2_loc + (regnum - 2)); + if (!addr) + addr = &info->sw->f2 + (regnum - 2); + } else if (regnum <= 15) { + if (regnum <= 11) { + pt = get_scratch_regs(info); + addr = &pt->f6 + (regnum - 6); + } + else + addr = &info->sw->f12 + (regnum - 12); + } else if (regnum <= 31) { + addr = info->fr_loc[regnum - 16]; + if (!addr) + addr = &info->sw->f16 + (regnum - 16); + } else { + struct task_struct *t = info->task; + + if (write) + ia64_sync_fph(t); + else + ia64_flush_fph(t); + addr = t->thread.fph + (regnum - 32); + } + + if (write) + if (read_only(addr)) { + UNW_DPRINT(0, "unwind.%s: ignoring attempt to write read-only location\n", + __func__); + } else + *addr = *val; + else + *val = *addr; + return 0; +} +EXPORT_SYMBOL(unw_access_fr); + +int +unw_access_ar (struct unw_frame_info *info, int regnum, unsigned long *val, int write) +{ + unsigned long *addr; + struct pt_regs *pt; + + switch (regnum) { + case UNW_AR_BSP: + addr = info->bsp_loc; + if (!addr) + addr = &info->sw->ar_bspstore; + break; + + case UNW_AR_BSPSTORE: + addr = info->bspstore_loc; + if (!addr) + addr = &info->sw->ar_bspstore; + break; + + case UNW_AR_PFS: + addr = info->pfs_loc; + if (!addr) + addr = &info->sw->ar_pfs; + break; + + case UNW_AR_RNAT: + addr = info->rnat_loc; + if (!addr) + addr = &info->sw->ar_rnat; + break; + + case UNW_AR_UNAT: + addr = info->unat_loc; + if (!addr) + addr = &info->sw->caller_unat; + break; + + case UNW_AR_LC: + addr = info->lc_loc; + if (!addr) + addr = &info->sw->ar_lc; + break; + + case UNW_AR_EC: + if (!info->cfm_loc) + return -1; + if (write) + *info->cfm_loc = + (*info->cfm_loc & ~(0x3fUL << 52)) | ((*val & 0x3f) << 52); + else + *val = (*info->cfm_loc >> 52) & 0x3f; + return 0; + + case UNW_AR_FPSR: + addr = info->fpsr_loc; + if (!addr) + addr = &info->sw->ar_fpsr; + break; + + case UNW_AR_RSC: + pt = get_scratch_regs(info); + addr = &pt->ar_rsc; + break; + + case UNW_AR_CCV: + pt = get_scratch_regs(info); + addr = &pt->ar_ccv; + break; + + case UNW_AR_CSD: + pt = get_scratch_regs(info); + addr = &pt->ar_csd; + break; + + case UNW_AR_SSD: + pt = get_scratch_regs(info); + addr = &pt->ar_ssd; + break; + + default: + UNW_DPRINT(0, "unwind.%s: trying to access non-existent ar%u\n", + __func__, regnum); + return -1; + } + + if (write) { + if (read_only(addr)) { + UNW_DPRINT(0, "unwind.%s: ignoring attempt to write read-only location\n", + __func__); + } else + *addr = *val; + } else + *val = *addr; + return 0; +} +EXPORT_SYMBOL(unw_access_ar); + +int +unw_access_pr (struct unw_frame_info *info, unsigned long *val, int write) +{ + unsigned long *addr; + + addr = info->pr_loc; + if (!addr) + addr = &info->sw->pr; + + if (write) { + if (read_only(addr)) { + UNW_DPRINT(0, "unwind.%s: ignoring attempt to write read-only location\n", + __func__); + } else + *addr = *val; + } else + *val = *addr; + return 0; +} +EXPORT_SYMBOL(unw_access_pr); + + +/* Routines to manipulate the state stack. */ + +static inline void +push (struct unw_state_record *sr) +{ + struct unw_reg_state *rs; + + rs = alloc_reg_state(); + if (!rs) { + printk(KERN_ERR "unwind: cannot stack reg state!\n"); + return; + } + memcpy(rs, &sr->curr, sizeof(*rs)); + sr->curr.next = rs; +} + +static void +pop (struct unw_state_record *sr) +{ + struct unw_reg_state *rs = sr->curr.next; + + if (!rs) { + printk(KERN_ERR "unwind: stack underflow!\n"); + return; + } + memcpy(&sr->curr, rs, sizeof(*rs)); + free_reg_state(rs); +} + +/* Make a copy of the state stack. Non-recursive to avoid stack overflows. */ +static struct unw_reg_state * +dup_state_stack (struct unw_reg_state *rs) +{ + struct unw_reg_state *copy, *prev = NULL, *first = NULL; + + while (rs) { + copy = alloc_reg_state(); + if (!copy) { + printk(KERN_ERR "unwind.dup_state_stack: out of memory\n"); + return NULL; + } + memcpy(copy, rs, sizeof(*copy)); + if (first) + prev->next = copy; + else + first = copy; + rs = rs->next; + prev = copy; + } + return first; +} + +/* Free all stacked register states (but not RS itself). */ +static void +free_state_stack (struct unw_reg_state *rs) +{ + struct unw_reg_state *p, *next; + + for (p = rs->next; p != NULL; p = next) { + next = p->next; + free_reg_state(p); + } + rs->next = NULL; +} + +/* Unwind decoder routines */ + +static enum unw_register_index __attribute_const__ +decode_abreg (unsigned char abreg, int memory) +{ + switch (abreg) { + case 0x04 ... 0x07: return UNW_REG_R4 + (abreg - 0x04); + case 0x22 ... 0x25: return UNW_REG_F2 + (abreg - 0x22); + case 0x30 ... 0x3f: return UNW_REG_F16 + (abreg - 0x30); + case 0x41 ... 0x45: return UNW_REG_B1 + (abreg - 0x41); + case 0x60: return UNW_REG_PR; + case 0x61: return UNW_REG_PSP; + case 0x62: return memory ? UNW_REG_PRI_UNAT_MEM : UNW_REG_PRI_UNAT_GR; + case 0x63: return UNW_REG_RP; + case 0x64: return UNW_REG_BSP; + case 0x65: return UNW_REG_BSPSTORE; + case 0x66: return UNW_REG_RNAT; + case 0x67: return UNW_REG_UNAT; + case 0x68: return UNW_REG_FPSR; + case 0x69: return UNW_REG_PFS; + case 0x6a: return UNW_REG_LC; + default: + break; + } + UNW_DPRINT(0, "unwind.%s: bad abreg=0x%x\n", __func__, abreg); + return UNW_REG_LC; +} + +static void +set_reg (struct unw_reg_info *reg, enum unw_where where, int when, unsigned long val) +{ + reg->val = val; + reg->where = where; + if (reg->when == UNW_WHEN_NEVER) + reg->when = when; +} + +static void +alloc_spill_area (unsigned long *offp, unsigned long regsize, + struct unw_reg_info *lo, struct unw_reg_info *hi) +{ + struct unw_reg_info *reg; + + for (reg = hi; reg >= lo; --reg) { + if (reg->where == UNW_WHERE_SPILL_HOME) { + reg->where = UNW_WHERE_PSPREL; + *offp -= regsize; + reg->val = *offp; + } + } +} + +static inline void +spill_next_when (struct unw_reg_info **regp, struct unw_reg_info *lim, unw_word t) +{ + struct unw_reg_info *reg; + + for (reg = *regp; reg <= lim; ++reg) { + if (reg->where == UNW_WHERE_SPILL_HOME) { + reg->when = t; + *regp = reg + 1; + return; + } + } + UNW_DPRINT(0, "unwind.%s: excess spill!\n", __func__); +} + +static inline void +finish_prologue (struct unw_state_record *sr) +{ + struct unw_reg_info *reg; + unsigned long off; + int i; + + /* + * First, resolve implicit register save locations (see Section "11.4.2.3 Rules + * for Using Unwind Descriptors", rule 3): + */ + for (i = 0; i < (int) ARRAY_SIZE(unw.save_order); ++i) { + reg = sr->curr.reg + unw.save_order[i]; + if (reg->where == UNW_WHERE_GR_SAVE) { + reg->where = UNW_WHERE_GR; + reg->val = sr->gr_save_loc++; + } + } + + /* + * Next, compute when the fp, general, and branch registers get + * saved. This must come before alloc_spill_area() because + * we need to know which registers are spilled to their home + * locations. + */ + if (sr->imask) { + unsigned char kind, mask = 0, *cp = sr->imask; + int t; + static const unsigned char limit[3] = { + UNW_REG_F31, UNW_REG_R7, UNW_REG_B5 + }; + struct unw_reg_info *(regs[3]); + + regs[0] = sr->curr.reg + UNW_REG_F2; + regs[1] = sr->curr.reg + UNW_REG_R4; + regs[2] = sr->curr.reg + UNW_REG_B1; + + for (t = 0; t < sr->region_len; ++t) { + if ((t & 3) == 0) + mask = *cp++; + kind = (mask >> 2*(3-(t & 3))) & 3; + if (kind > 0) + spill_next_when(®s[kind - 1], sr->curr.reg + limit[kind - 1], + sr->region_start + t); + } + } + /* + * Next, lay out the memory stack spill area: + */ + if (sr->any_spills) { + off = sr->spill_offset; + alloc_spill_area(&off, 16, sr->curr.reg + UNW_REG_F2, sr->curr.reg + UNW_REG_F31); + alloc_spill_area(&off, 8, sr->curr.reg + UNW_REG_B1, sr->curr.reg + UNW_REG_B5); + alloc_spill_area(&off, 8, sr->curr.reg + UNW_REG_R4, sr->curr.reg + UNW_REG_R7); + } +} + +/* + * Region header descriptors. + */ + +static void +desc_prologue (int body, unw_word rlen, unsigned char mask, unsigned char grsave, + struct unw_state_record *sr) +{ + int i, region_start; + + if (!(sr->in_body || sr->first_region)) + finish_prologue(sr); + sr->first_region = 0; + + /* check if we're done: */ + if (sr->when_target < sr->region_start + sr->region_len) { + sr->done = 1; + return; + } + + region_start = sr->region_start + sr->region_len; + + for (i = 0; i < sr->epilogue_count; ++i) + pop(sr); + sr->epilogue_count = 0; + sr->epilogue_start = UNW_WHEN_NEVER; + + sr->region_start = region_start; + sr->region_len = rlen; + sr->in_body = body; + + if (!body) { + push(sr); + + for (i = 0; i < 4; ++i) { + if (mask & 0x8) + set_reg(sr->curr.reg + unw.save_order[i], UNW_WHERE_GR, + sr->region_start + sr->region_len - 1, grsave++); + mask <<= 1; + } + sr->gr_save_loc = grsave; + sr->any_spills = 0; + sr->imask = NULL; + sr->spill_offset = 0x10; /* default to psp+16 */ + } +} + +/* + * Prologue descriptors. + */ + +static inline void +desc_abi (unsigned char abi, unsigned char context, struct unw_state_record *sr) +{ + if (abi == 3 && context == 'i') { + sr->flags |= UNW_FLAG_INTERRUPT_FRAME; + UNW_DPRINT(3, "unwind.%s: interrupt frame\n", __func__); + } + else + UNW_DPRINT(0, "unwind%s: ignoring unwabi(abi=0x%x,context=0x%x)\n", + __func__, abi, context); +} + +static inline void +desc_br_gr (unsigned char brmask, unsigned char gr, struct unw_state_record *sr) +{ + int i; + + for (i = 0; i < 5; ++i) { + if (brmask & 1) + set_reg(sr->curr.reg + UNW_REG_B1 + i, UNW_WHERE_GR, + sr->region_start + sr->region_len - 1, gr++); + brmask >>= 1; + } +} + +static inline void +desc_br_mem (unsigned char brmask, struct unw_state_record *sr) +{ + int i; + + for (i = 0; i < 5; ++i) { + if (brmask & 1) { + set_reg(sr->curr.reg + UNW_REG_B1 + i, UNW_WHERE_SPILL_HOME, + sr->region_start + sr->region_len - 1, 0); + sr->any_spills = 1; + } + brmask >>= 1; + } +} + +static inline void +desc_frgr_mem (unsigned char grmask, unw_word frmask, struct unw_state_record *sr) +{ + int i; + + for (i = 0; i < 4; ++i) { + if ((grmask & 1) != 0) { + set_reg(sr->curr.reg + UNW_REG_R4 + i, UNW_WHERE_SPILL_HOME, + sr->region_start + sr->region_len - 1, 0); + sr->any_spills = 1; + } + grmask >>= 1; + } + for (i = 0; i < 20; ++i) { + if ((frmask & 1) != 0) { + int base = (i < 4) ? UNW_REG_F2 : UNW_REG_F16 - 4; + set_reg(sr->curr.reg + base + i, UNW_WHERE_SPILL_HOME, + sr->region_start + sr->region_len - 1, 0); + sr->any_spills = 1; + } + frmask >>= 1; + } +} + +static inline void +desc_fr_mem (unsigned char frmask, struct unw_state_record *sr) +{ + int i; + + for (i = 0; i < 4; ++i) { + if ((frmask & 1) != 0) { + set_reg(sr->curr.reg + UNW_REG_F2 + i, UNW_WHERE_SPILL_HOME, + sr->region_start + sr->region_len - 1, 0); + sr->any_spills = 1; + } + frmask >>= 1; + } +} + +static inline void +desc_gr_gr (unsigned char grmask, unsigned char gr, struct unw_state_record *sr) +{ + int i; + + for (i = 0; i < 4; ++i) { + if ((grmask & 1) != 0) + set_reg(sr->curr.reg + UNW_REG_R4 + i, UNW_WHERE_GR, + sr->region_start + sr->region_len - 1, gr++); + grmask >>= 1; + } +} + +static inline void +desc_gr_mem (unsigned char grmask, struct unw_state_record *sr) +{ + int i; + + for (i = 0; i < 4; ++i) { + if ((grmask & 1) != 0) { + set_reg(sr->curr.reg + UNW_REG_R4 + i, UNW_WHERE_SPILL_HOME, + sr->region_start + sr->region_len - 1, 0); + sr->any_spills = 1; + } + grmask >>= 1; + } +} + +static inline void +desc_mem_stack_f (unw_word t, unw_word size, struct unw_state_record *sr) +{ + set_reg(sr->curr.reg + UNW_REG_PSP, UNW_WHERE_NONE, + sr->region_start + min_t(int, t, sr->region_len - 1), 16*size); +} + +static inline void +desc_mem_stack_v (unw_word t, struct unw_state_record *sr) +{ + sr->curr.reg[UNW_REG_PSP].when = sr->region_start + min_t(int, t, sr->region_len - 1); +} + +static inline void +desc_reg_gr (unsigned char reg, unsigned char dst, struct unw_state_record *sr) +{ + set_reg(sr->curr.reg + reg, UNW_WHERE_GR, sr->region_start + sr->region_len - 1, dst); +} + +static inline void +desc_reg_psprel (unsigned char reg, unw_word pspoff, struct unw_state_record *sr) +{ + set_reg(sr->curr.reg + reg, UNW_WHERE_PSPREL, sr->region_start + sr->region_len - 1, + 0x10 - 4*pspoff); +} + +static inline void +desc_reg_sprel (unsigned char reg, unw_word spoff, struct unw_state_record *sr) +{ + set_reg(sr->curr.reg + reg, UNW_WHERE_SPREL, sr->region_start + sr->region_len - 1, + 4*spoff); +} + +static inline void +desc_rp_br (unsigned char dst, struct unw_state_record *sr) +{ + sr->return_link_reg = dst; +} + +static inline void +desc_reg_when (unsigned char regnum, unw_word t, struct unw_state_record *sr) +{ + struct unw_reg_info *reg = sr->curr.reg + regnum; + + if (reg->where == UNW_WHERE_NONE) + reg->where = UNW_WHERE_GR_SAVE; + reg->when = sr->region_start + min_t(int, t, sr->region_len - 1); +} + +static inline void +desc_spill_base (unw_word pspoff, struct unw_state_record *sr) +{ + sr->spill_offset = 0x10 - 4*pspoff; +} + +static inline unsigned char * +desc_spill_mask (unsigned char *imaskp, struct unw_state_record *sr) +{ + sr->imask = imaskp; + return imaskp + (2*sr->region_len + 7)/8; +} + +/* + * Body descriptors. + */ +static inline void +desc_epilogue (unw_word t, unw_word ecount, struct unw_state_record *sr) +{ + sr->epilogue_start = sr->region_start + sr->region_len - 1 - t; + sr->epilogue_count = ecount + 1; +} + +static inline void +desc_copy_state (unw_word label, struct unw_state_record *sr) +{ + struct unw_labeled_state *ls; + + for (ls = sr->labeled_states; ls; ls = ls->next) { + if (ls->label == label) { + free_state_stack(&sr->curr); + memcpy(&sr->curr, &ls->saved_state, sizeof(sr->curr)); + sr->curr.next = dup_state_stack(ls->saved_state.next); + return; + } + } + printk(KERN_ERR "unwind: failed to find state labeled 0x%lx\n", label); +} + +static inline void +desc_label_state (unw_word label, struct unw_state_record *sr) +{ + struct unw_labeled_state *ls; + + ls = alloc_labeled_state(); + if (!ls) { + printk(KERN_ERR "unwind.desc_label_state(): out of memory\n"); + return; + } + ls->label = label; + memcpy(&ls->saved_state, &sr->curr, sizeof(ls->saved_state)); + ls->saved_state.next = dup_state_stack(sr->curr.next); + + /* insert into list of labeled states: */ + ls->next = sr->labeled_states; + sr->labeled_states = ls; +} + +/* + * General descriptors. + */ + +static inline int +desc_is_active (unsigned char qp, unw_word t, struct unw_state_record *sr) +{ + if (sr->when_target <= sr->region_start + min_t(int, t, sr->region_len - 1)) + return 0; + if (qp > 0) { + if ((sr->pr_val & (1UL << qp)) == 0) + return 0; + sr->pr_mask |= (1UL << qp); + } + return 1; +} + +static inline void +desc_restore_p (unsigned char qp, unw_word t, unsigned char abreg, struct unw_state_record *sr) +{ + struct unw_reg_info *r; + + if (!desc_is_active(qp, t, sr)) + return; + + r = sr->curr.reg + decode_abreg(abreg, 0); + r->where = UNW_WHERE_NONE; + r->when = UNW_WHEN_NEVER; + r->val = 0; +} + +static inline void +desc_spill_reg_p (unsigned char qp, unw_word t, unsigned char abreg, unsigned char x, + unsigned char ytreg, struct unw_state_record *sr) +{ + enum unw_where where = UNW_WHERE_GR; + struct unw_reg_info *r; + + if (!desc_is_active(qp, t, sr)) + return; + + if (x) + where = UNW_WHERE_BR; + else if (ytreg & 0x80) + where = UNW_WHERE_FR; + + r = sr->curr.reg + decode_abreg(abreg, 0); + r->where = where; + r->when = sr->region_start + min_t(int, t, sr->region_len - 1); + r->val = (ytreg & 0x7f); +} + +static inline void +desc_spill_psprel_p (unsigned char qp, unw_word t, unsigned char abreg, unw_word pspoff, + struct unw_state_record *sr) +{ + struct unw_reg_info *r; + + if (!desc_is_active(qp, t, sr)) + return; + + r = sr->curr.reg + decode_abreg(abreg, 1); + r->where = UNW_WHERE_PSPREL; + r->when = sr->region_start + min_t(int, t, sr->region_len - 1); + r->val = 0x10 - 4*pspoff; +} + +static inline void +desc_spill_sprel_p (unsigned char qp, unw_word t, unsigned char abreg, unw_word spoff, + struct unw_state_record *sr) +{ + struct unw_reg_info *r; + + if (!desc_is_active(qp, t, sr)) + return; + + r = sr->curr.reg + decode_abreg(abreg, 1); + r->where = UNW_WHERE_SPREL; + r->when = sr->region_start + min_t(int, t, sr->region_len - 1); + r->val = 4*spoff; +} + +#define UNW_DEC_BAD_CODE(code) printk(KERN_ERR "unwind: unknown code 0x%02x\n", \ + code); + +/* + * region headers: + */ +#define UNW_DEC_PROLOGUE_GR(fmt,r,m,gr,arg) desc_prologue(0,r,m,gr,arg) +#define UNW_DEC_PROLOGUE(fmt,b,r,arg) desc_prologue(b,r,0,32,arg) +/* + * prologue descriptors: + */ +#define UNW_DEC_ABI(fmt,a,c,arg) desc_abi(a,c,arg) +#define UNW_DEC_BR_GR(fmt,b,g,arg) desc_br_gr(b,g,arg) +#define UNW_DEC_BR_MEM(fmt,b,arg) desc_br_mem(b,arg) +#define UNW_DEC_FRGR_MEM(fmt,g,f,arg) desc_frgr_mem(g,f,arg) +#define UNW_DEC_FR_MEM(fmt,f,arg) desc_fr_mem(f,arg) +#define UNW_DEC_GR_GR(fmt,m,g,arg) desc_gr_gr(m,g,arg) +#define UNW_DEC_GR_MEM(fmt,m,arg) desc_gr_mem(m,arg) +#define UNW_DEC_MEM_STACK_F(fmt,t,s,arg) desc_mem_stack_f(t,s,arg) +#define UNW_DEC_MEM_STACK_V(fmt,t,arg) desc_mem_stack_v(t,arg) +#define UNW_DEC_REG_GR(fmt,r,d,arg) desc_reg_gr(r,d,arg) +#define UNW_DEC_REG_PSPREL(fmt,r,o,arg) desc_reg_psprel(r,o,arg) +#define UNW_DEC_REG_SPREL(fmt,r,o,arg) desc_reg_sprel(r,o,arg) +#define UNW_DEC_REG_WHEN(fmt,r,t,arg) desc_reg_when(r,t,arg) +#define UNW_DEC_PRIUNAT_WHEN_GR(fmt,t,arg) desc_reg_when(UNW_REG_PRI_UNAT_GR,t,arg) +#define UNW_DEC_PRIUNAT_WHEN_MEM(fmt,t,arg) desc_reg_when(UNW_REG_PRI_UNAT_MEM,t,arg) +#define UNW_DEC_PRIUNAT_GR(fmt,r,arg) desc_reg_gr(UNW_REG_PRI_UNAT_GR,r,arg) +#define UNW_DEC_PRIUNAT_PSPREL(fmt,o,arg) desc_reg_psprel(UNW_REG_PRI_UNAT_MEM,o,arg) +#define UNW_DEC_PRIUNAT_SPREL(fmt,o,arg) desc_reg_sprel(UNW_REG_PRI_UNAT_MEM,o,arg) +#define UNW_DEC_RP_BR(fmt,d,arg) desc_rp_br(d,arg) +#define UNW_DEC_SPILL_BASE(fmt,o,arg) desc_spill_base(o,arg) +#define UNW_DEC_SPILL_MASK(fmt,m,arg) (m = desc_spill_mask(m,arg)) +/* + * body descriptors: + */ +#define UNW_DEC_EPILOGUE(fmt,t,c,arg) desc_epilogue(t,c,arg) +#define UNW_DEC_COPY_STATE(fmt,l,arg) desc_copy_state(l,arg) +#define UNW_DEC_LABEL_STATE(fmt,l,arg) desc_label_state(l,arg) +/* + * general unwind descriptors: + */ +#define UNW_DEC_SPILL_REG_P(f,p,t,a,x,y,arg) desc_spill_reg_p(p,t,a,x,y,arg) +#define UNW_DEC_SPILL_REG(f,t,a,x,y,arg) desc_spill_reg_p(0,t,a,x,y,arg) +#define UNW_DEC_SPILL_PSPREL_P(f,p,t,a,o,arg) desc_spill_psprel_p(p,t,a,o,arg) +#define UNW_DEC_SPILL_PSPREL(f,t,a,o,arg) desc_spill_psprel_p(0,t,a,o,arg) +#define UNW_DEC_SPILL_SPREL_P(f,p,t,a,o,arg) desc_spill_sprel_p(p,t,a,o,arg) +#define UNW_DEC_SPILL_SPREL(f,t,a,o,arg) desc_spill_sprel_p(0,t,a,o,arg) +#define UNW_DEC_RESTORE_P(f,p,t,a,arg) desc_restore_p(p,t,a,arg) +#define UNW_DEC_RESTORE(f,t,a,arg) desc_restore_p(0,t,a,arg) + +#include "unwind_decoder.c" + + +/* Unwind scripts. */ + +static inline unw_hash_index_t +hash (unsigned long ip) +{ + /* magic number = ((sqrt(5)-1)/2)*2^64 */ + static const unsigned long hashmagic = 0x9e3779b97f4a7c16UL; + + return (ip >> 4) * hashmagic >> (64 - UNW_LOG_HASH_SIZE); +} + +static inline long +cache_match (struct unw_script *script, unsigned long ip, unsigned long pr) +{ + read_lock(&script->lock); + if (ip == script->ip && ((pr ^ script->pr_val) & script->pr_mask) == 0) + /* keep the read lock... */ + return 1; + read_unlock(&script->lock); + return 0; +} + +static inline struct unw_script * +script_lookup (struct unw_frame_info *info) +{ + struct unw_script *script = unw.cache + info->hint; + unsigned short index; + unsigned long ip, pr; + + if (UNW_DEBUG_ON(0)) + return NULL; /* Always regenerate scripts in debug mode */ + + STAT(++unw.stat.cache.lookups); + + ip = info->ip; + pr = info->pr; + + if (cache_match(script, ip, pr)) { + STAT(++unw.stat.cache.hinted_hits); + return script; + } + + index = unw.hash[hash(ip)]; + if (index >= UNW_CACHE_SIZE) + return NULL; + + script = unw.cache + index; + while (1) { + if (cache_match(script, ip, pr)) { + /* update hint; no locking required as single-word writes are atomic */ + STAT(++unw.stat.cache.normal_hits); + unw.cache[info->prev_script].hint = script - unw.cache; + return script; + } + if (script->coll_chain >= UNW_HASH_SIZE) + return NULL; + script = unw.cache + script->coll_chain; + STAT(++unw.stat.cache.collision_chain_traversals); + } +} + +/* + * On returning, a write lock for the SCRIPT is still being held. + */ +static inline struct unw_script * +script_new (unsigned long ip) +{ + struct unw_script *script, *prev, *tmp; + unw_hash_index_t index; + unsigned short head; + + STAT(++unw.stat.script.news); + + /* + * Can't (easily) use cmpxchg() here because of ABA problem + * that is intrinsic in cmpxchg()... + */ + head = unw.lru_head; + script = unw.cache + head; + unw.lru_head = script->lru_chain; + + /* + * We'd deadlock here if we interrupted a thread that is holding a read lock on + * script->lock. Thus, if the write_trylock() fails, we simply bail out. The + * alternative would be to disable interrupts whenever we hold a read-lock, but + * that seems silly. + */ + if (!write_trylock(&script->lock)) + return NULL; + + /* re-insert script at the tail of the LRU chain: */ + unw.cache[unw.lru_tail].lru_chain = head; + unw.lru_tail = head; + + /* remove the old script from the hash table (if it's there): */ + if (script->ip) { + index = hash(script->ip); + tmp = unw.cache + unw.hash[index]; + prev = NULL; + while (1) { + if (tmp == script) { + if (prev) + prev->coll_chain = tmp->coll_chain; + else + unw.hash[index] = tmp->coll_chain; + break; + } else + prev = tmp; + if (tmp->coll_chain >= UNW_CACHE_SIZE) + /* old script wasn't in the hash-table */ + break; + tmp = unw.cache + tmp->coll_chain; + } + } + + /* enter new script in the hash table */ + index = hash(ip); + script->coll_chain = unw.hash[index]; + unw.hash[index] = script - unw.cache; + + script->ip = ip; /* set new IP while we're holding the locks */ + + STAT(if (script->coll_chain < UNW_CACHE_SIZE) ++unw.stat.script.collisions); + + script->flags = 0; + script->hint = 0; + script->count = 0; + return script; +} + +static void +script_finalize (struct unw_script *script, struct unw_state_record *sr) +{ + script->pr_mask = sr->pr_mask; + script->pr_val = sr->pr_val; + /* + * We could down-grade our write-lock on script->lock here but + * the rwlock API doesn't offer atomic lock downgrading, so + * we'll just keep the write-lock and release it later when + * we're done using the script. + */ +} + +static inline void +script_emit (struct unw_script *script, struct unw_insn insn) +{ + if (script->count >= UNW_MAX_SCRIPT_LEN) { + UNW_DPRINT(0, "unwind.%s: script exceeds maximum size of %u instructions!\n", + __func__, UNW_MAX_SCRIPT_LEN); + return; + } + script->insn[script->count++] = insn; +} + +static inline void +emit_nat_info (struct unw_state_record *sr, int i, struct unw_script *script) +{ + struct unw_reg_info *r = sr->curr.reg + i; + enum unw_insn_opcode opc; + struct unw_insn insn; + unsigned long val = 0; + + switch (r->where) { + case UNW_WHERE_GR: + if (r->val >= 32) { + /* register got spilled to a stacked register */ + opc = UNW_INSN_SETNAT_TYPE; + val = UNW_NAT_REGSTK; + } else + /* register got spilled to a scratch register */ + opc = UNW_INSN_SETNAT_MEMSTK; + break; + + case UNW_WHERE_FR: + opc = UNW_INSN_SETNAT_TYPE; + val = UNW_NAT_VAL; + break; + + case UNW_WHERE_BR: + opc = UNW_INSN_SETNAT_TYPE; + val = UNW_NAT_NONE; + break; + + case UNW_WHERE_PSPREL: + case UNW_WHERE_SPREL: + opc = UNW_INSN_SETNAT_MEMSTK; + break; + + default: + UNW_DPRINT(0, "unwind.%s: don't know how to emit nat info for where = %u\n", + __func__, r->where); + return; + } + insn.opc = opc; + insn.dst = unw.preg_index[i]; + insn.val = val; + script_emit(script, insn); +} + +static void +compile_reg (struct unw_state_record *sr, int i, struct unw_script *script) +{ + struct unw_reg_info *r = sr->curr.reg + i; + enum unw_insn_opcode opc; + unsigned long val, rval; + struct unw_insn insn; + long need_nat_info; + + if (r->where == UNW_WHERE_NONE || r->when >= sr->when_target) + return; + + opc = UNW_INSN_MOVE; + val = rval = r->val; + need_nat_info = (i >= UNW_REG_R4 && i <= UNW_REG_R7); + + switch (r->where) { + case UNW_WHERE_GR: + if (rval >= 32) { + opc = UNW_INSN_MOVE_STACKED; + val = rval - 32; + } else if (rval >= 4 && rval <= 7) { + if (need_nat_info) { + opc = UNW_INSN_MOVE2; + need_nat_info = 0; + } + val = unw.preg_index[UNW_REG_R4 + (rval - 4)]; + } else if (rval == 0) { + opc = UNW_INSN_MOVE_CONST; + val = 0; + } else { + /* register got spilled to a scratch register */ + opc = UNW_INSN_MOVE_SCRATCH; + val = pt_regs_off(rval); + } + break; + + case UNW_WHERE_FR: + if (rval <= 5) + val = unw.preg_index[UNW_REG_F2 + (rval - 2)]; + else if (rval >= 16 && rval <= 31) + val = unw.preg_index[UNW_REG_F16 + (rval - 16)]; + else { + opc = UNW_INSN_MOVE_SCRATCH; + if (rval <= 11) + val = offsetof(struct pt_regs, f6) + 16*(rval - 6); + else + UNW_DPRINT(0, "unwind.%s: kernel may not touch f%lu\n", + __func__, rval); + } + break; + + case UNW_WHERE_BR: + if (rval >= 1 && rval <= 5) + val = unw.preg_index[UNW_REG_B1 + (rval - 1)]; + else { + opc = UNW_INSN_MOVE_SCRATCH; + if (rval == 0) + val = offsetof(struct pt_regs, b0); + else if (rval == 6) + val = offsetof(struct pt_regs, b6); + else + val = offsetof(struct pt_regs, b7); + } + break; + + case UNW_WHERE_SPREL: + opc = UNW_INSN_ADD_SP; + break; + + case UNW_WHERE_PSPREL: + opc = UNW_INSN_ADD_PSP; + break; + + default: + UNW_DPRINT(0, "unwind%s: register %u has unexpected `where' value of %u\n", + __func__, i, r->where); + break; + } + insn.opc = opc; + insn.dst = unw.preg_index[i]; + insn.val = val; + script_emit(script, insn); + if (need_nat_info) + emit_nat_info(sr, i, script); + + if (i == UNW_REG_PSP) { + /* + * info->psp must contain the _value_ of the previous + * sp, not it's save location. We get this by + * dereferencing the value we just stored in + * info->psp: + */ + insn.opc = UNW_INSN_LOAD; + insn.dst = insn.val = unw.preg_index[UNW_REG_PSP]; + script_emit(script, insn); + } +} + +static inline const struct unw_table_entry * +lookup (struct unw_table *table, unsigned long rel_ip) +{ + const struct unw_table_entry *e = NULL; + unsigned long lo, hi, mid; + + /* do a binary search for right entry: */ + for (lo = 0, hi = table->length; lo < hi; ) { + mid = (lo + hi) / 2; + e = &table->array[mid]; + if (rel_ip < e->start_offset) + hi = mid; + else if (rel_ip >= e->end_offset) + lo = mid + 1; + else + break; + } + if (rel_ip < e->start_offset || rel_ip >= e->end_offset) + return NULL; + return e; +} + +/* + * Build an unwind script that unwinds from state OLD_STATE to the + * entrypoint of the function that called OLD_STATE. + */ +static inline struct unw_script * +build_script (struct unw_frame_info *info) +{ + const struct unw_table_entry *e = NULL; + struct unw_script *script = NULL; + struct unw_labeled_state *ls, *next; + unsigned long ip = info->ip; + struct unw_state_record sr; + struct unw_table *table, *prev; + struct unw_reg_info *r; + struct unw_insn insn; + u8 *dp, *desc_end; + u64 hdr; + int i; + STAT(unsigned long start, parse_start;) + + STAT(++unw.stat.script.builds; start = ia64_get_itc()); + + /* build state record */ + memset(&sr, 0, sizeof(sr)); + for (r = sr.curr.reg; r < sr.curr.reg + UNW_NUM_REGS; ++r) + r->when = UNW_WHEN_NEVER; + sr.pr_val = info->pr; + + UNW_DPRINT(3, "unwind.%s: ip 0x%lx\n", __func__, ip); + script = script_new(ip); + if (!script) { + UNW_DPRINT(0, "unwind.%s: failed to create unwind script\n", __func__); + STAT(unw.stat.script.build_time += ia64_get_itc() - start); + return NULL; + } + unw.cache[info->prev_script].hint = script - unw.cache; + + /* search the kernels and the modules' unwind tables for IP: */ + + STAT(parse_start = ia64_get_itc()); + + prev = NULL; + for (table = unw.tables; table; table = table->next) { + if (ip >= table->start && ip < table->end) { + /* + * Leave the kernel unwind table at the very front, + * lest moving it breaks some assumption elsewhere. + * Otherwise, move the matching table to the second + * position in the list so that traversals can benefit + * from commonality in backtrace paths. + */ + if (prev && prev != unw.tables) { + /* unw is safe - we're already spinlocked */ + prev->next = table->next; + table->next = unw.tables->next; + unw.tables->next = table; + } + e = lookup(table, ip - table->segment_base); + break; + } + prev = table; + } + if (!e) { + /* no info, return default unwinder (leaf proc, no mem stack, no saved regs) */ + UNW_DPRINT(1, "unwind.%s: no unwind info for ip=0x%lx (prev ip=0x%lx)\n", + __func__, ip, unw.cache[info->prev_script].ip); + sr.curr.reg[UNW_REG_RP].where = UNW_WHERE_BR; + sr.curr.reg[UNW_REG_RP].when = -1; + sr.curr.reg[UNW_REG_RP].val = 0; + compile_reg(&sr, UNW_REG_RP, script); + script_finalize(script, &sr); + STAT(unw.stat.script.parse_time += ia64_get_itc() - parse_start); + STAT(unw.stat.script.build_time += ia64_get_itc() - start); + return script; + } + + sr.when_target = (3*((ip & ~0xfUL) - (table->segment_base + e->start_offset))/16 + + (ip & 0xfUL)); + hdr = *(u64 *) (table->segment_base + e->info_offset); + dp = (u8 *) (table->segment_base + e->info_offset + 8); + desc_end = dp + 8*UNW_LENGTH(hdr); + + while (!sr.done && dp < desc_end) + dp = unw_decode(dp, sr.in_body, &sr); + + if (sr.when_target > sr.epilogue_start) { + /* + * sp has been restored and all values on the memory stack below + * psp also have been restored. + */ + sr.curr.reg[UNW_REG_PSP].val = 0; + sr.curr.reg[UNW_REG_PSP].where = UNW_WHERE_NONE; + sr.curr.reg[UNW_REG_PSP].when = UNW_WHEN_NEVER; + for (r = sr.curr.reg; r < sr.curr.reg + UNW_NUM_REGS; ++r) + if ((r->where == UNW_WHERE_PSPREL && r->val <= 0x10) + || r->where == UNW_WHERE_SPREL) + { + r->val = 0; + r->where = UNW_WHERE_NONE; + r->when = UNW_WHEN_NEVER; + } + } + + script->flags = sr.flags; + + /* + * If RP did't get saved, generate entry for the return link + * register. + */ + if (sr.curr.reg[UNW_REG_RP].when >= sr.when_target) { + sr.curr.reg[UNW_REG_RP].where = UNW_WHERE_BR; + sr.curr.reg[UNW_REG_RP].when = -1; + sr.curr.reg[UNW_REG_RP].val = sr.return_link_reg; + UNW_DPRINT(1, "unwind.%s: using default for rp at ip=0x%lx where=%d val=0x%lx\n", + __func__, ip, sr.curr.reg[UNW_REG_RP].where, + sr.curr.reg[UNW_REG_RP].val); + } + +#ifdef UNW_DEBUG + UNW_DPRINT(1, "unwind.%s: state record for func 0x%lx, t=%u:\n", + __func__, table->segment_base + e->start_offset, sr.when_target); + for (r = sr.curr.reg; r < sr.curr.reg + UNW_NUM_REGS; ++r) { + if (r->where != UNW_WHERE_NONE || r->when != UNW_WHEN_NEVER) { + UNW_DPRINT(1, " %s <- ", unw.preg_name[r - sr.curr.reg]); + switch (r->where) { + case UNW_WHERE_GR: UNW_DPRINT(1, "r%lu", r->val); break; + case UNW_WHERE_FR: UNW_DPRINT(1, "f%lu", r->val); break; + case UNW_WHERE_BR: UNW_DPRINT(1, "b%lu", r->val); break; + case UNW_WHERE_SPREL: UNW_DPRINT(1, "[sp+0x%lx]", r->val); break; + case UNW_WHERE_PSPREL: UNW_DPRINT(1, "[psp+0x%lx]", r->val); break; + case UNW_WHERE_NONE: + UNW_DPRINT(1, "%s+0x%lx", unw.preg_name[r - sr.curr.reg], r->val); + break; + + default: + UNW_DPRINT(1, "BADWHERE(%d)", r->where); + break; + } + UNW_DPRINT(1, "\t\t%d\n", r->when); + } + } +#endif + + STAT(unw.stat.script.parse_time += ia64_get_itc() - parse_start); + + /* translate state record into unwinder instructions: */ + + /* + * First, set psp if we're dealing with a fixed-size frame; + * subsequent instructions may depend on this value. + */ + if (sr.when_target > sr.curr.reg[UNW_REG_PSP].when + && (sr.curr.reg[UNW_REG_PSP].where == UNW_WHERE_NONE) + && sr.curr.reg[UNW_REG_PSP].val != 0) { + /* new psp is sp plus frame size */ + insn.opc = UNW_INSN_ADD; + insn.dst = offsetof(struct unw_frame_info, psp)/8; + insn.val = sr.curr.reg[UNW_REG_PSP].val; /* frame size */ + script_emit(script, insn); + } + + /* determine where the primary UNaT is: */ + if (sr.when_target < sr.curr.reg[UNW_REG_PRI_UNAT_GR].when) + i = UNW_REG_PRI_UNAT_MEM; + else if (sr.when_target < sr.curr.reg[UNW_REG_PRI_UNAT_MEM].when) + i = UNW_REG_PRI_UNAT_GR; + else if (sr.curr.reg[UNW_REG_PRI_UNAT_MEM].when > sr.curr.reg[UNW_REG_PRI_UNAT_GR].when) + i = UNW_REG_PRI_UNAT_MEM; + else + i = UNW_REG_PRI_UNAT_GR; + + compile_reg(&sr, i, script); + + for (i = UNW_REG_BSP; i < UNW_NUM_REGS; ++i) + compile_reg(&sr, i, script); + + /* free labeled register states & stack: */ + + STAT(parse_start = ia64_get_itc()); + for (ls = sr.labeled_states; ls; ls = next) { + next = ls->next; + free_state_stack(&ls->saved_state); + free_labeled_state(ls); + } + free_state_stack(&sr.curr); + STAT(unw.stat.script.parse_time += ia64_get_itc() - parse_start); + + script_finalize(script, &sr); + STAT(unw.stat.script.build_time += ia64_get_itc() - start); + return script; +} + +/* + * Apply the unwinding actions represented by OPS and update SR to + * reflect the state that existed upon entry to the function that this + * unwinder represents. + */ +static inline void +run_script (struct unw_script *script, struct unw_frame_info *state) +{ + struct unw_insn *ip, *limit, next_insn; + unsigned long opc, dst, val, off; + unsigned long *s = (unsigned long *) state; + STAT(unsigned long start;) + + STAT(++unw.stat.script.runs; start = ia64_get_itc()); + state->flags = script->flags; + ip = script->insn; + limit = script->insn + script->count; + next_insn = *ip; + + while (ip++ < limit) { + opc = next_insn.opc; + dst = next_insn.dst; + val = next_insn.val; + next_insn = *ip; + + redo: + switch (opc) { + case UNW_INSN_ADD: + s[dst] += val; + break; + + case UNW_INSN_MOVE2: + if (!s[val]) + goto lazy_init; + s[dst+1] = s[val+1]; + s[dst] = s[val]; + break; + + case UNW_INSN_MOVE: + if (!s[val]) + goto lazy_init; + s[dst] = s[val]; + break; + + case UNW_INSN_MOVE_SCRATCH: + if (state->pt) { + s[dst] = (unsigned long) get_scratch_regs(state) + val; + } else { + s[dst] = 0; + UNW_DPRINT(0, "unwind.%s: no state->pt, dst=%ld, val=%ld\n", + __func__, dst, val); + } + break; + + case UNW_INSN_MOVE_CONST: + if (val == 0) + s[dst] = (unsigned long) &unw.r0; + else { + s[dst] = 0; + UNW_DPRINT(0, "unwind.%s: UNW_INSN_MOVE_CONST bad val=%ld\n", + __func__, val); + } + break; + + + case UNW_INSN_MOVE_STACKED: + s[dst] = (unsigned long) ia64_rse_skip_regs((unsigned long *)state->bsp, + val); + break; + + case UNW_INSN_ADD_PSP: + s[dst] = state->psp + val; + break; + + case UNW_INSN_ADD_SP: + s[dst] = state->sp + val; + break; + + case UNW_INSN_SETNAT_MEMSTK: + if (!state->pri_unat_loc) + state->pri_unat_loc = &state->sw->caller_unat; + /* register off. is a multiple of 8, so the least 3 bits (type) are 0 */ + s[dst+1] = ((unsigned long) state->pri_unat_loc - s[dst]) | UNW_NAT_MEMSTK; + break; + + case UNW_INSN_SETNAT_TYPE: + s[dst+1] = val; + break; + + case UNW_INSN_LOAD: +#ifdef UNW_DEBUG + if ((s[val] & (local_cpu_data->unimpl_va_mask | 0x7)) != 0 + || s[val] < TASK_SIZE) + { + UNW_DPRINT(0, "unwind.%s: rejecting bad psp=0x%lx\n", + __func__, s[val]); + break; + } +#endif + s[dst] = *(unsigned long *) s[val]; + break; + } + } + STAT(unw.stat.script.run_time += ia64_get_itc() - start); + return; + + lazy_init: + off = unw.sw_off[val]; + s[val] = (unsigned long) state->sw + off; + if (off >= offsetof(struct switch_stack, r4) && off <= offsetof(struct switch_stack, r7)) + /* + * We're initializing a general register: init NaT info, too. Note that + * the offset is a multiple of 8 which gives us the 3 bits needed for + * the type field. + */ + s[val+1] = (offsetof(struct switch_stack, ar_unat) - off) | UNW_NAT_MEMSTK; + goto redo; +} + +static int +find_save_locs (struct unw_frame_info *info) +{ + int have_write_lock = 0; + struct unw_script *scr; + unsigned long flags = 0; + + if ((info->ip & (local_cpu_data->unimpl_va_mask | 0xf)) || info->ip < TASK_SIZE) { + /* don't let obviously bad addresses pollute the cache */ + /* FIXME: should really be level 0 but it occurs too often. KAO */ + UNW_DPRINT(1, "unwind.%s: rejecting bad ip=0x%lx\n", __func__, info->ip); + info->rp_loc = NULL; + return -1; + } + + scr = script_lookup(info); + if (!scr) { + spin_lock_irqsave(&unw.lock, flags); + scr = build_script(info); + if (!scr) { + spin_unlock_irqrestore(&unw.lock, flags); + UNW_DPRINT(0, + "unwind.%s: failed to locate/build unwind script for ip %lx\n", + __func__, info->ip); + return -1; + } + have_write_lock = 1; + } + info->hint = scr->hint; + info->prev_script = scr - unw.cache; + + run_script(scr, info); + + if (have_write_lock) { + write_unlock(&scr->lock); + spin_unlock_irqrestore(&unw.lock, flags); + } else + read_unlock(&scr->lock); + return 0; +} + +static int +unw_valid(const struct unw_frame_info *info, unsigned long* p) +{ + unsigned long loc = (unsigned long)p; + return (loc >= info->regstk.limit && loc < info->regstk.top) || + (loc >= info->memstk.top && loc < info->memstk.limit); +} + +int +unw_unwind (struct unw_frame_info *info) +{ + unsigned long prev_ip, prev_sp, prev_bsp; + unsigned long ip, pr, num_regs; + STAT(unsigned long start, flags;) + int retval; + + STAT(local_irq_save(flags); ++unw.stat.api.unwinds; start = ia64_get_itc()); + + prev_ip = info->ip; + prev_sp = info->sp; + prev_bsp = info->bsp; + + /* validate the return IP pointer */ + if (!unw_valid(info, info->rp_loc)) { + /* FIXME: should really be level 0 but it occurs too often. KAO */ + UNW_DPRINT(1, "unwind.%s: failed to locate return link (ip=0x%lx)!\n", + __func__, info->ip); + STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags)); + return -1; + } + /* restore the ip */ + ip = info->ip = *info->rp_loc; + if (ip < GATE_ADDR) { + UNW_DPRINT(2, "unwind.%s: reached user-space (ip=0x%lx)\n", __func__, ip); + STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags)); + return -1; + } + + /* validate the previous stack frame pointer */ + if (!unw_valid(info, info->pfs_loc)) { + UNW_DPRINT(0, "unwind.%s: failed to locate ar.pfs!\n", __func__); + STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags)); + return -1; + } + /* restore the cfm: */ + info->cfm_loc = info->pfs_loc; + + /* restore the bsp: */ + pr = info->pr; + num_regs = 0; + if ((info->flags & UNW_FLAG_INTERRUPT_FRAME)) { + info->pt = info->sp + 16; + if ((pr & (1UL << PRED_NON_SYSCALL)) != 0) + num_regs = *info->cfm_loc & 0x7f; /* size of frame */ + info->pfs_loc = + (unsigned long *) (info->pt + offsetof(struct pt_regs, ar_pfs)); + UNW_DPRINT(3, "unwind.%s: interrupt_frame pt 0x%lx\n", __func__, info->pt); + } else + num_regs = (*info->cfm_loc >> 7) & 0x7f; /* size of locals */ + info->bsp = (unsigned long) ia64_rse_skip_regs((unsigned long *) info->bsp, -num_regs); + if (info->bsp < info->regstk.limit || info->bsp > info->regstk.top) { + UNW_DPRINT(0, "unwind.%s: bsp (0x%lx) out of range [0x%lx-0x%lx]\n", + __func__, info->bsp, info->regstk.limit, info->regstk.top); + STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags)); + return -1; + } + + /* restore the sp: */ + info->sp = info->psp; + if (info->sp < info->memstk.top || info->sp > info->memstk.limit) { + UNW_DPRINT(0, "unwind.%s: sp (0x%lx) out of range [0x%lx-0x%lx]\n", + __func__, info->sp, info->memstk.top, info->memstk.limit); + STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags)); + return -1; + } + + if (info->ip == prev_ip && info->sp == prev_sp && info->bsp == prev_bsp) { + UNW_DPRINT(0, "unwind.%s: ip, sp, bsp unchanged; stopping here (ip=0x%lx)\n", + __func__, ip); + STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags)); + return -1; + } + + /* as we unwind, the saved ar.unat becomes the primary unat: */ + info->pri_unat_loc = info->unat_loc; + + /* finally, restore the predicates: */ + unw_get_pr(info, &info->pr); + + retval = find_save_locs(info); + STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags)); + return retval; +} +EXPORT_SYMBOL(unw_unwind); + +int +unw_unwind_to_user (struct unw_frame_info *info) +{ + unsigned long ip, sp, pr = info->pr; + + do { + unw_get_sp(info, &sp); + if ((long)((unsigned long)info->task + IA64_STK_OFFSET - sp) + < IA64_PT_REGS_SIZE) { + UNW_DPRINT(0, "unwind.%s: ran off the top of the kernel stack\n", + __func__); + break; + } + if (unw_is_intr_frame(info) && + (pr & (1UL << PRED_USER_STACK))) + return 0; + if (unw_get_pr (info, &pr) < 0) { + unw_get_rp(info, &ip); + UNW_DPRINT(0, "unwind.%s: failed to read " + "predicate register (ip=0x%lx)\n", + __func__, ip); + return -1; + } + } while (unw_unwind(info) >= 0); + unw_get_ip(info, &ip); + UNW_DPRINT(0, "unwind.%s: failed to unwind to user-level (ip=0x%lx)\n", + __func__, ip); + return -1; +} +EXPORT_SYMBOL(unw_unwind_to_user); + +static void +init_frame_info (struct unw_frame_info *info, struct task_struct *t, + struct switch_stack *sw, unsigned long stktop) +{ + unsigned long rbslimit, rbstop, stklimit; + STAT(unsigned long start, flags;) + + STAT(local_irq_save(flags); ++unw.stat.api.inits; start = ia64_get_itc()); + + /* + * Subtle stuff here: we _could_ unwind through the switch_stack frame but we + * don't want to do that because it would be slow as each preserved register would + * have to be processed. Instead, what we do here is zero out the frame info and + * start the unwind process at the function that created the switch_stack frame. + * When a preserved value in switch_stack needs to be accessed, run_script() will + * initialize the appropriate pointer on demand. + */ + memset(info, 0, sizeof(*info)); + + rbslimit = (unsigned long) t + IA64_RBS_OFFSET; + stklimit = (unsigned long) t + IA64_STK_OFFSET; + + rbstop = sw->ar_bspstore; + if (rbstop > stklimit || rbstop < rbslimit) + rbstop = rbslimit; + + if (stktop <= rbstop) + stktop = rbstop; + if (stktop > stklimit) + stktop = stklimit; + + info->regstk.limit = rbslimit; + info->regstk.top = rbstop; + info->memstk.limit = stklimit; + info->memstk.top = stktop; + info->task = t; + info->sw = sw; + info->sp = info->psp = stktop; + info->pr = sw->pr; + UNW_DPRINT(3, "unwind.%s:\n" + " task 0x%lx\n" + " rbs = [0x%lx-0x%lx)\n" + " stk = [0x%lx-0x%lx)\n" + " pr 0x%lx\n" + " sw 0x%lx\n" + " sp 0x%lx\n", + __func__, (unsigned long) t, rbslimit, rbstop, stktop, stklimit, + info->pr, (unsigned long) info->sw, info->sp); + STAT(unw.stat.api.init_time += ia64_get_itc() - start; local_irq_restore(flags)); +} + +void +unw_init_frame_info (struct unw_frame_info *info, struct task_struct *t, struct switch_stack *sw) +{ + unsigned long sol; + + init_frame_info(info, t, sw, (unsigned long) (sw + 1) - 16); + info->cfm_loc = &sw->ar_pfs; + sol = (*info->cfm_loc >> 7) & 0x7f; + info->bsp = (unsigned long) ia64_rse_skip_regs((unsigned long *) info->regstk.top, -sol); + info->ip = sw->b0; + UNW_DPRINT(3, "unwind.%s:\n" + " bsp 0x%lx\n" + " sol 0x%lx\n" + " ip 0x%lx\n", + __func__, info->bsp, sol, info->ip); + find_save_locs(info); +} + +EXPORT_SYMBOL(unw_init_frame_info); + +void +unw_init_from_blocked_task (struct unw_frame_info *info, struct task_struct *t) +{ + struct switch_stack *sw = (struct switch_stack *) (t->thread.ksp + 16); + + UNW_DPRINT(1, "unwind.%s\n", __func__); + unw_init_frame_info(info, t, sw); +} +EXPORT_SYMBOL(unw_init_from_blocked_task); + +static void +init_unwind_table (struct unw_table *table, const char *name, unsigned long segment_base, + unsigned long gp, const void *table_start, const void *table_end) +{ + const struct unw_table_entry *start = table_start, *end = table_end; + + table->name = name; + table->segment_base = segment_base; + table->gp = gp; + table->start = segment_base + start[0].start_offset; + table->end = segment_base + end[-1].end_offset; + table->array = start; + table->length = end - start; +} + +void * +unw_add_unwind_table (const char *name, unsigned long segment_base, unsigned long gp, + const void *table_start, const void *table_end) +{ + const struct unw_table_entry *start = table_start, *end = table_end; + struct unw_table *table; + unsigned long flags; + + if (end - start <= 0) { + UNW_DPRINT(0, "unwind.%s: ignoring attempt to insert empty unwind table\n", + __func__); + return NULL; + } + + table = kmalloc(sizeof(*table), GFP_USER); + if (!table) + return NULL; + + init_unwind_table(table, name, segment_base, gp, table_start, table_end); + + spin_lock_irqsave(&unw.lock, flags); + { + /* keep kernel unwind table at the front (it's searched most commonly): */ + table->next = unw.tables->next; + unw.tables->next = table; + } + spin_unlock_irqrestore(&unw.lock, flags); + + return table; +} + +void +unw_remove_unwind_table (void *handle) +{ + struct unw_table *table, *prev; + struct unw_script *tmp; + unsigned long flags; + long index; + + if (!handle) { + UNW_DPRINT(0, "unwind.%s: ignoring attempt to remove non-existent unwind table\n", + __func__); + return; + } + + table = handle; + if (table == &unw.kernel_table) { + UNW_DPRINT(0, "unwind.%s: sorry, freeing the kernel's unwind table is a " + "no-can-do!\n", __func__); + return; + } + + spin_lock_irqsave(&unw.lock, flags); + { + /* first, delete the table: */ + + for (prev = (struct unw_table *) &unw.tables; prev; prev = prev->next) + if (prev->next == table) + break; + if (!prev) { + UNW_DPRINT(0, "unwind.%s: failed to find unwind table %p\n", + __func__, (void *) table); + spin_unlock_irqrestore(&unw.lock, flags); + return; + } + prev->next = table->next; + } + spin_unlock_irqrestore(&unw.lock, flags); + + /* next, remove hash table entries for this table */ + + for (index = 0; index < UNW_HASH_SIZE; ++index) { + tmp = unw.cache + unw.hash[index]; + if (unw.hash[index] >= UNW_CACHE_SIZE + || tmp->ip < table->start || tmp->ip >= table->end) + continue; + + write_lock(&tmp->lock); + { + if (tmp->ip >= table->start && tmp->ip < table->end) { + unw.hash[index] = tmp->coll_chain; + tmp->ip = 0; + } + } + write_unlock(&tmp->lock); + } + + kfree(table); +} + +static int __init +create_gate_table (void) +{ + const struct unw_table_entry *entry, *start, *end; + unsigned long *lp, segbase = GATE_ADDR; + size_t info_size, size; + char *info; + Elf64_Phdr *punw = NULL, *phdr = (Elf64_Phdr *) (GATE_ADDR + GATE_EHDR->e_phoff); + int i; + + for (i = 0; i < GATE_EHDR->e_phnum; ++i, ++phdr) + if (phdr->p_type == PT_IA_64_UNWIND) { + punw = phdr; + break; + } + + if (!punw) { + printk("%s: failed to find gate DSO's unwind table!\n", __func__); + return 0; + } + + start = (const struct unw_table_entry *) punw->p_vaddr; + end = (struct unw_table_entry *) ((char *) start + punw->p_memsz); + size = 0; + + unw_add_unwind_table("linux-gate.so", segbase, 0, start, end); + + for (entry = start; entry < end; ++entry) + size += 3*8 + 8 + 8*UNW_LENGTH(*(u64 *) (segbase + entry->info_offset)); + size += 8; /* reserve space for "end of table" marker */ + + unw.gate_table = kmalloc(size, GFP_KERNEL); + if (!unw.gate_table) { + unw.gate_table_size = 0; + printk(KERN_ERR "%s: unable to create unwind data for gate page!\n", __func__); + return 0; + } + unw.gate_table_size = size; + + lp = unw.gate_table; + info = (char *) unw.gate_table + size; + + for (entry = start; entry < end; ++entry, lp += 3) { + info_size = 8 + 8*UNW_LENGTH(*(u64 *) (segbase + entry->info_offset)); + info -= info_size; + memcpy(info, (char *) segbase + entry->info_offset, info_size); + + lp[0] = segbase + entry->start_offset; /* start */ + lp[1] = segbase + entry->end_offset; /* end */ + lp[2] = info - (char *) unw.gate_table; /* info */ + } + *lp = 0; /* end-of-table marker */ + return 0; +} + +__initcall(create_gate_table); + +void __init +unw_init (void) +{ + extern char __gp[]; + extern void unw_hash_index_t_is_too_narrow (void); + long i, off; + + if (8*sizeof(unw_hash_index_t) < UNW_LOG_HASH_SIZE) + unw_hash_index_t_is_too_narrow(); + + unw.sw_off[unw.preg_index[UNW_REG_PRI_UNAT_GR]] = SW(CALLER_UNAT); + unw.sw_off[unw.preg_index[UNW_REG_BSPSTORE]] = SW(AR_BSPSTORE); + unw.sw_off[unw.preg_index[UNW_REG_PFS]] = SW(AR_PFS); + unw.sw_off[unw.preg_index[UNW_REG_RP]] = SW(B0); + unw.sw_off[unw.preg_index[UNW_REG_UNAT]] = SW(CALLER_UNAT); + unw.sw_off[unw.preg_index[UNW_REG_PR]] = SW(PR); + unw.sw_off[unw.preg_index[UNW_REG_LC]] = SW(AR_LC); + unw.sw_off[unw.preg_index[UNW_REG_FPSR]] = SW(AR_FPSR); + for (i = UNW_REG_R4, off = SW(R4); i <= UNW_REG_R7; ++i, off += 8) + unw.sw_off[unw.preg_index[i]] = off; + for (i = UNW_REG_B1, off = SW(B1); i <= UNW_REG_B5; ++i, off += 8) + unw.sw_off[unw.preg_index[i]] = off; + for (i = UNW_REG_F2, off = SW(F2); i <= UNW_REG_F5; ++i, off += 16) + unw.sw_off[unw.preg_index[i]] = off; + for (i = UNW_REG_F16, off = SW(F16); i <= UNW_REG_F31; ++i, off += 16) + unw.sw_off[unw.preg_index[i]] = off; + + for (i = 0; i < UNW_CACHE_SIZE; ++i) { + if (i > 0) + unw.cache[i].lru_chain = (i - 1); + unw.cache[i].coll_chain = -1; + rwlock_init(&unw.cache[i].lock); + } + unw.lru_head = UNW_CACHE_SIZE - 1; + unw.lru_tail = 0; + + init_unwind_table(&unw.kernel_table, "kernel", KERNEL_START, (unsigned long) __gp, + __start_unwind, __end_unwind); +} + +/* + * DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED + * + * This system call has been deprecated. The new and improved way to get + * at the kernel's unwind info is via the gate DSO. The address of the + * ELF header for this DSO is passed to user-level via AT_SYSINFO_EHDR. + * + * DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED + * + * This system call copies the unwind data into the buffer pointed to by BUF and returns + * the size of the unwind data. If BUF_SIZE is smaller than the size of the unwind data + * or if BUF is NULL, nothing is copied, but the system call still returns the size of the + * unwind data. + * + * The first portion of the unwind data contains an unwind table and rest contains the + * associated unwind info (in no particular order). The unwind table consists of a table + * of entries of the form: + * + * u64 start; (64-bit address of start of function) + * u64 end; (64-bit address of start of function) + * u64 info; (BUF-relative offset to unwind info) + * + * The end of the unwind table is indicated by an entry with a START address of zero. + * + * Please see the IA-64 Software Conventions and Runtime Architecture manual for details + * on the format of the unwind info. + * + * ERRORS + * EFAULT BUF points outside your accessible address space. + */ +asmlinkage long +sys_getunwind (void __user *buf, size_t buf_size) +{ + if (buf && buf_size >= unw.gate_table_size) + if (copy_to_user(buf, unw.gate_table, unw.gate_table_size) != 0) + return -EFAULT; + return unw.gate_table_size; +} diff --git a/kernel/arch/ia64/kernel/unwind_decoder.c b/kernel/arch/ia64/kernel/unwind_decoder.c new file mode 100644 index 000000000..50ac2d82f --- /dev/null +++ b/kernel/arch/ia64/kernel/unwind_decoder.c @@ -0,0 +1,459 @@ +/* + * Copyright (C) 2000 Hewlett-Packard Co + * Copyright (C) 2000 David Mosberger-Tang + * + * Generic IA-64 unwind info decoder. + * + * This file is used both by the Linux kernel and objdump. Please keep + * the two copies of this file in sync. + * + * You need to customize the decoder by defining the following + * macros/constants before including this file: + * + * Types: + * unw_word Unsigned integer type with at least 64 bits + * + * Register names: + * UNW_REG_BSP + * UNW_REG_BSPSTORE + * UNW_REG_FPSR + * UNW_REG_LC + * UNW_REG_PFS + * UNW_REG_PR + * UNW_REG_RNAT + * UNW_REG_PSP + * UNW_REG_RP + * UNW_REG_UNAT + * + * Decoder action macros: + * UNW_DEC_BAD_CODE(code) + * UNW_DEC_ABI(fmt,abi,context,arg) + * UNW_DEC_BR_GR(fmt,brmask,gr,arg) + * UNW_DEC_BR_MEM(fmt,brmask,arg) + * UNW_DEC_COPY_STATE(fmt,label,arg) + * UNW_DEC_EPILOGUE(fmt,t,ecount,arg) + * UNW_DEC_FRGR_MEM(fmt,grmask,frmask,arg) + * UNW_DEC_FR_MEM(fmt,frmask,arg) + * UNW_DEC_GR_GR(fmt,grmask,gr,arg) + * UNW_DEC_GR_MEM(fmt,grmask,arg) + * UNW_DEC_LABEL_STATE(fmt,label,arg) + * UNW_DEC_MEM_STACK_F(fmt,t,size,arg) + * UNW_DEC_MEM_STACK_V(fmt,t,arg) + * UNW_DEC_PRIUNAT_GR(fmt,r,arg) + * UNW_DEC_PRIUNAT_WHEN_GR(fmt,t,arg) + * UNW_DEC_PRIUNAT_WHEN_MEM(fmt,t,arg) + * UNW_DEC_PRIUNAT_WHEN_PSPREL(fmt,pspoff,arg) + * UNW_DEC_PRIUNAT_WHEN_SPREL(fmt,spoff,arg) + * UNW_DEC_PROLOGUE(fmt,body,rlen,arg) + * UNW_DEC_PROLOGUE_GR(fmt,rlen,mask,grsave,arg) + * UNW_DEC_REG_PSPREL(fmt,reg,pspoff,arg) + * UNW_DEC_REG_REG(fmt,src,dst,arg) + * UNW_DEC_REG_SPREL(fmt,reg,spoff,arg) + * UNW_DEC_REG_WHEN(fmt,reg,t,arg) + * UNW_DEC_RESTORE(fmt,t,abreg,arg) + * UNW_DEC_RESTORE_P(fmt,qp,t,abreg,arg) + * UNW_DEC_SPILL_BASE(fmt,pspoff,arg) + * UNW_DEC_SPILL_MASK(fmt,imaskp,arg) + * UNW_DEC_SPILL_PSPREL(fmt,t,abreg,pspoff,arg) + * UNW_DEC_SPILL_PSPREL_P(fmt,qp,t,abreg,pspoff,arg) + * UNW_DEC_SPILL_REG(fmt,t,abreg,x,ytreg,arg) + * UNW_DEC_SPILL_REG_P(fmt,qp,t,abreg,x,ytreg,arg) + * UNW_DEC_SPILL_SPREL(fmt,t,abreg,spoff,arg) + * UNW_DEC_SPILL_SPREL_P(fmt,qp,t,abreg,pspoff,arg) + */ + +static unw_word +unw_decode_uleb128 (unsigned char **dpp) +{ + unsigned shift = 0; + unw_word byte, result = 0; + unsigned char *bp = *dpp; + + while (1) + { + byte = *bp++; + result |= (byte & 0x7f) << shift; + if ((byte & 0x80) == 0) + break; + shift += 7; + } + *dpp = bp; + return result; +} + +static unsigned char * +unw_decode_x1 (unsigned char *dp, unsigned char code, void *arg) +{ + unsigned char byte1, abreg; + unw_word t, off; + + byte1 = *dp++; + t = unw_decode_uleb128 (&dp); + off = unw_decode_uleb128 (&dp); + abreg = (byte1 & 0x7f); + if (byte1 & 0x80) + UNW_DEC_SPILL_SPREL(X1, t, abreg, off, arg); + else + UNW_DEC_SPILL_PSPREL(X1, t, abreg, off, arg); + return dp; +} + +static unsigned char * +unw_decode_x2 (unsigned char *dp, unsigned char code, void *arg) +{ + unsigned char byte1, byte2, abreg, x, ytreg; + unw_word t; + + byte1 = *dp++; byte2 = *dp++; + t = unw_decode_uleb128 (&dp); + abreg = (byte1 & 0x7f); + ytreg = byte2; + x = (byte1 >> 7) & 1; + if ((byte1 & 0x80) == 0 && ytreg == 0) + UNW_DEC_RESTORE(X2, t, abreg, arg); + else + UNW_DEC_SPILL_REG(X2, t, abreg, x, ytreg, arg); + return dp; +} + +static unsigned char * +unw_decode_x3 (unsigned char *dp, unsigned char code, void *arg) +{ + unsigned char byte1, byte2, abreg, qp; + unw_word t, off; + + byte1 = *dp++; byte2 = *dp++; + t = unw_decode_uleb128 (&dp); + off = unw_decode_uleb128 (&dp); + + qp = (byte1 & 0x3f); + abreg = (byte2 & 0x7f); + + if (byte1 & 0x80) + UNW_DEC_SPILL_SPREL_P(X3, qp, t, abreg, off, arg); + else + UNW_DEC_SPILL_PSPREL_P(X3, qp, t, abreg, off, arg); + return dp; +} + +static unsigned char * +unw_decode_x4 (unsigned char *dp, unsigned char code, void *arg) +{ + unsigned char byte1, byte2, byte3, qp, abreg, x, ytreg; + unw_word t; + + byte1 = *dp++; byte2 = *dp++; byte3 = *dp++; + t = unw_decode_uleb128 (&dp); + + qp = (byte1 & 0x3f); + abreg = (byte2 & 0x7f); + x = (byte2 >> 7) & 1; + ytreg = byte3; + + if ((byte2 & 0x80) == 0 && byte3 == 0) + UNW_DEC_RESTORE_P(X4, qp, t, abreg, arg); + else + UNW_DEC_SPILL_REG_P(X4, qp, t, abreg, x, ytreg, arg); + return dp; +} + +static unsigned char * +unw_decode_r1 (unsigned char *dp, unsigned char code, void *arg) +{ + int body = (code & 0x20) != 0; + unw_word rlen; + + rlen = (code & 0x1f); + UNW_DEC_PROLOGUE(R1, body, rlen, arg); + return dp; +} + +static unsigned char * +unw_decode_r2 (unsigned char *dp, unsigned char code, void *arg) +{ + unsigned char byte1, mask, grsave; + unw_word rlen; + + byte1 = *dp++; + + mask = ((code & 0x7) << 1) | ((byte1 >> 7) & 1); + grsave = (byte1 & 0x7f); + rlen = unw_decode_uleb128 (&dp); + UNW_DEC_PROLOGUE_GR(R2, rlen, mask, grsave, arg); + return dp; +} + +static unsigned char * +unw_decode_r3 (unsigned char *dp, unsigned char code, void *arg) +{ + unw_word rlen; + + rlen = unw_decode_uleb128 (&dp); + UNW_DEC_PROLOGUE(R3, ((code & 0x3) == 1), rlen, arg); + return dp; +} + +static unsigned char * +unw_decode_p1 (unsigned char *dp, unsigned char code, void *arg) +{ + unsigned char brmask = (code & 0x1f); + + UNW_DEC_BR_MEM(P1, brmask, arg); + return dp; +} + +static unsigned char * +unw_decode_p2_p5 (unsigned char *dp, unsigned char code, void *arg) +{ + if ((code & 0x10) == 0) + { + unsigned char byte1 = *dp++; + + UNW_DEC_BR_GR(P2, ((code & 0xf) << 1) | ((byte1 >> 7) & 1), + (byte1 & 0x7f), arg); + } + else if ((code & 0x08) == 0) + { + unsigned char byte1 = *dp++, r, dst; + + r = ((code & 0x7) << 1) | ((byte1 >> 7) & 1); + dst = (byte1 & 0x7f); + switch (r) + { + case 0: UNW_DEC_REG_GR(P3, UNW_REG_PSP, dst, arg); break; + case 1: UNW_DEC_REG_GR(P3, UNW_REG_RP, dst, arg); break; + case 2: UNW_DEC_REG_GR(P3, UNW_REG_PFS, dst, arg); break; + case 3: UNW_DEC_REG_GR(P3, UNW_REG_PR, dst, arg); break; + case 4: UNW_DEC_REG_GR(P3, UNW_REG_UNAT, dst, arg); break; + case 5: UNW_DEC_REG_GR(P3, UNW_REG_LC, dst, arg); break; + case 6: UNW_DEC_RP_BR(P3, dst, arg); break; + case 7: UNW_DEC_REG_GR(P3, UNW_REG_RNAT, dst, arg); break; + case 8: UNW_DEC_REG_GR(P3, UNW_REG_BSP, dst, arg); break; + case 9: UNW_DEC_REG_GR(P3, UNW_REG_BSPSTORE, dst, arg); break; + case 10: UNW_DEC_REG_GR(P3, UNW_REG_FPSR, dst, arg); break; + case 11: UNW_DEC_PRIUNAT_GR(P3, dst, arg); break; + default: UNW_DEC_BAD_CODE(r); break; + } + } + else if ((code & 0x7) == 0) + UNW_DEC_SPILL_MASK(P4, dp, arg); + else if ((code & 0x7) == 1) + { + unw_word grmask, frmask, byte1, byte2, byte3; + + byte1 = *dp++; byte2 = *dp++; byte3 = *dp++; + grmask = ((byte1 >> 4) & 0xf); + frmask = ((byte1 & 0xf) << 16) | (byte2 << 8) | byte3; + UNW_DEC_FRGR_MEM(P5, grmask, frmask, arg); + } + else + UNW_DEC_BAD_CODE(code); + return dp; +} + +static unsigned char * +unw_decode_p6 (unsigned char *dp, unsigned char code, void *arg) +{ + int gregs = (code & 0x10) != 0; + unsigned char mask = (code & 0x0f); + + if (gregs) + UNW_DEC_GR_MEM(P6, mask, arg); + else + UNW_DEC_FR_MEM(P6, mask, arg); + return dp; +} + +static unsigned char * +unw_decode_p7_p10 (unsigned char *dp, unsigned char code, void *arg) +{ + unsigned char r, byte1, byte2; + unw_word t, size; + + if ((code & 0x10) == 0) + { + r = (code & 0xf); + t = unw_decode_uleb128 (&dp); + switch (r) + { + case 0: + size = unw_decode_uleb128 (&dp); + UNW_DEC_MEM_STACK_F(P7, t, size, arg); + break; + + case 1: UNW_DEC_MEM_STACK_V(P7, t, arg); break; + case 2: UNW_DEC_SPILL_BASE(P7, t, arg); break; + case 3: UNW_DEC_REG_SPREL(P7, UNW_REG_PSP, t, arg); break; + case 4: UNW_DEC_REG_WHEN(P7, UNW_REG_RP, t, arg); break; + case 5: UNW_DEC_REG_PSPREL(P7, UNW_REG_RP, t, arg); break; + case 6: UNW_DEC_REG_WHEN(P7, UNW_REG_PFS, t, arg); break; + case 7: UNW_DEC_REG_PSPREL(P7, UNW_REG_PFS, t, arg); break; + case 8: UNW_DEC_REG_WHEN(P7, UNW_REG_PR, t, arg); break; + case 9: UNW_DEC_REG_PSPREL(P7, UNW_REG_PR, t, arg); break; + case 10: UNW_DEC_REG_WHEN(P7, UNW_REG_LC, t, arg); break; + case 11: UNW_DEC_REG_PSPREL(P7, UNW_REG_LC, t, arg); break; + case 12: UNW_DEC_REG_WHEN(P7, UNW_REG_UNAT, t, arg); break; + case 13: UNW_DEC_REG_PSPREL(P7, UNW_REG_UNAT, t, arg); break; + case 14: UNW_DEC_REG_WHEN(P7, UNW_REG_FPSR, t, arg); break; + case 15: UNW_DEC_REG_PSPREL(P7, UNW_REG_FPSR, t, arg); break; + default: UNW_DEC_BAD_CODE(r); break; + } + } + else + { + switch (code & 0xf) + { + case 0x0: /* p8 */ + { + r = *dp++; + t = unw_decode_uleb128 (&dp); + switch (r) + { + case 1: UNW_DEC_REG_SPREL(P8, UNW_REG_RP, t, arg); break; + case 2: UNW_DEC_REG_SPREL(P8, UNW_REG_PFS, t, arg); break; + case 3: UNW_DEC_REG_SPREL(P8, UNW_REG_PR, t, arg); break; + case 4: UNW_DEC_REG_SPREL(P8, UNW_REG_LC, t, arg); break; + case 5: UNW_DEC_REG_SPREL(P8, UNW_REG_UNAT, t, arg); break; + case 6: UNW_DEC_REG_SPREL(P8, UNW_REG_FPSR, t, arg); break; + case 7: UNW_DEC_REG_WHEN(P8, UNW_REG_BSP, t, arg); break; + case 8: UNW_DEC_REG_PSPREL(P8, UNW_REG_BSP, t, arg); break; + case 9: UNW_DEC_REG_SPREL(P8, UNW_REG_BSP, t, arg); break; + case 10: UNW_DEC_REG_WHEN(P8, UNW_REG_BSPSTORE, t, arg); break; + case 11: UNW_DEC_REG_PSPREL(P8, UNW_REG_BSPSTORE, t, arg); break; + case 12: UNW_DEC_REG_SPREL(P8, UNW_REG_BSPSTORE, t, arg); break; + case 13: UNW_DEC_REG_WHEN(P8, UNW_REG_RNAT, t, arg); break; + case 14: UNW_DEC_REG_PSPREL(P8, UNW_REG_RNAT, t, arg); break; + case 15: UNW_DEC_REG_SPREL(P8, UNW_REG_RNAT, t, arg); break; + case 16: UNW_DEC_PRIUNAT_WHEN_GR(P8, t, arg); break; + case 17: UNW_DEC_PRIUNAT_PSPREL(P8, t, arg); break; + case 18: UNW_DEC_PRIUNAT_SPREL(P8, t, arg); break; + case 19: UNW_DEC_PRIUNAT_WHEN_MEM(P8, t, arg); break; + default: UNW_DEC_BAD_CODE(r); break; + } + } + break; + + case 0x1: + byte1 = *dp++; byte2 = *dp++; + UNW_DEC_GR_GR(P9, (byte1 & 0xf), (byte2 & 0x7f), arg); + break; + + case 0xf: /* p10 */ + byte1 = *dp++; byte2 = *dp++; + UNW_DEC_ABI(P10, byte1, byte2, arg); + break; + + case 0x9: + return unw_decode_x1 (dp, code, arg); + + case 0xa: + return unw_decode_x2 (dp, code, arg); + + case 0xb: + return unw_decode_x3 (dp, code, arg); + + case 0xc: + return unw_decode_x4 (dp, code, arg); + + default: + UNW_DEC_BAD_CODE(code); + break; + } + } + return dp; +} + +static unsigned char * +unw_decode_b1 (unsigned char *dp, unsigned char code, void *arg) +{ + unw_word label = (code & 0x1f); + + if ((code & 0x20) != 0) + UNW_DEC_COPY_STATE(B1, label, arg); + else + UNW_DEC_LABEL_STATE(B1, label, arg); + return dp; +} + +static unsigned char * +unw_decode_b2 (unsigned char *dp, unsigned char code, void *arg) +{ + unw_word t; + + t = unw_decode_uleb128 (&dp); + UNW_DEC_EPILOGUE(B2, t, (code & 0x1f), arg); + return dp; +} + +static unsigned char * +unw_decode_b3_x4 (unsigned char *dp, unsigned char code, void *arg) +{ + unw_word t, ecount, label; + + if ((code & 0x10) == 0) + { + t = unw_decode_uleb128 (&dp); + ecount = unw_decode_uleb128 (&dp); + UNW_DEC_EPILOGUE(B3, t, ecount, arg); + } + else if ((code & 0x07) == 0) + { + label = unw_decode_uleb128 (&dp); + if ((code & 0x08) != 0) + UNW_DEC_COPY_STATE(B4, label, arg); + else + UNW_DEC_LABEL_STATE(B4, label, arg); + } + else + switch (code & 0x7) + { + case 1: return unw_decode_x1 (dp, code, arg); + case 2: return unw_decode_x2 (dp, code, arg); + case 3: return unw_decode_x3 (dp, code, arg); + case 4: return unw_decode_x4 (dp, code, arg); + default: UNW_DEC_BAD_CODE(code); break; + } + return dp; +} + +typedef unsigned char *(*unw_decoder) (unsigned char *, unsigned char, void *); + +static unw_decoder unw_decode_table[2][8] = +{ + /* prologue table: */ + { + unw_decode_r1, /* 0 */ + unw_decode_r1, + unw_decode_r2, + unw_decode_r3, + unw_decode_p1, /* 4 */ + unw_decode_p2_p5, + unw_decode_p6, + unw_decode_p7_p10 + }, + { + unw_decode_r1, /* 0 */ + unw_decode_r1, + unw_decode_r2, + unw_decode_r3, + unw_decode_b1, /* 4 */ + unw_decode_b1, + unw_decode_b2, + unw_decode_b3_x4 + } +}; + +/* + * Decode one descriptor and return address of next descriptor. + */ +static inline unsigned char * +unw_decode (unsigned char *dp, int inside_body, void *arg) +{ + unw_decoder decoder; + unsigned char code; + + code = *dp++; + decoder = unw_decode_table[inside_body][code >> 5]; + dp = (*decoder) (dp, code, arg); + return dp; +} diff --git a/kernel/arch/ia64/kernel/unwind_i.h b/kernel/arch/ia64/kernel/unwind_i.h new file mode 100644 index 000000000..96693a6ae --- /dev/null +++ b/kernel/arch/ia64/kernel/unwind_i.h @@ -0,0 +1,164 @@ +/* + * Copyright (C) 2000, 2002-2003 Hewlett-Packard Co + * David Mosberger-Tang + * + * Kernel unwind support. + */ + +#define UNW_VER(x) ((x) >> 48) +#define UNW_FLAG_MASK 0x0000ffff00000000 +#define UNW_FLAG_OSMASK 0x0000f00000000000 +#define UNW_FLAG_EHANDLER(x) ((x) & 0x0000000100000000L) +#define UNW_FLAG_UHANDLER(x) ((x) & 0x0000000200000000L) +#define UNW_LENGTH(x) ((x) & 0x00000000ffffffffL) + +enum unw_register_index { + /* primary unat: */ + UNW_REG_PRI_UNAT_GR, + UNW_REG_PRI_UNAT_MEM, + + /* register stack */ + UNW_REG_BSP, /* register stack pointer */ + UNW_REG_BSPSTORE, + UNW_REG_PFS, /* previous function state */ + UNW_REG_RNAT, + /* memory stack */ + UNW_REG_PSP, /* previous memory stack pointer */ + /* return pointer: */ + UNW_REG_RP, + + /* preserved registers: */ + UNW_REG_R4, UNW_REG_R5, UNW_REG_R6, UNW_REG_R7, + UNW_REG_UNAT, UNW_REG_PR, UNW_REG_LC, UNW_REG_FPSR, + UNW_REG_B1, UNW_REG_B2, UNW_REG_B3, UNW_REG_B4, UNW_REG_B5, + UNW_REG_F2, UNW_REG_F3, UNW_REG_F4, UNW_REG_F5, + UNW_REG_F16, UNW_REG_F17, UNW_REG_F18, UNW_REG_F19, + UNW_REG_F20, UNW_REG_F21, UNW_REG_F22, UNW_REG_F23, + UNW_REG_F24, UNW_REG_F25, UNW_REG_F26, UNW_REG_F27, + UNW_REG_F28, UNW_REG_F29, UNW_REG_F30, UNW_REG_F31, + UNW_NUM_REGS +}; + +struct unw_info_block { + u64 header; + u64 desc[0]; /* unwind descriptors */ + /* personality routine and language-specific data follow behind descriptors */ +}; + +struct unw_table { + struct unw_table *next; /* must be first member! */ + const char *name; + unsigned long gp; /* global pointer for this load-module */ + unsigned long segment_base; /* base for offsets in the unwind table entries */ + unsigned long start; + unsigned long end; + const struct unw_table_entry *array; + unsigned long length; +}; + +enum unw_where { + UNW_WHERE_NONE, /* register isn't saved at all */ + UNW_WHERE_GR, /* register is saved in a general register */ + UNW_WHERE_FR, /* register is saved in a floating-point register */ + UNW_WHERE_BR, /* register is saved in a branch register */ + UNW_WHERE_SPREL, /* register is saved on memstack (sp-relative) */ + UNW_WHERE_PSPREL, /* register is saved on memstack (psp-relative) */ + /* + * At the end of each prologue these locations get resolved to + * UNW_WHERE_PSPREL and UNW_WHERE_GR, respectively: + */ + UNW_WHERE_SPILL_HOME, /* register is saved in its spill home */ + UNW_WHERE_GR_SAVE /* register is saved in next general register */ +}; + +#define UNW_WHEN_NEVER 0x7fffffff + +struct unw_reg_info { + unsigned long val; /* save location: register number or offset */ + enum unw_where where; /* where the register gets saved */ + int when; /* when the register gets saved */ +}; + +struct unw_reg_state { + struct unw_reg_state *next; /* next (outer) element on state stack */ + struct unw_reg_info reg[UNW_NUM_REGS]; /* register save locations */ +}; + +struct unw_labeled_state { + struct unw_labeled_state *next; /* next labeled state (or NULL) */ + unsigned long label; /* label for this state */ + struct unw_reg_state saved_state; +}; + +struct unw_state_record { + unsigned int first_region : 1; /* is this the first region? */ + unsigned int done : 1; /* are we done scanning descriptors? */ + unsigned int any_spills : 1; /* got any register spills? */ + unsigned int in_body : 1; /* are we inside a body (as opposed to a prologue)? */ + unsigned long flags; /* see UNW_FLAG_* in unwind.h */ + + u8 *imask; /* imask of spill_mask record or NULL */ + unsigned long pr_val; /* predicate values */ + unsigned long pr_mask; /* predicate mask */ + long spill_offset; /* psp-relative offset for spill base */ + int region_start; + int region_len; + int epilogue_start; + int epilogue_count; + int when_target; + + u8 gr_save_loc; /* next general register to use for saving a register */ + u8 return_link_reg; /* branch register in which the return link is passed */ + + struct unw_labeled_state *labeled_states; /* list of all labeled states */ + struct unw_reg_state curr; /* current state */ +}; + +enum unw_nat_type { + UNW_NAT_NONE, /* NaT not represented */ + UNW_NAT_VAL, /* NaT represented by NaT value (fp reg) */ + UNW_NAT_MEMSTK, /* NaT value is in unat word at offset OFF */ + UNW_NAT_REGSTK /* NaT is in rnat */ +}; + +enum unw_insn_opcode { + UNW_INSN_ADD, /* s[dst] += val */ + UNW_INSN_ADD_PSP, /* s[dst] = (s.psp + val) */ + UNW_INSN_ADD_SP, /* s[dst] = (s.sp + val) */ + UNW_INSN_MOVE, /* s[dst] = s[val] */ + UNW_INSN_MOVE2, /* s[dst] = s[val]; s[dst+1] = s[val+1] */ + UNW_INSN_MOVE_STACKED, /* s[dst] = ia64_rse_skip(*s.bsp, val) */ + UNW_INSN_SETNAT_MEMSTK, /* s[dst+1].nat.type = MEMSTK; + s[dst+1].nat.off = *s.pri_unat - s[dst] */ + UNW_INSN_SETNAT_TYPE, /* s[dst+1].nat.type = val */ + UNW_INSN_LOAD, /* s[dst] = *s[val] */ + UNW_INSN_MOVE_SCRATCH, /* s[dst] = scratch reg "val" */ + UNW_INSN_MOVE_CONST, /* s[dst] = constant reg "val" */ +}; + +struct unw_insn { + unsigned int opc : 4; + unsigned int dst : 9; + signed int val : 19; +}; + +/* + * Preserved general static registers (r4-r7) give rise to two script + * instructions; everything else yields at most one instruction; at + * the end of the script, the psp gets popped, accounting for one more + * instruction. + */ +#define UNW_MAX_SCRIPT_LEN (UNW_NUM_REGS + 5) + +struct unw_script { + unsigned long ip; /* ip this script is for */ + unsigned long pr_mask; /* mask of predicates script depends on */ + unsigned long pr_val; /* predicate values this script is for */ + rwlock_t lock; + unsigned int flags; /* see UNW_FLAG_* in unwind.h */ + unsigned short lru_chain; /* used for least-recently-used chain */ + unsigned short coll_chain; /* used for hash collisions */ + unsigned short hint; /* hint for next script to try (or -1) */ + unsigned short count; /* number of instructions in script */ + struct unw_insn insn[UNW_MAX_SCRIPT_LEN]; +}; diff --git a/kernel/arch/ia64/kernel/vmlinux.lds.S b/kernel/arch/ia64/kernel/vmlinux.lds.S new file mode 100644 index 000000000..84f8a52ac --- /dev/null +++ b/kernel/arch/ia64/kernel/vmlinux.lds.S @@ -0,0 +1,248 @@ + +#include +#include +#include + +#include + +OUTPUT_FORMAT("elf64-ia64-little") +OUTPUT_ARCH(ia64) +ENTRY(phys_start) +jiffies = jiffies_64; + +PHDRS { + code PT_LOAD; + percpu PT_LOAD; + data PT_LOAD; + note PT_NOTE; + unwind 0x70000001; /* PT_IA_64_UNWIND, but ld doesn't match the name */ +} + +SECTIONS { + /* + * unwind exit sections must be discarded before + * the rest of the sections get included. + */ + /DISCARD/ : { + *(.IA_64.unwind.exit.text) + *(.IA_64.unwind_info.exit.text) + *(.comment) + *(.note) + } + + v = PAGE_OFFSET; /* this symbol is here to make debugging easier... */ + phys_start = _start - LOAD_OFFSET; + + code : { + } :code + . = KERNEL_START; + + _text = .; + _stext = .; + + .text : AT(ADDR(.text) - LOAD_OFFSET) { + __start_ivt_text = .; + *(.text..ivt) + __end_ivt_text = .; + TEXT_TEXT + SCHED_TEXT + LOCK_TEXT + KPROBES_TEXT + *(.gnu.linkonce.t*) + } + + .text2 : AT(ADDR(.text2) - LOAD_OFFSET) { + *(.text2) + } + +#ifdef CONFIG_SMP + .text..lock : AT(ADDR(.text..lock) - LOAD_OFFSET) { + *(.text..lock) + } +#endif + _etext = .; + + /* + * Read-only data + */ + NOTES :code :note /* put .notes in text and mark in PT_NOTE */ + code_continues : { + } : code /* switch back to regular program... */ + + EXCEPTION_TABLE(16) + + /* MCA table */ + . = ALIGN(16); + __mca_table : AT(ADDR(__mca_table) - LOAD_OFFSET) { + __start___mca_table = .; + *(__mca_table) + __stop___mca_table = .; + } + + .data..patch.phys_stack_reg : AT(ADDR(.data..patch.phys_stack_reg) - LOAD_OFFSET) { + __start___phys_stack_reg_patchlist = .; + *(.data..patch.phys_stack_reg) + __end___phys_stack_reg_patchlist = .; + } + + /* + * Global data + */ + _data = .; + + /* Unwind info & table: */ + . = ALIGN(8); + .IA_64.unwind_info : AT(ADDR(.IA_64.unwind_info) - LOAD_OFFSET) { + *(.IA_64.unwind_info*) + } + .IA_64.unwind : AT(ADDR(.IA_64.unwind) - LOAD_OFFSET) { + __start_unwind = .; + *(.IA_64.unwind*) + __end_unwind = .; + } :code :unwind + code_continues2 : { + } : code + + RODATA + + .opd : AT(ADDR(.opd) - LOAD_OFFSET) { + *(.opd) + } + + /* + * Initialization code and data: + */ + . = ALIGN(PAGE_SIZE); + __init_begin = .; + + INIT_TEXT_SECTION(PAGE_SIZE) + INIT_DATA_SECTION(16) + + .data..patch.vtop : AT(ADDR(.data..patch.vtop) - LOAD_OFFSET) { + __start___vtop_patchlist = .; + *(.data..patch.vtop) + __end___vtop_patchlist = .; + } + + .data..patch.rse : AT(ADDR(.data..patch.rse) - LOAD_OFFSET) { + __start___rse_patchlist = .; + *(.data..patch.rse) + __end___rse_patchlist = .; + } + + .data..patch.mckinley_e9 : AT(ADDR(.data..patch.mckinley_e9) - LOAD_OFFSET) { + __start___mckinley_e9_bundles = .; + *(.data..patch.mckinley_e9) + __end___mckinley_e9_bundles = .; + } + +#if defined(CONFIG_PARAVIRT) + . = ALIGN(16); + .paravirt_bundles : AT(ADDR(.paravirt_bundles) - LOAD_OFFSET) { + __start_paravirt_bundles = .; + *(.paravirt_bundles) + __stop_paravirt_bundles = .; + } + . = ALIGN(16); + .paravirt_insts : AT(ADDR(.paravirt_insts) - LOAD_OFFSET) { + __start_paravirt_insts = .; + *(.paravirt_insts) + __stop_paravirt_insts = .; + } + . = ALIGN(16); + .paravirt_branches : AT(ADDR(.paravirt_branches) - LOAD_OFFSET) { + __start_paravirt_branches = .; + *(.paravirt_branches) + __stop_paravirt_branches = .; + } +#endif + +#if defined(CONFIG_IA64_GENERIC) + /* Machine Vector */ + . = ALIGN(16); + .machvec : AT(ADDR(.machvec) - LOAD_OFFSET) { + machvec_start = .; + *(.machvec) + machvec_end = .; + } +#endif + +#ifdef CONFIG_SMP + . = ALIGN(PERCPU_PAGE_SIZE); + __cpu0_per_cpu = .; + . = . + PERCPU_PAGE_SIZE; /* cpu0 per-cpu space */ +#endif + + . = ALIGN(PAGE_SIZE); + __init_end = .; + + .data..page_aligned : AT(ADDR(.data..page_aligned) - LOAD_OFFSET) { + PAGE_ALIGNED_DATA(PAGE_SIZE) + . = ALIGN(PAGE_SIZE); + __start_gate_section = .; + *(.data..gate) + __stop_gate_section = .; + } + /* + * make sure the gate page doesn't expose + * kernel data + */ + . = ALIGN(PAGE_SIZE); + + /* Per-cpu data: */ + . = ALIGN(PERCPU_PAGE_SIZE); + PERCPU_VADDR(SMP_CACHE_BYTES, PERCPU_ADDR, :percpu) + __phys_per_cpu_start = __per_cpu_load; + /* + * ensure percpu data fits + * into percpu page size + */ + . = __phys_per_cpu_start + PERCPU_PAGE_SIZE; + + data : { + } :data + .data : AT(ADDR(.data) - LOAD_OFFSET) { + _sdata = .; + INIT_TASK_DATA(PAGE_SIZE) + CACHELINE_ALIGNED_DATA(SMP_CACHE_BYTES) + READ_MOSTLY_DATA(SMP_CACHE_BYTES) + DATA_DATA + *(.data1) + *(.gnu.linkonce.d*) + CONSTRUCTORS + } + + . = ALIGN(16); /* gp must be 16-byte aligned for exc. table */ + .got : AT(ADDR(.got) - LOAD_OFFSET) { + *(.got.plt) + *(.got) + } + __gp = ADDR(.got) + 0x200000; + + /* + * We want the small data sections together, + * so single-instruction offsets can access + * them all, and initialized data all before + * uninitialized, so we can shorten the + * on-disk segment size. + */ + .sdata : AT(ADDR(.sdata) - LOAD_OFFSET) { + *(.sdata) + *(.sdata1) + *(.srdata) + } + _edata = .; + + BSS_SECTION(0, 0, 0) + + _end = .; + + code : { + } :code + + STABS_DEBUG + DWARF_DEBUG + + /* Default discards */ + DISCARDS +} diff --git a/kernel/arch/ia64/lib/Makefile b/kernel/arch/ia64/lib/Makefile new file mode 100644 index 000000000..98771e2a7 --- /dev/null +++ b/kernel/arch/ia64/lib/Makefile @@ -0,0 +1,50 @@ +# +# Makefile for ia64-specific library routines.. +# + +obj-y := io.o + +lib-y := __divsi3.o __udivsi3.o __modsi3.o __umodsi3.o \ + __divdi3.o __udivdi3.o __moddi3.o __umoddi3.o \ + checksum.o clear_page.o csum_partial_copy.o \ + clear_user.o strncpy_from_user.o strlen_user.o strnlen_user.o \ + flush.o ip_fast_csum.o do_csum.o \ + memset.o strlen.o xor.o + +obj-$(CONFIG_ITANIUM) += copy_page.o copy_user.o memcpy.o +obj-$(CONFIG_MCKINLEY) += copy_page_mck.o memcpy_mck.o +lib-$(CONFIG_PERFMON) += carta_random.o + +AFLAGS___divdi3.o = +AFLAGS___udivdi3.o = -DUNSIGNED +AFLAGS___moddi3.o = -DMODULO +AFLAGS___umoddi3.o = -DUNSIGNED -DMODULO + +AFLAGS___divsi3.o = +AFLAGS___udivsi3.o = -DUNSIGNED +AFLAGS___modsi3.o = -DMODULO +AFLAGS___umodsi3.o = -DUNSIGNED -DMODULO + +$(obj)/__divdi3.o: $(src)/idiv64.S FORCE + $(call if_changed_dep,as_o_S) + +$(obj)/__udivdi3.o: $(src)/idiv64.S FORCE + $(call if_changed_dep,as_o_S) + +$(obj)/__moddi3.o: $(src)/idiv64.S FORCE + $(call if_changed_dep,as_o_S) + +$(obj)/__umoddi3.o: $(src)/idiv64.S FORCE + $(call if_changed_dep,as_o_S) + +$(obj)/__divsi3.o: $(src)/idiv32.S FORCE + $(call if_changed_dep,as_o_S) + +$(obj)/__udivsi3.o: $(src)/idiv32.S FORCE + $(call if_changed_dep,as_o_S) + +$(obj)/__modsi3.o: $(src)/idiv32.S FORCE + $(call if_changed_dep,as_o_S) + +$(obj)/__umodsi3.o: $(src)/idiv32.S FORCE + $(call if_changed_dep,as_o_S) diff --git a/kernel/arch/ia64/lib/carta_random.S b/kernel/arch/ia64/lib/carta_random.S new file mode 100644 index 000000000..d0674c360 --- /dev/null +++ b/kernel/arch/ia64/lib/carta_random.S @@ -0,0 +1,54 @@ +/* + * Fast, simple, yet decent quality random number generator based on + * a paper by David G. Carta ("Two Fast Implementations of the + * `Minimal Standard' Random Number Generator," Communications of the + * ACM, January, 1990). + * + * Copyright (C) 2002 Hewlett-Packard Co + * David Mosberger-Tang + */ + +#include + +#define a r2 +#define m r3 +#define lo r8 +#define hi r9 +#define t0 r16 +#define t1 r17 +#define seed r32 + +GLOBAL_ENTRY(carta_random32) + movl a = (16807 << 16) | 16807 + ;; + pmpyshr2.u t0 = a, seed, 0 + pmpyshr2.u t1 = a, seed, 16 + ;; + unpack2.l t0 = t1, t0 + dep m = -1, r0, 0, 31 + ;; + zxt4 lo = t0 + shr.u hi = t0, 32 + ;; + dep t0 = 0, hi, 15, 49 // t0 = (hi & 0x7fff) + ;; + shl t0 = t0, 16 // t0 = (hi & 0x7fff) << 16 + shr t1 = hi, 15 // t1 = (hi >> 15) + ;; + add lo = lo, t0 + ;; + cmp.gtu p6, p0 = lo, m + ;; +(p6) and lo = lo, m + ;; +(p6) add lo = 1, lo + ;; + add lo = lo, t1 + ;; + cmp.gtu p6, p0 = lo, m + ;; +(p6) and lo = lo, m + ;; +(p6) add lo = 1, lo + br.ret.sptk.many rp +END(carta_random32) diff --git a/kernel/arch/ia64/lib/checksum.c b/kernel/arch/ia64/lib/checksum.c new file mode 100644 index 000000000..9fc955026 --- /dev/null +++ b/kernel/arch/ia64/lib/checksum.c @@ -0,0 +1,101 @@ +/* + * Network checksum routines + * + * Copyright (C) 1999, 2003 Hewlett-Packard Co + * Stephane Eranian + * + * Most of the code coming from arch/alpha/lib/checksum.c + * + * This file contains network checksum routines that are better done + * in an architecture-specific manner due to speed.. + */ + +#include +#include + +#include + +static inline unsigned short +from64to16 (unsigned long x) +{ + /* add up 32-bit words for 33 bits */ + x = (x & 0xffffffff) + (x >> 32); + /* add up 16-bit and 17-bit words for 17+c bits */ + x = (x & 0xffff) + (x >> 16); + /* add up 16-bit and 2-bit for 16+c bit */ + x = (x & 0xffff) + (x >> 16); + /* add up carry.. */ + x = (x & 0xffff) + (x >> 16); + return x; +} + +/* + * computes the checksum of the TCP/UDP pseudo-header + * returns a 16-bit checksum, already complemented. + */ +__sum16 +csum_tcpudp_magic (__be32 saddr, __be32 daddr, unsigned short len, + unsigned short proto, __wsum sum) +{ + return (__force __sum16)~from64to16( + (__force u64)saddr + (__force u64)daddr + + (__force u64)sum + ((len + proto) << 8)); +} + +EXPORT_SYMBOL(csum_tcpudp_magic); + +__wsum +csum_tcpudp_nofold (__be32 saddr, __be32 daddr, unsigned short len, + unsigned short proto, __wsum sum) +{ + unsigned long result; + + result = (__force u64)saddr + (__force u64)daddr + + (__force u64)sum + ((len + proto) << 8); + + /* Fold down to 32-bits so we don't lose in the typedef-less network stack. */ + /* 64 to 33 */ + result = (result & 0xffffffff) + (result >> 32); + /* 33 to 32 */ + result = (result & 0xffffffff) + (result >> 32); + return (__force __wsum)result; +} +EXPORT_SYMBOL(csum_tcpudp_nofold); + +extern unsigned long do_csum (const unsigned char *, long); + +/* + * computes the checksum of a memory block at buff, length len, + * and adds in "sum" (32-bit) + * + * returns a 32-bit number suitable for feeding into itself + * or csum_tcpudp_magic + * + * this function must be called with even lengths, except + * for the last fragment, which may be odd + * + * it's best to have buff aligned on a 32-bit boundary + */ +__wsum csum_partial(const void *buff, int len, __wsum sum) +{ + u64 result = do_csum(buff, len); + + /* add in old sum, and carry.. */ + result += (__force u32)sum; + /* 32+c bits -> 32 bits */ + result = (result & 0xffffffff) + (result >> 32); + return (__force __wsum)result; +} + +EXPORT_SYMBOL(csum_partial); + +/* + * this routine is used for miscellaneous IP-like checksums, mainly + * in icmp.c + */ +__sum16 ip_compute_csum (const void *buff, int len) +{ + return (__force __sum16)~do_csum(buff,len); +} + +EXPORT_SYMBOL(ip_compute_csum); diff --git a/kernel/arch/ia64/lib/clear_page.S b/kernel/arch/ia64/lib/clear_page.S new file mode 100644 index 000000000..2d814e7ed --- /dev/null +++ b/kernel/arch/ia64/lib/clear_page.S @@ -0,0 +1,76 @@ +/* + * Copyright (C) 1999-2002 Hewlett-Packard Co + * Stephane Eranian + * David Mosberger-Tang + * Copyright (C) 2002 Ken Chen + * + * 1/06/01 davidm Tuned for Itanium. + * 2/12/02 kchen Tuned for both Itanium and McKinley + * 3/08/02 davidm Some more tweaking + */ + +#include +#include + +#ifdef CONFIG_ITANIUM +# define L3_LINE_SIZE 64 // Itanium L3 line size +# define PREFETCH_LINES 9 // magic number +#else +# define L3_LINE_SIZE 128 // McKinley L3 line size +# define PREFETCH_LINES 12 // magic number +#endif + +#define saved_lc r2 +#define dst_fetch r3 +#define dst1 r8 +#define dst2 r9 +#define dst3 r10 +#define dst4 r11 + +#define dst_last r31 + +GLOBAL_ENTRY(clear_page) + .prologue + .regstk 1,0,0,0 + mov r16 = PAGE_SIZE/L3_LINE_SIZE-1 // main loop count, -1=repeat/until + .save ar.lc, saved_lc + mov saved_lc = ar.lc + + .body + mov ar.lc = (PREFETCH_LINES - 1) + mov dst_fetch = in0 + adds dst1 = 16, in0 + adds dst2 = 32, in0 + ;; +.fetch: stf.spill.nta [dst_fetch] = f0, L3_LINE_SIZE + adds dst3 = 48, in0 // executing this multiple times is harmless + br.cloop.sptk.few .fetch + ;; + addl dst_last = (PAGE_SIZE - PREFETCH_LINES*L3_LINE_SIZE), dst_fetch + mov ar.lc = r16 // one L3 line per iteration + adds dst4 = 64, in0 + ;; +#ifdef CONFIG_ITANIUM + // Optimized for Itanium +1: stf.spill.nta [dst1] = f0, 64 + stf.spill.nta [dst2] = f0, 64 + cmp.lt p8,p0=dst_fetch, dst_last + ;; +#else + // Optimized for McKinley +1: stf.spill.nta [dst1] = f0, 64 + stf.spill.nta [dst2] = f0, 64 + stf.spill.nta [dst3] = f0, 64 + stf.spill.nta [dst4] = f0, 128 + cmp.lt p8,p0=dst_fetch, dst_last + ;; + stf.spill.nta [dst1] = f0, 64 + stf.spill.nta [dst2] = f0, 64 +#endif + stf.spill.nta [dst3] = f0, 64 +(p8) stf.spill.nta [dst_fetch] = f0, L3_LINE_SIZE + br.cloop.sptk.few 1b + ;; + mov ar.lc = saved_lc // restore lc + br.ret.sptk.many rp +END(clear_page) diff --git a/kernel/arch/ia64/lib/clear_user.S b/kernel/arch/ia64/lib/clear_user.S new file mode 100644 index 000000000..eecd8577b --- /dev/null +++ b/kernel/arch/ia64/lib/clear_user.S @@ -0,0 +1,209 @@ +/* + * This routine clears to zero a linear memory buffer in user space. + * + * Inputs: + * in0: address of buffer + * in1: length of buffer in bytes + * Outputs: + * r8: number of bytes that didn't get cleared due to a fault + * + * Copyright (C) 1998, 1999, 2001 Hewlett-Packard Co + * Stephane Eranian + */ + +#include + +// +// arguments +// +#define buf r32 +#define len r33 + +// +// local registers +// +#define cnt r16 +#define buf2 r17 +#define saved_lc r18 +#define saved_pfs r19 +#define tmp r20 +#define len2 r21 +#define len3 r22 + +// +// Theory of operations: +// - we check whether or not the buffer is small, i.e., less than 17 +// in which case we do the byte by byte loop. +// +// - Otherwise we go progressively from 1 byte store to 8byte store in +// the head part, the body is a 16byte store loop and we finish we the +// tail for the last 15 bytes. +// The good point about this breakdown is that the long buffer handling +// contains only 2 branches. +// +// The reason for not using shifting & masking for both the head and the +// tail is to stay semantically correct. This routine is not supposed +// to write bytes outside of the buffer. While most of the time this would +// be ok, we can't tolerate a mistake. A classical example is the case +// of multithreaded code were to the extra bytes touched is actually owned +// by another thread which runs concurrently to ours. Another, less likely, +// example is with device drivers where reading an I/O mapped location may +// have side effects (same thing for writing). +// + +GLOBAL_ENTRY(__do_clear_user) + .prologue + .save ar.pfs, saved_pfs + alloc saved_pfs=ar.pfs,2,0,0,0 + cmp.eq p6,p0=r0,len // check for zero length + .save ar.lc, saved_lc + mov saved_lc=ar.lc // preserve ar.lc (slow) + .body + ;; // avoid WAW on CFM + adds tmp=-1,len // br.ctop is repeat/until + mov ret0=len // return value is length at this point +(p6) br.ret.spnt.many rp + ;; + cmp.lt p6,p0=16,len // if len > 16 then long memset + mov ar.lc=tmp // initialize lc for small count +(p6) br.cond.dptk .long_do_clear + ;; // WAR on ar.lc + // + // worst case 16 iterations, avg 8 iterations + // + // We could have played with the predicates to use the extra + // M slot for 2 stores/iteration but the cost the initialization + // the various counters compared to how long the loop is supposed + // to last on average does not make this solution viable. + // +1: + EX( .Lexit1, st1 [buf]=r0,1 ) + adds len=-1,len // countdown length using len + br.cloop.dptk 1b + ;; // avoid RAW on ar.lc + // + // .Lexit4: comes from byte by byte loop + // len contains bytes left +.Lexit1: + mov ret0=len // faster than using ar.lc + mov ar.lc=saved_lc + br.ret.sptk.many rp // end of short clear_user + + + // + // At this point we know we have more than 16 bytes to copy + // so we focus on alignment (no branches required) + // + // The use of len/len2 for countdown of the number of bytes left + // instead of ret0 is due to the fact that the exception code + // changes the values of r8. + // +.long_do_clear: + tbit.nz p6,p0=buf,0 // odd alignment (for long_do_clear) + ;; + EX( .Lexit3, (p6) st1 [buf]=r0,1 ) // 1-byte aligned +(p6) adds len=-1,len;; // sync because buf is modified + tbit.nz p6,p0=buf,1 + ;; + EX( .Lexit3, (p6) st2 [buf]=r0,2 ) // 2-byte aligned +(p6) adds len=-2,len;; + tbit.nz p6,p0=buf,2 + ;; + EX( .Lexit3, (p6) st4 [buf]=r0,4 ) // 4-byte aligned +(p6) adds len=-4,len;; + tbit.nz p6,p0=buf,3 + ;; + EX( .Lexit3, (p6) st8 [buf]=r0,8 ) // 8-byte aligned +(p6) adds len=-8,len;; + shr.u cnt=len,4 // number of 128-bit (2x64bit) words + ;; + cmp.eq p6,p0=r0,cnt + adds tmp=-1,cnt +(p6) br.cond.dpnt .dotail // we have less than 16 bytes left + ;; + adds buf2=8,buf // setup second base pointer + mov ar.lc=tmp + ;; + + // + // 16bytes/iteration core loop + // + // The second store can never generate a fault because + // we come into the loop only when we are 16-byte aligned. + // This means that if we cross a page then it will always be + // in the first store and never in the second. + // + // + // We need to keep track of the remaining length. A possible (optimistic) + // way would be to use ar.lc and derive how many byte were left by + // doing : left= 16*ar.lc + 16. this would avoid the addition at + // every iteration. + // However we need to keep the synchronization point. A template + // M;;MB does not exist and thus we can keep the addition at no + // extra cycle cost (use a nop slot anyway). It also simplifies the + // (unlikely) error recovery code + // + +2: EX(.Lexit3, st8 [buf]=r0,16 ) + ;; // needed to get len correct when error + st8 [buf2]=r0,16 + adds len=-16,len + br.cloop.dptk 2b + ;; + mov ar.lc=saved_lc + // + // tail correction based on len only + // + // We alternate the use of len3,len2 to allow parallelism and correct + // error handling. We also reuse p6/p7 to return correct value. + // The addition of len2/len3 does not cost anything more compared to + // the regular memset as we had empty slots. + // +.dotail: + mov len2=len // for parallelization of error handling + mov len3=len + tbit.nz p6,p0=len,3 + ;; + EX( .Lexit2, (p6) st8 [buf]=r0,8 ) // at least 8 bytes +(p6) adds len3=-8,len2 + tbit.nz p7,p6=len,2 + ;; + EX( .Lexit2, (p7) st4 [buf]=r0,4 ) // at least 4 bytes +(p7) adds len2=-4,len3 + tbit.nz p6,p7=len,1 + ;; + EX( .Lexit2, (p6) st2 [buf]=r0,2 ) // at least 2 bytes +(p6) adds len3=-2,len2 + tbit.nz p7,p6=len,0 + ;; + EX( .Lexit2, (p7) st1 [buf]=r0 ) // only 1 byte left + mov ret0=r0 // success + br.ret.sptk.many rp // end of most likely path + + // + // Outlined error handling code + // + + // + // .Lexit3: comes from core loop, need restore pr/lc + // len contains bytes left + // + // + // .Lexit2: + // if p6 -> coming from st8 or st2 : len2 contains what's left + // if p7 -> coming from st4 or st1 : len3 contains what's left + // We must restore lc/pr even though might not have been used. +.Lexit2: + .pred.rel "mutex", p6, p7 +(p6) mov len=len2 +(p7) mov len=len3 + ;; + // + // .Lexit4: comes from head, need not restore pr/lc + // len contains bytes left + // +.Lexit3: + mov ret0=len + mov ar.lc=saved_lc + br.ret.sptk.many rp +END(__do_clear_user) diff --git a/kernel/arch/ia64/lib/copy_page.S b/kernel/arch/ia64/lib/copy_page.S new file mode 100644 index 000000000..127d1d050 --- /dev/null +++ b/kernel/arch/ia64/lib/copy_page.S @@ -0,0 +1,98 @@ +/* + * + * Optimized version of the standard copy_page() function + * + * Inputs: + * in0: address of target page + * in1: address of source page + * Output: + * no return value + * + * Copyright (C) 1999, 2001 Hewlett-Packard Co + * Stephane Eranian + * David Mosberger + * + * 4/06/01 davidm Tuned to make it perform well both for cached and uncached copies. + */ +#include +#include + +#define PIPE_DEPTH 3 +#define EPI p[PIPE_DEPTH-1] + +#define lcount r16 +#define saved_pr r17 +#define saved_lc r18 +#define saved_pfs r19 +#define src1 r20 +#define src2 r21 +#define tgt1 r22 +#define tgt2 r23 +#define srcf r24 +#define tgtf r25 +#define tgt_last r26 + +#define Nrot ((8*PIPE_DEPTH+7)&~7) + +GLOBAL_ENTRY(copy_page) + .prologue + .save ar.pfs, saved_pfs + alloc saved_pfs=ar.pfs,3,Nrot-3,0,Nrot + + .rotr t1[PIPE_DEPTH], t2[PIPE_DEPTH], t3[PIPE_DEPTH], t4[PIPE_DEPTH], \ + t5[PIPE_DEPTH], t6[PIPE_DEPTH], t7[PIPE_DEPTH], t8[PIPE_DEPTH] + .rotp p[PIPE_DEPTH] + + .save ar.lc, saved_lc + mov saved_lc=ar.lc + mov ar.ec=PIPE_DEPTH + + mov lcount=PAGE_SIZE/64-1 + .save pr, saved_pr + mov saved_pr=pr + mov pr.rot=1<<16 + + .body + + mov src1=in1 + adds src2=8,in1 + mov tgt_last = PAGE_SIZE + ;; + adds tgt2=8,in0 + add srcf=512,in1 + mov ar.lc=lcount + mov tgt1=in0 + add tgtf=512,in0 + add tgt_last = tgt_last, in0 + ;; +1: +(p[0]) ld8 t1[0]=[src1],16 +(EPI) st8 [tgt1]=t1[PIPE_DEPTH-1],16 +(p[0]) ld8 t2[0]=[src2],16 +(EPI) st8 [tgt2]=t2[PIPE_DEPTH-1],16 + cmp.ltu p6,p0 = tgtf, tgt_last + ;; +(p[0]) ld8 t3[0]=[src1],16 +(EPI) st8 [tgt1]=t3[PIPE_DEPTH-1],16 +(p[0]) ld8 t4[0]=[src2],16 +(EPI) st8 [tgt2]=t4[PIPE_DEPTH-1],16 + ;; +(p[0]) ld8 t5[0]=[src1],16 +(EPI) st8 [tgt1]=t5[PIPE_DEPTH-1],16 +(p[0]) ld8 t6[0]=[src2],16 +(EPI) st8 [tgt2]=t6[PIPE_DEPTH-1],16 + ;; +(p[0]) ld8 t7[0]=[src1],16 +(EPI) st8 [tgt1]=t7[PIPE_DEPTH-1],16 +(p[0]) ld8 t8[0]=[src2],16 +(EPI) st8 [tgt2]=t8[PIPE_DEPTH-1],16 + +(p6) lfetch [srcf], 64 +(p6) lfetch [tgtf], 64 + br.ctop.sptk.few 1b + ;; + mov pr=saved_pr,0xffffffffffff0000 // restore predicates + mov ar.pfs=saved_pfs + mov ar.lc=saved_lc + br.ret.sptk.many rp +END(copy_page) diff --git a/kernel/arch/ia64/lib/copy_page_mck.S b/kernel/arch/ia64/lib/copy_page_mck.S new file mode 100644 index 000000000..3c45d60a8 --- /dev/null +++ b/kernel/arch/ia64/lib/copy_page_mck.S @@ -0,0 +1,185 @@ +/* + * McKinley-optimized version of copy_page(). + * + * Copyright (C) 2002 Hewlett-Packard Co + * David Mosberger + * + * Inputs: + * in0: address of target page + * in1: address of source page + * Output: + * no return value + * + * General idea: + * - use regular loads and stores to prefetch data to avoid consuming M-slot just for + * lfetches => good for in-cache performance + * - avoid l2 bank-conflicts by not storing into the same 16-byte bank within a single + * cycle + * + * Principle of operation: + * First, note that L1 has a line-size of 64 bytes and L2 a line-size of 128 bytes. + * To avoid secondary misses in L2, we prefetch both source and destination with a line-size + * of 128 bytes. When both of these lines are in the L2 and the first half of the + * source line is in L1, we start copying the remaining words. The second half of the + * source line is prefetched in an earlier iteration, so that by the time we start + * accessing it, it's also present in the L1. + * + * We use a software-pipelined loop to control the overall operation. The pipeline + * has 2*PREFETCH_DIST+K stages. The first PREFETCH_DIST stages are used for prefetching + * source cache-lines. The second PREFETCH_DIST stages are used for prefetching destination + * cache-lines, the last K stages are used to copy the cache-line words not copied by + * the prefetches. The four relevant points in the pipelined are called A, B, C, D: + * p[A] is TRUE if a source-line should be prefetched, p[B] is TRUE if a destination-line + * should be prefetched, p[C] is TRUE if the second half of an L2 line should be brought + * into L1D and p[D] is TRUE if a cacheline needs to be copied. + * + * This all sounds very complicated, but thanks to the modulo-scheduled loop support, + * the resulting code is very regular and quite easy to follow (once you get the idea). + * + * As a secondary optimization, the first 2*PREFETCH_DIST iterations are implemented + * as the separate .prefetch_loop. Logically, this loop performs exactly like the + * main-loop (.line_copy), but has all known-to-be-predicated-off instructions removed, + * so that each loop iteration is faster (again, good for cached case). + * + * When reading the code, it helps to keep the following picture in mind: + * + * word 0 word 1 + * +------+------+--- + * | v[x] | t1 | ^ + * | t2 | t3 | | + * | t4 | t5 | | + * | t6 | t7 | | 128 bytes + * | n[y] | t9 | | (L2 cache line) + * | t10 | t11 | | + * | t12 | t13 | | + * | t14 | t15 | v + * +------+------+--- + * + * Here, v[x] is copied by the (memory) prefetch. n[y] is loaded at p[C] + * to fetch the second-half of the L2 cache line into L1, and the tX words are copied in + * an order that avoids bank conflicts. + */ +#include +#include + +#define PREFETCH_DIST 8 // McKinley sustains 16 outstanding L2 misses (8 ld, 8 st) + +#define src0 r2 +#define src1 r3 +#define dst0 r9 +#define dst1 r10 +#define src_pre_mem r11 +#define dst_pre_mem r14 +#define src_pre_l2 r15 +#define dst_pre_l2 r16 +#define t1 r17 +#define t2 r18 +#define t3 r19 +#define t4 r20 +#define t5 t1 // alias! +#define t6 t2 // alias! +#define t7 t3 // alias! +#define t9 t5 // alias! +#define t10 t4 // alias! +#define t11 t7 // alias! +#define t12 t6 // alias! +#define t14 t10 // alias! +#define t13 r21 +#define t15 r22 + +#define saved_lc r23 +#define saved_pr r24 + +#define A 0 +#define B (PREFETCH_DIST) +#define C (B + PREFETCH_DIST) +#define D (C + 3) +#define N (D + 1) +#define Nrot ((N + 7) & ~7) + +GLOBAL_ENTRY(copy_page) + .prologue + alloc r8 = ar.pfs, 2, Nrot-2, 0, Nrot + + .rotr v[2*PREFETCH_DIST], n[D-C+1] + .rotp p[N] + + .save ar.lc, saved_lc + mov saved_lc = ar.lc + .save pr, saved_pr + mov saved_pr = pr + .body + + mov src_pre_mem = in1 + mov pr.rot = 0x10000 + mov ar.ec = 1 // special unrolled loop + + mov dst_pre_mem = in0 + mov ar.lc = 2*PREFETCH_DIST - 1 + + add src_pre_l2 = 8*8, in1 + add dst_pre_l2 = 8*8, in0 + add src0 = 8, in1 // first t1 src + add src1 = 3*8, in1 // first t3 src + add dst0 = 8, in0 // first t1 dst + add dst1 = 3*8, in0 // first t3 dst + mov t1 = (PAGE_SIZE/128) - (2*PREFETCH_DIST) - 1 + nop.m 0 + nop.i 0 + ;; + // same as .line_copy loop, but with all predicated-off instructions removed: +.prefetch_loop: +(p[A]) ld8 v[A] = [src_pre_mem], 128 // M0 +(p[B]) st8 [dst_pre_mem] = v[B], 128 // M2 + br.ctop.sptk .prefetch_loop + ;; + cmp.eq p16, p0 = r0, r0 // reset p16 to 1 (br.ctop cleared it to zero) + mov ar.lc = t1 // with 64KB pages, t1 is too big to fit in 8 bits! + mov ar.ec = N // # of stages in pipeline + ;; +.line_copy: +(p[D]) ld8 t2 = [src0], 3*8 // M0 +(p[D]) ld8 t4 = [src1], 3*8 // M1 +(p[B]) st8 [dst_pre_mem] = v[B], 128 // M2 prefetch dst from memory +(p[D]) st8 [dst_pre_l2] = n[D-C], 128 // M3 prefetch dst from L2 + ;; +(p[A]) ld8 v[A] = [src_pre_mem], 128 // M0 prefetch src from memory +(p[C]) ld8 n[0] = [src_pre_l2], 128 // M1 prefetch src from L2 +(p[D]) st8 [dst0] = t1, 8 // M2 +(p[D]) st8 [dst1] = t3, 8 // M3 + ;; +(p[D]) ld8 t5 = [src0], 8 +(p[D]) ld8 t7 = [src1], 3*8 +(p[D]) st8 [dst0] = t2, 3*8 +(p[D]) st8 [dst1] = t4, 3*8 + ;; +(p[D]) ld8 t6 = [src0], 3*8 +(p[D]) ld8 t10 = [src1], 8 +(p[D]) st8 [dst0] = t5, 8 +(p[D]) st8 [dst1] = t7, 3*8 + ;; +(p[D]) ld8 t9 = [src0], 3*8 +(p[D]) ld8 t11 = [src1], 3*8 +(p[D]) st8 [dst0] = t6, 3*8 +(p[D]) st8 [dst1] = t10, 8 + ;; +(p[D]) ld8 t12 = [src0], 8 +(p[D]) ld8 t14 = [src1], 8 +(p[D]) st8 [dst0] = t9, 3*8 +(p[D]) st8 [dst1] = t11, 3*8 + ;; +(p[D]) ld8 t13 = [src0], 4*8 +(p[D]) ld8 t15 = [src1], 4*8 +(p[D]) st8 [dst0] = t12, 8 +(p[D]) st8 [dst1] = t14, 8 + ;; +(p[D-1])ld8 t1 = [src0], 8 +(p[D-1])ld8 t3 = [src1], 8 +(p[D]) st8 [dst0] = t13, 4*8 +(p[D]) st8 [dst1] = t15, 4*8 + br.ctop.sptk .line_copy + ;; + mov ar.lc = saved_lc + mov pr = saved_pr, -1 + br.ret.sptk.many rp +END(copy_page) diff --git a/kernel/arch/ia64/lib/copy_user.S b/kernel/arch/ia64/lib/copy_user.S new file mode 100644 index 000000000..c952bdc6a --- /dev/null +++ b/kernel/arch/ia64/lib/copy_user.S @@ -0,0 +1,610 @@ +/* + * + * Optimized version of the copy_user() routine. + * It is used to copy date across the kernel/user boundary. + * + * The source and destination are always on opposite side of + * the boundary. When reading from user space we must catch + * faults on loads. When writing to user space we must catch + * errors on stores. Note that because of the nature of the copy + * we don't need to worry about overlapping regions. + * + * + * Inputs: + * in0 address of source buffer + * in1 address of destination buffer + * in2 number of bytes to copy + * + * Outputs: + * ret0 0 in case of success. The number of bytes NOT copied in + * case of error. + * + * Copyright (C) 2000-2001 Hewlett-Packard Co + * Stephane Eranian + * + * Fixme: + * - handle the case where we have more than 16 bytes and the alignment + * are different. + * - more benchmarking + * - fix extraneous stop bit introduced by the EX() macro. + */ + +#include + +// +// Tuneable parameters +// +#define COPY_BREAK 16 // we do byte copy below (must be >=16) +#define PIPE_DEPTH 21 // pipe depth + +#define EPI p[PIPE_DEPTH-1] + +// +// arguments +// +#define dst in0 +#define src in1 +#define len in2 + +// +// local registers +// +#define t1 r2 // rshift in bytes +#define t2 r3 // lshift in bytes +#define rshift r14 // right shift in bits +#define lshift r15 // left shift in bits +#define word1 r16 +#define word2 r17 +#define cnt r18 +#define len2 r19 +#define saved_lc r20 +#define saved_pr r21 +#define tmp r22 +#define val r23 +#define src1 r24 +#define dst1 r25 +#define src2 r26 +#define dst2 r27 +#define len1 r28 +#define enddst r29 +#define endsrc r30 +#define saved_pfs r31 + +GLOBAL_ENTRY(__copy_user) + .prologue + .save ar.pfs, saved_pfs + alloc saved_pfs=ar.pfs,3,((2*PIPE_DEPTH+7)&~7),0,((2*PIPE_DEPTH+7)&~7) + + .rotr val1[PIPE_DEPTH],val2[PIPE_DEPTH] + .rotp p[PIPE_DEPTH] + + adds len2=-1,len // br.ctop is repeat/until + mov ret0=r0 + + ;; // RAW of cfm when len=0 + cmp.eq p8,p0=r0,len // check for zero length + .save ar.lc, saved_lc + mov saved_lc=ar.lc // preserve ar.lc (slow) +(p8) br.ret.spnt.many rp // empty mempcy() + ;; + add enddst=dst,len // first byte after end of source + add endsrc=src,len // first byte after end of destination + .save pr, saved_pr + mov saved_pr=pr // preserve predicates + + .body + + mov dst1=dst // copy because of rotation + mov ar.ec=PIPE_DEPTH + mov pr.rot=1<<16 // p16=true all others are false + + mov src1=src // copy because of rotation + mov ar.lc=len2 // initialize lc for small count + cmp.lt p10,p7=COPY_BREAK,len // if len > COPY_BREAK then long copy + + xor tmp=src,dst // same alignment test prepare +(p10) br.cond.dptk .long_copy_user + ;; // RAW pr.rot/p16 ? + // + // Now we do the byte by byte loop with software pipeline + // + // p7 is necessarily false by now +1: + EX(.failure_in_pipe1,(p16) ld1 val1[0]=[src1],1) + EX(.failure_out,(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1) + br.ctop.dptk.few 1b + ;; + mov ar.lc=saved_lc + mov pr=saved_pr,0xffffffffffff0000 + mov ar.pfs=saved_pfs // restore ar.ec + br.ret.sptk.many rp // end of short memcpy + + // + // Not 8-byte aligned + // +.diff_align_copy_user: + // At this point we know we have more than 16 bytes to copy + // and also that src and dest do _not_ have the same alignment. + and src2=0x7,src1 // src offset + and dst2=0x7,dst1 // dst offset + ;; + // The basic idea is that we copy byte-by-byte at the head so + // that we can reach 8-byte alignment for both src1 and dst1. + // Then copy the body using software pipelined 8-byte copy, + // shifting the two back-to-back words right and left, then copy + // the tail by copying byte-by-byte. + // + // Fault handling. If the byte-by-byte at the head fails on the + // load, then restart and finish the pipleline by copying zeros + // to the dst1. Then copy zeros for the rest of dst1. + // If 8-byte software pipeline fails on the load, do the same as + // failure_in3 does. If the byte-by-byte at the tail fails, it is + // handled simply by failure_in_pipe1. + // + // The case p14 represents the source has more bytes in the + // the first word (by the shifted part), whereas the p15 needs to + // copy some bytes from the 2nd word of the source that has the + // tail of the 1st of the destination. + // + + // + // Optimization. If dst1 is 8-byte aligned (quite common), we don't need + // to copy the head to dst1, to start 8-byte copy software pipeline. + // We know src1 is not 8-byte aligned in this case. + // + cmp.eq p14,p15=r0,dst2 +(p15) br.cond.spnt 1f + ;; + sub t1=8,src2 + mov t2=src2 + ;; + shl rshift=t2,3 + sub len1=len,t1 // set len1 + ;; + sub lshift=64,rshift + ;; + br.cond.spnt .word_copy_user + ;; +1: + cmp.leu p14,p15=src2,dst2 + sub t1=dst2,src2 + ;; + .pred.rel "mutex", p14, p15 +(p14) sub word1=8,src2 // (8 - src offset) +(p15) sub t1=r0,t1 // absolute value +(p15) sub word1=8,dst2 // (8 - dst offset) + ;; + // For the case p14, we don't need to copy the shifted part to + // the 1st word of destination. + sub t2=8,t1 +(p14) sub word1=word1,t1 + ;; + sub len1=len,word1 // resulting len +(p15) shl rshift=t1,3 // in bits +(p14) shl rshift=t2,3 + ;; +(p14) sub len1=len1,t1 + adds cnt=-1,word1 + ;; + sub lshift=64,rshift + mov ar.ec=PIPE_DEPTH + mov pr.rot=1<<16 // p16=true all others are false + mov ar.lc=cnt + ;; +2: + EX(.failure_in_pipe2,(p16) ld1 val1[0]=[src1],1) + EX(.failure_out,(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1) + br.ctop.dptk.few 2b + ;; + clrrrb + ;; +.word_copy_user: + cmp.gtu p9,p0=16,len1 +(p9) br.cond.spnt 4f // if (16 > len1) skip 8-byte copy + ;; + shr.u cnt=len1,3 // number of 64-bit words + ;; + adds cnt=-1,cnt + ;; + .pred.rel "mutex", p14, p15 +(p14) sub src1=src1,t2 +(p15) sub src1=src1,t1 + // + // Now both src1 and dst1 point to an 8-byte aligned address. And + // we have more than 8 bytes to copy. + // + mov ar.lc=cnt + mov ar.ec=PIPE_DEPTH + mov pr.rot=1<<16 // p16=true all others are false + ;; +3: + // + // The pipleline consists of 3 stages: + // 1 (p16): Load a word from src1 + // 2 (EPI_1): Shift right pair, saving to tmp + // 3 (EPI): Store tmp to dst1 + // + // To make it simple, use at least 2 (p16) loops to set up val1[n] + // because we need 2 back-to-back val1[] to get tmp. + // Note that this implies EPI_2 must be p18 or greater. + // + +#define EPI_1 p[PIPE_DEPTH-2] +#define SWITCH(pred, shift) cmp.eq pred,p0=shift,rshift +#define CASE(pred, shift) \ + (pred) br.cond.spnt .copy_user_bit##shift +#define BODY(rshift) \ +.copy_user_bit##rshift: \ +1: \ + EX(.failure_out,(EPI) st8 [dst1]=tmp,8); \ +(EPI_1) shrp tmp=val1[PIPE_DEPTH-2],val1[PIPE_DEPTH-1],rshift; \ + EX(3f,(p16) ld8 val1[1]=[src1],8); \ +(p16) mov val1[0]=r0; \ + br.ctop.dptk 1b; \ + ;; \ + br.cond.sptk.many .diff_align_do_tail; \ +2: \ +(EPI) st8 [dst1]=tmp,8; \ +(EPI_1) shrp tmp=val1[PIPE_DEPTH-2],val1[PIPE_DEPTH-1],rshift; \ +3: \ +(p16) mov val1[1]=r0; \ +(p16) mov val1[0]=r0; \ + br.ctop.dptk 2b; \ + ;; \ + br.cond.sptk.many .failure_in2 + + // + // Since the instruction 'shrp' requires a fixed 128-bit value + // specifying the bits to shift, we need to provide 7 cases + // below. + // + SWITCH(p6, 8) + SWITCH(p7, 16) + SWITCH(p8, 24) + SWITCH(p9, 32) + SWITCH(p10, 40) + SWITCH(p11, 48) + SWITCH(p12, 56) + ;; + CASE(p6, 8) + CASE(p7, 16) + CASE(p8, 24) + CASE(p9, 32) + CASE(p10, 40) + CASE(p11, 48) + CASE(p12, 56) + ;; + BODY(8) + BODY(16) + BODY(24) + BODY(32) + BODY(40) + BODY(48) + BODY(56) + ;; +.diff_align_do_tail: + .pred.rel "mutex", p14, p15 +(p14) sub src1=src1,t1 +(p14) adds dst1=-8,dst1 +(p15) sub dst1=dst1,t1 + ;; +4: + // Tail correction. + // + // The problem with this piplelined loop is that the last word is not + // loaded and thus parf of the last word written is not correct. + // To fix that, we simply copy the tail byte by byte. + + sub len1=endsrc,src1,1 + clrrrb + ;; + mov ar.ec=PIPE_DEPTH + mov pr.rot=1<<16 // p16=true all others are false + mov ar.lc=len1 + ;; +5: + EX(.failure_in_pipe1,(p16) ld1 val1[0]=[src1],1) + EX(.failure_out,(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1) + br.ctop.dptk.few 5b + ;; + mov ar.lc=saved_lc + mov pr=saved_pr,0xffffffffffff0000 + mov ar.pfs=saved_pfs + br.ret.sptk.many rp + + // + // Beginning of long mempcy (i.e. > 16 bytes) + // +.long_copy_user: + tbit.nz p6,p7=src1,0 // odd alignment + and tmp=7,tmp + ;; + cmp.eq p10,p8=r0,tmp + mov len1=len // copy because of rotation +(p8) br.cond.dpnt .diff_align_copy_user + ;; + // At this point we know we have more than 16 bytes to copy + // and also that both src and dest have the same alignment + // which may not be the one we want. So for now we must move + // forward slowly until we reach 16byte alignment: no need to + // worry about reaching the end of buffer. + // + EX(.failure_in1,(p6) ld1 val1[0]=[src1],1) // 1-byte aligned +(p6) adds len1=-1,len1;; + tbit.nz p7,p0=src1,1 + ;; + EX(.failure_in1,(p7) ld2 val1[1]=[src1],2) // 2-byte aligned +(p7) adds len1=-2,len1;; + tbit.nz p8,p0=src1,2 + ;; + // + // Stop bit not required after ld4 because if we fail on ld4 + // we have never executed the ld1, therefore st1 is not executed. + // + EX(.failure_in1,(p8) ld4 val2[0]=[src1],4) // 4-byte aligned + ;; + EX(.failure_out,(p6) st1 [dst1]=val1[0],1) + tbit.nz p9,p0=src1,3 + ;; + // + // Stop bit not required after ld8 because if we fail on ld8 + // we have never executed the ld2, therefore st2 is not executed. + // + EX(.failure_in1,(p9) ld8 val2[1]=[src1],8) // 8-byte aligned + EX(.failure_out,(p7) st2 [dst1]=val1[1],2) +(p8) adds len1=-4,len1 + ;; + EX(.failure_out, (p8) st4 [dst1]=val2[0],4) +(p9) adds len1=-8,len1;; + shr.u cnt=len1,4 // number of 128-bit (2x64bit) words + ;; + EX(.failure_out, (p9) st8 [dst1]=val2[1],8) + tbit.nz p6,p0=len1,3 + cmp.eq p7,p0=r0,cnt + adds tmp=-1,cnt // br.ctop is repeat/until +(p7) br.cond.dpnt .dotail // we have less than 16 bytes left + ;; + adds src2=8,src1 + adds dst2=8,dst1 + mov ar.lc=tmp + ;; + // + // 16bytes/iteration + // +2: + EX(.failure_in3,(p16) ld8 val1[0]=[src1],16) +(p16) ld8 val2[0]=[src2],16 + + EX(.failure_out, (EPI) st8 [dst1]=val1[PIPE_DEPTH-1],16) +(EPI) st8 [dst2]=val2[PIPE_DEPTH-1],16 + br.ctop.dptk 2b + ;; // RAW on src1 when fall through from loop + // + // Tail correction based on len only + // + // No matter where we come from (loop or test) the src1 pointer + // is 16 byte aligned AND we have less than 16 bytes to copy. + // +.dotail: + EX(.failure_in1,(p6) ld8 val1[0]=[src1],8) // at least 8 bytes + tbit.nz p7,p0=len1,2 + ;; + EX(.failure_in1,(p7) ld4 val1[1]=[src1],4) // at least 4 bytes + tbit.nz p8,p0=len1,1 + ;; + EX(.failure_in1,(p8) ld2 val2[0]=[src1],2) // at least 2 bytes + tbit.nz p9,p0=len1,0 + ;; + EX(.failure_out, (p6) st8 [dst1]=val1[0],8) + ;; + EX(.failure_in1,(p9) ld1 val2[1]=[src1]) // only 1 byte left + mov ar.lc=saved_lc + ;; + EX(.failure_out,(p7) st4 [dst1]=val1[1],4) + mov pr=saved_pr,0xffffffffffff0000 + ;; + EX(.failure_out, (p8) st2 [dst1]=val2[0],2) + mov ar.pfs=saved_pfs + ;; + EX(.failure_out, (p9) st1 [dst1]=val2[1]) + br.ret.sptk.many rp + + + // + // Here we handle the case where the byte by byte copy fails + // on the load. + // Several factors make the zeroing of the rest of the buffer kind of + // tricky: + // - the pipeline: loads/stores are not in sync (pipeline) + // + // In the same loop iteration, the dst1 pointer does not directly + // reflect where the faulty load was. + // + // - pipeline effect + // When you get a fault on load, you may have valid data from + // previous loads not yet store in transit. Such data must be + // store normally before moving onto zeroing the rest. + // + // - single/multi dispersal independence. + // + // solution: + // - we don't disrupt the pipeline, i.e. data in transit in + // the software pipeline will be eventually move to memory. + // We simply replace the load with a simple mov and keep the + // pipeline going. We can't really do this inline because + // p16 is always reset to 1 when lc > 0. + // +.failure_in_pipe1: + sub ret0=endsrc,src1 // number of bytes to zero, i.e. not copied +1: +(p16) mov val1[0]=r0 +(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1 + br.ctop.dptk 1b + ;; + mov pr=saved_pr,0xffffffffffff0000 + mov ar.lc=saved_lc + mov ar.pfs=saved_pfs + br.ret.sptk.many rp + + // + // This is the case where the byte by byte copy fails on the load + // when we copy the head. We need to finish the pipeline and copy + // zeros for the rest of the destination. Since this happens + // at the top we still need to fill the body and tail. +.failure_in_pipe2: + sub ret0=endsrc,src1 // number of bytes to zero, i.e. not copied +2: +(p16) mov val1[0]=r0 +(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1 + br.ctop.dptk 2b + ;; + sub len=enddst,dst1,1 // precompute len + br.cond.dptk.many .failure_in1bis + ;; + + // + // Here we handle the head & tail part when we check for alignment. + // The following code handles only the load failures. The + // main diffculty comes from the fact that loads/stores are + // scheduled. So when you fail on a load, the stores corresponding + // to previous successful loads must be executed. + // + // However some simplifications are possible given the way + // things work. + // + // 1) HEAD + // Theory of operation: + // + // Page A | Page B + // ---------|----- + // 1|8 x + // 1 2|8 x + // 4|8 x + // 1 4|8 x + // 2 4|8 x + // 1 2 4|8 x + // |1 + // |2 x + // |4 x + // + // page_size >= 4k (2^12). (x means 4, 2, 1) + // Here we suppose Page A exists and Page B does not. + // + // As we move towards eight byte alignment we may encounter faults. + // The numbers on each page show the size of the load (current alignment). + // + // Key point: + // - if you fail on 1, 2, 4 then you have never executed any smaller + // size loads, e.g. failing ld4 means no ld1 nor ld2 executed + // before. + // + // This allows us to simplify the cleanup code, because basically you + // only have to worry about "pending" stores in the case of a failing + // ld8(). Given the way the code is written today, this means only + // worry about st2, st4. There we can use the information encapsulated + // into the predicates. + // + // Other key point: + // - if you fail on the ld8 in the head, it means you went straight + // to it, i.e. 8byte alignment within an unexisting page. + // Again this comes from the fact that if you crossed just for the ld8 then + // you are 8byte aligned but also 16byte align, therefore you would + // either go for the 16byte copy loop OR the ld8 in the tail part. + // The combination ld1, ld2, ld4, ld8 where you fail on ld8 is impossible + // because it would mean you had 15bytes to copy in which case you + // would have defaulted to the byte by byte copy. + // + // + // 2) TAIL + // Here we now we have less than 16 bytes AND we are either 8 or 16 byte + // aligned. + // + // Key point: + // This means that we either: + // - are right on a page boundary + // OR + // - are at more than 16 bytes from a page boundary with + // at most 15 bytes to copy: no chance of crossing. + // + // This allows us to assume that if we fail on a load we haven't possibly + // executed any of the previous (tail) ones, so we don't need to do + // any stores. For instance, if we fail on ld2, this means we had + // 2 or 3 bytes left to copy and we did not execute the ld8 nor ld4. + // + // This means that we are in a situation similar the a fault in the + // head part. That's nice! + // +.failure_in1: + sub ret0=endsrc,src1 // number of bytes to zero, i.e. not copied + sub len=endsrc,src1,1 + // + // we know that ret0 can never be zero at this point + // because we failed why trying to do a load, i.e. there is still + // some work to do. + // The failure_in1bis and length problem is taken care of at the + // calling side. + // + ;; +.failure_in1bis: // from (.failure_in3) + mov ar.lc=len // Continue with a stupid byte store. + ;; +5: + st1 [dst1]=r0,1 + br.cloop.dptk 5b + ;; + mov pr=saved_pr,0xffffffffffff0000 + mov ar.lc=saved_lc + mov ar.pfs=saved_pfs + br.ret.sptk.many rp + + // + // Here we simply restart the loop but instead + // of doing loads we fill the pipeline with zeroes + // We can't simply store r0 because we may have valid + // data in transit in the pipeline. + // ar.lc and ar.ec are setup correctly at this point + // + // we MUST use src1/endsrc here and not dst1/enddst because + // of the pipeline effect. + // +.failure_in3: + sub ret0=endsrc,src1 // number of bytes to zero, i.e. not copied + ;; +2: +(p16) mov val1[0]=r0 +(p16) mov val2[0]=r0 +(EPI) st8 [dst1]=val1[PIPE_DEPTH-1],16 +(EPI) st8 [dst2]=val2[PIPE_DEPTH-1],16 + br.ctop.dptk 2b + ;; + cmp.ne p6,p0=dst1,enddst // Do we need to finish the tail ? + sub len=enddst,dst1,1 // precompute len +(p6) br.cond.dptk .failure_in1bis + ;; + mov pr=saved_pr,0xffffffffffff0000 + mov ar.lc=saved_lc + mov ar.pfs=saved_pfs + br.ret.sptk.many rp + +.failure_in2: + sub ret0=endsrc,src1 + cmp.ne p6,p0=dst1,enddst // Do we need to finish the tail ? + sub len=enddst,dst1,1 // precompute len +(p6) br.cond.dptk .failure_in1bis + ;; + mov pr=saved_pr,0xffffffffffff0000 + mov ar.lc=saved_lc + mov ar.pfs=saved_pfs + br.ret.sptk.many rp + + // + // handling of failures on stores: that's the easy part + // +.failure_out: + sub ret0=enddst,dst1 + mov pr=saved_pr,0xffffffffffff0000 + mov ar.lc=saved_lc + + mov ar.pfs=saved_pfs + br.ret.sptk.many rp +END(__copy_user) diff --git a/kernel/arch/ia64/lib/csum_partial_copy.c b/kernel/arch/ia64/lib/csum_partial_copy.c new file mode 100644 index 000000000..118daf5a0 --- /dev/null +++ b/kernel/arch/ia64/lib/csum_partial_copy.c @@ -0,0 +1,140 @@ +/* + * Network Checksum & Copy routine + * + * Copyright (C) 1999, 2003-2004 Hewlett-Packard Co + * Stephane Eranian + * + * Most of the code has been imported from Linux/Alpha + */ + +#include +#include +#include + +#include + +/* + * XXX Fixme: those 2 inlines are meant for debugging and will go away + */ +static inline unsigned +short from64to16(unsigned long x) +{ + /* add up 32-bit words for 33 bits */ + x = (x & 0xffffffff) + (x >> 32); + /* add up 16-bit and 17-bit words for 17+c bits */ + x = (x & 0xffff) + (x >> 16); + /* add up 16-bit and 2-bit for 16+c bit */ + x = (x & 0xffff) + (x >> 16); + /* add up carry.. */ + x = (x & 0xffff) + (x >> 16); + return x; +} + +static inline +unsigned long do_csum_c(const unsigned char * buff, int len, unsigned int psum) +{ + int odd, count; + unsigned long result = (unsigned long)psum; + + if (len <= 0) + goto out; + odd = 1 & (unsigned long) buff; + if (odd) { + result = *buff << 8; + len--; + buff++; + } + count = len >> 1; /* nr of 16-bit words.. */ + if (count) { + if (2 & (unsigned long) buff) { + result += *(unsigned short *) buff; + count--; + len -= 2; + buff += 2; + } + count >>= 1; /* nr of 32-bit words.. */ + if (count) { + if (4 & (unsigned long) buff) { + result += *(unsigned int *) buff; + count--; + len -= 4; + buff += 4; + } + count >>= 1; /* nr of 64-bit words.. */ + if (count) { + unsigned long carry = 0; + do { + unsigned long w = *(unsigned long *) buff; + count--; + buff += 8; + result += carry; + result += w; + carry = (w > result); + } while (count); + result += carry; + result = (result & 0xffffffff) + (result >> 32); + } + if (len & 4) { + result += *(unsigned int *) buff; + buff += 4; + } + } + if (len & 2) { + result += *(unsigned short *) buff; + buff += 2; + } + } + if (len & 1) + result += *buff; + + result = from64to16(result); + + if (odd) + result = ((result >> 8) & 0xff) | ((result & 0xff) << 8); + +out: + return result; +} + +/* + * XXX Fixme + * + * This is very ugly but temporary. THIS NEEDS SERIOUS ENHANCEMENTS. + * But it's very tricky to get right even in C. + */ +extern unsigned long do_csum(const unsigned char *, long); + +__wsum +csum_partial_copy_from_user(const void __user *src, void *dst, + int len, __wsum psum, int *errp) +{ + unsigned long result; + + /* XXX Fixme + * for now we separate the copy from checksum for obvious + * alignment difficulties. Look at the Alpha code and you'll be + * scared. + */ + + if (__copy_from_user(dst, src, len) != 0 && errp) + *errp = -EFAULT; + + result = do_csum(dst, len); + + /* add in old sum, and carry.. */ + result += (__force u32)psum; + /* 32+c bits -> 32 bits */ + result = (result & 0xffffffff) + (result >> 32); + return (__force __wsum)result; +} + +EXPORT_SYMBOL(csum_partial_copy_from_user); + +__wsum +csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum) +{ + return csum_partial_copy_from_user((__force const void __user *)src, + dst, len, sum, NULL); +} + +EXPORT_SYMBOL(csum_partial_copy_nocheck); diff --git a/kernel/arch/ia64/lib/do_csum.S b/kernel/arch/ia64/lib/do_csum.S new file mode 100644 index 000000000..1a431a5cf --- /dev/null +++ b/kernel/arch/ia64/lib/do_csum.S @@ -0,0 +1,323 @@ +/* + * + * Optmized version of the standard do_csum() function + * + * Return: a 64bit quantity containing the 16bit Internet checksum + * + * Inputs: + * in0: address of buffer to checksum (char *) + * in1: length of the buffer (int) + * + * Copyright (C) 1999, 2001-2002 Hewlett-Packard Co + * Stephane Eranian + * + * 02/04/22 Ken Chen + * Data locality study on the checksum buffer. + * More optimization cleanup - remove excessive stop bits. + * 02/04/08 David Mosberger + * More cleanup and tuning. + * 01/04/18 Jun Nakajima + * Clean up and optimize and the software pipeline, loading two + * back-to-back 8-byte words per loop. Clean up the initialization + * for the loop. Support the cases where load latency = 1 or 2. + * Set CONFIG_IA64_LOAD_LATENCY to 1 or 2 (default). + */ + +#include + +// +// Theory of operations: +// The goal is to go as quickly as possible to the point where +// we can checksum 16 bytes/loop. Before reaching that point we must +// take care of incorrect alignment of first byte. +// +// The code hereafter also takes care of the "tail" part of the buffer +// before entering the core loop, if any. The checksum is a sum so it +// allows us to commute operations. So we do the "head" and "tail" +// first to finish at full speed in the body. Once we get the head and +// tail values, we feed them into the pipeline, very handy initialization. +// +// Of course we deal with the special case where the whole buffer fits +// into one 8 byte word. In this case we have only one entry in the pipeline. +// +// We use a (LOAD_LATENCY+2)-stage pipeline in the loop to account for +// possible load latency and also to accommodate for head and tail. +// +// The end of the function deals with folding the checksum from 64bits +// down to 16bits taking care of the carry. +// +// This version avoids synchronization in the core loop by also using a +// pipeline for the accumulation of the checksum in resultx[] (x=1,2). +// +// wordx[] (x=1,2) +// |---| +// | | 0 : new value loaded in pipeline +// |---| +// | | - : in transit data +// |---| +// | | LOAD_LATENCY : current value to add to checksum +// |---| +// | | LOAD_LATENCY+1 : previous value added to checksum +// |---| (previous iteration) +// +// resultx[] (x=1,2) +// |---| +// | | 0 : initial value +// |---| +// | | LOAD_LATENCY-1 : new checksum +// |---| +// | | LOAD_LATENCY : previous value of checksum +// |---| +// | | LOAD_LATENCY+1 : final checksum when out of the loop +// |---| +// +// +// See RFC1071 "Computing the Internet Checksum" for various techniques for +// calculating the Internet checksum. +// +// NOT YET DONE: +// - Maybe another algorithm which would take care of the folding at the +// end in a different manner +// - Work with people more knowledgeable than me on the network stack +// to figure out if we could not split the function depending on the +// type of packet or alignment we get. Like the ip_fast_csum() routine +// where we know we have at least 20bytes worth of data to checksum. +// - Do a better job of handling small packets. +// - Note on prefetching: it was found that under various load, i.e. ftp read/write, +// nfs read/write, the L1 cache hit rate is at 60% and L2 cache hit rate is at 99.8% +// on the data that buffer points to (partly because the checksum is often preceded by +// a copy_from_user()). This finding indiate that lfetch will not be beneficial since +// the data is already in the cache. +// + +#define saved_pfs r11 +#define hmask r16 +#define tmask r17 +#define first1 r18 +#define firstval r19 +#define firstoff r20 +#define last r21 +#define lastval r22 +#define lastoff r23 +#define saved_lc r24 +#define saved_pr r25 +#define tmp1 r26 +#define tmp2 r27 +#define tmp3 r28 +#define carry1 r29 +#define carry2 r30 +#define first2 r31 + +#define buf in0 +#define len in1 + +#define LOAD_LATENCY 2 // XXX fix me + +#if (LOAD_LATENCY != 1) && (LOAD_LATENCY != 2) +# error "Only 1 or 2 is supported/tested for LOAD_LATENCY." +#endif + +#define PIPE_DEPTH (LOAD_LATENCY+2) +#define ELD p[LOAD_LATENCY] // end of load +#define ELD_1 p[LOAD_LATENCY+1] // and next stage + +// unsigned long do_csum(unsigned char *buf,long len) + +GLOBAL_ENTRY(do_csum) + .prologue + .save ar.pfs, saved_pfs + alloc saved_pfs=ar.pfs,2,16,0,16 + .rotr word1[4], word2[4],result1[LOAD_LATENCY+2],result2[LOAD_LATENCY+2] + .rotp p[PIPE_DEPTH], pC1[2], pC2[2] + mov ret0=r0 // in case we have zero length + cmp.lt p0,p6=r0,len // check for zero length or negative (32bit len) + ;; + add tmp1=buf,len // last byte's address + .save pr, saved_pr + mov saved_pr=pr // preserve predicates (rotation) +(p6) br.ret.spnt.many rp // return if zero or negative length + + mov hmask=-1 // initialize head mask + tbit.nz p15,p0=buf,0 // is buf an odd address? + and first1=-8,buf // 8-byte align down address of first1 element + + and firstoff=7,buf // how many bytes off for first1 element + mov tmask=-1 // initialize tail mask + + ;; + adds tmp2=-1,tmp1 // last-1 + and lastoff=7,tmp1 // how many bytes off for last element + ;; + sub tmp1=8,lastoff // complement to lastoff + and last=-8,tmp2 // address of word containing last byte + ;; + sub tmp3=last,first1 // tmp3=distance from first1 to last + .save ar.lc, saved_lc + mov saved_lc=ar.lc // save lc + cmp.eq p8,p9=last,first1 // everything fits in one word ? + + ld8 firstval=[first1],8 // load, ahead of time, "first1" word + and tmp1=7, tmp1 // make sure that if tmp1==8 -> tmp1=0 + shl tmp2=firstoff,3 // number of bits + ;; +(p9) ld8 lastval=[last] // load, ahead of time, "last" word, if needed + shl tmp1=tmp1,3 // number of bits +(p9) adds tmp3=-8,tmp3 // effectively loaded + ;; +(p8) mov lastval=r0 // we don't need lastval if first1==last + shl hmask=hmask,tmp2 // build head mask, mask off [0,first1off[ + shr.u tmask=tmask,tmp1 // build tail mask, mask off ]8,lastoff] + ;; + .body +#define count tmp3 + +(p8) and hmask=hmask,tmask // apply tail mask to head mask if 1 word only +(p9) and word2[0]=lastval,tmask // mask last it as appropriate + shr.u count=count,3 // how many 8-byte? + ;; + // If count is odd, finish this 8-byte word so that we can + // load two back-to-back 8-byte words per loop thereafter. + and word1[0]=firstval,hmask // and mask it as appropriate + tbit.nz p10,p11=count,0 // if (count is odd) + ;; +(p8) mov result1[0]=word1[0] +(p9) add result1[0]=word1[0],word2[0] + ;; + cmp.ltu p6,p0=result1[0],word1[0] // check the carry + cmp.eq.or.andcm p8,p0=0,count // exit if zero 8-byte + ;; +(p6) adds result1[0]=1,result1[0] +(p8) br.cond.dptk .do_csum_exit // if (within an 8-byte word) +(p11) br.cond.dptk .do_csum16 // if (count is even) + + // Here count is odd. + ld8 word1[1]=[first1],8 // load an 8-byte word + cmp.eq p9,p10=1,count // if (count == 1) + adds count=-1,count // loaded an 8-byte word + ;; + add result1[0]=result1[0],word1[1] + ;; + cmp.ltu p6,p0=result1[0],word1[1] + ;; +(p6) adds result1[0]=1,result1[0] +(p9) br.cond.sptk .do_csum_exit // if (count == 1) exit + // Fall through to calculate the checksum, feeding result1[0] as + // the initial value in result1[0]. + // + // Calculate the checksum loading two 8-byte words per loop. + // +.do_csum16: + add first2=8,first1 + shr.u count=count,1 // we do 16 bytes per loop + ;; + adds count=-1,count + mov carry1=r0 + mov carry2=r0 + brp.loop.imp 1f,2f + ;; + mov ar.ec=PIPE_DEPTH + mov ar.lc=count // set lc + mov pr.rot=1<<16 + // result1[0] must be initialized in advance. + mov result2[0]=r0 + ;; + .align 32 +1: +(ELD_1) cmp.ltu pC1[0],p0=result1[LOAD_LATENCY],word1[LOAD_LATENCY+1] +(pC1[1])adds carry1=1,carry1 +(ELD_1) cmp.ltu pC2[0],p0=result2[LOAD_LATENCY],word2[LOAD_LATENCY+1] +(pC2[1])adds carry2=1,carry2 +(ELD) add result1[LOAD_LATENCY-1]=result1[LOAD_LATENCY],word1[LOAD_LATENCY] +(ELD) add result2[LOAD_LATENCY-1]=result2[LOAD_LATENCY],word2[LOAD_LATENCY] +2: +(p[0]) ld8 word1[0]=[first1],16 +(p[0]) ld8 word2[0]=[first2],16 + br.ctop.sptk 1b + ;; + // Since len is a 32-bit value, carry cannot be larger than a 64-bit value. +(pC1[1])adds carry1=1,carry1 // since we miss the last one +(pC2[1])adds carry2=1,carry2 + ;; + add result1[LOAD_LATENCY+1]=result1[LOAD_LATENCY+1],carry1 + add result2[LOAD_LATENCY+1]=result2[LOAD_LATENCY+1],carry2 + ;; + cmp.ltu p6,p0=result1[LOAD_LATENCY+1],carry1 + cmp.ltu p7,p0=result2[LOAD_LATENCY+1],carry2 + ;; +(p6) adds result1[LOAD_LATENCY+1]=1,result1[LOAD_LATENCY+1] +(p7) adds result2[LOAD_LATENCY+1]=1,result2[LOAD_LATENCY+1] + ;; + add result1[0]=result1[LOAD_LATENCY+1],result2[LOAD_LATENCY+1] + ;; + cmp.ltu p6,p0=result1[0],result2[LOAD_LATENCY+1] + ;; +(p6) adds result1[0]=1,result1[0] + ;; +.do_csum_exit: + // + // now fold 64 into 16 bits taking care of carry + // that's not very good because it has lots of sequentiality + // + mov tmp3=0xffff + zxt4 tmp1=result1[0] + shr.u tmp2=result1[0],32 + ;; + add result1[0]=tmp1,tmp2 + ;; + and tmp1=result1[0],tmp3 + shr.u tmp2=result1[0],16 + ;; + add result1[0]=tmp1,tmp2 + ;; + and tmp1=result1[0],tmp3 + shr.u tmp2=result1[0],16 + ;; + add result1[0]=tmp1,tmp2 + ;; + and tmp1=result1[0],tmp3 + shr.u tmp2=result1[0],16 + ;; + add ret0=tmp1,tmp2 + mov pr=saved_pr,0xffffffffffff0000 + ;; + // if buf was odd then swap bytes + mov ar.pfs=saved_pfs // restore ar.ec +(p15) mux1 ret0=ret0,@rev // reverse word + ;; + mov ar.lc=saved_lc +(p15) shr.u ret0=ret0,64-16 // + shift back to position = swap bytes + br.ret.sptk.many rp + +// I (Jun Nakajima) wrote an equivalent code (see below), but it was +// not much better than the original. So keep the original there so that +// someone else can challenge. +// +// shr.u word1[0]=result1[0],32 +// zxt4 result1[0]=result1[0] +// ;; +// add result1[0]=result1[0],word1[0] +// ;; +// zxt2 result2[0]=result1[0] +// extr.u word1[0]=result1[0],16,16 +// shr.u carry1=result1[0],32 +// ;; +// add result2[0]=result2[0],word1[0] +// ;; +// add result2[0]=result2[0],carry1 +// ;; +// extr.u ret0=result2[0],16,16 +// ;; +// add ret0=ret0,result2[0] +// ;; +// zxt2 ret0=ret0 +// mov ar.pfs=saved_pfs // restore ar.ec +// mov pr=saved_pr,0xffffffffffff0000 +// ;; +// // if buf was odd then swap bytes +// mov ar.lc=saved_lc +//(p15) mux1 ret0=ret0,@rev // reverse word +// ;; +//(p15) shr.u ret0=ret0,64-16 // + shift back to position = swap bytes +// br.ret.sptk.many rp + +END(do_csum) diff --git a/kernel/arch/ia64/lib/flush.S b/kernel/arch/ia64/lib/flush.S new file mode 100644 index 000000000..1d8c88860 --- /dev/null +++ b/kernel/arch/ia64/lib/flush.S @@ -0,0 +1,117 @@ +/* + * Cache flushing routines. + * + * Copyright (C) 1999-2001, 2005 Hewlett-Packard Co + * David Mosberger-Tang + * + * 05/28/05 Zoltan Menyhart Dynamic stride size + */ + +#include + + + /* + * flush_icache_range(start,end) + * + * Make i-cache(s) coherent with d-caches. + * + * Must deal with range from start to end-1 but nothing else (need to + * be careful not to touch addresses that may be unmapped). + * + * Note: "in0" and "in1" are preserved for debugging purposes. + */ + .section .kprobes.text,"ax" +GLOBAL_ENTRY(flush_icache_range) + + .prologue + alloc r2=ar.pfs,2,0,0,0 + movl r3=ia64_i_cache_stride_shift + mov r21=1 + ;; + ld8 r20=[r3] // r20: stride shift + sub r22=in1,r0,1 // last byte address + ;; + shr.u r23=in0,r20 // start / (stride size) + shr.u r22=r22,r20 // (last byte address) / (stride size) + shl r21=r21,r20 // r21: stride size of the i-cache(s) + ;; + sub r8=r22,r23 // number of strides - 1 + shl r24=r23,r20 // r24: addresses for "fc.i" = + // "start" rounded down to stride boundary + .save ar.lc,r3 + mov r3=ar.lc // save ar.lc + ;; + + .body + mov ar.lc=r8 + ;; + /* + * 32 byte aligned loop, even number of (actually 2) bundles + */ +.Loop: fc.i r24 // issuable on M0 only + add r24=r21,r24 // we flush "stride size" bytes per iteration + nop.i 0 + br.cloop.sptk.few .Loop + ;; + sync.i + ;; + srlz.i + ;; + mov ar.lc=r3 // restore ar.lc + br.ret.sptk.many rp +END(flush_icache_range) + + /* + * clflush_cache_range(start,size) + * + * Flush cache lines from start to start+size-1. + * + * Must deal with range from start to start+size-1 but nothing else + * (need to be careful not to touch addresses that may be + * unmapped). + * + * Note: "in0" and "in1" are preserved for debugging purposes. + */ + .section .kprobes.text,"ax" +GLOBAL_ENTRY(clflush_cache_range) + + .prologue + alloc r2=ar.pfs,2,0,0,0 + movl r3=ia64_cache_stride_shift + mov r21=1 + add r22=in1,in0 + ;; + ld8 r20=[r3] // r20: stride shift + sub r22=r22,r0,1 // last byte address + ;; + shr.u r23=in0,r20 // start / (stride size) + shr.u r22=r22,r20 // (last byte address) / (stride size) + shl r21=r21,r20 // r21: stride size of the i-cache(s) + ;; + sub r8=r22,r23 // number of strides - 1 + shl r24=r23,r20 // r24: addresses for "fc" = + // "start" rounded down to stride + // boundary + .save ar.lc,r3 + mov r3=ar.lc // save ar.lc + ;; + + .body + mov ar.lc=r8 + ;; + /* + * 32 byte aligned loop, even number of (actually 2) bundles + */ +.Loop_fc: + fc r24 // issuable on M0 only + add r24=r21,r24 // we flush "stride size" bytes per iteration + nop.i 0 + br.cloop.sptk.few .Loop_fc + ;; + sync.i + ;; + srlz.i + ;; + mov ar.lc=r3 // restore ar.lc + br.ret.sptk.many rp +END(clflush_cache_range) diff --git a/kernel/arch/ia64/lib/idiv32.S b/kernel/arch/ia64/lib/idiv32.S new file mode 100644 index 000000000..2ac28bf0a --- /dev/null +++ b/kernel/arch/ia64/lib/idiv32.S @@ -0,0 +1,83 @@ +/* + * Copyright (C) 2000 Hewlett-Packard Co + * Copyright (C) 2000 David Mosberger-Tang + * + * 32-bit integer division. + * + * This code is based on the application note entitled "Divide, Square Root + * and Remainder Algorithms for the IA-64 Architecture". This document + * is available as Intel document number 248725-002 or via the web at + * http://developer.intel.com/software/opensource/numerics/ + * + * For more details on the theory behind these algorithms, see "IA-64 + * and Elementary Functions" by Peter Markstein; HP Professional Books + * (http://www.hp.com/go/retailbooks/) + */ + +#include + +#ifdef MODULO +# define OP mod +#else +# define OP div +#endif + +#ifdef UNSIGNED +# define SGN u +# define EXTEND zxt4 +# define INT_TO_FP(a,b) fcvt.xuf.s1 a=b +# define FP_TO_INT(a,b) fcvt.fxu.trunc.s1 a=b +#else +# define SGN +# define EXTEND sxt4 +# define INT_TO_FP(a,b) fcvt.xf a=b +# define FP_TO_INT(a,b) fcvt.fx.trunc.s1 a=b +#endif + +#define PASTE1(a,b) a##b +#define PASTE(a,b) PASTE1(a,b) +#define NAME PASTE(PASTE(__,SGN),PASTE(OP,si3)) + +GLOBAL_ENTRY(NAME) + .regstk 2,0,0,0 + // Transfer inputs to FP registers. + mov r2 = 0xffdd // r2 = -34 + 65535 (fp reg format bias) + EXTEND in0 = in0 // in0 = a + EXTEND in1 = in1 // in1 = b + ;; + setf.sig f8 = in0 + setf.sig f9 = in1 +#ifdef MODULO + sub in1 = r0, in1 // in1 = -b +#endif + ;; + // Convert the inputs to FP, to avoid FP software-assist faults. + INT_TO_FP(f8, f8) + INT_TO_FP(f9, f9) + ;; + setf.exp f7 = r2 // f7 = 2^-34 + frcpa.s1 f6, p6 = f8, f9 // y0 = frcpa(b) + ;; +(p6) fmpy.s1 f8 = f8, f6 // q0 = a*y0 +(p6) fnma.s1 f6 = f9, f6, f1 // e0 = -b*y0 + 1 + ;; +#ifdef MODULO + setf.sig f9 = in1 // f9 = -b +#endif +(p6) fma.s1 f8 = f6, f8, f8 // q1 = e0*q0 + q0 +(p6) fma.s1 f6 = f6, f6, f7 // e1 = e0*e0 + 2^-34 + ;; +#ifdef MODULO + setf.sig f7 = in0 +#endif +(p6) fma.s1 f6 = f6, f8, f8 // q2 = e1*q1 + q1 + ;; + FP_TO_INT(f6, f6) // q = trunc(q2) + ;; +#ifdef MODULO + xma.l f6 = f6, f9, f7 // r = q*(-b) + a + ;; +#endif + getf.sig r8 = f6 // transfer result to result register + br.ret.sptk.many rp +END(NAME) diff --git a/kernel/arch/ia64/lib/idiv64.S b/kernel/arch/ia64/lib/idiv64.S new file mode 100644 index 000000000..f69bd2b09 --- /dev/null +++ b/kernel/arch/ia64/lib/idiv64.S @@ -0,0 +1,80 @@ +/* + * Copyright (C) 1999-2000 Hewlett-Packard Co + * Copyright (C) 1999-2000 David Mosberger-Tang + * + * 64-bit integer division. + * + * This code is based on the application note entitled "Divide, Square Root + * and Remainder Algorithms for the IA-64 Architecture". This document + * is available as Intel document number 248725-002 or via the web at + * http://developer.intel.com/software/opensource/numerics/ + * + * For more details on the theory behind these algorithms, see "IA-64 + * and Elementary Functions" by Peter Markstein; HP Professional Books + * (http://www.hp.com/go/retailbooks/) + */ + +#include + +#ifdef MODULO +# define OP mod +#else +# define OP div +#endif + +#ifdef UNSIGNED +# define SGN u +# define INT_TO_FP(a,b) fcvt.xuf.s1 a=b +# define FP_TO_INT(a,b) fcvt.fxu.trunc.s1 a=b +#else +# define SGN +# define INT_TO_FP(a,b) fcvt.xf a=b +# define FP_TO_INT(a,b) fcvt.fx.trunc.s1 a=b +#endif + +#define PASTE1(a,b) a##b +#define PASTE(a,b) PASTE1(a,b) +#define NAME PASTE(PASTE(__,SGN),PASTE(OP,di3)) + +GLOBAL_ENTRY(NAME) + .regstk 2,0,0,0 + // Transfer inputs to FP registers. + setf.sig f8 = in0 + setf.sig f9 = in1 + ;; + // Convert the inputs to FP, to avoid FP software-assist faults. + INT_TO_FP(f8, f8) + INT_TO_FP(f9, f9) + ;; + frcpa.s1 f11, p6 = f8, f9 // y0 = frcpa(b) + ;; +(p6) fmpy.s1 f7 = f8, f11 // q0 = a*y0 +(p6) fnma.s1 f6 = f9, f11, f1 // e0 = -b*y0 + 1 + ;; +(p6) fma.s1 f10 = f7, f6, f7 // q1 = q0*e0 + q0 +(p6) fmpy.s1 f7 = f6, f6 // e1 = e0*e0 + ;; +#ifdef MODULO + sub in1 = r0, in1 // in1 = -b +#endif +(p6) fma.s1 f10 = f10, f7, f10 // q2 = q1*e1 + q1 +(p6) fma.s1 f6 = f11, f6, f11 // y1 = y0*e0 + y0 + ;; +(p6) fma.s1 f6 = f6, f7, f6 // y2 = y1*e1 + y1 +(p6) fnma.s1 f7 = f9, f10, f8 // r = -b*q2 + a + ;; +#ifdef MODULO + setf.sig f8 = in0 // f8 = a + setf.sig f9 = in1 // f9 = -b +#endif +(p6) fma.s1 f11 = f7, f6, f10 // q3 = r*y2 + q2 + ;; + FP_TO_INT(f11, f11) // q = trunc(q3) + ;; +#ifdef MODULO + xma.l f11 = f11, f9, f8 // r = q*(-b) + a + ;; +#endif + getf.sig r8 = f11 // transfer result to result register + br.ret.sptk.many rp +END(NAME) diff --git a/kernel/arch/ia64/lib/io.c b/kernel/arch/ia64/lib/io.c new file mode 100644 index 000000000..bcd16f8ad --- /dev/null +++ b/kernel/arch/ia64/lib/io.c @@ -0,0 +1,164 @@ +#include +#include + +#include + +/* + * Copy data from IO memory space to "real" memory space. + * This needs to be optimized. + */ +void memcpy_fromio(void *to, const volatile void __iomem *from, long count) +{ + char *dst = to; + + while (count) { + count--; + *dst++ = readb(from++); + } +} +EXPORT_SYMBOL(memcpy_fromio); + +/* + * Copy data from "real" memory space to IO memory space. + * This needs to be optimized. + */ +void memcpy_toio(volatile void __iomem *to, const void *from, long count) +{ + const char *src = from; + + while (count) { + count--; + writeb(*src++, to++); + } +} +EXPORT_SYMBOL(memcpy_toio); + +/* + * "memset" on IO memory space. + * This needs to be optimized. + */ +void memset_io(volatile void __iomem *dst, int c, long count) +{ + unsigned char ch = (char)(c & 0xff); + + while (count) { + count--; + writeb(ch, dst); + dst++; + } +} +EXPORT_SYMBOL(memset_io); + +#ifdef CONFIG_IA64_GENERIC + +#undef __ia64_inb +#undef __ia64_inw +#undef __ia64_inl +#undef __ia64_outb +#undef __ia64_outw +#undef __ia64_outl +#undef __ia64_readb +#undef __ia64_readw +#undef __ia64_readl +#undef __ia64_readq +#undef __ia64_readb_relaxed +#undef __ia64_readw_relaxed +#undef __ia64_readl_relaxed +#undef __ia64_readq_relaxed +#undef __ia64_writeb +#undef __ia64_writew +#undef __ia64_writel +#undef __ia64_writeq +#undef __ia64_mmiowb + +unsigned int +__ia64_inb (unsigned long port) +{ + return ___ia64_inb(port); +} + +unsigned int +__ia64_inw (unsigned long port) +{ + return ___ia64_inw(port); +} + +unsigned int +__ia64_inl (unsigned long port) +{ + return ___ia64_inl(port); +} + +void +__ia64_outb (unsigned char val, unsigned long port) +{ + ___ia64_outb(val, port); +} + +void +__ia64_outw (unsigned short val, unsigned long port) +{ + ___ia64_outw(val, port); +} + +void +__ia64_outl (unsigned int val, unsigned long port) +{ + ___ia64_outl(val, port); +} + +unsigned char +__ia64_readb (void __iomem *addr) +{ + return ___ia64_readb (addr); +} + +unsigned short +__ia64_readw (void __iomem *addr) +{ + return ___ia64_readw (addr); +} + +unsigned int +__ia64_readl (void __iomem *addr) +{ + return ___ia64_readl (addr); +} + +unsigned long +__ia64_readq (void __iomem *addr) +{ + return ___ia64_readq (addr); +} + +unsigned char +__ia64_readb_relaxed (void __iomem *addr) +{ + return ___ia64_readb (addr); +} + +unsigned short +__ia64_readw_relaxed (void __iomem *addr) +{ + return ___ia64_readw (addr); +} + +unsigned int +__ia64_readl_relaxed (void __iomem *addr) +{ + return ___ia64_readl (addr); +} + +unsigned long +__ia64_readq_relaxed (void __iomem *addr) +{ + return ___ia64_readq (addr); +} + +void +__ia64_mmiowb(void) +{ + ___ia64_mmiowb(); +} + +#endif /* CONFIG_IA64_GENERIC */ diff --git a/kernel/arch/ia64/lib/ip_fast_csum.S b/kernel/arch/ia64/lib/ip_fast_csum.S new file mode 100644 index 000000000..620d9dc52 --- /dev/null +++ b/kernel/arch/ia64/lib/ip_fast_csum.S @@ -0,0 +1,144 @@ +/* + * Optmized version of the ip_fast_csum() function + * Used for calculating IP header checksum + * + * Return: 16bit checksum, complemented + * + * Inputs: + * in0: address of buffer to checksum (char *) + * in1: length of the buffer (int) + * + * Copyright (C) 2002, 2006 Intel Corp. + * Copyright (C) 2002, 2006 Ken Chen + */ + +#include + +/* + * Since we know that most likely this function is called with buf aligned + * on 4-byte boundary and 20 bytes in length, we can execution rather quickly + * versus calling generic version of do_csum, which has lots of overhead in + * handling various alignments and sizes. However, due to lack of constrains + * put on the function input argument, cases with alignment not on 4-byte or + * size not equal to 20 bytes will be handled by the generic do_csum function. + */ + +#define in0 r32 +#define in1 r33 +#define in2 r34 +#define in3 r35 +#define in4 r36 +#define ret0 r8 + +GLOBAL_ENTRY(ip_fast_csum) + .prologue + .body + cmp.ne p6,p7=5,in1 // size other than 20 byte? + and r14=3,in0 // is it aligned on 4-byte? + add r15=4,in0 // second source pointer + ;; + cmp.ne.or.andcm p6,p7=r14,r0 + ;; +(p7) ld4 r20=[in0],8 +(p7) ld4 r21=[r15],8 +(p6) br.spnt .generic + ;; + ld4 r22=[in0],8 + ld4 r23=[r15],8 + ;; + ld4 r24=[in0] + add r20=r20,r21 + add r22=r22,r23 + ;; + add r20=r20,r22 + ;; + add r20=r20,r24 + ;; + shr.u ret0=r20,16 // now need to add the carry + zxt2 r20=r20 + ;; + add r20=ret0,r20 + ;; + shr.u ret0=r20,16 // add carry again + zxt2 r20=r20 + ;; + add r20=ret0,r20 + ;; + shr.u ret0=r20,16 + zxt2 r20=r20 + ;; + add r20=ret0,r20 + mov r9=0xffff + ;; + andcm ret0=r9,r20 + .restore sp // reset frame state + br.ret.sptk.many b0 + ;; + +.generic: + .prologue + .save ar.pfs, r35 + alloc r35=ar.pfs,2,2,2,0 + .save rp, r34 + mov r34=b0 + .body + dep.z out1=in1,2,30 + mov out0=in0 + ;; + br.call.sptk.many b0=do_csum + ;; + andcm ret0=-1,ret0 + mov ar.pfs=r35 + mov b0=r34 + br.ret.sptk.many b0 +END(ip_fast_csum) + +GLOBAL_ENTRY(csum_ipv6_magic) + ld4 r20=[in0],4 + ld4 r21=[in1],4 + zxt4 in2=in2 + ;; + ld4 r22=[in0],4 + ld4 r23=[in1],4 + dep r15=in3,in2,32,16 + ;; + ld4 r24=[in0],4 + ld4 r25=[in1],4 + mux1 r15=r15,@rev + add r16=r20,r21 + add r17=r22,r23 + zxt4 in4=in4 + ;; + ld4 r26=[in0],4 + ld4 r27=[in1],4 + shr.u r15=r15,16 + add r18=r24,r25 + add r8=r16,r17 + ;; + add r19=r26,r27 + add r8=r8,r18 + ;; + add r8=r8,r19 + add r15=r15,in4 + ;; + add r8=r8,r15 + ;; + shr.u r10=r8,32 // now fold sum into short + zxt4 r11=r8 + ;; + add r8=r10,r11 + ;; + shr.u r10=r8,16 // yeah, keep it rolling + zxt2 r11=r8 + ;; + add r8=r10,r11 + ;; + shr.u r10=r8,16 // three times lucky + zxt2 r11=r8 + ;; + add r8=r10,r11 + mov r9=0xffff + ;; + andcm r8=r9,r8 + br.ret.sptk.many b0 +END(csum_ipv6_magic) diff --git a/kernel/arch/ia64/lib/memcpy.S b/kernel/arch/ia64/lib/memcpy.S new file mode 100644 index 000000000..448908d80 --- /dev/null +++ b/kernel/arch/ia64/lib/memcpy.S @@ -0,0 +1,301 @@ +/* + * + * Optimized version of the standard memcpy() function + * + * Inputs: + * in0: destination address + * in1: source address + * in2: number of bytes to copy + * Output: + * no return value + * + * Copyright (C) 2000-2001 Hewlett-Packard Co + * Stephane Eranian + * David Mosberger-Tang + */ +#include + +GLOBAL_ENTRY(memcpy) + +# define MEM_LAT 21 /* latency to memory */ + +# define dst r2 +# define src r3 +# define retval r8 +# define saved_pfs r9 +# define saved_lc r10 +# define saved_pr r11 +# define cnt r16 +# define src2 r17 +# define t0 r18 +# define t1 r19 +# define t2 r20 +# define t3 r21 +# define t4 r22 +# define src_end r23 + +# define N (MEM_LAT + 4) +# define Nrot ((N + 7) & ~7) + + /* + * First, check if everything (src, dst, len) is a multiple of eight. If + * so, we handle everything with no taken branches (other than the loop + * itself) and a small icache footprint. Otherwise, we jump off to + * the more general copy routine handling arbitrary + * sizes/alignment etc. + */ + .prologue + .save ar.pfs, saved_pfs + alloc saved_pfs=ar.pfs,3,Nrot,0,Nrot + .save ar.lc, saved_lc + mov saved_lc=ar.lc + or t0=in0,in1 + ;; + + or t0=t0,in2 + .save pr, saved_pr + mov saved_pr=pr + + .body + + cmp.eq p6,p0=in2,r0 // zero length? + mov retval=in0 // return dst +(p6) br.ret.spnt.many rp // zero length, return immediately + ;; + + mov dst=in0 // copy because of rotation + shr.u cnt=in2,3 // number of 8-byte words to copy + mov pr.rot=1<<16 + ;; + + adds cnt=-1,cnt // br.ctop is repeat/until + cmp.gtu p7,p0=16,in2 // copying less than 16 bytes? + mov ar.ec=N + ;; + + and t0=0x7,t0 + mov ar.lc=cnt + ;; + cmp.ne p6,p0=t0,r0 + + mov src=in1 // copy because of rotation +(p7) br.cond.spnt.few .memcpy_short +(p6) br.cond.spnt.few .memcpy_long + ;; + nop.m 0 + ;; + nop.m 0 + nop.i 0 + ;; + nop.m 0 + ;; + .rotr val[N] + .rotp p[N] + .align 32 +1: { .mib +(p[0]) ld8 val[0]=[src],8 + nop.i 0 + brp.loop.imp 1b, 2f +} +2: { .mfb +(p[N-1])st8 [dst]=val[N-1],8 + nop.f 0 + br.ctop.dptk.few 1b +} + ;; + mov ar.lc=saved_lc + mov pr=saved_pr,-1 + mov ar.pfs=saved_pfs + br.ret.sptk.many rp + + /* + * Small (<16 bytes) unaligned copying is done via a simple byte-at-the-time + * copy loop. This performs relatively poorly on Itanium, but it doesn't + * get used very often (gcc inlines small copies) and due to atomicity + * issues, we want to avoid read-modify-write of entire words. + */ + .align 32 +.memcpy_short: + adds cnt=-1,in2 // br.ctop is repeat/until + mov ar.ec=MEM_LAT + brp.loop.imp 1f, 2f + ;; + mov ar.lc=cnt + ;; + nop.m 0 + ;; + nop.m 0 + nop.i 0 + ;; + nop.m 0 + ;; + nop.m 0 + ;; + /* + * It is faster to put a stop bit in the loop here because it makes + * the pipeline shorter (and latency is what matters on short copies). + */ + .align 32 +1: { .mib +(p[0]) ld1 val[0]=[src],1 + nop.i 0 + brp.loop.imp 1b, 2f +} ;; +2: { .mfb +(p[MEM_LAT-1])st1 [dst]=val[MEM_LAT-1],1 + nop.f 0 + br.ctop.dptk.few 1b +} ;; + mov ar.lc=saved_lc + mov pr=saved_pr,-1 + mov ar.pfs=saved_pfs + br.ret.sptk.many rp + + /* + * Large (>= 16 bytes) copying is done in a fancy way. Latency isn't + * an overriding concern here, but throughput is. We first do + * sub-word copying until the destination is aligned, then we check + * if the source is also aligned. If so, we do a simple load/store-loop + * until there are less than 8 bytes left over and then we do the tail, + * by storing the last few bytes using sub-word copying. If the source + * is not aligned, we branch off to the non-congruent loop. + * + * stage: op: + * 0 ld + * : + * MEM_LAT+3 shrp + * MEM_LAT+4 st + * + * On Itanium, the pipeline itself runs without stalls. However, br.ctop + * seems to introduce an unavoidable bubble in the pipeline so the overall + * latency is 2 cycles/iteration. This gives us a _copy_ throughput + * of 4 byte/cycle. Still not bad. + */ +# undef N +# undef Nrot +# define N (MEM_LAT + 5) /* number of stages */ +# define Nrot ((N+1 + 2 + 7) & ~7) /* number of rotating regs */ + +#define LOG_LOOP_SIZE 6 + +.memcpy_long: + alloc t3=ar.pfs,3,Nrot,0,Nrot // resize register frame + and t0=-8,src // t0 = src & ~7 + and t2=7,src // t2 = src & 7 + ;; + ld8 t0=[t0] // t0 = 1st source word + adds src2=7,src // src2 = (src + 7) + sub t4=r0,dst // t4 = -dst + ;; + and src2=-8,src2 // src2 = (src + 7) & ~7 + shl t2=t2,3 // t2 = 8*(src & 7) + shl t4=t4,3 // t4 = 8*(dst & 7) + ;; + ld8 t1=[src2] // t1 = 1st source word if src is 8-byte aligned, 2nd otherwise + sub t3=64,t2 // t3 = 64-8*(src & 7) + shr.u t0=t0,t2 + ;; + add src_end=src,in2 + shl t1=t1,t3 + mov pr=t4,0x38 // (p5,p4,p3)=(dst & 7) + ;; + or t0=t0,t1 + mov cnt=r0 + adds src_end=-1,src_end + ;; +(p3) st1 [dst]=t0,1 +(p3) shr.u t0=t0,8 +(p3) adds cnt=1,cnt + ;; +(p4) st2 [dst]=t0,2 +(p4) shr.u t0=t0,16 +(p4) adds cnt=2,cnt + ;; +(p5) st4 [dst]=t0,4 +(p5) adds cnt=4,cnt + and src_end=-8,src_end // src_end = last word of source buffer + ;; + + // At this point, dst is aligned to 8 bytes and there at least 16-7=9 bytes left to copy: + +1:{ add src=cnt,src // make src point to remainder of source buffer + sub cnt=in2,cnt // cnt = number of bytes left to copy + mov t4=ip + } ;; + and src2=-8,src // align source pointer + adds t4=.memcpy_loops-1b,t4 + mov ar.ec=N + + and t0=7,src // t0 = src & 7 + shr.u t2=cnt,3 // t2 = number of 8-byte words left to copy + shl cnt=cnt,3 // move bits 0-2 to 3-5 + ;; + + .rotr val[N+1], w[2] + .rotp p[N] + + cmp.ne p6,p0=t0,r0 // is src aligned, too? + shl t0=t0,LOG_LOOP_SIZE // t0 = 8*(src & 7) + adds t2=-1,t2 // br.ctop is repeat/until + ;; + add t4=t0,t4 + mov pr=cnt,0x38 // set (p5,p4,p3) to # of bytes last-word bytes to copy + mov ar.lc=t2 + ;; + nop.m 0 + ;; + nop.m 0 + nop.i 0 + ;; + nop.m 0 + ;; +(p6) ld8 val[1]=[src2],8 // prime the pump... + mov b6=t4 + br.sptk.few b6 + ;; + +.memcpy_tail: + // At this point, (p5,p4,p3) are set to the number of bytes left to copy (which is + // less than 8) and t0 contains the last few bytes of the src buffer: +(p5) st4 [dst]=t0,4 +(p5) shr.u t0=t0,32 + mov ar.lc=saved_lc + ;; +(p4) st2 [dst]=t0,2 +(p4) shr.u t0=t0,16 + mov ar.pfs=saved_pfs + ;; +(p3) st1 [dst]=t0 + mov pr=saved_pr,-1 + br.ret.sptk.many rp + +/////////////////////////////////////////////////////// + .align 64 + +#define COPY(shift,index) \ + 1: { .mib \ + (p[0]) ld8 val[0]=[src2],8; \ + (p[MEM_LAT+3]) shrp w[0]=val[MEM_LAT+3],val[MEM_LAT+4-index],shift; \ + brp.loop.imp 1b, 2f \ + }; \ + 2: { .mfb \ + (p[MEM_LAT+4]) st8 [dst]=w[1],8; \ + nop.f 0; \ + br.ctop.dptk.few 1b; \ + }; \ + ;; \ + ld8 val[N-1]=[src_end]; /* load last word (may be same as val[N]) */ \ + ;; \ + shrp t0=val[N-1],val[N-index],shift; \ + br .memcpy_tail +.memcpy_loops: + COPY(0, 1) /* no point special casing this---it doesn't go any faster without shrp */ + COPY(8, 0) + COPY(16, 0) + COPY(24, 0) + COPY(32, 0) + COPY(40, 0) + COPY(48, 0) + COPY(56, 0) + +END(memcpy) diff --git a/kernel/arch/ia64/lib/memcpy_mck.S b/kernel/arch/ia64/lib/memcpy_mck.S new file mode 100644 index 000000000..ab0f87639 --- /dev/null +++ b/kernel/arch/ia64/lib/memcpy_mck.S @@ -0,0 +1,666 @@ +/* + * Itanium 2-optimized version of memcpy and copy_user function + * + * Inputs: + * in0: destination address + * in1: source address + * in2: number of bytes to copy + * Output: + * for memcpy: return dest + * for copy_user: return 0 if success, + * or number of byte NOT copied if error occurred. + * + * Copyright (C) 2002 Intel Corp. + * Copyright (C) 2002 Ken Chen + */ +#include +#include + +#define EK(y...) EX(y) + +/* McKinley specific optimization */ + +#define retval r8 +#define saved_pfs r31 +#define saved_lc r10 +#define saved_pr r11 +#define saved_in0 r14 +#define saved_in1 r15 +#define saved_in2 r16 + +#define src0 r2 +#define src1 r3 +#define dst0 r17 +#define dst1 r18 +#define cnt r9 + +/* r19-r30 are temp for each code section */ +#define PREFETCH_DIST 8 +#define src_pre_mem r19 +#define dst_pre_mem r20 +#define src_pre_l2 r21 +#define dst_pre_l2 r22 +#define t1 r23 +#define t2 r24 +#define t3 r25 +#define t4 r26 +#define t5 t1 // alias! +#define t6 t2 // alias! +#define t7 t3 // alias! +#define n8 r27 +#define t9 t5 // alias! +#define t10 t4 // alias! +#define t11 t7 // alias! +#define t12 t6 // alias! +#define t14 t10 // alias! +#define t13 r28 +#define t15 r29 +#define tmp r30 + +/* defines for long_copy block */ +#define A 0 +#define B (PREFETCH_DIST) +#define C (B + PREFETCH_DIST) +#define D (C + 1) +#define N (D + 1) +#define Nrot ((N + 7) & ~7) + +/* alias */ +#define in0 r32 +#define in1 r33 +#define in2 r34 + +GLOBAL_ENTRY(memcpy) + and r28=0x7,in0 + and r29=0x7,in1 + mov f6=f0 + mov retval=in0 + br.cond.sptk .common_code + ;; +END(memcpy) +GLOBAL_ENTRY(__copy_user) + .prologue +// check dest alignment + and r28=0x7,in0 + and r29=0x7,in1 + mov f6=f1 + mov saved_in0=in0 // save dest pointer + mov saved_in1=in1 // save src pointer + mov retval=r0 // initialize return value + ;; +.common_code: + cmp.gt p15,p0=8,in2 // check for small size + cmp.ne p13,p0=0,r28 // check dest alignment + cmp.ne p14,p0=0,r29 // check src alignment + add src0=0,in1 + sub r30=8,r28 // for .align_dest + mov saved_in2=in2 // save len + ;; + add dst0=0,in0 + add dst1=1,in0 // dest odd index + cmp.le p6,p0 = 1,r30 // for .align_dest +(p15) br.cond.dpnt .memcpy_short +(p13) br.cond.dpnt .align_dest +(p14) br.cond.dpnt .unaligned_src + ;; + +// both dest and src are aligned on 8-byte boundary +.aligned_src: + .save ar.pfs, saved_pfs + alloc saved_pfs=ar.pfs,3,Nrot-3,0,Nrot + .save pr, saved_pr + mov saved_pr=pr + + shr.u cnt=in2,7 // this much cache line + ;; + cmp.lt p6,p0=2*PREFETCH_DIST,cnt + cmp.lt p7,p8=1,cnt + .save ar.lc, saved_lc + mov saved_lc=ar.lc + .body + add cnt=-1,cnt + add src_pre_mem=0,in1 // prefetch src pointer + add dst_pre_mem=0,in0 // prefetch dest pointer + ;; +(p7) mov ar.lc=cnt // prefetch count +(p8) mov ar.lc=r0 +(p6) br.cond.dpnt .long_copy + ;; + +.prefetch: + lfetch.fault [src_pre_mem], 128 + lfetch.fault.excl [dst_pre_mem], 128 + br.cloop.dptk.few .prefetch + ;; + +.medium_copy: + and tmp=31,in2 // copy length after iteration + shr.u r29=in2,5 // number of 32-byte iteration + add dst1=8,dst0 // 2nd dest pointer + ;; + add cnt=-1,r29 // ctop iteration adjustment + cmp.eq p10,p0=r29,r0 // do we really need to loop? + add src1=8,src0 // 2nd src pointer + cmp.le p6,p0=8,tmp + ;; + cmp.le p7,p0=16,tmp + mov ar.lc=cnt // loop setup + cmp.eq p16,p17 = r0,r0 + mov ar.ec=2 +(p10) br.dpnt.few .aligned_src_tail + ;; + TEXT_ALIGN(32) +1: +EX(.ex_handler, (p16) ld8 r34=[src0],16) +EK(.ex_handler, (p16) ld8 r38=[src1],16) +EX(.ex_handler, (p17) st8 [dst0]=r33,16) +EK(.ex_handler, (p17) st8 [dst1]=r37,16) + ;; +EX(.ex_handler, (p16) ld8 r32=[src0],16) +EK(.ex_handler, (p16) ld8 r36=[src1],16) +EX(.ex_handler, (p16) st8 [dst0]=r34,16) +EK(.ex_handler, (p16) st8 [dst1]=r38,16) + br.ctop.dptk.few 1b + ;; + +.aligned_src_tail: +EX(.ex_handler, (p6) ld8 t1=[src0]) + mov ar.lc=saved_lc + mov ar.pfs=saved_pfs +EX(.ex_hndlr_s, (p7) ld8 t2=[src1],8) + cmp.le p8,p0=24,tmp + and r21=-8,tmp + ;; +EX(.ex_hndlr_s, (p8) ld8 t3=[src1]) +EX(.ex_handler, (p6) st8 [dst0]=t1) // store byte 1 + and in2=7,tmp // remaining length +EX(.ex_hndlr_d, (p7) st8 [dst1]=t2,8) // store byte 2 + add src0=src0,r21 // setting up src pointer + add dst0=dst0,r21 // setting up dest pointer + ;; +EX(.ex_handler, (p8) st8 [dst1]=t3) // store byte 3 + mov pr=saved_pr,-1 + br.dptk.many .memcpy_short + ;; + +/* code taken from copy_page_mck */ +.long_copy: + .rotr v[2*PREFETCH_DIST] + .rotp p[N] + + mov src_pre_mem = src0 + mov pr.rot = 0x10000 + mov ar.ec = 1 // special unrolled loop + + mov dst_pre_mem = dst0 + + add src_pre_l2 = 8*8, src0 + add dst_pre_l2 = 8*8, dst0 + ;; + add src0 = 8, src_pre_mem // first t1 src + mov ar.lc = 2*PREFETCH_DIST - 1 + shr.u cnt=in2,7 // number of lines + add src1 = 3*8, src_pre_mem // first t3 src + add dst0 = 8, dst_pre_mem // first t1 dst + add dst1 = 3*8, dst_pre_mem // first t3 dst + ;; + and tmp=127,in2 // remaining bytes after this block + add cnt = -(2*PREFETCH_DIST) - 1, cnt + // same as .line_copy loop, but with all predicated-off instructions removed: +.prefetch_loop: +EX(.ex_hndlr_lcpy_1, (p[A]) ld8 v[A] = [src_pre_mem], 128) // M0 +EK(.ex_hndlr_lcpy_1, (p[B]) st8 [dst_pre_mem] = v[B], 128) // M2 + br.ctop.sptk .prefetch_loop + ;; + cmp.eq p16, p0 = r0, r0 // reset p16 to 1 + mov ar.lc = cnt + mov ar.ec = N // # of stages in pipeline + ;; +.line_copy: +EX(.ex_handler, (p[D]) ld8 t2 = [src0], 3*8) // M0 +EK(.ex_handler, (p[D]) ld8 t4 = [src1], 3*8) // M1 +EX(.ex_handler_lcpy, (p[B]) st8 [dst_pre_mem] = v[B], 128) // M2 prefetch dst from memory +EK(.ex_handler_lcpy, (p[D]) st8 [dst_pre_l2] = n8, 128) // M3 prefetch dst from L2 + ;; +EX(.ex_handler_lcpy, (p[A]) ld8 v[A] = [src_pre_mem], 128) // M0 prefetch src from memory +EK(.ex_handler_lcpy, (p[C]) ld8 n8 = [src_pre_l2], 128) // M1 prefetch src from L2 +EX(.ex_handler, (p[D]) st8 [dst0] = t1, 8) // M2 +EK(.ex_handler, (p[D]) st8 [dst1] = t3, 8) // M3 + ;; +EX(.ex_handler, (p[D]) ld8 t5 = [src0], 8) +EK(.ex_handler, (p[D]) ld8 t7 = [src1], 3*8) +EX(.ex_handler, (p[D]) st8 [dst0] = t2, 3*8) +EK(.ex_handler, (p[D]) st8 [dst1] = t4, 3*8) + ;; +EX(.ex_handler, (p[D]) ld8 t6 = [src0], 3*8) +EK(.ex_handler, (p[D]) ld8 t10 = [src1], 8) +EX(.ex_handler, (p[D]) st8 [dst0] = t5, 8) +EK(.ex_handler, (p[D]) st8 [dst1] = t7, 3*8) + ;; +EX(.ex_handler, (p[D]) ld8 t9 = [src0], 3*8) +EK(.ex_handler, (p[D]) ld8 t11 = [src1], 3*8) +EX(.ex_handler, (p[D]) st8 [dst0] = t6, 3*8) +EK(.ex_handler, (p[D]) st8 [dst1] = t10, 8) + ;; +EX(.ex_handler, (p[D]) ld8 t12 = [src0], 8) +EK(.ex_handler, (p[D]) ld8 t14 = [src1], 8) +EX(.ex_handler, (p[D]) st8 [dst0] = t9, 3*8) +EK(.ex_handler, (p[D]) st8 [dst1] = t11, 3*8) + ;; +EX(.ex_handler, (p[D]) ld8 t13 = [src0], 4*8) +EK(.ex_handler, (p[D]) ld8 t15 = [src1], 4*8) +EX(.ex_handler, (p[D]) st8 [dst0] = t12, 8) +EK(.ex_handler, (p[D]) st8 [dst1] = t14, 8) + ;; +EX(.ex_handler, (p[C]) ld8 t1 = [src0], 8) +EK(.ex_handler, (p[C]) ld8 t3 = [src1], 8) +EX(.ex_handler, (p[D]) st8 [dst0] = t13, 4*8) +EK(.ex_handler, (p[D]) st8 [dst1] = t15, 4*8) + br.ctop.sptk .line_copy + ;; + + add dst0=-8,dst0 + add src0=-8,src0 + mov in2=tmp + .restore sp + br.sptk.many .medium_copy + ;; + +#define BLOCK_SIZE 128*32 +#define blocksize r23 +#define curlen r24 + +// dest is on 8-byte boundary, src is not. We need to do +// ld8-ld8, shrp, then st8. Max 8 byte copy per cycle. +.unaligned_src: + .prologue + .save ar.pfs, saved_pfs + alloc saved_pfs=ar.pfs,3,5,0,8 + .save ar.lc, saved_lc + mov saved_lc=ar.lc + .save pr, saved_pr + mov saved_pr=pr + .body +.4k_block: + mov saved_in0=dst0 // need to save all input arguments + mov saved_in2=in2 + mov blocksize=BLOCK_SIZE + ;; + cmp.lt p6,p7=blocksize,in2 + mov saved_in1=src0 + ;; +(p6) mov in2=blocksize + ;; + shr.u r21=in2,7 // this much cache line + shr.u r22=in2,4 // number of 16-byte iteration + and curlen=15,in2 // copy length after iteration + and r30=7,src0 // source alignment + ;; + cmp.lt p7,p8=1,r21 + add cnt=-1,r21 + ;; + + add src_pre_mem=0,src0 // prefetch src pointer + add dst_pre_mem=0,dst0 // prefetch dest pointer + and src0=-8,src0 // 1st src pointer +(p7) mov ar.lc = cnt +(p8) mov ar.lc = r0 + ;; + TEXT_ALIGN(32) +1: lfetch.fault [src_pre_mem], 128 + lfetch.fault.excl [dst_pre_mem], 128 + br.cloop.dptk.few 1b + ;; + + shladd dst1=r22,3,dst0 // 2nd dest pointer + shladd src1=r22,3,src0 // 2nd src pointer + cmp.eq p8,p9=r22,r0 // do we really need to loop? + cmp.le p6,p7=8,curlen; // have at least 8 byte remaining? + add cnt=-1,r22 // ctop iteration adjustment + ;; +EX(.ex_handler, (p9) ld8 r33=[src0],8) // loop primer +EK(.ex_handler, (p9) ld8 r37=[src1],8) +(p8) br.dpnt.few .noloop + ;; + +// The jump address is calculated based on src alignment. The COPYU +// macro below need to confine its size to power of two, so an entry +// can be caulated using shl instead of an expensive multiply. The +// size is then hard coded by the following #define to match the +// actual size. This make it somewhat tedious when COPYU macro gets +// changed and this need to be adjusted to match. +#define LOOP_SIZE 6 +1: + mov r29=ip // jmp_table thread + mov ar.lc=cnt + ;; + add r29=.jump_table - 1b - (.jmp1-.jump_table), r29 + shl r28=r30, LOOP_SIZE // jmp_table thread + mov ar.ec=2 // loop setup + ;; + add r29=r29,r28 // jmp_table thread + cmp.eq p16,p17=r0,r0 + ;; + mov b6=r29 // jmp_table thread + ;; + br.cond.sptk.few b6 + +// for 8-15 byte case +// We will skip the loop, but need to replicate the side effect +// that the loop produces. +.noloop: +EX(.ex_handler, (p6) ld8 r37=[src1],8) + add src0=8,src0 +(p6) shl r25=r30,3 + ;; +EX(.ex_handler, (p6) ld8 r27=[src1]) +(p6) shr.u r28=r37,r25 +(p6) sub r26=64,r25 + ;; +(p6) shl r27=r27,r26 + ;; +(p6) or r21=r28,r27 + +.unaligned_src_tail: +/* check if we have more than blocksize to copy, if so go back */ + cmp.gt p8,p0=saved_in2,blocksize + ;; +(p8) add dst0=saved_in0,blocksize +(p8) add src0=saved_in1,blocksize +(p8) sub in2=saved_in2,blocksize +(p8) br.dpnt .4k_block + ;; + +/* we have up to 15 byte to copy in the tail. + * part of work is already done in the jump table code + * we are at the following state. + * src side: + * + * xxxxxx xx <----- r21 has xxxxxxxx already + * -------- -------- -------- + * 0 8 16 + * ^ + * | + * src1 + * + * dst + * -------- -------- -------- + * ^ + * | + * dst1 + */ +EX(.ex_handler, (p6) st8 [dst1]=r21,8) // more than 8 byte to copy +(p6) add curlen=-8,curlen // update length + mov ar.pfs=saved_pfs + ;; + mov ar.lc=saved_lc + mov pr=saved_pr,-1 + mov in2=curlen // remaining length + mov dst0=dst1 // dest pointer + add src0=src1,r30 // forward by src alignment + ;; + +// 7 byte or smaller. +.memcpy_short: + cmp.le p8,p9 = 1,in2 + cmp.le p10,p11 = 2,in2 + cmp.le p12,p13 = 3,in2 + cmp.le p14,p15 = 4,in2 + add src1=1,src0 // second src pointer + add dst1=1,dst0 // second dest pointer + ;; + +EX(.ex_handler_short, (p8) ld1 t1=[src0],2) +EK(.ex_handler_short, (p10) ld1 t2=[src1],2) +(p9) br.ret.dpnt rp // 0 byte copy + ;; + +EX(.ex_handler_short, (p8) st1 [dst0]=t1,2) +EK(.ex_handler_short, (p10) st1 [dst1]=t2,2) +(p11) br.ret.dpnt rp // 1 byte copy + +EX(.ex_handler_short, (p12) ld1 t3=[src0],2) +EK(.ex_handler_short, (p14) ld1 t4=[src1],2) +(p13) br.ret.dpnt rp // 2 byte copy + ;; + + cmp.le p6,p7 = 5,in2 + cmp.le p8,p9 = 6,in2 + cmp.le p10,p11 = 7,in2 + +EX(.ex_handler_short, (p12) st1 [dst0]=t3,2) +EK(.ex_handler_short, (p14) st1 [dst1]=t4,2) +(p15) br.ret.dpnt rp // 3 byte copy + ;; + +EX(.ex_handler_short, (p6) ld1 t5=[src0],2) +EK(.ex_handler_short, (p8) ld1 t6=[src1],2) +(p7) br.ret.dpnt rp // 4 byte copy + ;; + +EX(.ex_handler_short, (p6) st1 [dst0]=t5,2) +EK(.ex_handler_short, (p8) st1 [dst1]=t6,2) +(p9) br.ret.dptk rp // 5 byte copy + +EX(.ex_handler_short, (p10) ld1 t7=[src0],2) +(p11) br.ret.dptk rp // 6 byte copy + ;; + +EX(.ex_handler_short, (p10) st1 [dst0]=t7,2) + br.ret.dptk rp // done all cases + + +/* Align dest to nearest 8-byte boundary. We know we have at + * least 7 bytes to copy, enough to crawl to 8-byte boundary. + * Actual number of byte to crawl depend on the dest alignment. + * 7 byte or less is taken care at .memcpy_short + + * src0 - source even index + * src1 - source odd index + * dst0 - dest even index + * dst1 - dest odd index + * r30 - distance to 8-byte boundary + */ + +.align_dest: + add src1=1,in1 // source odd index + cmp.le p7,p0 = 2,r30 // for .align_dest + cmp.le p8,p0 = 3,r30 // for .align_dest +EX(.ex_handler_short, (p6) ld1 t1=[src0],2) + cmp.le p9,p0 = 4,r30 // for .align_dest + cmp.le p10,p0 = 5,r30 + ;; +EX(.ex_handler_short, (p7) ld1 t2=[src1],2) +EK(.ex_handler_short, (p8) ld1 t3=[src0],2) + cmp.le p11,p0 = 6,r30 +EX(.ex_handler_short, (p6) st1 [dst0] = t1,2) + cmp.le p12,p0 = 7,r30 + ;; +EX(.ex_handler_short, (p9) ld1 t4=[src1],2) +EK(.ex_handler_short, (p10) ld1 t5=[src0],2) +EX(.ex_handler_short, (p7) st1 [dst1] = t2,2) +EK(.ex_handler_short, (p8) st1 [dst0] = t3,2) + ;; +EX(.ex_handler_short, (p11) ld1 t6=[src1],2) +EK(.ex_handler_short, (p12) ld1 t7=[src0],2) + cmp.eq p6,p7=r28,r29 +EX(.ex_handler_short, (p9) st1 [dst1] = t4,2) +EK(.ex_handler_short, (p10) st1 [dst0] = t5,2) + sub in2=in2,r30 + ;; +EX(.ex_handler_short, (p11) st1 [dst1] = t6,2) +EK(.ex_handler_short, (p12) st1 [dst0] = t7) + add dst0=in0,r30 // setup arguments + add src0=in1,r30 +(p6) br.cond.dptk .aligned_src +(p7) br.cond.dpnt .unaligned_src + ;; + +/* main loop body in jump table format */ +#define COPYU(shift) \ +1: \ +EX(.ex_handler, (p16) ld8 r32=[src0],8); /* 1 */ \ +EK(.ex_handler, (p16) ld8 r36=[src1],8); \ + (p17) shrp r35=r33,r34,shift;; /* 1 */ \ +EX(.ex_handler, (p6) ld8 r22=[src1]); /* common, prime for tail section */ \ + nop.m 0; \ + (p16) shrp r38=r36,r37,shift; \ +EX(.ex_handler, (p17) st8 [dst0]=r35,8); /* 1 */ \ +EK(.ex_handler, (p17) st8 [dst1]=r39,8); \ + br.ctop.dptk.few 1b;; \ + (p7) add src1=-8,src1; /* back out for <8 byte case */ \ + shrp r21=r22,r38,shift; /* speculative work */ \ + br.sptk.few .unaligned_src_tail /* branch out of jump table */ \ + ;; + TEXT_ALIGN(32) +.jump_table: + COPYU(8) // unaligned cases +.jmp1: + COPYU(16) + COPYU(24) + COPYU(32) + COPYU(40) + COPYU(48) + COPYU(56) + +#undef A +#undef B +#undef C +#undef D + +/* + * Due to lack of local tag support in gcc 2.x assembler, it is not clear which + * instruction failed in the bundle. The exception algorithm is that we + * first figure out the faulting address, then detect if there is any + * progress made on the copy, if so, redo the copy from last known copied + * location up to the faulting address (exclusive). In the copy_from_user + * case, remaining byte in kernel buffer will be zeroed. + * + * Take copy_from_user as an example, in the code there are multiple loads + * in a bundle and those multiple loads could span over two pages, the + * faulting address is calculated as page_round_down(max(src0, src1)). + * This is based on knowledge that if we can access one byte in a page, we + * can access any byte in that page. + * + * predicate used in the exception handler: + * p6-p7: direction + * p10-p11: src faulting addr calculation + * p12-p13: dst faulting addr calculation + */ + +#define A r19 +#define B r20 +#define C r21 +#define D r22 +#define F r28 + +#define memset_arg0 r32 +#define memset_arg2 r33 + +#define saved_retval loc0 +#define saved_rtlink loc1 +#define saved_pfs_stack loc2 + +.ex_hndlr_s: + add src0=8,src0 + br.sptk .ex_handler + ;; +.ex_hndlr_d: + add dst0=8,dst0 + br.sptk .ex_handler + ;; +.ex_hndlr_lcpy_1: + mov src1=src_pre_mem + mov dst1=dst_pre_mem + cmp.gtu p10,p11=src_pre_mem,saved_in1 + cmp.gtu p12,p13=dst_pre_mem,saved_in0 + ;; +(p10) add src0=8,saved_in1 +(p11) mov src0=saved_in1 +(p12) add dst0=8,saved_in0 +(p13) mov dst0=saved_in0 + br.sptk .ex_handler +.ex_handler_lcpy: + // in line_copy block, the preload addresses should always ahead + // of the other two src/dst pointers. Furthermore, src1/dst1 should + // always ahead of src0/dst0. + mov src1=src_pre_mem + mov dst1=dst_pre_mem +.ex_handler: + mov pr=saved_pr,-1 // first restore pr, lc, and pfs + mov ar.lc=saved_lc + mov ar.pfs=saved_pfs + ;; +.ex_handler_short: // fault occurred in these sections didn't change pr, lc, pfs + cmp.ltu p6,p7=saved_in0, saved_in1 // get the copy direction + cmp.ltu p10,p11=src0,src1 + cmp.ltu p12,p13=dst0,dst1 + fcmp.eq p8,p0=f6,f0 // is it memcpy? + mov tmp = dst0 + ;; +(p11) mov src1 = src0 // pick the larger of the two +(p13) mov dst0 = dst1 // make dst0 the smaller one +(p13) mov dst1 = tmp // and dst1 the larger one + ;; +(p6) dep F = r0,dst1,0,PAGE_SHIFT // usr dst round down to page boundary +(p7) dep F = r0,src1,0,PAGE_SHIFT // usr src round down to page boundary + ;; +(p6) cmp.le p14,p0=dst0,saved_in0 // no progress has been made on store +(p7) cmp.le p14,p0=src0,saved_in1 // no progress has been made on load + mov retval=saved_in2 +(p8) ld1 tmp=[src1] // force an oops for memcpy call +(p8) st1 [dst1]=r0 // force an oops for memcpy call +(p14) br.ret.sptk.many rp + +/* + * The remaining byte to copy is calculated as: + * + * A = (faulting_addr - orig_src) -> len to faulting ld address + * or + * (faulting_addr - orig_dst) -> len to faulting st address + * B = (cur_dst - orig_dst) -> len copied so far + * C = A - B -> len need to be copied + * D = orig_len - A -> len need to be zeroed + */ +(p6) sub A = F, saved_in0 +(p7) sub A = F, saved_in1 + clrrrb + ;; + alloc saved_pfs_stack=ar.pfs,3,3,3,0 + cmp.lt p8,p0=A,r0 + sub B = dst0, saved_in0 // how many byte copied so far + ;; +(p8) mov A = 0; // A shouldn't be negative, cap it + ;; + sub C = A, B + sub D = saved_in2, A + ;; + cmp.gt p8,p0=C,r0 // more than 1 byte? + add memset_arg0=saved_in0, A +(p6) mov memset_arg2=0 // copy_to_user should not call memset +(p7) mov memset_arg2=D // copy_from_user need to have kbuf zeroed + mov r8=0 + mov saved_retval = D + mov saved_rtlink = b0 + + add out0=saved_in0, B + add out1=saved_in1, B + mov out2=C +(p8) br.call.sptk.few b0=__copy_user // recursive call + ;; + + add saved_retval=saved_retval,r8 // above might return non-zero value + cmp.gt p8,p0=memset_arg2,r0 // more than 1 byte? + mov out0=memset_arg0 // *s + mov out1=r0 // c + mov out2=memset_arg2 // n +(p8) br.call.sptk.few b0=memset + ;; + + mov retval=saved_retval + mov ar.pfs=saved_pfs_stack + mov b0=saved_rtlink + br.ret.sptk.many rp + +/* end of McKinley specific optimization */ +END(__copy_user) diff --git a/kernel/arch/ia64/lib/memset.S b/kernel/arch/ia64/lib/memset.S new file mode 100644 index 000000000..f26c16aef --- /dev/null +++ b/kernel/arch/ia64/lib/memset.S @@ -0,0 +1,362 @@ +/* Optimized version of the standard memset() function. + + Copyright (c) 2002 Hewlett-Packard Co/CERN + Sverre Jarp + + Return: dest + + Inputs: + in0: dest + in1: value + in2: count + + The algorithm is fairly straightforward: set byte by byte until we + we get to a 16B-aligned address, then loop on 128 B chunks using an + early store as prefetching, then loop on 32B chucks, then clear remaining + words, finally clear remaining bytes. + Since a stf.spill f0 can store 16B in one go, we use this instruction + to get peak speed when value = 0. */ + +#include +#undef ret + +#define dest in0 +#define value in1 +#define cnt in2 + +#define tmp r31 +#define save_lc r30 +#define ptr0 r29 +#define ptr1 r28 +#define ptr2 r27 +#define ptr3 r26 +#define ptr9 r24 +#define loopcnt r23 +#define linecnt r22 +#define bytecnt r21 + +#define fvalue f6 + +// This routine uses only scratch predicate registers (p6 - p15) +#define p_scr p6 // default register for same-cycle branches +#define p_nz p7 +#define p_zr p8 +#define p_unalgn p9 +#define p_y p11 +#define p_n p12 +#define p_yy p13 +#define p_nn p14 + +#define MIN1 15 +#define MIN1P1HALF 8 +#define LINE_SIZE 128 +#define LSIZE_SH 7 // shift amount +#define PREF_AHEAD 8 + +GLOBAL_ENTRY(memset) +{ .mmi + .prologue + alloc tmp = ar.pfs, 3, 0, 0, 0 + lfetch.nt1 [dest] // + .save ar.lc, save_lc + mov.i save_lc = ar.lc + .body +} { .mmi + mov ret0 = dest // return value + cmp.ne p_nz, p_zr = value, r0 // use stf.spill if value is zero + cmp.eq p_scr, p0 = cnt, r0 +;; } +{ .mmi + and ptr2 = -(MIN1+1), dest // aligned address + and tmp = MIN1, dest // prepare to check for correct alignment + tbit.nz p_y, p_n = dest, 0 // Do we have an odd address? (M_B_U) +} { .mib + mov ptr1 = dest + mux1 value = value, @brcst // create 8 identical bytes in word +(p_scr) br.ret.dpnt.many rp // return immediately if count = 0 +;; } +{ .mib + cmp.ne p_unalgn, p0 = tmp, r0 // +} { .mib + sub bytecnt = (MIN1+1), tmp // NB: # of bytes to move is 1 higher than loopcnt + cmp.gt p_scr, p0 = 16, cnt // is it a minimalistic task? +(p_scr) br.cond.dptk.many .move_bytes_unaligned // go move just a few (M_B_U) +;; } +{ .mmi +(p_unalgn) add ptr1 = (MIN1+1), ptr2 // after alignment +(p_unalgn) add ptr2 = MIN1P1HALF, ptr2 // after alignment +(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 3 // should we do a st8 ? +;; } +{ .mib +(p_y) add cnt = -8, cnt // +(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 2 // should we do a st4 ? +} { .mib +(p_y) st8 [ptr2] = value,-4 // +(p_n) add ptr2 = 4, ptr2 // +;; } +{ .mib +(p_yy) add cnt = -4, cnt // +(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 1 // should we do a st2 ? +} { .mib +(p_yy) st4 [ptr2] = value,-2 // +(p_nn) add ptr2 = 2, ptr2 // +;; } +{ .mmi + mov tmp = LINE_SIZE+1 // for compare +(p_y) add cnt = -2, cnt // +(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 0 // should we do a st1 ? +} { .mmi + setf.sig fvalue=value // transfer value to FLP side +(p_y) st2 [ptr2] = value,-1 // +(p_n) add ptr2 = 1, ptr2 // +;; } + +{ .mmi +(p_yy) st1 [ptr2] = value // + cmp.gt p_scr, p0 = tmp, cnt // is it a minimalistic task? +} { .mbb +(p_yy) add cnt = -1, cnt // +(p_scr) br.cond.dpnt.many .fraction_of_line // go move just a few +;; } + +{ .mib + nop.m 0 + shr.u linecnt = cnt, LSIZE_SH +(p_zr) br.cond.dptk.many .l1b // Jump to use stf.spill +;; } + + TEXT_ALIGN(32) // --------------------- // L1A: store ahead into cache lines; fill later +{ .mmi + and tmp = -(LINE_SIZE), cnt // compute end of range + mov ptr9 = ptr1 // used for prefetching + and cnt = (LINE_SIZE-1), cnt // remainder +} { .mmi + mov loopcnt = PREF_AHEAD-1 // default prefetch loop + cmp.gt p_scr, p0 = PREF_AHEAD, linecnt // check against actual value +;; } +{ .mmi +(p_scr) add loopcnt = -1, linecnt // + add ptr2 = 8, ptr1 // start of stores (beyond prefetch stores) + add ptr1 = tmp, ptr1 // first address beyond total range +;; } +{ .mmi + add tmp = -1, linecnt // next loop count + mov.i ar.lc = loopcnt // +;; } +.pref_l1a: +{ .mib + stf8 [ptr9] = fvalue, 128 // Do stores one cache line apart + nop.i 0 + br.cloop.dptk.few .pref_l1a +;; } +{ .mmi + add ptr0 = 16, ptr2 // Two stores in parallel + mov.i ar.lc = tmp // +;; } +.l1ax: + { .mmi + stf8 [ptr2] = fvalue, 8 + stf8 [ptr0] = fvalue, 8 + ;; } + { .mmi + stf8 [ptr2] = fvalue, 24 + stf8 [ptr0] = fvalue, 24 + ;; } + { .mmi + stf8 [ptr2] = fvalue, 8 + stf8 [ptr0] = fvalue, 8 + ;; } + { .mmi + stf8 [ptr2] = fvalue, 24 + stf8 [ptr0] = fvalue, 24 + ;; } + { .mmi + stf8 [ptr2] = fvalue, 8 + stf8 [ptr0] = fvalue, 8 + ;; } + { .mmi + stf8 [ptr2] = fvalue, 24 + stf8 [ptr0] = fvalue, 24 + ;; } + { .mmi + stf8 [ptr2] = fvalue, 8 + stf8 [ptr0] = fvalue, 32 + cmp.lt p_scr, p0 = ptr9, ptr1 // do we need more prefetching? + ;; } +{ .mmb + stf8 [ptr2] = fvalue, 24 +(p_scr) stf8 [ptr9] = fvalue, 128 + br.cloop.dptk.few .l1ax +;; } +{ .mbb + cmp.le p_scr, p0 = 8, cnt // just a few bytes left ? +(p_scr) br.cond.dpnt.many .fraction_of_line // Branch no. 2 + br.cond.dpnt.many .move_bytes_from_alignment // Branch no. 3 +;; } + + TEXT_ALIGN(32) +.l1b: // ------------------------------------ // L1B: store ahead into cache lines; fill later +{ .mmi + and tmp = -(LINE_SIZE), cnt // compute end of range + mov ptr9 = ptr1 // used for prefetching + and cnt = (LINE_SIZE-1), cnt // remainder +} { .mmi + mov loopcnt = PREF_AHEAD-1 // default prefetch loop + cmp.gt p_scr, p0 = PREF_AHEAD, linecnt // check against actual value +;; } +{ .mmi +(p_scr) add loopcnt = -1, linecnt + add ptr2 = 16, ptr1 // start of stores (beyond prefetch stores) + add ptr1 = tmp, ptr1 // first address beyond total range +;; } +{ .mmi + add tmp = -1, linecnt // next loop count + mov.i ar.lc = loopcnt +;; } +.pref_l1b: +{ .mib + stf.spill [ptr9] = f0, 128 // Do stores one cache line apart + nop.i 0 + br.cloop.dptk.few .pref_l1b +;; } +{ .mmi + add ptr0 = 16, ptr2 // Two stores in parallel + mov.i ar.lc = tmp +;; } +.l1bx: + { .mmi + stf.spill [ptr2] = f0, 32 + stf.spill [ptr0] = f0, 32 + ;; } + { .mmi + stf.spill [ptr2] = f0, 32 + stf.spill [ptr0] = f0, 32 + ;; } + { .mmi + stf.spill [ptr2] = f0, 32 + stf.spill [ptr0] = f0, 64 + cmp.lt p_scr, p0 = ptr9, ptr1 // do we need more prefetching? + ;; } +{ .mmb + stf.spill [ptr2] = f0, 32 +(p_scr) stf.spill [ptr9] = f0, 128 + br.cloop.dptk.few .l1bx +;; } +{ .mib + cmp.gt p_scr, p0 = 8, cnt // just a few bytes left ? +(p_scr) br.cond.dpnt.many .move_bytes_from_alignment // +;; } + +.fraction_of_line: +{ .mib + add ptr2 = 16, ptr1 + shr.u loopcnt = cnt, 5 // loopcnt = cnt / 32 +;; } +{ .mib + cmp.eq p_scr, p0 = loopcnt, r0 + add loopcnt = -1, loopcnt +(p_scr) br.cond.dpnt.many .store_words +;; } +{ .mib + and cnt = 0x1f, cnt // compute the remaining cnt + mov.i ar.lc = loopcnt +;; } + TEXT_ALIGN(32) +.l2: // ------------------------------------ // L2A: store 32B in 2 cycles +{ .mmb + stf8 [ptr1] = fvalue, 8 + stf8 [ptr2] = fvalue, 8 +;; } { .mmb + stf8 [ptr1] = fvalue, 24 + stf8 [ptr2] = fvalue, 24 + br.cloop.dptk.many .l2 +;; } +.store_words: +{ .mib + cmp.gt p_scr, p0 = 8, cnt // just a few bytes left ? +(p_scr) br.cond.dpnt.many .move_bytes_from_alignment // Branch +;; } + +{ .mmi + stf8 [ptr1] = fvalue, 8 // store + cmp.le p_y, p_n = 16, cnt + add cnt = -8, cnt // subtract +;; } +{ .mmi +(p_y) stf8 [ptr1] = fvalue, 8 // store +(p_y) cmp.le.unc p_yy, p_nn = 16, cnt +(p_y) add cnt = -8, cnt // subtract +;; } +{ .mmi // store +(p_yy) stf8 [ptr1] = fvalue, 8 +(p_yy) add cnt = -8, cnt // subtract +;; } + +.move_bytes_from_alignment: +{ .mib + cmp.eq p_scr, p0 = cnt, r0 + tbit.nz.unc p_y, p0 = cnt, 2 // should we terminate with a st4 ? +(p_scr) br.cond.dpnt.few .restore_and_exit +;; } +{ .mib +(p_y) st4 [ptr1] = value,4 + tbit.nz.unc p_yy, p0 = cnt, 1 // should we terminate with a st2 ? +;; } +{ .mib +(p_yy) st2 [ptr1] = value,2 + tbit.nz.unc p_y, p0 = cnt, 0 // should we terminate with a st1 ? +;; } + +{ .mib +(p_y) st1 [ptr1] = value +;; } +.restore_and_exit: +{ .mib + nop.m 0 + mov.i ar.lc = save_lc + br.ret.sptk.many rp +;; } + +.move_bytes_unaligned: +{ .mmi + .pred.rel "mutex",p_y, p_n + .pred.rel "mutex",p_yy, p_nn +(p_n) cmp.le p_yy, p_nn = 4, cnt +(p_y) cmp.le p_yy, p_nn = 5, cnt +(p_n) add ptr2 = 2, ptr1 +} { .mmi +(p_y) add ptr2 = 3, ptr1 +(p_y) st1 [ptr1] = value, 1 // fill 1 (odd-aligned) byte [15, 14 (or less) left] +(p_y) add cnt = -1, cnt +;; } +{ .mmi +(p_yy) cmp.le.unc p_y, p0 = 8, cnt + add ptr3 = ptr1, cnt // prepare last store + mov.i ar.lc = save_lc +} { .mmi +(p_yy) st2 [ptr1] = value, 4 // fill 2 (aligned) bytes +(p_yy) st2 [ptr2] = value, 4 // fill 2 (aligned) bytes [11, 10 (o less) left] +(p_yy) add cnt = -4, cnt +;; } +{ .mmi +(p_y) cmp.le.unc p_yy, p0 = 8, cnt + add ptr3 = -1, ptr3 // last store + tbit.nz p_scr, p0 = cnt, 1 // will there be a st2 at the end ? +} { .mmi +(p_y) st2 [ptr1] = value, 4 // fill 2 (aligned) bytes +(p_y) st2 [ptr2] = value, 4 // fill 2 (aligned) bytes [7, 6 (or less) left] +(p_y) add cnt = -4, cnt +;; } +{ .mmi +(p_yy) st2 [ptr1] = value, 4 // fill 2 (aligned) bytes +(p_yy) st2 [ptr2] = value, 4 // fill 2 (aligned) bytes [3, 2 (or less) left] + tbit.nz p_y, p0 = cnt, 0 // will there be a st1 at the end ? +} { .mmi +(p_yy) add cnt = -4, cnt +;; } +{ .mmb +(p_scr) st2 [ptr1] = value // fill 2 (aligned) bytes +(p_y) st1 [ptr3] = value // fill last byte (using ptr3) + br.ret.sptk.many rp +} +END(memset) diff --git a/kernel/arch/ia64/lib/strlen.S b/kernel/arch/ia64/lib/strlen.S new file mode 100644 index 000000000..e0cdac0a8 --- /dev/null +++ b/kernel/arch/ia64/lib/strlen.S @@ -0,0 +1,192 @@ +/* + * + * Optimized version of the standard strlen() function + * + * + * Inputs: + * in0 address of string + * + * Outputs: + * ret0 the number of characters in the string (0 if empty string) + * does not count the \0 + * + * Copyright (C) 1999, 2001 Hewlett-Packard Co + * Stephane Eranian + * + * 09/24/99 S.Eranian add speculation recovery code + */ + +#include + +// +// +// This is an enhanced version of the basic strlen. it includes a combination +// of compute zero index (czx), parallel comparisons, speculative loads and +// loop unroll using rotating registers. +// +// General Ideas about the algorithm: +// The goal is to look at the string in chunks of 8 bytes. +// so we need to do a few extra checks at the beginning because the +// string may not be 8-byte aligned. In this case we load the 8byte +// quantity which includes the start of the string and mask the unused +// bytes with 0xff to avoid confusing czx. +// We use speculative loads and software pipelining to hide memory +// latency and do read ahead safely. This way we defer any exception. +// +// Because we don't want the kernel to be relying on particular +// settings of the DCR register, we provide recovery code in case +// speculation fails. The recovery code is going to "redo" the work using +// only normal loads. If we still get a fault then we generate a +// kernel panic. Otherwise we return the strlen as usual. +// +// The fact that speculation may fail can be caused, for instance, by +// the DCR.dm bit being set. In this case TLB misses are deferred, i.e., +// a NaT bit will be set if the translation is not present. The normal +// load, on the other hand, will cause the translation to be inserted +// if the mapping exists. +// +// It should be noted that we execute recovery code only when we need +// to use the data that has been speculatively loaded: we don't execute +// recovery code on pure read ahead data. +// +// Remarks: +// - the cmp r0,r0 is used as a fast way to initialize a predicate +// register to 1. This is required to make sure that we get the parallel +// compare correct. +// +// - we don't use the epilogue counter to exit the loop but we need to set +// it to zero beforehand. +// +// - after the loop we must test for Nat values because neither the +// czx nor cmp instruction raise a NaT consumption fault. We must be +// careful not to look too far for a Nat for which we don't care. +// For instance we don't need to look at a NaT in val2 if the zero byte +// was in val1. +// +// - Clearly performance tuning is required. +// +// +// +#define saved_pfs r11 +#define tmp r10 +#define base r16 +#define orig r17 +#define saved_pr r18 +#define src r19 +#define mask r20 +#define val r21 +#define val1 r22 +#define val2 r23 + +GLOBAL_ENTRY(strlen) + .prologue + .save ar.pfs, saved_pfs + alloc saved_pfs=ar.pfs,11,0,0,8 // rotating must be multiple of 8 + + .rotr v[2], w[2] // declares our 4 aliases + + extr.u tmp=in0,0,3 // tmp=least significant 3 bits + mov orig=in0 // keep trackof initial byte address + dep src=0,in0,0,3 // src=8byte-aligned in0 address + .save pr, saved_pr + mov saved_pr=pr // preserve predicates (rotation) + ;; + + .body + + ld8 v[1]=[src],8 // must not speculate: can fail here + shl tmp=tmp,3 // multiply by 8bits/byte + mov mask=-1 // our mask + ;; + ld8.s w[1]=[src],8 // speculatively load next + cmp.eq p6,p0=r0,r0 // sets p6 to true for cmp.and + sub tmp=64,tmp // how many bits to shift our mask on the right + ;; + shr.u mask=mask,tmp // zero enough bits to hold v[1] valuable part + mov ar.ec=r0 // clear epilogue counter (saved in ar.pfs) + ;; + add base=-16,src // keep track of aligned base + or v[1]=v[1],mask // now we have a safe initial byte pattern + ;; +1: + ld8.s v[0]=[src],8 // speculatively load next + czx1.r val1=v[1] // search 0 byte from right + czx1.r val2=w[1] // search 0 byte from right following 8bytes + ;; + ld8.s w[0]=[src],8 // speculatively load next to next + cmp.eq.and p6,p0=8,val1 // p6 = p6 and val1==8 + cmp.eq.and p6,p0=8,val2 // p6 = p6 and mask==8 +(p6) br.wtop.dptk 1b // loop until p6 == 0 + ;; + // + // We must return try the recovery code iff + // val1_is_nat || (val1==8 && val2_is_nat) + // + // XXX Fixme + // - there must be a better way of doing the test + // + cmp.eq p8,p9=8,val1 // p6 = val1 had zero (disambiguate) + tnat.nz p6,p7=val1 // test NaT on val1 +(p6) br.cond.spnt .recover // jump to recovery if val1 is NaT + ;; + // + // if we come here p7 is true, i.e., initialized for // cmp + // + cmp.eq.and p7,p0=8,val1// val1==8? + tnat.nz.and p7,p0=val2 // test NaT if val2 +(p7) br.cond.spnt .recover // jump to recovery if val2 is NaT + ;; +(p8) mov val1=val2 // the other test got us out of the loop +(p8) adds src=-16,src // correct position when 3 ahead +(p9) adds src=-24,src // correct position when 4 ahead + ;; + sub ret0=src,orig // distance from base + sub tmp=8,val1 // which byte in word + mov pr=saved_pr,0xffffffffffff0000 + ;; + sub ret0=ret0,tmp // adjust + mov ar.pfs=saved_pfs // because of ar.ec, restore no matter what + br.ret.sptk.many rp // end of normal execution + + // + // Outlined recovery code when speculation failed + // + // This time we don't use speculation and rely on the normal exception + // mechanism. that's why the loop is not as good as the previous one + // because read ahead is not possible + // + // IMPORTANT: + // Please note that in the case of strlen() as opposed to strlen_user() + // we don't use the exception mechanism, as this function is not + // supposed to fail. If that happens it means we have a bug and the + // code will cause of kernel fault. + // + // XXX Fixme + // - today we restart from the beginning of the string instead + // of trying to continue where we left off. + // +.recover: + ld8 val=[base],8 // will fail if unrecoverable fault + ;; + or val=val,mask // remask first bytes + cmp.eq p0,p6=r0,r0 // nullify first ld8 in loop + ;; + // + // ar.ec is still zero here + // +2: +(p6) ld8 val=[base],8 // will fail if unrecoverable fault + ;; + czx1.r val1=val // search 0 byte from right + ;; + cmp.eq p6,p0=8,val1 // val1==8 ? +(p6) br.wtop.dptk 2b // loop until p6 == 0 + ;; // (avoid WAW on p63) + sub ret0=base,orig // distance from base + sub tmp=8,val1 + mov pr=saved_pr,0xffffffffffff0000 + ;; + sub ret0=ret0,tmp // length=now - back -1 + mov ar.pfs=saved_pfs // because of ar.ec, restore no matter what + br.ret.sptk.many rp // end of successful recovery code +END(strlen) diff --git a/kernel/arch/ia64/lib/strlen_user.S b/kernel/arch/ia64/lib/strlen_user.S new file mode 100644 index 000000000..c71eded42 --- /dev/null +++ b/kernel/arch/ia64/lib/strlen_user.S @@ -0,0 +1,198 @@ +/* + * Optimized version of the strlen_user() function + * + * Inputs: + * in0 address of buffer + * + * Outputs: + * ret0 0 in case of fault, strlen(buffer)+1 otherwise + * + * Copyright (C) 1998, 1999, 2001 Hewlett-Packard Co + * David Mosberger-Tang + * Stephane Eranian + * + * 01/19/99 S.Eranian heavily enhanced version (see details below) + * 09/24/99 S.Eranian added speculation recovery code + */ + +#include + +// +// int strlen_user(char *) +// ------------------------ +// Returns: +// - length of string + 1 +// - 0 in case an exception is raised +// +// This is an enhanced version of the basic strlen_user. it includes a +// combination of compute zero index (czx), parallel comparisons, speculative +// loads and loop unroll using rotating registers. +// +// General Ideas about the algorithm: +// The goal is to look at the string in chunks of 8 bytes. +// so we need to do a few extra checks at the beginning because the +// string may not be 8-byte aligned. In this case we load the 8byte +// quantity which includes the start of the string and mask the unused +// bytes with 0xff to avoid confusing czx. +// We use speculative loads and software pipelining to hide memory +// latency and do read ahead safely. This way we defer any exception. +// +// Because we don't want the kernel to be relying on particular +// settings of the DCR register, we provide recovery code in case +// speculation fails. The recovery code is going to "redo" the work using +// only normal loads. If we still get a fault then we return an +// error (ret0=0). Otherwise we return the strlen+1 as usual. +// The fact that speculation may fail can be caused, for instance, by +// the DCR.dm bit being set. In this case TLB misses are deferred, i.e., +// a NaT bit will be set if the translation is not present. The normal +// load, on the other hand, will cause the translation to be inserted +// if the mapping exists. +// +// It should be noted that we execute recovery code only when we need +// to use the data that has been speculatively loaded: we don't execute +// recovery code on pure read ahead data. +// +// Remarks: +// - the cmp r0,r0 is used as a fast way to initialize a predicate +// register to 1. This is required to make sure that we get the parallel +// compare correct. +// +// - we don't use the epilogue counter to exit the loop but we need to set +// it to zero beforehand. +// +// - after the loop we must test for Nat values because neither the +// czx nor cmp instruction raise a NaT consumption fault. We must be +// careful not to look too far for a Nat for which we don't care. +// For instance we don't need to look at a NaT in val2 if the zero byte +// was in val1. +// +// - Clearly performance tuning is required. +// + +#define saved_pfs r11 +#define tmp r10 +#define base r16 +#define orig r17 +#define saved_pr r18 +#define src r19 +#define mask r20 +#define val r21 +#define val1 r22 +#define val2 r23 + +GLOBAL_ENTRY(__strlen_user) + .prologue + .save ar.pfs, saved_pfs + alloc saved_pfs=ar.pfs,11,0,0,8 + + .rotr v[2], w[2] // declares our 4 aliases + + extr.u tmp=in0,0,3 // tmp=least significant 3 bits + mov orig=in0 // keep trackof initial byte address + dep src=0,in0,0,3 // src=8byte-aligned in0 address + .save pr, saved_pr + mov saved_pr=pr // preserve predicates (rotation) + ;; + + .body + + ld8.s v[1]=[src],8 // load the initial 8bytes (must speculate) + shl tmp=tmp,3 // multiply by 8bits/byte + mov mask=-1 // our mask + ;; + ld8.s w[1]=[src],8 // load next 8 bytes in 2nd pipeline + cmp.eq p6,p0=r0,r0 // sets p6 (required because of // cmp.and) + sub tmp=64,tmp // how many bits to shift our mask on the right + ;; + shr.u mask=mask,tmp // zero enough bits to hold v[1] valuable part + mov ar.ec=r0 // clear epilogue counter (saved in ar.pfs) + ;; + add base=-16,src // keep track of aligned base + chk.s v[1], .recover // if already NaT, then directly skip to recover + or v[1]=v[1],mask // now we have a safe initial byte pattern + ;; +1: + ld8.s v[0]=[src],8 // speculatively load next + czx1.r val1=v[1] // search 0 byte from right + czx1.r val2=w[1] // search 0 byte from right following 8bytes + ;; + ld8.s w[0]=[src],8 // speculatively load next to next + cmp.eq.and p6,p0=8,val1 // p6 = p6 and val1==8 + cmp.eq.and p6,p0=8,val2 // p6 = p6 and mask==8 +(p6) br.wtop.dptk.few 1b // loop until p6 == 0 + ;; + // + // We must return try the recovery code iff + // val1_is_nat || (val1==8 && val2_is_nat) + // + // XXX Fixme + // - there must be a better way of doing the test + // + cmp.eq p8,p9=8,val1 // p6 = val1 had zero (disambiguate) + tnat.nz p6,p7=val1 // test NaT on val1 +(p6) br.cond.spnt .recover // jump to recovery if val1 is NaT + ;; + // + // if we come here p7 is true, i.e., initialized for // cmp + // + cmp.eq.and p7,p0=8,val1// val1==8? + tnat.nz.and p7,p0=val2 // test NaT if val2 +(p7) br.cond.spnt .recover // jump to recovery if val2 is NaT + ;; +(p8) mov val1=val2 // val2 contains the value +(p8) adds src=-16,src // correct position when 3 ahead +(p9) adds src=-24,src // correct position when 4 ahead + ;; + sub ret0=src,orig // distance from origin + sub tmp=7,val1 // 7=8-1 because this strlen returns strlen+1 + mov pr=saved_pr,0xffffffffffff0000 + ;; + sub ret0=ret0,tmp // length=now - back -1 + mov ar.pfs=saved_pfs // because of ar.ec, restore no matter what + br.ret.sptk.many rp // end of normal execution + + // + // Outlined recovery code when speculation failed + // + // This time we don't use speculation and rely on the normal exception + // mechanism. that's why the loop is not as good as the previous one + // because read ahead is not possible + // + // XXX Fixme + // - today we restart from the beginning of the string instead + // of trying to continue where we left off. + // +.recover: + EX(.Lexit1, ld8 val=[base],8) // load the initial bytes + ;; + or val=val,mask // remask first bytes + cmp.eq p0,p6=r0,r0 // nullify first ld8 in loop + ;; + // + // ar.ec is still zero here + // +2: + EX(.Lexit1, (p6) ld8 val=[base],8) + ;; + czx1.r val1=val // search 0 byte from right + ;; + cmp.eq p6,p0=8,val1 // val1==8 ? +(p6) br.wtop.dptk.few 2b // loop until p6 == 0 + ;; + sub ret0=base,orig // distance from base + sub tmp=7,val1 // 7=8-1 because this strlen returns strlen+1 + mov pr=saved_pr,0xffffffffffff0000 + ;; + sub ret0=ret0,tmp // length=now - back -1 + mov ar.pfs=saved_pfs // because of ar.ec, restore no matter what + br.ret.sptk.many rp // end of successful recovery code + + // + // We failed even on the normal load (called from exception handler) + // +.Lexit1: + mov ret0=0 + mov pr=saved_pr,0xffffffffffff0000 + mov ar.pfs=saved_pfs // because of ar.ec, restore no matter what + br.ret.sptk.many rp +END(__strlen_user) diff --git a/kernel/arch/ia64/lib/strncpy_from_user.S b/kernel/arch/ia64/lib/strncpy_from_user.S new file mode 100644 index 000000000..a504381f3 --- /dev/null +++ b/kernel/arch/ia64/lib/strncpy_from_user.S @@ -0,0 +1,44 @@ +/* + * Just like strncpy() except that if a fault occurs during copying, + * -EFAULT is returned. + * + * Inputs: + * in0: address of destination buffer + * in1: address of string to be copied + * in2: length of buffer in bytes + * Outputs: + * r8: -EFAULT in case of fault or number of bytes copied if no fault + * + * Copyright (C) 1998-2001 Hewlett-Packard Co + * Copyright (C) 1998-2001 David Mosberger-Tang + * + * 00/03/06 D. Mosberger Fixed to return proper return value (bug found by + * by Andreas Schwab ). + */ + +#include + +GLOBAL_ENTRY(__strncpy_from_user) + alloc r2=ar.pfs,3,0,0,0 + mov r8=0 + mov r9=in1 + ;; + add r10=in1,in2 + cmp.eq p6,p0=r0,in2 +(p6) br.ret.spnt.many rp + + // XXX braindead copy loop---this needs to be optimized +.Loop1: + EX(.Lexit, ld1 r8=[in1],1) + ;; + EX(.Lexit, st1 [in0]=r8,1) + cmp.ne p6,p7=r8,r0 + ;; +(p6) cmp.ne.unc p8,p0=in1,r10 +(p8) br.cond.dpnt.few .Loop1 + ;; +(p6) mov r8=in2 // buffer filled up---return buffer length +(p7) sub r8=in1,r9,1 // return string length (excluding NUL character) +[.Lexit:] + br.ret.sptk.many rp +END(__strncpy_from_user) diff --git a/kernel/arch/ia64/lib/strnlen_user.S b/kernel/arch/ia64/lib/strnlen_user.S new file mode 100644 index 000000000..d09066b1e --- /dev/null +++ b/kernel/arch/ia64/lib/strnlen_user.S @@ -0,0 +1,45 @@ +/* + * Returns 0 if exception before NUL or reaching the supplied limit (N), + * a value greater than N if the string is longer than the limit, else + * strlen. + * + * Inputs: + * in0: address of buffer + * in1: string length limit N + * Outputs: + * r8: 0 in case of fault, strlen(buffer)+1 otherwise + * + * Copyright (C) 1999, 2001 David Mosberger-Tang + */ + +#include + +GLOBAL_ENTRY(__strnlen_user) + .prologue + alloc r2=ar.pfs,2,0,0,0 + .save ar.lc, r16 + mov r16=ar.lc // preserve ar.lc + + .body + + add r3=-1,in1 + ;; + mov ar.lc=r3 + mov r9=0 + ;; + // XXX braindead strlen loop---this needs to be optimized +.Loop1: + EXCLR(.Lexit, ld1 r8=[in0],1) + add r9=1,r9 + ;; + cmp.eq p6,p0=r8,r0 +(p6) br.cond.dpnt .Lexit + br.cloop.dptk.few .Loop1 + + add r9=1,in1 // NUL not found---return N+1 + ;; +.Lexit: + mov r8=r9 + mov ar.lc=r16 // restore ar.lc + br.ret.sptk.many rp +END(__strnlen_user) diff --git a/kernel/arch/ia64/lib/xor.S b/kernel/arch/ia64/lib/xor.S new file mode 100644 index 000000000..54e3f7eab --- /dev/null +++ b/kernel/arch/ia64/lib/xor.S @@ -0,0 +1,184 @@ +/* + * arch/ia64/lib/xor.S + * + * Optimized RAID-5 checksumming functions for IA-64. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * You should have received a copy of the GNU General Public License + * (for example /usr/src/linux/COPYING); if not, write to the Free + * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include + +GLOBAL_ENTRY(xor_ia64_2) + .prologue + .fframe 0 + .save ar.pfs, r31 + alloc r31 = ar.pfs, 3, 0, 13, 16 + .save ar.lc, r30 + mov r30 = ar.lc + .save pr, r29 + mov r29 = pr + ;; + .body + mov r8 = in1 + mov ar.ec = 6 + 2 + shr in0 = in0, 3 + ;; + adds in0 = -1, in0 + mov r16 = in1 + mov r17 = in2 + ;; + mov ar.lc = in0 + mov pr.rot = 1 << 16 + ;; + .rotr s1[6+1], s2[6+1], d[2] + .rotp p[6+2] +0: +(p[0]) ld8.nta s1[0] = [r16], 8 +(p[0]) ld8.nta s2[0] = [r17], 8 +(p[6]) xor d[0] = s1[6], s2[6] +(p[6+1])st8.nta [r8] = d[1], 8 + nop.f 0 + br.ctop.dptk.few 0b + ;; + mov ar.lc = r30 + mov pr = r29, -1 + br.ret.sptk.few rp +END(xor_ia64_2) + +GLOBAL_ENTRY(xor_ia64_3) + .prologue + .fframe 0 + .save ar.pfs, r31 + alloc r31 = ar.pfs, 4, 0, 20, 24 + .save ar.lc, r30 + mov r30 = ar.lc + .save pr, r29 + mov r29 = pr + ;; + .body + mov r8 = in1 + mov ar.ec = 6 + 2 + shr in0 = in0, 3 + ;; + adds in0 = -1, in0 + mov r16 = in1 + mov r17 = in2 + ;; + mov r18 = in3 + mov ar.lc = in0 + mov pr.rot = 1 << 16 + ;; + .rotr s1[6+1], s2[6+1], s3[6+1], d[2] + .rotp p[6+2] +0: +(p[0]) ld8.nta s1[0] = [r16], 8 +(p[0]) ld8.nta s2[0] = [r17], 8 +(p[6]) xor d[0] = s1[6], s2[6] + ;; +(p[0]) ld8.nta s3[0] = [r18], 8 +(p[6+1])st8.nta [r8] = d[1], 8 +(p[6]) xor d[0] = d[0], s3[6] + br.ctop.dptk.few 0b + ;; + mov ar.lc = r30 + mov pr = r29, -1 + br.ret.sptk.few rp +END(xor_ia64_3) + +GLOBAL_ENTRY(xor_ia64_4) + .prologue + .fframe 0 + .save ar.pfs, r31 + alloc r31 = ar.pfs, 5, 0, 27, 32 + .save ar.lc, r30 + mov r30 = ar.lc + .save pr, r29 + mov r29 = pr + ;; + .body + mov r8 = in1 + mov ar.ec = 6 + 2 + shr in0 = in0, 3 + ;; + adds in0 = -1, in0 + mov r16 = in1 + mov r17 = in2 + ;; + mov r18 = in3 + mov ar.lc = in0 + mov pr.rot = 1 << 16 + mov r19 = in4 + ;; + .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], d[2] + .rotp p[6+2] +0: +(p[0]) ld8.nta s1[0] = [r16], 8 +(p[0]) ld8.nta s2[0] = [r17], 8 +(p[6]) xor d[0] = s1[6], s2[6] +(p[0]) ld8.nta s3[0] = [r18], 8 +(p[0]) ld8.nta s4[0] = [r19], 8 +(p[6]) xor r20 = s3[6], s4[6] + ;; +(p[6+1])st8.nta [r8] = d[1], 8 +(p[6]) xor d[0] = d[0], r20 + br.ctop.dptk.few 0b + ;; + mov ar.lc = r30 + mov pr = r29, -1 + br.ret.sptk.few rp +END(xor_ia64_4) + +GLOBAL_ENTRY(xor_ia64_5) + .prologue + .fframe 0 + .save ar.pfs, r31 + alloc r31 = ar.pfs, 6, 0, 34, 40 + .save ar.lc, r30 + mov r30 = ar.lc + .save pr, r29 + mov r29 = pr + ;; + .body + mov r8 = in1 + mov ar.ec = 6 + 2 + shr in0 = in0, 3 + ;; + adds in0 = -1, in0 + mov r16 = in1 + mov r17 = in2 + ;; + mov r18 = in3 + mov ar.lc = in0 + mov pr.rot = 1 << 16 + mov r19 = in4 + mov r20 = in5 + ;; + .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], s5[6+1], d[2] + .rotp p[6+2] +0: +(p[0]) ld8.nta s1[0] = [r16], 8 +(p[0]) ld8.nta s2[0] = [r17], 8 +(p[6]) xor d[0] = s1[6], s2[6] +(p[0]) ld8.nta s3[0] = [r18], 8 +(p[0]) ld8.nta s4[0] = [r19], 8 +(p[6]) xor r21 = s3[6], s4[6] + ;; +(p[0]) ld8.nta s5[0] = [r20], 8 +(p[6+1])st8.nta [r8] = d[1], 8 +(p[6]) xor d[0] = d[0], r21 + ;; +(p[6]) xor d[0] = d[0], s5[6] + nop.f 0 + br.ctop.dptk.few 0b + ;; + mov ar.lc = r30 + mov pr = r29, -1 + br.ret.sptk.few rp +END(xor_ia64_5) diff --git a/kernel/arch/ia64/mm/Makefile b/kernel/arch/ia64/mm/Makefile new file mode 100644 index 000000000..bb0a01a81 --- /dev/null +++ b/kernel/arch/ia64/mm/Makefile @@ -0,0 +1,11 @@ +# +# Makefile for the ia64-specific parts of the memory manager. +# + +obj-y := init.o fault.o tlb.o extable.o ioremap.o + +obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o +obj-$(CONFIG_NUMA) += numa.o +obj-$(CONFIG_DISCONTIGMEM) += discontig.o +obj-$(CONFIG_SPARSEMEM) += discontig.o +obj-$(CONFIG_FLATMEM) += contig.o diff --git a/kernel/arch/ia64/mm/contig.c b/kernel/arch/ia64/mm/contig.c new file mode 100644 index 000000000..52715a71a --- /dev/null +++ b/kernel/arch/ia64/mm/contig.c @@ -0,0 +1,278 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1998-2003 Hewlett-Packard Co + * David Mosberger-Tang + * Stephane Eranian + * Copyright (C) 2000, Rohit Seth + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999 Walt Drummond + * Copyright (C) 2003 Silicon Graphics, Inc. All rights reserved. + * + * Routines used by ia64 machines with contiguous (or virtually contiguous) + * memory. + */ +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#ifdef CONFIG_VIRTUAL_MEM_MAP +static unsigned long max_gap; +#endif + +/* physical address where the bootmem map is located */ +unsigned long bootmap_start; + +/** + * find_bootmap_location - callback to find a memory area for the bootmap + * @start: start of region + * @end: end of region + * @arg: unused callback data + * + * Find a place to put the bootmap and return its starting address in + * bootmap_start. This address must be page-aligned. + */ +static int __init +find_bootmap_location (u64 start, u64 end, void *arg) +{ + u64 needed = *(unsigned long *)arg; + u64 range_start, range_end, free_start; + int i; + +#if IGNORE_PFN0 + if (start == PAGE_OFFSET) { + start += PAGE_SIZE; + if (start >= end) + return 0; + } +#endif + + free_start = PAGE_OFFSET; + + for (i = 0; i < num_rsvd_regions; i++) { + range_start = max(start, free_start); + range_end = min(end, rsvd_region[i].start & PAGE_MASK); + + free_start = PAGE_ALIGN(rsvd_region[i].end); + + if (range_end <= range_start) + continue; /* skip over empty range */ + + if (range_end - range_start >= needed) { + bootmap_start = __pa(range_start); + return -1; /* done */ + } + + /* nothing more available in this segment */ + if (range_end == end) + return 0; + } + return 0; +} + +#ifdef CONFIG_SMP +static void *cpu_data; +/** + * per_cpu_init - setup per-cpu variables + * + * Allocate and setup per-cpu data areas. + */ +void *per_cpu_init(void) +{ + static bool first_time = true; + void *cpu0_data = __cpu0_per_cpu; + unsigned int cpu; + + if (!first_time) + goto skip; + first_time = false; + + /* + * get_free_pages() cannot be used before cpu_init() done. + * BSP allocates PERCPU_PAGE_SIZE bytes for all possible CPUs + * to avoid that AP calls get_zeroed_page(). + */ + for_each_possible_cpu(cpu) { + void *src = cpu == 0 ? cpu0_data : __phys_per_cpu_start; + + memcpy(cpu_data, src, __per_cpu_end - __per_cpu_start); + __per_cpu_offset[cpu] = (char *)cpu_data - __per_cpu_start; + per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu]; + + /* + * percpu area for cpu0 is moved from the __init area + * which is setup by head.S and used till this point. + * Update ar.k3. This move is ensures that percpu + * area for cpu0 is on the correct node and its + * virtual address isn't insanely far from other + * percpu areas which is important for congruent + * percpu allocator. + */ + if (cpu == 0) + ia64_set_kr(IA64_KR_PER_CPU_DATA, __pa(cpu_data) - + (unsigned long)__per_cpu_start); + + cpu_data += PERCPU_PAGE_SIZE; + } +skip: + return __per_cpu_start + __per_cpu_offset[smp_processor_id()]; +} + +static inline void +alloc_per_cpu_data(void) +{ + cpu_data = __alloc_bootmem(PERCPU_PAGE_SIZE * num_possible_cpus(), + PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); +} + +/** + * setup_per_cpu_areas - setup percpu areas + * + * Arch code has already allocated and initialized percpu areas. All + * this function has to do is to teach the determined layout to the + * dynamic percpu allocator, which happens to be more complex than + * creating whole new ones using helpers. + */ +void __init +setup_per_cpu_areas(void) +{ + struct pcpu_alloc_info *ai; + struct pcpu_group_info *gi; + unsigned int cpu; + ssize_t static_size, reserved_size, dyn_size; + int rc; + + ai = pcpu_alloc_alloc_info(1, num_possible_cpus()); + if (!ai) + panic("failed to allocate pcpu_alloc_info"); + gi = &ai->groups[0]; + + /* units are assigned consecutively to possible cpus */ + for_each_possible_cpu(cpu) + gi->cpu_map[gi->nr_units++] = cpu; + + /* set parameters */ + static_size = __per_cpu_end - __per_cpu_start; + reserved_size = PERCPU_MODULE_RESERVE; + dyn_size = PERCPU_PAGE_SIZE - static_size - reserved_size; + if (dyn_size < 0) + panic("percpu area overflow static=%zd reserved=%zd\n", + static_size, reserved_size); + + ai->static_size = static_size; + ai->reserved_size = reserved_size; + ai->dyn_size = dyn_size; + ai->unit_size = PERCPU_PAGE_SIZE; + ai->atom_size = PAGE_SIZE; + ai->alloc_size = PERCPU_PAGE_SIZE; + + rc = pcpu_setup_first_chunk(ai, __per_cpu_start + __per_cpu_offset[0]); + if (rc) + panic("failed to setup percpu area (err=%d)", rc); + + pcpu_free_alloc_info(ai); +} +#else +#define alloc_per_cpu_data() do { } while (0) +#endif /* CONFIG_SMP */ + +/** + * find_memory - setup memory map + * + * Walk the EFI memory map and find usable memory for the system, taking + * into account reserved areas. + */ +void __init +find_memory (void) +{ + unsigned long bootmap_size; + + reserve_memory(); + + /* first find highest page frame number */ + min_low_pfn = ~0UL; + max_low_pfn = 0; + efi_memmap_walk(find_max_min_low_pfn, NULL); + max_pfn = max_low_pfn; + /* how many bytes to cover all the pages */ + bootmap_size = bootmem_bootmap_pages(max_pfn) << PAGE_SHIFT; + + /* look for a location to hold the bootmap */ + bootmap_start = ~0UL; + efi_memmap_walk(find_bootmap_location, &bootmap_size); + if (bootmap_start == ~0UL) + panic("Cannot find %ld bytes for bootmap\n", bootmap_size); + + bootmap_size = init_bootmem_node(NODE_DATA(0), + (bootmap_start >> PAGE_SHIFT), 0, max_pfn); + + /* Free all available memory, then mark bootmem-map as being in use. */ + efi_memmap_walk(filter_rsvd_memory, free_bootmem); + reserve_bootmem(bootmap_start, bootmap_size, BOOTMEM_DEFAULT); + + find_initrd(); + + alloc_per_cpu_data(); +} + +/* + * Set up the page tables. + */ + +void __init +paging_init (void) +{ + unsigned long max_dma; + unsigned long max_zone_pfns[MAX_NR_ZONES]; + + memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); +#ifdef CONFIG_ZONE_DMA + max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT; + max_zone_pfns[ZONE_DMA] = max_dma; +#endif + max_zone_pfns[ZONE_NORMAL] = max_low_pfn; + +#ifdef CONFIG_VIRTUAL_MEM_MAP + efi_memmap_walk(filter_memory, register_active_ranges); + efi_memmap_walk(find_largest_hole, (u64 *)&max_gap); + if (max_gap < LARGE_GAP) { + vmem_map = (struct page *) 0; + free_area_init_nodes(max_zone_pfns); + } else { + unsigned long map_size; + + /* allocate virtual_mem_map */ + + map_size = PAGE_ALIGN(ALIGN(max_low_pfn, MAX_ORDER_NR_PAGES) * + sizeof(struct page)); + VMALLOC_END -= map_size; + vmem_map = (struct page *) VMALLOC_END; + efi_memmap_walk(create_mem_map_page_table, NULL); + + /* + * alloc_node_mem_map makes an adjustment for mem_map + * which isn't compatible with vmem_map. + */ + NODE_DATA(0)->node_mem_map = vmem_map + + find_min_pfn_with_active_regions(); + free_area_init_nodes(max_zone_pfns); + + printk("Virtual mem_map starts at 0x%p\n", mem_map); + } +#else /* !CONFIG_VIRTUAL_MEM_MAP */ + memblock_add_node(0, PFN_PHYS(max_low_pfn), 0); + free_area_init_nodes(max_zone_pfns); +#endif /* !CONFIG_VIRTUAL_MEM_MAP */ + zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page)); +} diff --git a/kernel/arch/ia64/mm/discontig.c b/kernel/arch/ia64/mm/discontig.c new file mode 100644 index 000000000..878626805 --- /dev/null +++ b/kernel/arch/ia64/mm/discontig.c @@ -0,0 +1,764 @@ +/* + * Copyright (c) 2000, 2003 Silicon Graphics, Inc. All rights reserved. + * Copyright (c) 2001 Intel Corp. + * Copyright (c) 2001 Tony Luck + * Copyright (c) 2002 NEC Corp. + * Copyright (c) 2002 Kimio Suganuma + * Copyright (c) 2004 Silicon Graphics, Inc + * Russ Anderson + * Jesse Barnes + * Jack Steiner + */ + +/* + * Platform initialization for Discontig Memory + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Track per-node information needed to setup the boot memory allocator, the + * per-node areas, and the real VM. + */ +struct early_node_data { + struct ia64_node_data *node_data; + unsigned long pernode_addr; + unsigned long pernode_size; +#ifdef CONFIG_ZONE_DMA + unsigned long num_dma_physpages; +#endif + unsigned long min_pfn; + unsigned long max_pfn; +}; + +static struct early_node_data mem_data[MAX_NUMNODES] __initdata; +static nodemask_t memory_less_mask __initdata; + +pg_data_t *pgdat_list[MAX_NUMNODES]; + +/* + * To prevent cache aliasing effects, align per-node structures so that they + * start at addresses that are strided by node number. + */ +#define MAX_NODE_ALIGN_OFFSET (32 * 1024 * 1024) +#define NODEDATA_ALIGN(addr, node) \ + ((((addr) + 1024*1024-1) & ~(1024*1024-1)) + \ + (((node)*PERCPU_PAGE_SIZE) & (MAX_NODE_ALIGN_OFFSET - 1))) + +/** + * build_node_maps - callback to setup bootmem structs for each node + * @start: physical start of range + * @len: length of range + * @node: node where this range resides + * + * We allocate a struct bootmem_data for each piece of memory that we wish to + * treat as a virtually contiguous block (i.e. each node). Each such block + * must start on an %IA64_GRANULE_SIZE boundary, so we round the address down + * if necessary. Any non-existent pages will simply be part of the virtual + * memmap. We also update min_low_pfn and max_low_pfn here as we receive + * memory ranges from the caller. + */ +static int __init build_node_maps(unsigned long start, unsigned long len, + int node) +{ + unsigned long spfn, epfn, end = start + len; + struct bootmem_data *bdp = &bootmem_node_data[node]; + + epfn = GRANULEROUNDUP(end) >> PAGE_SHIFT; + spfn = GRANULEROUNDDOWN(start) >> PAGE_SHIFT; + + if (!bdp->node_low_pfn) { + bdp->node_min_pfn = spfn; + bdp->node_low_pfn = epfn; + } else { + bdp->node_min_pfn = min(spfn, bdp->node_min_pfn); + bdp->node_low_pfn = max(epfn, bdp->node_low_pfn); + } + + return 0; +} + +/** + * early_nr_cpus_node - return number of cpus on a given node + * @node: node to check + * + * Count the number of cpus on @node. We can't use nr_cpus_node() yet because + * acpi_boot_init() (which builds the node_to_cpu_mask array) hasn't been + * called yet. Note that node 0 will also count all non-existent cpus. + */ +static int __meminit early_nr_cpus_node(int node) +{ + int cpu, n = 0; + + for_each_possible_early_cpu(cpu) + if (node == node_cpuid[cpu].nid) + n++; + + return n; +} + +/** + * compute_pernodesize - compute size of pernode data + * @node: the node id. + */ +static unsigned long __meminit compute_pernodesize(int node) +{ + unsigned long pernodesize = 0, cpus; + + cpus = early_nr_cpus_node(node); + pernodesize += PERCPU_PAGE_SIZE * cpus; + pernodesize += node * L1_CACHE_BYTES; + pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t)); + pernodesize += L1_CACHE_ALIGN(sizeof(struct ia64_node_data)); + pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t)); + pernodesize = PAGE_ALIGN(pernodesize); + return pernodesize; +} + +/** + * per_cpu_node_setup - setup per-cpu areas on each node + * @cpu_data: per-cpu area on this node + * @node: node to setup + * + * Copy the static per-cpu data into the region we just set aside and then + * setup __per_cpu_offset for each CPU on this node. Return a pointer to + * the end of the area. + */ +static void *per_cpu_node_setup(void *cpu_data, int node) +{ +#ifdef CONFIG_SMP + int cpu; + + for_each_possible_early_cpu(cpu) { + void *src = cpu == 0 ? __cpu0_per_cpu : __phys_per_cpu_start; + + if (node != node_cpuid[cpu].nid) + continue; + + memcpy(__va(cpu_data), src, __per_cpu_end - __per_cpu_start); + __per_cpu_offset[cpu] = (char *)__va(cpu_data) - + __per_cpu_start; + + /* + * percpu area for cpu0 is moved from the __init area + * which is setup by head.S and used till this point. + * Update ar.k3. This move is ensures that percpu + * area for cpu0 is on the correct node and its + * virtual address isn't insanely far from other + * percpu areas which is important for congruent + * percpu allocator. + */ + if (cpu == 0) + ia64_set_kr(IA64_KR_PER_CPU_DATA, + (unsigned long)cpu_data - + (unsigned long)__per_cpu_start); + + cpu_data += PERCPU_PAGE_SIZE; + } +#endif + return cpu_data; +} + +#ifdef CONFIG_SMP +/** + * setup_per_cpu_areas - setup percpu areas + * + * Arch code has already allocated and initialized percpu areas. All + * this function has to do is to teach the determined layout to the + * dynamic percpu allocator, which happens to be more complex than + * creating whole new ones using helpers. + */ +void __init setup_per_cpu_areas(void) +{ + struct pcpu_alloc_info *ai; + struct pcpu_group_info *uninitialized_var(gi); + unsigned int *cpu_map; + void *base; + unsigned long base_offset; + unsigned int cpu; + ssize_t static_size, reserved_size, dyn_size; + int node, prev_node, unit, nr_units, rc; + + ai = pcpu_alloc_alloc_info(MAX_NUMNODES, nr_cpu_ids); + if (!ai) + panic("failed to allocate pcpu_alloc_info"); + cpu_map = ai->groups[0].cpu_map; + + /* determine base */ + base = (void *)ULONG_MAX; + for_each_possible_cpu(cpu) + base = min(base, + (void *)(__per_cpu_offset[cpu] + __per_cpu_start)); + base_offset = (void *)__per_cpu_start - base; + + /* build cpu_map, units are grouped by node */ + unit = 0; + for_each_node(node) + for_each_possible_cpu(cpu) + if (node == node_cpuid[cpu].nid) + cpu_map[unit++] = cpu; + nr_units = unit; + + /* set basic parameters */ + static_size = __per_cpu_end - __per_cpu_start; + reserved_size = PERCPU_MODULE_RESERVE; + dyn_size = PERCPU_PAGE_SIZE - static_size - reserved_size; + if (dyn_size < 0) + panic("percpu area overflow static=%zd reserved=%zd\n", + static_size, reserved_size); + + ai->static_size = static_size; + ai->reserved_size = reserved_size; + ai->dyn_size = dyn_size; + ai->unit_size = PERCPU_PAGE_SIZE; + ai->atom_size = PAGE_SIZE; + ai->alloc_size = PERCPU_PAGE_SIZE; + + /* + * CPUs are put into groups according to node. Walk cpu_map + * and create new groups at node boundaries. + */ + prev_node = -1; + ai->nr_groups = 0; + for (unit = 0; unit < nr_units; unit++) { + cpu = cpu_map[unit]; + node = node_cpuid[cpu].nid; + + if (node == prev_node) { + gi->nr_units++; + continue; + } + prev_node = node; + + gi = &ai->groups[ai->nr_groups++]; + gi->nr_units = 1; + gi->base_offset = __per_cpu_offset[cpu] + base_offset; + gi->cpu_map = &cpu_map[unit]; + } + + rc = pcpu_setup_first_chunk(ai, base); + if (rc) + panic("failed to setup percpu area (err=%d)", rc); + + pcpu_free_alloc_info(ai); +} +#endif + +/** + * fill_pernode - initialize pernode data. + * @node: the node id. + * @pernode: physical address of pernode data + * @pernodesize: size of the pernode data + */ +static void __init fill_pernode(int node, unsigned long pernode, + unsigned long pernodesize) +{ + void *cpu_data; + int cpus = early_nr_cpus_node(node); + struct bootmem_data *bdp = &bootmem_node_data[node]; + + mem_data[node].pernode_addr = pernode; + mem_data[node].pernode_size = pernodesize; + memset(__va(pernode), 0, pernodesize); + + cpu_data = (void *)pernode; + pernode += PERCPU_PAGE_SIZE * cpus; + pernode += node * L1_CACHE_BYTES; + + pgdat_list[node] = __va(pernode); + pernode += L1_CACHE_ALIGN(sizeof(pg_data_t)); + + mem_data[node].node_data = __va(pernode); + pernode += L1_CACHE_ALIGN(sizeof(struct ia64_node_data)); + + pgdat_list[node]->bdata = bdp; + pernode += L1_CACHE_ALIGN(sizeof(pg_data_t)); + + cpu_data = per_cpu_node_setup(cpu_data, node); + + return; +} + +/** + * find_pernode_space - allocate memory for memory map and per-node structures + * @start: physical start of range + * @len: length of range + * @node: node where this range resides + * + * This routine reserves space for the per-cpu data struct, the list of + * pg_data_ts and the per-node data struct. Each node will have something like + * the following in the first chunk of addr. space large enough to hold it. + * + * ________________________ + * | | + * |~~~~~~~~~~~~~~~~~~~~~~~~| <-- NODEDATA_ALIGN(start, node) for the first + * | PERCPU_PAGE_SIZE * | start and length big enough + * | cpus_on_this_node | Node 0 will also have entries for all non-existent cpus. + * |------------------------| + * | local pg_data_t * | + * |------------------------| + * | local ia64_node_data | + * |------------------------| + * | ??? | + * |________________________| + * + * Once this space has been set aside, the bootmem maps are initialized. We + * could probably move the allocation of the per-cpu and ia64_node_data space + * outside of this function and use alloc_bootmem_node(), but doing it here + * is straightforward and we get the alignments we want so... + */ +static int __init find_pernode_space(unsigned long start, unsigned long len, + int node) +{ + unsigned long spfn, epfn; + unsigned long pernodesize = 0, pernode, pages, mapsize; + struct bootmem_data *bdp = &bootmem_node_data[node]; + + spfn = start >> PAGE_SHIFT; + epfn = (start + len) >> PAGE_SHIFT; + + pages = bdp->node_low_pfn - bdp->node_min_pfn; + mapsize = bootmem_bootmap_pages(pages) << PAGE_SHIFT; + + /* + * Make sure this memory falls within this node's usable memory + * since we may have thrown some away in build_maps(). + */ + if (spfn < bdp->node_min_pfn || epfn > bdp->node_low_pfn) + return 0; + + /* Don't setup this node's local space twice... */ + if (mem_data[node].pernode_addr) + return 0; + + /* + * Calculate total size needed, incl. what's necessary + * for good alignment and alias prevention. + */ + pernodesize = compute_pernodesize(node); + pernode = NODEDATA_ALIGN(start, node); + + /* Is this range big enough for what we want to store here? */ + if (start + len > (pernode + pernodesize + mapsize)) + fill_pernode(node, pernode, pernodesize); + + return 0; +} + +/** + * free_node_bootmem - free bootmem allocator memory for use + * @start: physical start of range + * @len: length of range + * @node: node where this range resides + * + * Simply calls the bootmem allocator to free the specified ranged from + * the given pg_data_t's bdata struct. After this function has been called + * for all the entries in the EFI memory map, the bootmem allocator will + * be ready to service allocation requests. + */ +static int __init free_node_bootmem(unsigned long start, unsigned long len, + int node) +{ + free_bootmem_node(pgdat_list[node], start, len); + + return 0; +} + +/** + * reserve_pernode_space - reserve memory for per-node space + * + * Reserve the space used by the bootmem maps & per-node space in the boot + * allocator so that when we actually create the real mem maps we don't + * use their memory. + */ +static void __init reserve_pernode_space(void) +{ + unsigned long base, size, pages; + struct bootmem_data *bdp; + int node; + + for_each_online_node(node) { + pg_data_t *pdp = pgdat_list[node]; + + if (node_isset(node, memory_less_mask)) + continue; + + bdp = pdp->bdata; + + /* First the bootmem_map itself */ + pages = bdp->node_low_pfn - bdp->node_min_pfn; + size = bootmem_bootmap_pages(pages) << PAGE_SHIFT; + base = __pa(bdp->node_bootmem_map); + reserve_bootmem_node(pdp, base, size, BOOTMEM_DEFAULT); + + /* Now the per-node space */ + size = mem_data[node].pernode_size; + base = __pa(mem_data[node].pernode_addr); + reserve_bootmem_node(pdp, base, size, BOOTMEM_DEFAULT); + } +} + +static void __meminit scatter_node_data(void) +{ + pg_data_t **dst; + int node; + + /* + * for_each_online_node() can't be used at here. + * node_online_map is not set for hot-added nodes at this time, + * because we are halfway through initialization of the new node's + * structures. If for_each_online_node() is used, a new node's + * pg_data_ptrs will be not initialized. Instead of using it, + * pgdat_list[] is checked. + */ + for_each_node(node) { + if (pgdat_list[node]) { + dst = LOCAL_DATA_ADDR(pgdat_list[node])->pg_data_ptrs; + memcpy(dst, pgdat_list, sizeof(pgdat_list)); + } + } +} + +/** + * initialize_pernode_data - fixup per-cpu & per-node pointers + * + * Each node's per-node area has a copy of the global pg_data_t list, so + * we copy that to each node here, as well as setting the per-cpu pointer + * to the local node data structure. The active_cpus field of the per-node + * structure gets setup by the platform_cpu_init() function later. + */ +static void __init initialize_pernode_data(void) +{ + int cpu, node; + + scatter_node_data(); + +#ifdef CONFIG_SMP + /* Set the node_data pointer for each per-cpu struct */ + for_each_possible_early_cpu(cpu) { + node = node_cpuid[cpu].nid; + per_cpu(ia64_cpu_info, cpu).node_data = + mem_data[node].node_data; + } +#else + { + struct cpuinfo_ia64 *cpu0_cpu_info; + cpu = 0; + node = node_cpuid[cpu].nid; + cpu0_cpu_info = (struct cpuinfo_ia64 *)(__phys_per_cpu_start + + ((char *)&ia64_cpu_info - __per_cpu_start)); + cpu0_cpu_info->node_data = mem_data[node].node_data; + } +#endif /* CONFIG_SMP */ +} + +/** + * memory_less_node_alloc - * attempt to allocate memory on the best NUMA slit + * node but fall back to any other node when __alloc_bootmem_node fails + * for best. + * @nid: node id + * @pernodesize: size of this node's pernode data + */ +static void __init *memory_less_node_alloc(int nid, unsigned long pernodesize) +{ + void *ptr = NULL; + u8 best = 0xff; + int bestnode = -1, node, anynode = 0; + + for_each_online_node(node) { + if (node_isset(node, memory_less_mask)) + continue; + else if (node_distance(nid, node) < best) { + best = node_distance(nid, node); + bestnode = node; + } + anynode = node; + } + + if (bestnode == -1) + bestnode = anynode; + + ptr = __alloc_bootmem_node(pgdat_list[bestnode], pernodesize, + PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); + + return ptr; +} + +/** + * memory_less_nodes - allocate and initialize CPU only nodes pernode + * information. + */ +static void __init memory_less_nodes(void) +{ + unsigned long pernodesize; + void *pernode; + int node; + + for_each_node_mask(node, memory_less_mask) { + pernodesize = compute_pernodesize(node); + pernode = memory_less_node_alloc(node, pernodesize); + fill_pernode(node, __pa(pernode), pernodesize); + } + + return; +} + +/** + * find_memory - walk the EFI memory map and setup the bootmem allocator + * + * Called early in boot to setup the bootmem allocator, and to + * allocate the per-cpu and per-node structures. + */ +void __init find_memory(void) +{ + int node; + + reserve_memory(); + + if (num_online_nodes() == 0) { + printk(KERN_ERR "node info missing!\n"); + node_set_online(0); + } + + nodes_or(memory_less_mask, memory_less_mask, node_online_map); + min_low_pfn = -1; + max_low_pfn = 0; + + /* These actually end up getting called by call_pernode_memory() */ + efi_memmap_walk(filter_rsvd_memory, build_node_maps); + efi_memmap_walk(filter_rsvd_memory, find_pernode_space); + efi_memmap_walk(find_max_min_low_pfn, NULL); + + for_each_online_node(node) + if (bootmem_node_data[node].node_low_pfn) { + node_clear(node, memory_less_mask); + mem_data[node].min_pfn = ~0UL; + } + + efi_memmap_walk(filter_memory, register_active_ranges); + + /* + * Initialize the boot memory maps in reverse order since that's + * what the bootmem allocator expects + */ + for (node = MAX_NUMNODES - 1; node >= 0; node--) { + unsigned long pernode, pernodesize, map; + struct bootmem_data *bdp; + + if (!node_online(node)) + continue; + else if (node_isset(node, memory_less_mask)) + continue; + + bdp = &bootmem_node_data[node]; + pernode = mem_data[node].pernode_addr; + pernodesize = mem_data[node].pernode_size; + map = pernode + pernodesize; + + init_bootmem_node(pgdat_list[node], + map>>PAGE_SHIFT, + bdp->node_min_pfn, + bdp->node_low_pfn); + } + + efi_memmap_walk(filter_rsvd_memory, free_node_bootmem); + + reserve_pernode_space(); + memory_less_nodes(); + initialize_pernode_data(); + + max_pfn = max_low_pfn; + + find_initrd(); +} + +#ifdef CONFIG_SMP +/** + * per_cpu_init - setup per-cpu variables + * + * find_pernode_space() does most of this already, we just need to set + * local_per_cpu_offset + */ +void *per_cpu_init(void) +{ + int cpu; + static int first_time = 1; + + if (first_time) { + first_time = 0; + for_each_possible_early_cpu(cpu) + per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu]; + } + + return __per_cpu_start + __per_cpu_offset[smp_processor_id()]; +} +#endif /* CONFIG_SMP */ + +/** + * call_pernode_memory - use SRAT to call callback functions with node info + * @start: physical start of range + * @len: length of range + * @arg: function to call for each range + * + * efi_memmap_walk() knows nothing about layout of memory across nodes. Find + * out to which node a block of memory belongs. Ignore memory that we cannot + * identify, and split blocks that run across multiple nodes. + * + * Take this opportunity to round the start address up and the end address + * down to page boundaries. + */ +void call_pernode_memory(unsigned long start, unsigned long len, void *arg) +{ + unsigned long rs, re, end = start + len; + void (*func)(unsigned long, unsigned long, int); + int i; + + start = PAGE_ALIGN(start); + end &= PAGE_MASK; + if (start >= end) + return; + + func = arg; + + if (!num_node_memblks) { + /* No SRAT table, so assume one node (node 0) */ + if (start < end) + (*func)(start, end - start, 0); + return; + } + + for (i = 0; i < num_node_memblks; i++) { + rs = max(start, node_memblk[i].start_paddr); + re = min(end, node_memblk[i].start_paddr + + node_memblk[i].size); + + if (rs < re) + (*func)(rs, re - rs, node_memblk[i].nid); + + if (re == end) + break; + } +} + +/** + * count_node_pages - callback to build per-node memory info structures + * @start: physical start of range + * @len: length of range + * @node: node where this range resides + * + * Each node has it's own number of physical pages, DMAable pages, start, and + * end page frame number. This routine will be called by call_pernode_memory() + * for each piece of usable memory and will setup these values for each node. + * Very similar to build_maps(). + */ +static __init int count_node_pages(unsigned long start, unsigned long len, int node) +{ + unsigned long end = start + len; + +#ifdef CONFIG_ZONE_DMA + if (start <= __pa(MAX_DMA_ADDRESS)) + mem_data[node].num_dma_physpages += + (min(end, __pa(MAX_DMA_ADDRESS)) - start) >>PAGE_SHIFT; +#endif + start = GRANULEROUNDDOWN(start); + end = GRANULEROUNDUP(end); + mem_data[node].max_pfn = max(mem_data[node].max_pfn, + end >> PAGE_SHIFT); + mem_data[node].min_pfn = min(mem_data[node].min_pfn, + start >> PAGE_SHIFT); + + return 0; +} + +/** + * paging_init - setup page tables + * + * paging_init() sets up the page tables for each node of the system and frees + * the bootmem allocator memory for general use. + */ +void __init paging_init(void) +{ + unsigned long max_dma; + unsigned long pfn_offset = 0; + unsigned long max_pfn = 0; + int node; + unsigned long max_zone_pfns[MAX_NR_ZONES]; + + max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT; + + efi_memmap_walk(filter_rsvd_memory, count_node_pages); + + sparse_memory_present_with_active_regions(MAX_NUMNODES); + sparse_init(); + +#ifdef CONFIG_VIRTUAL_MEM_MAP + VMALLOC_END -= PAGE_ALIGN(ALIGN(max_low_pfn, MAX_ORDER_NR_PAGES) * + sizeof(struct page)); + vmem_map = (struct page *) VMALLOC_END; + efi_memmap_walk(create_mem_map_page_table, NULL); + printk("Virtual mem_map starts at 0x%p\n", vmem_map); +#endif + + for_each_online_node(node) { + pfn_offset = mem_data[node].min_pfn; + +#ifdef CONFIG_VIRTUAL_MEM_MAP + NODE_DATA(node)->node_mem_map = vmem_map + pfn_offset; +#endif + if (mem_data[node].max_pfn > max_pfn) + max_pfn = mem_data[node].max_pfn; + } + + memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); +#ifdef CONFIG_ZONE_DMA + max_zone_pfns[ZONE_DMA] = max_dma; +#endif + max_zone_pfns[ZONE_NORMAL] = max_pfn; + free_area_init_nodes(max_zone_pfns); + + zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page)); +} + +#ifdef CONFIG_MEMORY_HOTPLUG +pg_data_t *arch_alloc_nodedata(int nid) +{ + unsigned long size = compute_pernodesize(nid); + + return kzalloc(size, GFP_KERNEL); +} + +void arch_free_nodedata(pg_data_t *pgdat) +{ + kfree(pgdat); +} + +void arch_refresh_nodedata(int update_node, pg_data_t *update_pgdat) +{ + pgdat_list[update_node] = update_pgdat; + scatter_node_data(); +} +#endif + +#ifdef CONFIG_SPARSEMEM_VMEMMAP +int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) +{ + return vmemmap_populate_basepages(start, end, node); +} + +void vmemmap_free(unsigned long start, unsigned long end) +{ +} +#endif diff --git a/kernel/arch/ia64/mm/extable.c b/kernel/arch/ia64/mm/extable.c new file mode 100644 index 000000000..c99a41e29 --- /dev/null +++ b/kernel/arch/ia64/mm/extable.c @@ -0,0 +1,115 @@ +/* + * Kernel exception handling table support. Derived from arch/alpha/mm/extable.c. + * + * Copyright (C) 1998, 1999, 2001-2002, 2004 Hewlett-Packard Co + * David Mosberger-Tang + */ + +#include + +#include +#include + +static int cmp_ex(const void *a, const void *b) +{ + const struct exception_table_entry *l = a, *r = b; + u64 lip = (u64) &l->addr + l->addr; + u64 rip = (u64) &r->addr + r->addr; + + /* avoid overflow */ + if (lip > rip) + return 1; + if (lip < rip) + return -1; + return 0; +} + +static void swap_ex(void *a, void *b, int size) +{ + struct exception_table_entry *l = a, *r = b, tmp; + u64 delta = (u64) r - (u64) l; + + tmp = *l; + l->addr = r->addr + delta; + l->cont = r->cont + delta; + r->addr = tmp.addr - delta; + r->cont = tmp.cont - delta; +} + +/* + * Sort the exception table. It's usually already sorted, but there + * may be unordered entries due to multiple text sections (such as the + * .init text section). Note that the exception-table-entries contain + * location-relative addresses, which requires a bit of care during + * sorting to avoid overflows in the offset members (e.g., it would + * not be safe to make a temporary copy of an exception-table entry on + * the stack, because the stack may be more than 2GB away from the + * exception-table). + */ +void sort_extable (struct exception_table_entry *start, + struct exception_table_entry *finish) +{ + sort(start, finish - start, sizeof(struct exception_table_entry), + cmp_ex, swap_ex); +} + +static inline unsigned long ex_to_addr(const struct exception_table_entry *x) +{ + return (unsigned long)&x->addr + x->addr; +} + +#ifdef CONFIG_MODULES +/* + * Any entry referring to the module init will be at the beginning or + * the end. + */ +void trim_init_extable(struct module *m) +{ + /*trim the beginning*/ + while (m->num_exentries && + within_module_init(ex_to_addr(&m->extable[0]), m)) { + m->extable++; + m->num_exentries--; + } + /*trim the end*/ + while (m->num_exentries && + within_module_init(ex_to_addr(&m->extable[m->num_exentries-1]), + m)) + m->num_exentries--; +} +#endif /* CONFIG_MODULES */ + +const struct exception_table_entry * +search_extable (const struct exception_table_entry *first, + const struct exception_table_entry *last, + unsigned long ip) +{ + const struct exception_table_entry *mid; + unsigned long mid_ip; + long diff; + + while (first <= last) { + mid = &first[(last - first)/2]; + mid_ip = (u64) &mid->addr + mid->addr; + diff = mid_ip - ip; + if (diff == 0) + return mid; + else if (diff < 0) + first = mid + 1; + else + last = mid - 1; + } + return NULL; +} + +void +ia64_handle_exception (struct pt_regs *regs, const struct exception_table_entry *e) +{ + long fix = (u64) &e->cont + e->cont; + + regs->r8 = -EFAULT; + if (fix & 4) + regs->r9 = 0; + regs->cr_iip = fix & ~0xf; + ia64_psr(regs)->ri = fix & 0x3; /* set continuation slot number */ +} diff --git a/kernel/arch/ia64/mm/fault.c b/kernel/arch/ia64/mm/fault.c new file mode 100644 index 000000000..70b40d120 --- /dev/null +++ b/kernel/arch/ia64/mm/fault.c @@ -0,0 +1,308 @@ +/* + * MMU fault handling support. + * + * Copyright (C) 1998-2002 Hewlett-Packard Co + * David Mosberger-Tang + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +extern int die(char *, struct pt_regs *, long); + +#ifdef CONFIG_KPROBES +static inline int notify_page_fault(struct pt_regs *regs, int trap) +{ + int ret = 0; + + if (!user_mode(regs)) { + /* kprobe_running() needs smp_processor_id() */ + preempt_disable(); + if (kprobe_running() && kprobe_fault_handler(regs, trap)) + ret = 1; + preempt_enable(); + } + + return ret; +} +#else +static inline int notify_page_fault(struct pt_regs *regs, int trap) +{ + return 0; +} +#endif + +/* + * Return TRUE if ADDRESS points at a page in the kernel's mapped segment + * (inside region 5, on ia64) and that page is present. + */ +static int +mapped_kernel_page_is_present (unsigned long address) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *ptep, pte; + + pgd = pgd_offset_k(address); + if (pgd_none(*pgd) || pgd_bad(*pgd)) + return 0; + + pud = pud_offset(pgd, address); + if (pud_none(*pud) || pud_bad(*pud)) + return 0; + + pmd = pmd_offset(pud, address); + if (pmd_none(*pmd) || pmd_bad(*pmd)) + return 0; + + ptep = pte_offset_kernel(pmd, address); + if (!ptep) + return 0; + + pte = *ptep; + return pte_present(pte); +} + +# define VM_READ_BIT 0 +# define VM_WRITE_BIT 1 +# define VM_EXEC_BIT 2 + +void __kprobes +ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *regs) +{ + int signal = SIGSEGV, code = SEGV_MAPERR; + struct vm_area_struct *vma, *prev_vma; + struct mm_struct *mm = current->mm; + struct siginfo si; + unsigned long mask; + int fault; + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; + + mask = ((((isr >> IA64_ISR_X_BIT) & 1UL) << VM_EXEC_BIT) + | (((isr >> IA64_ISR_W_BIT) & 1UL) << VM_WRITE_BIT)); + + /* mmap_sem is performance critical.... */ + prefetchw(&mm->mmap_sem); + + /* + * If we're in an interrupt or have no user context, we must not take the fault.. + */ + if (faulthandler_disabled() || !mm) + goto no_context; + +#ifdef CONFIG_VIRTUAL_MEM_MAP + /* + * If fault is in region 5 and we are in the kernel, we may already + * have the mmap_sem (pfn_valid macro is called during mmap). There + * is no vma for region 5 addr's anyway, so skip getting the semaphore + * and go directly to the exception handling code. + */ + + if ((REGION_NUMBER(address) == 5) && !user_mode(regs)) + goto bad_area_no_up; +#endif + + /* + * This is to handle the kprobes on user space access instructions + */ + if (notify_page_fault(regs, TRAP_BRKPT)) + return; + + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; + if (mask & VM_WRITE) + flags |= FAULT_FLAG_WRITE; +retry: + down_read(&mm->mmap_sem); + + vma = find_vma_prev(mm, address, &prev_vma); + if (!vma && !prev_vma ) + goto bad_area; + + /* + * find_vma_prev() returns vma such that address < vma->vm_end or NULL + * + * May find no vma, but could be that the last vm area is the + * register backing store that needs to expand upwards, in + * this case vma will be null, but prev_vma will ne non-null + */ + if (( !vma && prev_vma ) || (address < vma->vm_start) ) + goto check_expansion; + + good_area: + code = SEGV_ACCERR; + + /* OK, we've got a good vm_area for this memory area. Check the access permissions: */ + +# if (((1 << VM_READ_BIT) != VM_READ || (1 << VM_WRITE_BIT) != VM_WRITE) \ + || (1 << VM_EXEC_BIT) != VM_EXEC) +# error File is out of sync with . Please update. +# endif + + if (((isr >> IA64_ISR_R_BIT) & 1UL) && (!(vma->vm_flags & (VM_READ | VM_WRITE)))) + goto bad_area; + + if ((vma->vm_flags & mask) != mask) + goto bad_area; + + /* + * If for any reason at all we couldn't handle the fault, make + * sure we exit gracefully rather than endlessly redo the + * fault. + */ + fault = handle_mm_fault(mm, vma, address, flags); + + if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) + return; + + if (unlikely(fault & VM_FAULT_ERROR)) { + /* + * We ran out of memory, or some other thing happened + * to us that made us unable to handle the page fault + * gracefully. + */ + if (fault & VM_FAULT_OOM) { + goto out_of_memory; + } else if (fault & VM_FAULT_SIGSEGV) { + goto bad_area; + } else if (fault & VM_FAULT_SIGBUS) { + signal = SIGBUS; + goto bad_area; + } + BUG(); + } + + if (flags & FAULT_FLAG_ALLOW_RETRY) { + if (fault & VM_FAULT_MAJOR) + current->maj_flt++; + else + current->min_flt++; + if (fault & VM_FAULT_RETRY) { + flags &= ~FAULT_FLAG_ALLOW_RETRY; + flags |= FAULT_FLAG_TRIED; + + /* No need to up_read(&mm->mmap_sem) as we would + * have already released it in __lock_page_or_retry + * in mm/filemap.c. + */ + + goto retry; + } + } + + up_read(&mm->mmap_sem); + return; + + check_expansion: + if (!(prev_vma && (prev_vma->vm_flags & VM_GROWSUP) && (address == prev_vma->vm_end))) { + if (!vma) + goto bad_area; + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto bad_area; + if (REGION_NUMBER(address) != REGION_NUMBER(vma->vm_start) + || REGION_OFFSET(address) >= RGN_MAP_LIMIT) + goto bad_area; + if (expand_stack(vma, address)) + goto bad_area; + } else { + vma = prev_vma; + if (REGION_NUMBER(address) != REGION_NUMBER(vma->vm_start) + || REGION_OFFSET(address) >= RGN_MAP_LIMIT) + goto bad_area; + /* + * Since the register backing store is accessed sequentially, + * we disallow growing it by more than a page at a time. + */ + if (address > vma->vm_end + PAGE_SIZE - sizeof(long)) + goto bad_area; + if (expand_upwards(vma, address)) + goto bad_area; + } + goto good_area; + + bad_area: + up_read(&mm->mmap_sem); +#ifdef CONFIG_VIRTUAL_MEM_MAP + bad_area_no_up: +#endif + if ((isr & IA64_ISR_SP) + || ((isr & IA64_ISR_NA) && (isr & IA64_ISR_CODE_MASK) == IA64_ISR_CODE_LFETCH)) + { + /* + * This fault was due to a speculative load or lfetch.fault, set the "ed" + * bit in the psr to ensure forward progress. (Target register will get a + * NaT for ld.s, lfetch will be canceled.) + */ + ia64_psr(regs)->ed = 1; + return; + } + if (user_mode(regs)) { + si.si_signo = signal; + si.si_errno = 0; + si.si_code = code; + si.si_addr = (void __user *) address; + si.si_isr = isr; + si.si_flags = __ISR_VALID; + force_sig_info(signal, &si, current); + return; + } + + no_context: + if ((isr & IA64_ISR_SP) + || ((isr & IA64_ISR_NA) && (isr & IA64_ISR_CODE_MASK) == IA64_ISR_CODE_LFETCH)) + { + /* + * This fault was due to a speculative load or lfetch.fault, set the "ed" + * bit in the psr to ensure forward progress. (Target register will get a + * NaT for ld.s, lfetch will be canceled.) + */ + ia64_psr(regs)->ed = 1; + return; + } + + /* + * Since we have no vma's for region 5, we might get here even if the address is + * valid, due to the VHPT walker inserting a non present translation that becomes + * stale. If that happens, the non present fault handler already purged the stale + * translation, which fixed the problem. So, we check to see if the translation is + * valid, and return if it is. + */ + if (REGION_NUMBER(address) == 5 && mapped_kernel_page_is_present(address)) + return; + + if (ia64_done_with_exception(regs)) + return; + + /* + * Oops. The kernel tried to access some bad page. We'll have to terminate things + * with extreme prejudice. + */ + bust_spinlocks(1); + + if (address < PAGE_SIZE) + printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference (address %016lx)\n", address); + else + printk(KERN_ALERT "Unable to handle kernel paging request at " + "virtual address %016lx\n", address); + if (die("Oops", regs, isr)) + regs = NULL; + bust_spinlocks(0); + if (regs) + do_exit(SIGKILL); + return; + + out_of_memory: + up_read(&mm->mmap_sem); + if (!user_mode(regs)) + goto no_context; + pagefault_out_of_memory(); +} diff --git a/kernel/arch/ia64/mm/hugetlbpage.c b/kernel/arch/ia64/mm/hugetlbpage.c new file mode 100644 index 000000000..52b7604b5 --- /dev/null +++ b/kernel/arch/ia64/mm/hugetlbpage.c @@ -0,0 +1,199 @@ +/* + * IA-64 Huge TLB Page Support for Kernel. + * + * Copyright (C) 2002-2004 Rohit Seth + * Copyright (C) 2003-2004 Ken Chen + * + * Sep, 2003: add numa support + * Feb, 2004: dynamic hugetlb page size via boot parameter + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +unsigned int hpage_shift = HPAGE_SHIFT_DEFAULT; +EXPORT_SYMBOL(hpage_shift); + +pte_t * +huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz) +{ + unsigned long taddr = htlbpage_to_page(addr); + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte = NULL; + + pgd = pgd_offset(mm, taddr); + pud = pud_alloc(mm, pgd, taddr); + if (pud) { + pmd = pmd_alloc(mm, pud, taddr); + if (pmd) + pte = pte_alloc_map(mm, NULL, pmd, taddr); + } + return pte; +} + +pte_t * +huge_pte_offset (struct mm_struct *mm, unsigned long addr) +{ + unsigned long taddr = htlbpage_to_page(addr); + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte = NULL; + + pgd = pgd_offset(mm, taddr); + if (pgd_present(*pgd)) { + pud = pud_offset(pgd, taddr); + if (pud_present(*pud)) { + pmd = pmd_offset(pud, taddr); + if (pmd_present(*pmd)) + pte = pte_offset_map(pmd, taddr); + } + } + + return pte; +} + +int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) +{ + return 0; +} + +#define mk_pte_huge(entry) { pte_val(entry) |= _PAGE_P; } + +/* + * Don't actually need to do any preparation, but need to make sure + * the address is in the right region. + */ +int prepare_hugepage_range(struct file *file, + unsigned long addr, unsigned long len) +{ + if (len & ~HPAGE_MASK) + return -EINVAL; + if (addr & ~HPAGE_MASK) + return -EINVAL; + if (REGION_NUMBER(addr) != RGN_HPAGE) + return -EINVAL; + + return 0; +} + +struct page *follow_huge_addr(struct mm_struct *mm, unsigned long addr, int write) +{ + struct page *page; + pte_t *ptep; + + if (REGION_NUMBER(addr) != RGN_HPAGE) + return ERR_PTR(-EINVAL); + + ptep = huge_pte_offset(mm, addr); + if (!ptep || pte_none(*ptep)) + return NULL; + page = pte_page(*ptep); + page += ((addr & ~HPAGE_MASK) >> PAGE_SHIFT); + return page; +} +int pmd_huge(pmd_t pmd) +{ + return 0; +} + +int pud_huge(pud_t pud) +{ + return 0; +} + +void hugetlb_free_pgd_range(struct mmu_gather *tlb, + unsigned long addr, unsigned long end, + unsigned long floor, unsigned long ceiling) +{ + /* + * This is called to free hugetlb page tables. + * + * The offset of these addresses from the base of the hugetlb + * region must be scaled down by HPAGE_SIZE/PAGE_SIZE so that + * the standard free_pgd_range will free the right page tables. + * + * If floor and ceiling are also in the hugetlb region, they + * must likewise be scaled down; but if outside, left unchanged. + */ + + addr = htlbpage_to_page(addr); + end = htlbpage_to_page(end); + if (REGION_NUMBER(floor) == RGN_HPAGE) + floor = htlbpage_to_page(floor); + if (REGION_NUMBER(ceiling) == RGN_HPAGE) + ceiling = htlbpage_to_page(ceiling); + + free_pgd_range(tlb, addr, end, floor, ceiling); +} + +unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, + unsigned long pgoff, unsigned long flags) +{ + struct vm_unmapped_area_info info; + + if (len > RGN_MAP_LIMIT) + return -ENOMEM; + if (len & ~HPAGE_MASK) + return -EINVAL; + + /* Handle MAP_FIXED */ + if (flags & MAP_FIXED) { + if (prepare_hugepage_range(file, addr, len)) + return -EINVAL; + return addr; + } + + /* This code assumes that RGN_HPAGE != 0. */ + if ((REGION_NUMBER(addr) != RGN_HPAGE) || (addr & (HPAGE_SIZE - 1))) + addr = HPAGE_REGION_BASE; + + info.flags = 0; + info.length = len; + info.low_limit = addr; + info.high_limit = HPAGE_REGION_BASE + RGN_MAP_LIMIT; + info.align_mask = PAGE_MASK & (HPAGE_SIZE - 1); + info.align_offset = 0; + return vm_unmapped_area(&info); +} + +static int __init hugetlb_setup_sz(char *str) +{ + u64 tr_pages; + unsigned long long size; + + if (ia64_pal_vm_page_size(&tr_pages, NULL) != 0) + /* + * shouldn't happen, but just in case. + */ + tr_pages = 0x15557000UL; + + size = memparse(str, &str); + if (*str || !is_power_of_2(size) || !(tr_pages & size) || + size <= PAGE_SIZE || + size >= (1UL << PAGE_SHIFT << MAX_ORDER)) { + printk(KERN_WARNING "Invalid huge page size specified\n"); + return 1; + } + + hpage_shift = __ffs(size); + /* + * boot cpu already executed ia64_mmu_init, and has HPAGE_SHIFT_DEFAULT + * override here with new page shift. + */ + ia64_set_rr(HPAGE_REGION_BASE, hpage_shift << 2); + return 0; +} +early_param("hugepagesz", hugetlb_setup_sz); diff --git a/kernel/arch/ia64/mm/init.c b/kernel/arch/ia64/mm/init.c new file mode 100644 index 000000000..a9b65cf7b --- /dev/null +++ b/kernel/arch/ia64/mm/init.c @@ -0,0 +1,741 @@ +/* + * Initialize MMU support. + * + * Copyright (C) 1998-2003 Hewlett-Packard Co + * David Mosberger-Tang + */ +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern void ia64_tlb_init (void); + +unsigned long MAX_DMA_ADDRESS = PAGE_OFFSET + 0x100000000UL; + +#ifdef CONFIG_VIRTUAL_MEM_MAP +unsigned long VMALLOC_END = VMALLOC_END_INIT; +EXPORT_SYMBOL(VMALLOC_END); +struct page *vmem_map; +EXPORT_SYMBOL(vmem_map); +#endif + +struct page *zero_page_memmap_ptr; /* map entry for zero page */ +EXPORT_SYMBOL(zero_page_memmap_ptr); + +void +__ia64_sync_icache_dcache (pte_t pte) +{ + unsigned long addr; + struct page *page; + + page = pte_page(pte); + addr = (unsigned long) page_address(page); + + if (test_bit(PG_arch_1, &page->flags)) + return; /* i-cache is already coherent with d-cache */ + + flush_icache_range(addr, addr + (PAGE_SIZE << compound_order(page))); + set_bit(PG_arch_1, &page->flags); /* mark page as clean */ +} + +/* + * Since DMA is i-cache coherent, any (complete) pages that were written via + * DMA can be marked as "clean" so that lazy_mmu_prot_update() doesn't have to + * flush them when they get mapped into an executable vm-area. + */ +void +dma_mark_clean(void *addr, size_t size) +{ + unsigned long pg_addr, end; + + pg_addr = PAGE_ALIGN((unsigned long) addr); + end = (unsigned long) addr + size; + while (pg_addr + PAGE_SIZE <= end) { + struct page *page = virt_to_page(pg_addr); + set_bit(PG_arch_1, &page->flags); + pg_addr += PAGE_SIZE; + } +} + +inline void +ia64_set_rbs_bot (void) +{ + unsigned long stack_size = rlimit_max(RLIMIT_STACK) & -16; + + if (stack_size > MAX_USER_STACK_SIZE) + stack_size = MAX_USER_STACK_SIZE; + current->thread.rbs_bot = PAGE_ALIGN(current->mm->start_stack - stack_size); +} + +/* + * This performs some platform-dependent address space initialization. + * On IA-64, we want to setup the VM area for the register backing + * store (which grows upwards) and install the gateway page which is + * used for signal trampolines, etc. + */ +void +ia64_init_addr_space (void) +{ + struct vm_area_struct *vma; + + ia64_set_rbs_bot(); + + /* + * If we're out of memory and kmem_cache_alloc() returns NULL, we simply ignore + * the problem. When the process attempts to write to the register backing store + * for the first time, it will get a SEGFAULT in this case. + */ + vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); + if (vma) { + INIT_LIST_HEAD(&vma->anon_vma_chain); + vma->vm_mm = current->mm; + vma->vm_start = current->thread.rbs_bot & PAGE_MASK; + vma->vm_end = vma->vm_start + PAGE_SIZE; + vma->vm_flags = VM_DATA_DEFAULT_FLAGS|VM_GROWSUP|VM_ACCOUNT; + vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); + down_write(¤t->mm->mmap_sem); + if (insert_vm_struct(current->mm, vma)) { + up_write(¤t->mm->mmap_sem); + kmem_cache_free(vm_area_cachep, vma); + return; + } + up_write(¤t->mm->mmap_sem); + } + + /* map NaT-page at address zero to speed up speculative dereferencing of NULL: */ + if (!(current->personality & MMAP_PAGE_ZERO)) { + vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); + if (vma) { + INIT_LIST_HEAD(&vma->anon_vma_chain); + vma->vm_mm = current->mm; + vma->vm_end = PAGE_SIZE; + vma->vm_page_prot = __pgprot(pgprot_val(PAGE_READONLY) | _PAGE_MA_NAT); + vma->vm_flags = VM_READ | VM_MAYREAD | VM_IO | + VM_DONTEXPAND | VM_DONTDUMP; + down_write(¤t->mm->mmap_sem); + if (insert_vm_struct(current->mm, vma)) { + up_write(¤t->mm->mmap_sem); + kmem_cache_free(vm_area_cachep, vma); + return; + } + up_write(¤t->mm->mmap_sem); + } + } +} + +void +free_initmem (void) +{ + free_reserved_area(ia64_imva(__init_begin), ia64_imva(__init_end), + -1, "unused kernel"); +} + +void __init +free_initrd_mem (unsigned long start, unsigned long end) +{ + /* + * EFI uses 4KB pages while the kernel can use 4KB or bigger. + * Thus EFI and the kernel may have different page sizes. It is + * therefore possible to have the initrd share the same page as + * the end of the kernel (given current setup). + * + * To avoid freeing/using the wrong page (kernel sized) we: + * - align up the beginning of initrd + * - align down the end of initrd + * + * | | + * |=============| a000 + * | | + * | | + * | | 9000 + * |/////////////| + * |/////////////| + * |=============| 8000 + * |///INITRD////| + * |/////////////| + * |/////////////| 7000 + * | | + * |KKKKKKKKKKKKK| + * |=============| 6000 + * |KKKKKKKKKKKKK| + * |KKKKKKKKKKKKK| + * K=kernel using 8KB pages + * + * In this example, we must free page 8000 ONLY. So we must align up + * initrd_start and keep initrd_end as is. + */ + start = PAGE_ALIGN(start); + end = end & PAGE_MASK; + + if (start < end) + printk(KERN_INFO "Freeing initrd memory: %ldkB freed\n", (end - start) >> 10); + + for (; start < end; start += PAGE_SIZE) { + if (!virt_addr_valid(start)) + continue; + free_reserved_page(virt_to_page(start)); + } +} + +/* + * This installs a clean page in the kernel's page table. + */ +static struct page * __init +put_kernel_page (struct page *page, unsigned long address, pgprot_t pgprot) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + if (!PageReserved(page)) + printk(KERN_ERR "put_kernel_page: page at 0x%p not in reserved memory\n", + page_address(page)); + + pgd = pgd_offset_k(address); /* note: this is NOT pgd_offset()! */ + + { + pud = pud_alloc(&init_mm, pgd, address); + if (!pud) + goto out; + pmd = pmd_alloc(&init_mm, pud, address); + if (!pmd) + goto out; + pte = pte_alloc_kernel(pmd, address); + if (!pte) + goto out; + if (!pte_none(*pte)) + goto out; + set_pte(pte, mk_pte(page, pgprot)); + } + out: + /* no need for flush_tlb */ + return page; +} + +static void __init +setup_gate (void) +{ + void *gate_section; + struct page *page; + + /* + * Map the gate page twice: once read-only to export the ELF + * headers etc. and once execute-only page to enable + * privilege-promotion via "epc": + */ + gate_section = paravirt_get_gate_section(); + page = virt_to_page(ia64_imva(gate_section)); + put_kernel_page(page, GATE_ADDR, PAGE_READONLY); +#ifdef HAVE_BUGGY_SEGREL + page = virt_to_page(ia64_imva(gate_section + PAGE_SIZE)); + put_kernel_page(page, GATE_ADDR + PAGE_SIZE, PAGE_GATE); +#else + put_kernel_page(page, GATE_ADDR + PERCPU_PAGE_SIZE, PAGE_GATE); + /* Fill in the holes (if any) with read-only zero pages: */ + { + unsigned long addr; + + for (addr = GATE_ADDR + PAGE_SIZE; + addr < GATE_ADDR + PERCPU_PAGE_SIZE; + addr += PAGE_SIZE) + { + put_kernel_page(ZERO_PAGE(0), addr, + PAGE_READONLY); + put_kernel_page(ZERO_PAGE(0), addr + PERCPU_PAGE_SIZE, + PAGE_READONLY); + } + } +#endif + ia64_patch_gate(); +} + +static struct vm_area_struct gate_vma; + +static int __init gate_vma_init(void) +{ + gate_vma.vm_mm = NULL; + gate_vma.vm_start = FIXADDR_USER_START; + gate_vma.vm_end = FIXADDR_USER_END; + gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC; + gate_vma.vm_page_prot = __P101; + + return 0; +} +__initcall(gate_vma_init); + +struct vm_area_struct *get_gate_vma(struct mm_struct *mm) +{ + return &gate_vma; +} + +int in_gate_area_no_mm(unsigned long addr) +{ + if ((addr >= FIXADDR_USER_START) && (addr < FIXADDR_USER_END)) + return 1; + return 0; +} + +int in_gate_area(struct mm_struct *mm, unsigned long addr) +{ + return in_gate_area_no_mm(addr); +} + +void ia64_mmu_init(void *my_cpu_data) +{ + unsigned long pta, impl_va_bits; + extern void tlb_init(void); + +#ifdef CONFIG_DISABLE_VHPT +# define VHPT_ENABLE_BIT 0 +#else +# define VHPT_ENABLE_BIT 1 +#endif + + /* + * Check if the virtually mapped linear page table (VMLPT) overlaps with a mapped + * address space. The IA-64 architecture guarantees that at least 50 bits of + * virtual address space are implemented but if we pick a large enough page size + * (e.g., 64KB), the mapped address space is big enough that it will overlap with + * VMLPT. I assume that once we run on machines big enough to warrant 64KB pages, + * IMPL_VA_MSB will be significantly bigger, so this is unlikely to become a + * problem in practice. Alternatively, we could truncate the top of the mapped + * address space to not permit mappings that would overlap with the VMLPT. + * --davidm 00/12/06 + */ +# define pte_bits 3 +# define mapped_space_bits (3*(PAGE_SHIFT - pte_bits) + PAGE_SHIFT) + /* + * The virtual page table has to cover the entire implemented address space within + * a region even though not all of this space may be mappable. The reason for + * this is that the Access bit and Dirty bit fault handlers perform + * non-speculative accesses to the virtual page table, so the address range of the + * virtual page table itself needs to be covered by virtual page table. + */ +# define vmlpt_bits (impl_va_bits - PAGE_SHIFT + pte_bits) +# define POW2(n) (1ULL << (n)) + + impl_va_bits = ffz(~(local_cpu_data->unimpl_va_mask | (7UL << 61))); + + if (impl_va_bits < 51 || impl_va_bits > 61) + panic("CPU has bogus IMPL_VA_MSB value of %lu!\n", impl_va_bits - 1); + /* + * mapped_space_bits - PAGE_SHIFT is the total number of ptes we need, + * which must fit into "vmlpt_bits - pte_bits" slots. Second half of + * the test makes sure that our mapped space doesn't overlap the + * unimplemented hole in the middle of the region. + */ + if ((mapped_space_bits - PAGE_SHIFT > vmlpt_bits - pte_bits) || + (mapped_space_bits > impl_va_bits - 1)) + panic("Cannot build a big enough virtual-linear page table" + " to cover mapped address space.\n" + " Try using a smaller page size.\n"); + + + /* place the VMLPT at the end of each page-table mapped region: */ + pta = POW2(61) - POW2(vmlpt_bits); + + /* + * Set the (virtually mapped linear) page table address. Bit + * 8 selects between the short and long format, bits 2-7 the + * size of the table, and bit 0 whether the VHPT walker is + * enabled. + */ + ia64_set_pta(pta | (0 << 8) | (vmlpt_bits << 2) | VHPT_ENABLE_BIT); + + ia64_tlb_init(); + +#ifdef CONFIG_HUGETLB_PAGE + ia64_set_rr(HPAGE_REGION_BASE, HPAGE_SHIFT << 2); + ia64_srlz_d(); +#endif +} + +#ifdef CONFIG_VIRTUAL_MEM_MAP +int vmemmap_find_next_valid_pfn(int node, int i) +{ + unsigned long end_address, hole_next_pfn; + unsigned long stop_address; + pg_data_t *pgdat = NODE_DATA(node); + + end_address = (unsigned long) &vmem_map[pgdat->node_start_pfn + i]; + end_address = PAGE_ALIGN(end_address); + stop_address = (unsigned long) &vmem_map[pgdat_end_pfn(pgdat)]; + + do { + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + pgd = pgd_offset_k(end_address); + if (pgd_none(*pgd)) { + end_address += PGDIR_SIZE; + continue; + } + + pud = pud_offset(pgd, end_address); + if (pud_none(*pud)) { + end_address += PUD_SIZE; + continue; + } + + pmd = pmd_offset(pud, end_address); + if (pmd_none(*pmd)) { + end_address += PMD_SIZE; + continue; + } + + pte = pte_offset_kernel(pmd, end_address); +retry_pte: + if (pte_none(*pte)) { + end_address += PAGE_SIZE; + pte++; + if ((end_address < stop_address) && + (end_address != ALIGN(end_address, 1UL << PMD_SHIFT))) + goto retry_pte; + continue; + } + /* Found next valid vmem_map page */ + break; + } while (end_address < stop_address); + + end_address = min(end_address, stop_address); + end_address = end_address - (unsigned long) vmem_map + sizeof(struct page) - 1; + hole_next_pfn = end_address / sizeof(struct page); + return hole_next_pfn - pgdat->node_start_pfn; +} + +int __init create_mem_map_page_table(u64 start, u64 end, void *arg) +{ + unsigned long address, start_page, end_page; + struct page *map_start, *map_end; + int node; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + map_start = vmem_map + (__pa(start) >> PAGE_SHIFT); + map_end = vmem_map + (__pa(end) >> PAGE_SHIFT); + + start_page = (unsigned long) map_start & PAGE_MASK; + end_page = PAGE_ALIGN((unsigned long) map_end); + node = paddr_to_nid(__pa(start)); + + for (address = start_page; address < end_page; address += PAGE_SIZE) { + pgd = pgd_offset_k(address); + if (pgd_none(*pgd)) + pgd_populate(&init_mm, pgd, alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE)); + pud = pud_offset(pgd, address); + + if (pud_none(*pud)) + pud_populate(&init_mm, pud, alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE)); + pmd = pmd_offset(pud, address); + + if (pmd_none(*pmd)) + pmd_populate_kernel(&init_mm, pmd, alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE)); + pte = pte_offset_kernel(pmd, address); + + if (pte_none(*pte)) + set_pte(pte, pfn_pte(__pa(alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE)) >> PAGE_SHIFT, + PAGE_KERNEL)); + } + return 0; +} + +struct memmap_init_callback_data { + struct page *start; + struct page *end; + int nid; + unsigned long zone; +}; + +static int __meminit +virtual_memmap_init(u64 start, u64 end, void *arg) +{ + struct memmap_init_callback_data *args; + struct page *map_start, *map_end; + + args = (struct memmap_init_callback_data *) arg; + map_start = vmem_map + (__pa(start) >> PAGE_SHIFT); + map_end = vmem_map + (__pa(end) >> PAGE_SHIFT); + + if (map_start < args->start) + map_start = args->start; + if (map_end > args->end) + map_end = args->end; + + /* + * We have to initialize "out of bounds" struct page elements that fit completely + * on the same pages that were allocated for the "in bounds" elements because they + * may be referenced later (and found to be "reserved"). + */ + map_start -= ((unsigned long) map_start & (PAGE_SIZE - 1)) / sizeof(struct page); + map_end += ((PAGE_ALIGN((unsigned long) map_end) - (unsigned long) map_end) + / sizeof(struct page)); + + if (map_start < map_end) + memmap_init_zone((unsigned long)(map_end - map_start), + args->nid, args->zone, page_to_pfn(map_start), + MEMMAP_EARLY); + return 0; +} + +void __meminit +memmap_init (unsigned long size, int nid, unsigned long zone, + unsigned long start_pfn) +{ + if (!vmem_map) + memmap_init_zone(size, nid, zone, start_pfn, MEMMAP_EARLY); + else { + struct page *start; + struct memmap_init_callback_data args; + + start = pfn_to_page(start_pfn); + args.start = start; + args.end = start + size; + args.nid = nid; + args.zone = zone; + + efi_memmap_walk(virtual_memmap_init, &args); + } +} + +int +ia64_pfn_valid (unsigned long pfn) +{ + char byte; + struct page *pg = pfn_to_page(pfn); + + return (__get_user(byte, (char __user *) pg) == 0) + && ((((u64)pg & PAGE_MASK) == (((u64)(pg + 1) - 1) & PAGE_MASK)) + || (__get_user(byte, (char __user *) (pg + 1) - 1) == 0)); +} +EXPORT_SYMBOL(ia64_pfn_valid); + +int __init find_largest_hole(u64 start, u64 end, void *arg) +{ + u64 *max_gap = arg; + + static u64 last_end = PAGE_OFFSET; + + /* NOTE: this algorithm assumes efi memmap table is ordered */ + + if (*max_gap < (start - last_end)) + *max_gap = start - last_end; + last_end = end; + return 0; +} + +#endif /* CONFIG_VIRTUAL_MEM_MAP */ + +int __init register_active_ranges(u64 start, u64 len, int nid) +{ + u64 end = start + len; + +#ifdef CONFIG_KEXEC + if (start > crashk_res.start && start < crashk_res.end) + start = crashk_res.end; + if (end > crashk_res.start && end < crashk_res.end) + end = crashk_res.start; +#endif + + if (start < end) + memblock_add_node(__pa(start), end - start, nid); + return 0; +} + +int +find_max_min_low_pfn (u64 start, u64 end, void *arg) +{ + unsigned long pfn_start, pfn_end; +#ifdef CONFIG_FLATMEM + pfn_start = (PAGE_ALIGN(__pa(start))) >> PAGE_SHIFT; + pfn_end = (PAGE_ALIGN(__pa(end - 1))) >> PAGE_SHIFT; +#else + pfn_start = GRANULEROUNDDOWN(__pa(start)) >> PAGE_SHIFT; + pfn_end = GRANULEROUNDUP(__pa(end - 1)) >> PAGE_SHIFT; +#endif + min_low_pfn = min(min_low_pfn, pfn_start); + max_low_pfn = max(max_low_pfn, pfn_end); + return 0; +} + +/* + * Boot command-line option "nolwsys" can be used to disable the use of any light-weight + * system call handler. When this option is in effect, all fsyscalls will end up bubbling + * down into the kernel and calling the normal (heavy-weight) syscall handler. This is + * useful for performance testing, but conceivably could also come in handy for debugging + * purposes. + */ + +static int nolwsys __initdata; + +static int __init +nolwsys_setup (char *s) +{ + nolwsys = 1; + return 1; +} + +__setup("nolwsys", nolwsys_setup); + +void __init +mem_init (void) +{ + int i; + + BUG_ON(PTRS_PER_PGD * sizeof(pgd_t) != PAGE_SIZE); + BUG_ON(PTRS_PER_PMD * sizeof(pmd_t) != PAGE_SIZE); + BUG_ON(PTRS_PER_PTE * sizeof(pte_t) != PAGE_SIZE); + +#ifdef CONFIG_PCI + /* + * This needs to be called _after_ the command line has been parsed but _before_ + * any drivers that may need the PCI DMA interface are initialized or bootmem has + * been freed. + */ + platform_dma_init(); +#endif + +#ifdef CONFIG_FLATMEM + BUG_ON(!mem_map); +#endif + + set_max_mapnr(max_low_pfn); + high_memory = __va(max_low_pfn * PAGE_SIZE); + free_all_bootmem(); + mem_init_print_info(NULL); + + /* + * For fsyscall entrpoints with no light-weight handler, use the ordinary + * (heavy-weight) handler, but mark it by setting bit 0, so the fsyscall entry + * code can tell them apart. + */ + for (i = 0; i < NR_syscalls; ++i) { + extern unsigned long sys_call_table[NR_syscalls]; + unsigned long *fsyscall_table = paravirt_get_fsyscall_table(); + + if (!fsyscall_table[i] || nolwsys) + fsyscall_table[i] = sys_call_table[i] | 1; + } + setup_gate(); +} + +#ifdef CONFIG_MEMORY_HOTPLUG +int arch_add_memory(int nid, u64 start, u64 size) +{ + pg_data_t *pgdat; + struct zone *zone; + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; + int ret; + + pgdat = NODE_DATA(nid); + + zone = pgdat->node_zones + + zone_for_memory(nid, start, size, ZONE_NORMAL); + ret = __add_pages(nid, zone, start_pfn, nr_pages); + + if (ret) + printk("%s: Problem encountered in __add_pages() as ret=%d\n", + __func__, ret); + + return ret; +} + +#ifdef CONFIG_MEMORY_HOTREMOVE +int arch_remove_memory(u64 start, u64 size) +{ + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; + struct zone *zone; + int ret; + + zone = page_zone(pfn_to_page(start_pfn)); + ret = __remove_pages(zone, start_pfn, nr_pages); + if (ret) + pr_warn("%s: Problem encountered in __remove_pages() as" + " ret=%d\n", __func__, ret); + + return ret; +} +#endif +#endif + +/** + * show_mem - give short summary of memory stats + * + * Shows a simple page count of reserved and used pages in the system. + * For discontig machines, it does this on a per-pgdat basis. + */ +void show_mem(unsigned int filter) +{ + int total_reserved = 0; + unsigned long total_present = 0; + pg_data_t *pgdat; + + printk(KERN_INFO "Mem-info:\n"); + show_free_areas(filter); + printk(KERN_INFO "Node memory in pages:\n"); + for_each_online_pgdat(pgdat) { + unsigned long present; + unsigned long flags; + int reserved = 0; + int nid = pgdat->node_id; + int zoneid; + + if (skip_free_areas_node(filter, nid)) + continue; + pgdat_resize_lock(pgdat, &flags); + + for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) { + struct zone *zone = &pgdat->node_zones[zoneid]; + if (!populated_zone(zone)) + continue; + + reserved += zone->present_pages - zone->managed_pages; + } + present = pgdat->node_present_pages; + + pgdat_resize_unlock(pgdat, &flags); + total_present += present; + total_reserved += reserved; + printk(KERN_INFO "Node %4d: RAM: %11ld, rsvd: %8d, ", + nid, present, reserved); + } + printk(KERN_INFO "%ld pages of RAM\n", total_present); + printk(KERN_INFO "%d reserved pages\n", total_reserved); + printk(KERN_INFO "Total of %ld pages in page table cache\n", + quicklist_total_size()); + printk(KERN_INFO "%ld free buffer pages\n", nr_free_buffer_pages()); +} diff --git a/kernel/arch/ia64/mm/ioremap.c b/kernel/arch/ia64/mm/ioremap.c new file mode 100644 index 000000000..43964cde6 --- /dev/null +++ b/kernel/arch/ia64/mm/ioremap.c @@ -0,0 +1,125 @@ +/* + * (c) Copyright 2006, 2007 Hewlett-Packard Development Company, L.P. + * Bjorn Helgaas + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include + +static inline void __iomem * +__ioremap_uc(unsigned long phys_addr) +{ + return (void __iomem *) (__IA64_UNCACHED_OFFSET | phys_addr); +} + +void __iomem * +early_ioremap (unsigned long phys_addr, unsigned long size) +{ + u64 attr; + attr = kern_mem_attribute(phys_addr, size); + if (attr & EFI_MEMORY_WB) + return (void __iomem *) phys_to_virt(phys_addr); + return __ioremap_uc(phys_addr); +} + +void __iomem * +ioremap (unsigned long phys_addr, unsigned long size) +{ + void __iomem *addr; + struct vm_struct *area; + unsigned long offset; + pgprot_t prot; + u64 attr; + unsigned long gran_base, gran_size; + unsigned long page_base; + + /* + * For things in kern_memmap, we must use the same attribute + * as the rest of the kernel. For more details, see + * Documentation/ia64/aliasing.txt. + */ + attr = kern_mem_attribute(phys_addr, size); + if (attr & EFI_MEMORY_WB) + return (void __iomem *) phys_to_virt(phys_addr); + else if (attr & EFI_MEMORY_UC) + return __ioremap_uc(phys_addr); + + /* + * Some chipsets don't support UC access to memory. If + * WB is supported for the whole granule, we prefer that. + */ + gran_base = GRANULEROUNDDOWN(phys_addr); + gran_size = GRANULEROUNDUP(phys_addr + size) - gran_base; + if (efi_mem_attribute(gran_base, gran_size) & EFI_MEMORY_WB) + return (void __iomem *) phys_to_virt(phys_addr); + + /* + * WB is not supported for the whole granule, so we can't use + * the region 7 identity mapping. If we can safely cover the + * area with kernel page table mappings, we can use those + * instead. + */ + page_base = phys_addr & PAGE_MASK; + size = PAGE_ALIGN(phys_addr + size) - page_base; + if (efi_mem_attribute(page_base, size) & EFI_MEMORY_WB) { + prot = PAGE_KERNEL; + + /* + * Mappings have to be page-aligned + */ + offset = phys_addr & ~PAGE_MASK; + phys_addr &= PAGE_MASK; + + /* + * Ok, go for it.. + */ + area = get_vm_area(size, VM_IOREMAP); + if (!area) + return NULL; + + area->phys_addr = phys_addr; + addr = (void __iomem *) area->addr; + if (ioremap_page_range((unsigned long) addr, + (unsigned long) addr + size, phys_addr, prot)) { + vunmap((void __force *) addr); + return NULL; + } + + return (void __iomem *) (offset + (char __iomem *)addr); + } + + return __ioremap_uc(phys_addr); +} +EXPORT_SYMBOL(ioremap); + +void __iomem * +ioremap_nocache (unsigned long phys_addr, unsigned long size) +{ + if (kern_mem_attribute(phys_addr, size) & EFI_MEMORY_WB) + return NULL; + + return __ioremap_uc(phys_addr); +} +EXPORT_SYMBOL(ioremap_nocache); + +void +early_iounmap (volatile void __iomem *addr, unsigned long size) +{ +} + +void +iounmap (volatile void __iomem *addr) +{ + if (REGION_NUMBER(addr) == RGN_GATE) + vunmap((void *) ((unsigned long) addr & PAGE_MASK)); +} +EXPORT_SYMBOL(iounmap); diff --git a/kernel/arch/ia64/mm/numa.c b/kernel/arch/ia64/mm/numa.c new file mode 100644 index 000000000..ea21d4cad --- /dev/null +++ b/kernel/arch/ia64/mm/numa.c @@ -0,0 +1,110 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * This file contains NUMA specific variables and functions which can + * be split away from DISCONTIGMEM and are used on NUMA machines with + * contiguous memory. + * + * 2002/08/07 Erich Focht + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/* + * The following structures are usually initialized by ACPI or + * similar mechanisms and describe the NUMA characteristics of the machine. + */ +int num_node_memblks; +struct node_memblk_s node_memblk[NR_NODE_MEMBLKS]; +struct node_cpuid_s node_cpuid[NR_CPUS] = + { [0 ... NR_CPUS-1] = { .phys_id = 0, .nid = NUMA_NO_NODE } }; + +/* + * This is a matrix with "distances" between nodes, they should be + * proportional to the memory access latency ratios. + */ +u8 numa_slit[MAX_NUMNODES * MAX_NUMNODES]; + +/* Identify which cnode a physical address resides on */ +int +paddr_to_nid(unsigned long paddr) +{ + int i; + + for (i = 0; i < num_node_memblks; i++) + if (paddr >= node_memblk[i].start_paddr && + paddr < node_memblk[i].start_paddr + node_memblk[i].size) + break; + + return (i < num_node_memblks) ? node_memblk[i].nid : (num_node_memblks ? -1 : 0); +} + +#if defined(CONFIG_SPARSEMEM) && defined(CONFIG_NUMA) +/* + * Because of holes evaluate on section limits. + * If the section of memory exists, then return the node where the section + * resides. Otherwise return node 0 as the default. This is used by + * SPARSEMEM to allocate the SPARSEMEM sectionmap on the NUMA node where + * the section resides. + */ +int __meminit __early_pfn_to_nid(unsigned long pfn) +{ + int i, section = pfn >> PFN_SECTION_SHIFT, ssec, esec; + /* + * NOTE: The following SMP-unsafe globals are only used early in boot + * when the kernel is running single-threaded. + */ + static int __meminitdata last_ssec, last_esec; + static int __meminitdata last_nid; + + if (section >= last_ssec && section < last_esec) + return last_nid; + + for (i = 0; i < num_node_memblks; i++) { + ssec = node_memblk[i].start_paddr >> PA_SECTION_SHIFT; + esec = (node_memblk[i].start_paddr + node_memblk[i].size + + ((1L << PA_SECTION_SHIFT) - 1)) >> PA_SECTION_SHIFT; + if (section >= ssec && section < esec) { + last_ssec = ssec; + last_esec = esec; + last_nid = node_memblk[i].nid; + return node_memblk[i].nid; + } + } + + return -1; +} + +void numa_clear_node(int cpu) +{ + unmap_cpu_from_node(cpu, NUMA_NO_NODE); +} + +#ifdef CONFIG_MEMORY_HOTPLUG +/* + * SRAT information is stored in node_memblk[], then we can use SRAT + * information at memory-hot-add if necessary. + */ + +int memory_add_physaddr_to_nid(u64 addr) +{ + int nid = paddr_to_nid(addr); + if (nid < 0) + return 0; + return nid; +} + +EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); +#endif +#endif diff --git a/kernel/arch/ia64/mm/tlb.c b/kernel/arch/ia64/mm/tlb.c new file mode 100644 index 000000000..ed6129768 --- /dev/null +++ b/kernel/arch/ia64/mm/tlb.c @@ -0,0 +1,561 @@ +/* + * TLB support routines. + * + * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co + * David Mosberger-Tang + * + * 08/02/00 A. Mallick + * Modified RID allocation for SMP + * Goutham Rao + * IPI based ptc implementation and A-step IPI implementation. + * Rohit Seth + * Ken Chen + * Christophe de Dinechin : Avoid ptc.e on memory allocation + * Copyright (C) 2007 Intel Corp + * Fenghua Yu + * Add multiple ptc.g/ptc.ga instruction support in global tlb purge. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static struct { + u64 mask; /* mask of supported purge page-sizes */ + unsigned long max_bits; /* log2 of largest supported purge page-size */ +} purge; + +struct ia64_ctx ia64_ctx = { + .lock = __SPIN_LOCK_UNLOCKED(ia64_ctx.lock), + .next = 1, + .max_ctx = ~0U +}; + +DEFINE_PER_CPU(u8, ia64_need_tlb_flush); +DEFINE_PER_CPU(u8, ia64_tr_num); /*Number of TR slots in current processor*/ +DEFINE_PER_CPU(u8, ia64_tr_used); /*Max Slot number used by kernel*/ + +struct ia64_tr_entry *ia64_idtrs[NR_CPUS]; + +/* + * Initializes the ia64_ctx.bitmap array based on max_ctx+1. + * Called after cpu_init() has setup ia64_ctx.max_ctx based on + * maximum RID that is supported by boot CPU. + */ +void __init +mmu_context_init (void) +{ + ia64_ctx.bitmap = alloc_bootmem((ia64_ctx.max_ctx+1)>>3); + ia64_ctx.flushmap = alloc_bootmem((ia64_ctx.max_ctx+1)>>3); +} + +/* + * Acquire the ia64_ctx.lock before calling this function! + */ +void +wrap_mmu_context (struct mm_struct *mm) +{ + int i, cpu; + unsigned long flush_bit; + + for (i=0; i <= ia64_ctx.max_ctx / BITS_PER_LONG; i++) { + flush_bit = xchg(&ia64_ctx.flushmap[i], 0); + ia64_ctx.bitmap[i] ^= flush_bit; + } + + /* use offset at 300 to skip daemons */ + ia64_ctx.next = find_next_zero_bit(ia64_ctx.bitmap, + ia64_ctx.max_ctx, 300); + ia64_ctx.limit = find_next_bit(ia64_ctx.bitmap, + ia64_ctx.max_ctx, ia64_ctx.next); + + /* + * can't call flush_tlb_all() here because of race condition + * with O(1) scheduler [EF] + */ + cpu = get_cpu(); /* prevent preemption/migration */ + for_each_online_cpu(i) + if (i != cpu) + per_cpu(ia64_need_tlb_flush, i) = 1; + put_cpu(); + local_flush_tlb_all(); +} + +/* + * Implement "spinaphores" ... like counting semaphores, but they + * spin instead of sleeping. If there are ever any other users for + * this primitive it can be moved up to a spinaphore.h header. + */ +struct spinaphore { + unsigned long ticket; + unsigned long serve; +}; + +static inline void spinaphore_init(struct spinaphore *ss, int val) +{ + ss->ticket = 0; + ss->serve = val; +} + +static inline void down_spin(struct spinaphore *ss) +{ + unsigned long t = ia64_fetchadd(1, &ss->ticket, acq), serve; + + if (time_before(t, ss->serve)) + return; + + ia64_invala(); + + for (;;) { + asm volatile ("ld8.c.nc %0=[%1]" : "=r"(serve) : "r"(&ss->serve) : "memory"); + if (time_before(t, serve)) + return; + cpu_relax(); + } +} + +static inline void up_spin(struct spinaphore *ss) +{ + ia64_fetchadd(1, &ss->serve, rel); +} + +static struct spinaphore ptcg_sem; +static u16 nptcg = 1; +static int need_ptcg_sem = 1; +static int toolatetochangeptcgsem = 0; + +/* + * Kernel parameter "nptcg=" overrides max number of concurrent global TLB + * purges which is reported from either PAL or SAL PALO. + * + * We don't have sanity checking for nptcg value. It's the user's responsibility + * for valid nptcg value on the platform. Otherwise, kernel may hang in some + * cases. + */ +static int __init +set_nptcg(char *str) +{ + int value = 0; + + get_option(&str, &value); + setup_ptcg_sem(value, NPTCG_FROM_KERNEL_PARAMETER); + + return 1; +} + +__setup("nptcg=", set_nptcg); + +/* + * Maximum number of simultaneous ptc.g purges in the system can + * be defined by PAL_VM_SUMMARY (in which case we should take + * the smallest value for any cpu in the system) or by the PAL + * override table (in which case we should ignore the value from + * PAL_VM_SUMMARY). + * + * Kernel parameter "nptcg=" overrides maximum number of simultanesous ptc.g + * purges defined in either PAL_VM_SUMMARY or PAL override table. In this case, + * we should ignore the value from either PAL_VM_SUMMARY or PAL override table. + * + * Complicating the logic here is the fact that num_possible_cpus() + * isn't fully setup until we start bringing cpus online. + */ +void +setup_ptcg_sem(int max_purges, int nptcg_from) +{ + static int kp_override; + static int palo_override; + static int firstcpu = 1; + + if (toolatetochangeptcgsem) { + if (nptcg_from == NPTCG_FROM_PAL && max_purges == 0) + BUG_ON(1 < nptcg); + else + BUG_ON(max_purges < nptcg); + return; + } + + if (nptcg_from == NPTCG_FROM_KERNEL_PARAMETER) { + kp_override = 1; + nptcg = max_purges; + goto resetsema; + } + if (kp_override) { + need_ptcg_sem = num_possible_cpus() > nptcg; + return; + } + + if (nptcg_from == NPTCG_FROM_PALO) { + palo_override = 1; + + /* In PALO max_purges == 0 really means it! */ + if (max_purges == 0) + panic("Whoa! Platform does not support global TLB purges.\n"); + nptcg = max_purges; + if (nptcg == PALO_MAX_TLB_PURGES) { + need_ptcg_sem = 0; + return; + } + goto resetsema; + } + if (palo_override) { + if (nptcg != PALO_MAX_TLB_PURGES) + need_ptcg_sem = (num_possible_cpus() > nptcg); + return; + } + + /* In PAL_VM_SUMMARY max_purges == 0 actually means 1 */ + if (max_purges == 0) max_purges = 1; + + if (firstcpu) { + nptcg = max_purges; + firstcpu = 0; + } + if (max_purges < nptcg) + nptcg = max_purges; + if (nptcg == PAL_MAX_PURGES) { + need_ptcg_sem = 0; + return; + } else + need_ptcg_sem = (num_possible_cpus() > nptcg); + +resetsema: + spinaphore_init(&ptcg_sem, max_purges); +} + +void +ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start, + unsigned long end, unsigned long nbits) +{ + struct mm_struct *active_mm = current->active_mm; + + toolatetochangeptcgsem = 1; + + if (mm != active_mm) { + /* Restore region IDs for mm */ + if (mm && active_mm) { + activate_context(mm); + } else { + flush_tlb_all(); + return; + } + } + + if (need_ptcg_sem) + down_spin(&ptcg_sem); + + do { + /* + * Flush ALAT entries also. + */ + ia64_ptcga(start, (nbits << 2)); + ia64_srlz_i(); + start += (1UL << nbits); + } while (start < end); + + if (need_ptcg_sem) + up_spin(&ptcg_sem); + + if (mm != active_mm) { + activate_context(active_mm); + } +} + +void +local_flush_tlb_all (void) +{ + unsigned long i, j, flags, count0, count1, stride0, stride1, addr; + + addr = local_cpu_data->ptce_base; + count0 = local_cpu_data->ptce_count[0]; + count1 = local_cpu_data->ptce_count[1]; + stride0 = local_cpu_data->ptce_stride[0]; + stride1 = local_cpu_data->ptce_stride[1]; + + local_irq_save(flags); + for (i = 0; i < count0; ++i) { + for (j = 0; j < count1; ++j) { + ia64_ptce(addr); + addr += stride1; + } + addr += stride0; + } + local_irq_restore(flags); + ia64_srlz_i(); /* srlz.i implies srlz.d */ +} + +void +flush_tlb_range (struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + struct mm_struct *mm = vma->vm_mm; + unsigned long size = end - start; + unsigned long nbits; + +#ifndef CONFIG_SMP + if (mm != current->active_mm) { + mm->context = 0; + return; + } +#endif + + nbits = ia64_fls(size + 0xfff); + while (unlikely (((1UL << nbits) & purge.mask) == 0) && + (nbits < purge.max_bits)) + ++nbits; + if (nbits > purge.max_bits) + nbits = purge.max_bits; + start &= ~((1UL << nbits) - 1); + + preempt_disable(); +#ifdef CONFIG_SMP + if (mm != current->active_mm || cpumask_weight(mm_cpumask(mm)) != 1) { + platform_global_tlb_purge(mm, start, end, nbits); + preempt_enable(); + return; + } +#endif + do { + ia64_ptcl(start, (nbits<<2)); + start += (1UL << nbits); + } while (start < end); + preempt_enable(); + ia64_srlz_i(); /* srlz.i implies srlz.d */ +} +EXPORT_SYMBOL(flush_tlb_range); + +void ia64_tlb_init(void) +{ + ia64_ptce_info_t uninitialized_var(ptce_info); /* GCC be quiet */ + u64 tr_pgbits; + long status; + pal_vm_info_1_u_t vm_info_1; + pal_vm_info_2_u_t vm_info_2; + int cpu = smp_processor_id(); + + if ((status = ia64_pal_vm_page_size(&tr_pgbits, &purge.mask)) != 0) { + printk(KERN_ERR "PAL_VM_PAGE_SIZE failed with status=%ld; " + "defaulting to architected purge page-sizes.\n", status); + purge.mask = 0x115557000UL; + } + purge.max_bits = ia64_fls(purge.mask); + + ia64_get_ptce(&ptce_info); + local_cpu_data->ptce_base = ptce_info.base; + local_cpu_data->ptce_count[0] = ptce_info.count[0]; + local_cpu_data->ptce_count[1] = ptce_info.count[1]; + local_cpu_data->ptce_stride[0] = ptce_info.stride[0]; + local_cpu_data->ptce_stride[1] = ptce_info.stride[1]; + + local_flush_tlb_all(); /* nuke left overs from bootstrapping... */ + status = ia64_pal_vm_summary(&vm_info_1, &vm_info_2); + + if (status) { + printk(KERN_ERR "ia64_pal_vm_summary=%ld\n", status); + per_cpu(ia64_tr_num, cpu) = 8; + return; + } + per_cpu(ia64_tr_num, cpu) = vm_info_1.pal_vm_info_1_s.max_itr_entry+1; + if (per_cpu(ia64_tr_num, cpu) > + (vm_info_1.pal_vm_info_1_s.max_dtr_entry+1)) + per_cpu(ia64_tr_num, cpu) = + vm_info_1.pal_vm_info_1_s.max_dtr_entry+1; + if (per_cpu(ia64_tr_num, cpu) > IA64_TR_ALLOC_MAX) { + static int justonce = 1; + per_cpu(ia64_tr_num, cpu) = IA64_TR_ALLOC_MAX; + if (justonce) { + justonce = 0; + printk(KERN_DEBUG "TR register number exceeds " + "IA64_TR_ALLOC_MAX!\n"); + } + } +} + +/* + * is_tr_overlap + * + * Check overlap with inserted TRs. + */ +static int is_tr_overlap(struct ia64_tr_entry *p, u64 va, u64 log_size) +{ + u64 tr_log_size; + u64 tr_end; + u64 va_rr = ia64_get_rr(va); + u64 va_rid = RR_TO_RID(va_rr); + u64 va_end = va + (1<rr)) + return 0; + tr_log_size = (p->itir & 0xff) >> 2; + tr_end = p->ifa + (1< tr_end || p->ifa > va_end) + return 0; + return 1; + +} + +/* + * ia64_insert_tr in virtual mode. Allocate a TR slot + * + * target_mask : 0x1 : itr, 0x2 : dtr, 0x3 : idtr + * + * va : virtual address. + * pte : pte entries inserted. + * log_size: range to be covered. + * + * Return value: <0 : error No. + * + * >=0 : slot number allocated for TR. + * Must be called with preemption disabled. + */ +int ia64_itr_entry(u64 target_mask, u64 va, u64 pte, u64 log_size) +{ + int i, r; + unsigned long psr; + struct ia64_tr_entry *p; + int cpu = smp_processor_id(); + + if (!ia64_idtrs[cpu]) { + ia64_idtrs[cpu] = kmalloc(2 * IA64_TR_ALLOC_MAX * + sizeof (struct ia64_tr_entry), GFP_KERNEL); + if (!ia64_idtrs[cpu]) + return -ENOMEM; + } + r = -EINVAL; + /*Check overlap with existing TR entries*/ + if (target_mask & 0x1) { + p = ia64_idtrs[cpu]; + for (i = IA64_TR_ALLOC_BASE; i <= per_cpu(ia64_tr_used, cpu); + i++, p++) { + if (p->pte & 0x1) + if (is_tr_overlap(p, va, log_size)) { + printk(KERN_DEBUG "Overlapped Entry" + "Inserted for TR Reigster!!\n"); + goto out; + } + } + } + if (target_mask & 0x2) { + p = ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX; + for (i = IA64_TR_ALLOC_BASE; i <= per_cpu(ia64_tr_used, cpu); + i++, p++) { + if (p->pte & 0x1) + if (is_tr_overlap(p, va, log_size)) { + printk(KERN_DEBUG "Overlapped Entry" + "Inserted for TR Reigster!!\n"); + goto out; + } + } + } + + for (i = IA64_TR_ALLOC_BASE; i < per_cpu(ia64_tr_num, cpu); i++) { + switch (target_mask & 0x3) { + case 1: + if (!((ia64_idtrs[cpu] + i)->pte & 0x1)) + goto found; + continue; + case 2: + if (!((ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i)->pte & 0x1)) + goto found; + continue; + case 3: + if (!((ia64_idtrs[cpu] + i)->pte & 0x1) && + !((ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i)->pte & 0x1)) + goto found; + continue; + default: + r = -EINVAL; + goto out; + } + } +found: + if (i >= per_cpu(ia64_tr_num, cpu)) + return -EBUSY; + + /*Record tr info for mca hander use!*/ + if (i > per_cpu(ia64_tr_used, cpu)) + per_cpu(ia64_tr_used, cpu) = i; + + psr = ia64_clear_ic(); + if (target_mask & 0x1) { + ia64_itr(0x1, i, va, pte, log_size); + ia64_srlz_i(); + p = ia64_idtrs[cpu] + i; + p->ifa = va; + p->pte = pte; + p->itir = log_size << 2; + p->rr = ia64_get_rr(va); + } + if (target_mask & 0x2) { + ia64_itr(0x2, i, va, pte, log_size); + ia64_srlz_i(); + p = ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i; + p->ifa = va; + p->pte = pte; + p->itir = log_size << 2; + p->rr = ia64_get_rr(va); + } + ia64_set_psr(psr); + r = i; +out: + return r; +} +EXPORT_SYMBOL_GPL(ia64_itr_entry); + +/* + * ia64_purge_tr + * + * target_mask: 0x1: purge itr, 0x2 : purge dtr, 0x3 purge idtr. + * slot: slot number to be freed. + * + * Must be called with preemption disabled. + */ +void ia64_ptr_entry(u64 target_mask, int slot) +{ + int cpu = smp_processor_id(); + int i; + struct ia64_tr_entry *p; + + if (slot < IA64_TR_ALLOC_BASE || slot >= per_cpu(ia64_tr_num, cpu)) + return; + + if (target_mask & 0x1) { + p = ia64_idtrs[cpu] + slot; + if ((p->pte&0x1) && is_tr_overlap(p, p->ifa, p->itir>>2)) { + p->pte = 0; + ia64_ptr(0x1, p->ifa, p->itir>>2); + ia64_srlz_i(); + } + } + + if (target_mask & 0x2) { + p = ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + slot; + if ((p->pte & 0x1) && is_tr_overlap(p, p->ifa, p->itir>>2)) { + p->pte = 0; + ia64_ptr(0x2, p->ifa, p->itir>>2); + ia64_srlz_i(); + } + } + + for (i = per_cpu(ia64_tr_used, cpu); i >= IA64_TR_ALLOC_BASE; i--) { + if (((ia64_idtrs[cpu] + i)->pte & 0x1) || + ((ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i)->pte & 0x1)) + break; + } + per_cpu(ia64_tr_used, cpu) = i; +} +EXPORT_SYMBOL_GPL(ia64_ptr_entry); diff --git a/kernel/arch/ia64/module.lds b/kernel/arch/ia64/module.lds new file mode 100644 index 000000000..6481f42fb --- /dev/null +++ b/kernel/arch/ia64/module.lds @@ -0,0 +1,13 @@ +SECTIONS { + /* Group unwind sections into a single section: */ + .IA_64.unwind_info : { *(.IA_64.unwind_info*) } + .IA_64.unwind : { *(.IA_64.unwind*) } + /* + * Create place-holder sections to hold the PLTs, GOT, and + * official procedure-descriptors (.opd). + */ + .core.plt : { BYTE(0) } + .init.plt : { BYTE(0) } + .got : { BYTE(0) } + .opd : { BYTE(0) } +} diff --git a/kernel/arch/ia64/oprofile/Makefile b/kernel/arch/ia64/oprofile/Makefile new file mode 100644 index 000000000..aad27a718 --- /dev/null +++ b/kernel/arch/ia64/oprofile/Makefile @@ -0,0 +1,10 @@ +obj-$(CONFIG_OPROFILE) += oprofile.o + +DRIVER_OBJS := $(addprefix ../../../drivers/oprofile/, \ + oprof.o cpu_buffer.o buffer_sync.o \ + event_buffer.o oprofile_files.o \ + oprofilefs.o oprofile_stats.o \ + timer_int.o ) + +oprofile-y := $(DRIVER_OBJS) init.o backtrace.o +oprofile-$(CONFIG_PERFMON) += perfmon.o diff --git a/kernel/arch/ia64/oprofile/backtrace.c b/kernel/arch/ia64/oprofile/backtrace.c new file mode 100644 index 000000000..6a219a946 --- /dev/null +++ b/kernel/arch/ia64/oprofile/backtrace.c @@ -0,0 +1,131 @@ +/** + * @file backtrace.c + * + * @remark Copyright 2004 Silicon Graphics Inc. All Rights Reserved. + * @remark Read the file COPYING + * + * @author Greg Banks + * @author Keith Owens + * Based on work done for the ia64 port of the SGI kernprof patch, which is + * Copyright (c) 2003-2004 Silicon Graphics Inc. All Rights Reserved. + */ + +#include +#include +#include +#include + +/* + * For IA64 we need to perform a complex little dance to get both + * the struct pt_regs and a synthetic struct switch_stack in place + * to allow the unwind code to work. This dance requires our unwind + * using code to be called from a function called from unw_init_running(). + * There we only get a single void* data pointer, so use this struct + * to hold all the data we need during the unwind. + */ +typedef struct +{ + unsigned int depth; + struct pt_regs *regs; + struct unw_frame_info frame; + unsigned long *prev_pfs_loc; /* state for WAR for old spinlock ool code */ +} ia64_backtrace_t; + +/* Returns non-zero if the PC is in the Interrupt Vector Table */ +static __inline__ int in_ivt_code(unsigned long pc) +{ + extern char ia64_ivt[]; + return (pc >= (u_long)ia64_ivt && pc < (u_long)ia64_ivt+32768); +} + +/* + * Unwind to next stack frame. + */ +static __inline__ int next_frame(ia64_backtrace_t *bt) +{ + /* + * Avoid unsightly console message from unw_unwind() when attempting + * to unwind through the Interrupt Vector Table which has no unwind + * information. + */ + if (in_ivt_code(bt->frame.ip)) + return 0; + + /* + * WAR for spinlock contention from leaf functions. ia64_spinlock_contention_pre3_4 + * has ar.pfs == r0. Leaf functions do not modify ar.pfs so ar.pfs remains + * as 0, stopping the backtrace. Record the previous ar.pfs when the current + * IP is in ia64_spinlock_contention_pre3_4 then unwind, if pfs_loc has not changed + * after unwind then use pt_regs.ar_pfs which is where the real ar.pfs is for + * leaf functions. + */ + if (bt->prev_pfs_loc && bt->regs && bt->frame.pfs_loc == bt->prev_pfs_loc) + bt->frame.pfs_loc = &bt->regs->ar_pfs; + bt->prev_pfs_loc = NULL; + + return unw_unwind(&bt->frame) == 0; +} + + +static void do_ia64_backtrace(struct unw_frame_info *info, void *vdata) +{ + ia64_backtrace_t *bt = vdata; + struct switch_stack *sw; + int count = 0; + u_long pc, sp; + + sw = (struct switch_stack *)(info+1); + /* padding from unw_init_running */ + sw = (struct switch_stack *)(((unsigned long)sw + 15) & ~15); + + unw_init_frame_info(&bt->frame, current, sw); + + /* skip over interrupt frame and oprofile calls */ + do { + unw_get_sp(&bt->frame, &sp); + if (sp >= (u_long)bt->regs) + break; + if (!next_frame(bt)) + return; + } while (count++ < 200); + + /* finally, grab the actual sample */ + while (bt->depth-- && next_frame(bt)) { + unw_get_ip(&bt->frame, &pc); + oprofile_add_trace(pc); + if (unw_is_intr_frame(&bt->frame)) { + /* + * Interrupt received on kernel stack; this can + * happen when timer interrupt fires while processing + * a softirq from the tail end of a hardware interrupt + * which interrupted a system call. Don't laugh, it + * happens! Splice the backtrace into two parts to + * avoid spurious cycles in the gprof output. + */ + /* TODO: split rather than drop the 2nd half */ + break; + } + } +} + +void +ia64_backtrace(struct pt_regs * const regs, unsigned int depth) +{ + ia64_backtrace_t bt; + unsigned long flags; + + /* + * On IA64 there is little hope of getting backtraces from + * user space programs -- the problems of getting the unwind + * information from arbitrary user programs are extreme. + */ + if (user_mode(regs)) + return; + + bt.depth = depth; + bt.regs = regs; + bt.prev_pfs_loc = NULL; + local_irq_save(flags); + unw_init_running(do_ia64_backtrace, &bt); + local_irq_restore(flags); +} diff --git a/kernel/arch/ia64/oprofile/init.c b/kernel/arch/ia64/oprofile/init.c new file mode 100644 index 000000000..31b545c35 --- /dev/null +++ b/kernel/arch/ia64/oprofile/init.c @@ -0,0 +1,38 @@ +/** + * @file init.c + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon + */ + +#include +#include +#include +#include + +extern int perfmon_init(struct oprofile_operations *ops); +extern void perfmon_exit(void); +extern void ia64_backtrace(struct pt_regs * const regs, unsigned int depth); + +int __init oprofile_arch_init(struct oprofile_operations *ops) +{ + int ret = -ENODEV; + +#ifdef CONFIG_PERFMON + /* perfmon_init() can fail, but we have no way to report it */ + ret = perfmon_init(ops); +#endif + ops->backtrace = ia64_backtrace; + + return ret; +} + + +void oprofile_arch_exit(void) +{ +#ifdef CONFIG_PERFMON + perfmon_exit(); +#endif +} diff --git a/kernel/arch/ia64/oprofile/perfmon.c b/kernel/arch/ia64/oprofile/perfmon.c new file mode 100644 index 000000000..192d3e8e1 --- /dev/null +++ b/kernel/arch/ia64/oprofile/perfmon.c @@ -0,0 +1,99 @@ +/** + * @file perfmon.c + * + * @remark Copyright 2003 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon + */ + +#include +#include +#include +#include +#include +#include + +static int allow_ints; + +static int +perfmon_handler(struct task_struct *task, void *buf, pfm_ovfl_arg_t *arg, + struct pt_regs *regs, unsigned long stamp) +{ + int event = arg->pmd_eventid; + + arg->ovfl_ctrl.bits.reset_ovfl_pmds = 1; + + /* the owner of the oprofile event buffer may have exited + * without perfmon being shutdown (e.g. SIGSEGV) + */ + if (allow_ints) + oprofile_add_sample(regs, event); + return 0; +} + + +static int perfmon_start(void) +{ + allow_ints = 1; + return 0; +} + + +static void perfmon_stop(void) +{ + allow_ints = 0; +} + + +#define OPROFILE_FMT_UUID { \ + 0x77, 0x7a, 0x6e, 0x61, 0x20, 0x65, 0x73, 0x69, 0x74, 0x6e, 0x72, 0x20, 0x61, 0x65, 0x0a, 0x6c } + +static pfm_buffer_fmt_t oprofile_fmt = { + .fmt_name = "oprofile_format", + .fmt_uuid = OPROFILE_FMT_UUID, + .fmt_handler = perfmon_handler, +}; + + +static char *get_cpu_type(void) +{ + __u8 family = local_cpu_data->family; + + switch (family) { + case 0x07: + return "ia64/itanium"; + case 0x1f: + return "ia64/itanium2"; + default: + return "ia64/ia64"; + } +} + + +/* all the ops are handled via userspace for IA64 perfmon */ + +static int using_perfmon; + +int perfmon_init(struct oprofile_operations *ops) +{ + int ret = pfm_register_buffer_fmt(&oprofile_fmt); + if (ret) + return -ENODEV; + + ops->cpu_type = get_cpu_type(); + ops->start = perfmon_start; + ops->stop = perfmon_stop; + using_perfmon = 1; + printk(KERN_INFO "oprofile: using perfmon.\n"); + return 0; +} + + +void perfmon_exit(void) +{ + if (!using_perfmon) + return; + + pfm_unregister_buffer_fmt(oprofile_fmt.fmt_uuid); +} diff --git a/kernel/arch/ia64/pci/Makefile b/kernel/arch/ia64/pci/Makefile new file mode 100644 index 000000000..fb14dc520 --- /dev/null +++ b/kernel/arch/ia64/pci/Makefile @@ -0,0 +1,4 @@ +# +# Makefile for the ia64-specific parts of the pci bus +# +obj-y := pci.o fixup.o diff --git a/kernel/arch/ia64/pci/fixup.c b/kernel/arch/ia64/pci/fixup.c new file mode 100644 index 000000000..fc505d58f --- /dev/null +++ b/kernel/arch/ia64/pci/fixup.c @@ -0,0 +1,70 @@ +/* + * Exceptions for specific devices. Usually work-arounds for fatal design flaws. + * Derived from fixup.c of i386 tree. + */ + +#include +#include +#include +#include + +#include + +/* + * Fixup to mark boot BIOS video selected by BIOS before it changes + * + * From information provided by "Jon Smirl" + * + * The standard boot ROM sequence for an x86 machine uses the BIOS + * to select an initial video card for boot display. This boot video + * card will have it's BIOS copied to C0000 in system RAM. + * IORESOURCE_ROM_SHADOW is used to associate the boot video + * card with this copy. On laptops this copy has to be used since + * the main ROM may be compressed or combined with another image. + * See pci_map_rom() for use of this flag. Before marking the device + * with IORESOURCE_ROM_SHADOW check if a vga_default_device is already set + * by either arch cde or vga-arbitration, if so only apply the fixup to this + * already determined primary video card. + */ + +static void pci_fixup_video(struct pci_dev *pdev) +{ + struct pci_dev *bridge; + struct pci_bus *bus; + u16 config; + + if ((strcmp(ia64_platform_name, "dig") != 0) + && (strcmp(ia64_platform_name, "hpzx1") != 0)) + return; + /* Maybe, this machine supports legacy memory map. */ + + /* Is VGA routed to us? */ + bus = pdev->bus; + while (bus) { + bridge = bus->self; + + /* + * From information provided by + * "David Miller" + * The bridge control register is valid for PCI header + * type BRIDGE, or CARDBUS. Host to PCI controllers use + * PCI header type NORMAL. + */ + if (bridge && (pci_is_bridge(bridge))) { + pci_read_config_word(bridge, PCI_BRIDGE_CONTROL, + &config); + if (!(config & PCI_BRIDGE_CTL_VGA)) + return; + } + bus = bus->parent; + } + if (!vga_default_device() || pdev == vga_default_device()) { + pci_read_config_word(pdev, PCI_COMMAND, &config); + if (config & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY)) { + pdev->resource[PCI_ROM_RESOURCE].flags |= IORESOURCE_ROM_SHADOW; + dev_printk(KERN_DEBUG, &pdev->dev, "Video device with shadowed ROM\n"); + } + } +} +DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_ANY_ID, PCI_ANY_ID, + PCI_CLASS_DISPLAY_VGA, 8, pci_fixup_video); diff --git a/kernel/arch/ia64/pci/pci.c b/kernel/arch/ia64/pci/pci.c new file mode 100644 index 000000000..7cc3be9fa --- /dev/null +++ b/kernel/arch/ia64/pci/pci.c @@ -0,0 +1,824 @@ +/* + * pci.c - Low-Level PCI Access in IA-64 + * + * Derived from bios32.c of i386 tree. + * + * (c) Copyright 2002, 2005 Hewlett-Packard Development Company, L.P. + * David Mosberger-Tang + * Bjorn Helgaas + * Copyright (C) 2004 Silicon Graphics, Inc. + * + * Note: Above list of copyright holders is incomplete... + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +/* + * Low-level SAL-based PCI configuration access functions. Note that SAL + * calls are already serialized (via sal_lock), so we don't need another + * synchronization mechanism here. + */ + +#define PCI_SAL_ADDRESS(seg, bus, devfn, reg) \ + (((u64) seg << 24) | (bus << 16) | (devfn << 8) | (reg)) + +/* SAL 3.2 adds support for extended config space. */ + +#define PCI_SAL_EXT_ADDRESS(seg, bus, devfn, reg) \ + (((u64) seg << 28) | (bus << 20) | (devfn << 12) | (reg)) + +int raw_pci_read(unsigned int seg, unsigned int bus, unsigned int devfn, + int reg, int len, u32 *value) +{ + u64 addr, data = 0; + int mode, result; + + if (!value || (seg > 65535) || (bus > 255) || (devfn > 255) || (reg > 4095)) + return -EINVAL; + + if ((seg | reg) <= 255) { + addr = PCI_SAL_ADDRESS(seg, bus, devfn, reg); + mode = 0; + } else if (sal_revision >= SAL_VERSION_CODE(3,2)) { + addr = PCI_SAL_EXT_ADDRESS(seg, bus, devfn, reg); + mode = 1; + } else { + return -EINVAL; + } + + result = ia64_sal_pci_config_read(addr, mode, len, &data); + if (result != 0) + return -EINVAL; + + *value = (u32) data; + return 0; +} + +int raw_pci_write(unsigned int seg, unsigned int bus, unsigned int devfn, + int reg, int len, u32 value) +{ + u64 addr; + int mode, result; + + if ((seg > 65535) || (bus > 255) || (devfn > 255) || (reg > 4095)) + return -EINVAL; + + if ((seg | reg) <= 255) { + addr = PCI_SAL_ADDRESS(seg, bus, devfn, reg); + mode = 0; + } else if (sal_revision >= SAL_VERSION_CODE(3,2)) { + addr = PCI_SAL_EXT_ADDRESS(seg, bus, devfn, reg); + mode = 1; + } else { + return -EINVAL; + } + result = ia64_sal_pci_config_write(addr, mode, len, value); + if (result != 0) + return -EINVAL; + return 0; +} + +static int pci_read(struct pci_bus *bus, unsigned int devfn, int where, + int size, u32 *value) +{ + return raw_pci_read(pci_domain_nr(bus), bus->number, + devfn, where, size, value); +} + +static int pci_write(struct pci_bus *bus, unsigned int devfn, int where, + int size, u32 value) +{ + return raw_pci_write(pci_domain_nr(bus), bus->number, + devfn, where, size, value); +} + +struct pci_ops pci_root_ops = { + .read = pci_read, + .write = pci_write, +}; + +/* Called by ACPI when it finds a new root bus. */ + +static struct pci_controller *alloc_pci_controller(int seg) +{ + struct pci_controller *controller; + + controller = kzalloc(sizeof(*controller), GFP_KERNEL); + if (!controller) + return NULL; + + controller->segment = seg; + return controller; +} + +struct pci_root_info { + struct acpi_device *bridge; + struct pci_controller *controller; + struct list_head resources; + struct resource *res; + resource_size_t *res_offset; + unsigned int res_num; + struct list_head io_resources; + char *name; +}; + +static unsigned int +new_space (u64 phys_base, int sparse) +{ + u64 mmio_base; + int i; + + if (phys_base == 0) + return 0; /* legacy I/O port space */ + + mmio_base = (u64) ioremap(phys_base, 0); + for (i = 0; i < num_io_spaces; i++) + if (io_space[i].mmio_base == mmio_base && + io_space[i].sparse == sparse) + return i; + + if (num_io_spaces == MAX_IO_SPACES) { + pr_err("PCI: Too many IO port spaces " + "(MAX_IO_SPACES=%lu)\n", MAX_IO_SPACES); + return ~0; + } + + i = num_io_spaces++; + io_space[i].mmio_base = mmio_base; + io_space[i].sparse = sparse; + + return i; +} + +static u64 add_io_space(struct pci_root_info *info, + struct acpi_resource_address64 *addr) +{ + struct iospace_resource *iospace; + struct resource *resource; + char *name; + unsigned long base, min, max, base_port; + unsigned int sparse = 0, space_nr, len; + + len = strlen(info->name) + 32; + iospace = kzalloc(sizeof(*iospace) + len, GFP_KERNEL); + if (!iospace) { + dev_err(&info->bridge->dev, + "PCI: No memory for %s I/O port space\n", + info->name); + goto out; + } + + name = (char *)(iospace + 1); + + min = addr->address.minimum; + max = min + addr->address.address_length - 1; + if (addr->info.io.translation_type == ACPI_SPARSE_TRANSLATION) + sparse = 1; + + space_nr = new_space(addr->address.translation_offset, sparse); + if (space_nr == ~0) + goto free_resource; + + base = __pa(io_space[space_nr].mmio_base); + base_port = IO_SPACE_BASE(space_nr); + snprintf(name, len, "%s I/O Ports %08lx-%08lx", info->name, + base_port + min, base_port + max); + + /* + * The SDM guarantees the legacy 0-64K space is sparse, but if the + * mapping is done by the processor (not the bridge), ACPI may not + * mark it as sparse. + */ + if (space_nr == 0) + sparse = 1; + + resource = &iospace->res; + resource->name = name; + resource->flags = IORESOURCE_MEM; + resource->start = base + (sparse ? IO_SPACE_SPARSE_ENCODING(min) : min); + resource->end = base + (sparse ? IO_SPACE_SPARSE_ENCODING(max) : max); + if (insert_resource(&iomem_resource, resource)) { + dev_err(&info->bridge->dev, + "can't allocate host bridge io space resource %pR\n", + resource); + goto free_resource; + } + + list_add_tail(&iospace->list, &info->io_resources); + return base_port; + +free_resource: + kfree(iospace); +out: + return ~0; +} + +static acpi_status resource_to_window(struct acpi_resource *resource, + struct acpi_resource_address64 *addr) +{ + acpi_status status; + + /* + * We're only interested in _CRS descriptors that are + * - address space descriptors for memory or I/O space + * - non-zero size + */ + status = acpi_resource_to_address64(resource, addr); + if (ACPI_SUCCESS(status) && + (addr->resource_type == ACPI_MEMORY_RANGE || + addr->resource_type == ACPI_IO_RANGE) && + addr->address.address_length) + return AE_OK; + + return AE_ERROR; +} + +static acpi_status count_window(struct acpi_resource *resource, void *data) +{ + unsigned int *windows = (unsigned int *) data; + struct acpi_resource_address64 addr; + acpi_status status; + + status = resource_to_window(resource, &addr); + if (ACPI_SUCCESS(status)) + (*windows)++; + + return AE_OK; +} + +static acpi_status add_window(struct acpi_resource *res, void *data) +{ + struct pci_root_info *info = data; + struct resource *resource; + struct acpi_resource_address64 addr; + acpi_status status; + unsigned long flags, offset = 0; + struct resource *root; + + /* Return AE_OK for non-window resources to keep scanning for more */ + status = resource_to_window(res, &addr); + if (!ACPI_SUCCESS(status)) + return AE_OK; + + if (addr.resource_type == ACPI_MEMORY_RANGE) { + flags = IORESOURCE_MEM; + root = &iomem_resource; + offset = addr.address.translation_offset; + } else if (addr.resource_type == ACPI_IO_RANGE) { + flags = IORESOURCE_IO; + root = &ioport_resource; + offset = add_io_space(info, &addr); + if (offset == ~0) + return AE_OK; + } else + return AE_OK; + + resource = &info->res[info->res_num]; + resource->name = info->name; + resource->flags = flags; + resource->start = addr.address.minimum + offset; + resource->end = resource->start + addr.address.address_length - 1; + info->res_offset[info->res_num] = offset; + + if (insert_resource(root, resource)) { + dev_err(&info->bridge->dev, + "can't allocate host bridge window %pR\n", + resource); + } else { + if (offset) + dev_info(&info->bridge->dev, "host bridge window %pR " + "(PCI address [%#llx-%#llx])\n", + resource, + resource->start - offset, + resource->end - offset); + else + dev_info(&info->bridge->dev, + "host bridge window %pR\n", resource); + } + /* HP's firmware has a hack to work around a Windows bug. + * Ignore these tiny memory ranges */ + if (!((resource->flags & IORESOURCE_MEM) && + (resource->end - resource->start < 16))) + pci_add_resource_offset(&info->resources, resource, + info->res_offset[info->res_num]); + + info->res_num++; + return AE_OK; +} + +static void free_pci_root_info_res(struct pci_root_info *info) +{ + struct iospace_resource *iospace, *tmp; + + list_for_each_entry_safe(iospace, tmp, &info->io_resources, list) + kfree(iospace); + + kfree(info->name); + kfree(info->res); + info->res = NULL; + kfree(info->res_offset); + info->res_offset = NULL; + info->res_num = 0; + kfree(info->controller); + info->controller = NULL; +} + +static void __release_pci_root_info(struct pci_root_info *info) +{ + int i; + struct resource *res; + struct iospace_resource *iospace; + + list_for_each_entry(iospace, &info->io_resources, list) + release_resource(&iospace->res); + + for (i = 0; i < info->res_num; i++) { + res = &info->res[i]; + + if (!res->parent) + continue; + + if (!(res->flags & (IORESOURCE_MEM | IORESOURCE_IO))) + continue; + + release_resource(res); + } + + free_pci_root_info_res(info); + kfree(info); +} + +static void release_pci_root_info(struct pci_host_bridge *bridge) +{ + struct pci_root_info *info = bridge->release_data; + + __release_pci_root_info(info); +} + +static int +probe_pci_root_info(struct pci_root_info *info, struct acpi_device *device, + int busnum, int domain) +{ + char *name; + + name = kmalloc(16, GFP_KERNEL); + if (!name) + return -ENOMEM; + + sprintf(name, "PCI Bus %04x:%02x", domain, busnum); + info->bridge = device; + info->name = name; + + acpi_walk_resources(device->handle, METHOD_NAME__CRS, count_window, + &info->res_num); + if (info->res_num) { + info->res = + kzalloc_node(sizeof(*info->res) * info->res_num, + GFP_KERNEL, info->controller->node); + if (!info->res) { + kfree(name); + return -ENOMEM; + } + + info->res_offset = + kzalloc_node(sizeof(*info->res_offset) * info->res_num, + GFP_KERNEL, info->controller->node); + if (!info->res_offset) { + kfree(name); + kfree(info->res); + info->res = NULL; + return -ENOMEM; + } + + info->res_num = 0; + acpi_walk_resources(device->handle, METHOD_NAME__CRS, + add_window, info); + } else + kfree(name); + + return 0; +} + +struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) +{ + struct acpi_device *device = root->device; + int domain = root->segment; + int bus = root->secondary.start; + struct pci_controller *controller; + struct pci_root_info *info = NULL; + int busnum = root->secondary.start; + struct pci_bus *pbus; + int ret; + + controller = alloc_pci_controller(domain); + if (!controller) + return NULL; + + controller->companion = device; + controller->node = acpi_get_node(device->handle); + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) { + dev_err(&device->dev, + "pci_bus %04x:%02x: ignored (out of memory)\n", + domain, busnum); + kfree(controller); + return NULL; + } + + info->controller = controller; + INIT_LIST_HEAD(&info->io_resources); + INIT_LIST_HEAD(&info->resources); + + ret = probe_pci_root_info(info, device, busnum, domain); + if (ret) { + kfree(info->controller); + kfree(info); + return NULL; + } + /* insert busn resource at first */ + pci_add_resource(&info->resources, &root->secondary); + /* + * See arch/x86/pci/acpi.c. + * The desired pci bus might already be scanned in a quirk. We + * should handle the case here, but it appears that IA64 hasn't + * such quirk. So we just ignore the case now. + */ + pbus = pci_create_root_bus(NULL, bus, &pci_root_ops, controller, + &info->resources); + if (!pbus) { + pci_free_resource_list(&info->resources); + __release_pci_root_info(info); + return NULL; + } + + pci_set_host_bridge_release(to_pci_host_bridge(pbus->bridge), + release_pci_root_info, info); + pci_scan_child_bus(pbus); + return pbus; +} + +int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge) +{ + /* + * We pass NULL as parent to pci_create_root_bus(), so if it is not NULL + * here, pci_create_root_bus() has been called by someone else and + * sysdata is likely to be different from what we expect. Let it go in + * that case. + */ + if (!bridge->dev.parent) { + struct pci_controller *controller = bridge->bus->sysdata; + ACPI_COMPANION_SET(&bridge->dev, controller->companion); + } + return 0; +} + +void pcibios_fixup_device_resources(struct pci_dev *dev) +{ + int idx; + + if (!dev->bus) + return; + + for (idx = 0; idx < PCI_BRIDGE_RESOURCES; idx++) { + struct resource *r = &dev->resource[idx]; + + if (!r->flags || r->parent || !r->start) + continue; + + pci_claim_resource(dev, idx); + } +} +EXPORT_SYMBOL_GPL(pcibios_fixup_device_resources); + +static void pcibios_fixup_bridge_resources(struct pci_dev *dev) +{ + int idx; + + if (!dev->bus) + return; + + for (idx = PCI_BRIDGE_RESOURCES; idx < PCI_NUM_RESOURCES; idx++) { + struct resource *r = &dev->resource[idx]; + + if (!r->flags || r->parent || !r->start) + continue; + + pci_claim_bridge_resource(dev, idx); + } +} + +/* + * Called after each bus is probed, but before its children are examined. + */ +void pcibios_fixup_bus(struct pci_bus *b) +{ + struct pci_dev *dev; + + if (b->self) { + pci_read_bridge_bases(b); + pcibios_fixup_bridge_resources(b->self); + } + list_for_each_entry(dev, &b->devices, bus_list) + pcibios_fixup_device_resources(dev); + platform_pci_fixup_bus(b); +} + +void pcibios_add_bus(struct pci_bus *bus) +{ + acpi_pci_add_bus(bus); +} + +void pcibios_remove_bus(struct pci_bus *bus) +{ + acpi_pci_remove_bus(bus); +} + +void pcibios_set_master (struct pci_dev *dev) +{ + /* No special bus mastering setup handling */ +} + +int +pcibios_enable_device (struct pci_dev *dev, int mask) +{ + int ret; + + ret = pci_enable_resources(dev, mask); + if (ret < 0) + return ret; + + if (!dev->msi_enabled) + return acpi_pci_irq_enable(dev); + return 0; +} + +void +pcibios_disable_device (struct pci_dev *dev) +{ + BUG_ON(atomic_read(&dev->enable_cnt)); + if (!dev->msi_enabled) + acpi_pci_irq_disable(dev); +} + +resource_size_t +pcibios_align_resource (void *data, const struct resource *res, + resource_size_t size, resource_size_t align) +{ + return res->start; +} + +int +pci_mmap_page_range (struct pci_dev *dev, struct vm_area_struct *vma, + enum pci_mmap_state mmap_state, int write_combine) +{ + unsigned long size = vma->vm_end - vma->vm_start; + pgprot_t prot; + + /* + * I/O space cannot be accessed via normal processor loads and + * stores on this platform. + */ + if (mmap_state == pci_mmap_io) + /* + * XXX we could relax this for I/O spaces for which ACPI + * indicates that the space is 1-to-1 mapped. But at the + * moment, we don't support multiple PCI address spaces and + * the legacy I/O space is not 1-to-1 mapped, so this is moot. + */ + return -EINVAL; + + if (!valid_mmap_phys_addr_range(vma->vm_pgoff, size)) + return -EINVAL; + + prot = phys_mem_access_prot(NULL, vma->vm_pgoff, size, + vma->vm_page_prot); + + /* + * If the user requested WC, the kernel uses UC or WC for this region, + * and the chipset supports WC, we can use WC. Otherwise, we have to + * use the same attribute the kernel uses. + */ + if (write_combine && + ((pgprot_val(prot) & _PAGE_MA_MASK) == _PAGE_MA_UC || + (pgprot_val(prot) & _PAGE_MA_MASK) == _PAGE_MA_WC) && + efi_range_is_wc(vma->vm_start, vma->vm_end - vma->vm_start)) + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + else + vma->vm_page_prot = prot; + + if (remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, + vma->vm_end - vma->vm_start, vma->vm_page_prot)) + return -EAGAIN; + + return 0; +} + +/** + * ia64_pci_get_legacy_mem - generic legacy mem routine + * @bus: bus to get legacy memory base address for + * + * Find the base of legacy memory for @bus. This is typically the first + * megabyte of bus address space for @bus or is simply 0 on platforms whose + * chipsets support legacy I/O and memory routing. Returns the base address + * or an error pointer if an error occurred. + * + * This is the ia64 generic version of this routine. Other platforms + * are free to override it with a machine vector. + */ +char *ia64_pci_get_legacy_mem(struct pci_bus *bus) +{ + return (char *)__IA64_UNCACHED_OFFSET; +} + +/** + * pci_mmap_legacy_page_range - map legacy memory space to userland + * @bus: bus whose legacy space we're mapping + * @vma: vma passed in by mmap + * + * Map legacy memory space for this device back to userspace using a machine + * vector to get the base address. + */ +int +pci_mmap_legacy_page_range(struct pci_bus *bus, struct vm_area_struct *vma, + enum pci_mmap_state mmap_state) +{ + unsigned long size = vma->vm_end - vma->vm_start; + pgprot_t prot; + char *addr; + + /* We only support mmap'ing of legacy memory space */ + if (mmap_state != pci_mmap_mem) + return -ENOSYS; + + /* + * Avoid attribute aliasing. See Documentation/ia64/aliasing.txt + * for more details. + */ + if (!valid_mmap_phys_addr_range(vma->vm_pgoff, size)) + return -EINVAL; + prot = phys_mem_access_prot(NULL, vma->vm_pgoff, size, + vma->vm_page_prot); + + addr = pci_get_legacy_mem(bus); + if (IS_ERR(addr)) + return PTR_ERR(addr); + + vma->vm_pgoff += (unsigned long)addr >> PAGE_SHIFT; + vma->vm_page_prot = prot; + + if (remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, + size, vma->vm_page_prot)) + return -EAGAIN; + + return 0; +} + +/** + * ia64_pci_legacy_read - read from legacy I/O space + * @bus: bus to read + * @port: legacy port value + * @val: caller allocated storage for returned value + * @size: number of bytes to read + * + * Simply reads @size bytes from @port and puts the result in @val. + * + * Again, this (and the write routine) are generic versions that can be + * overridden by the platform. This is necessary on platforms that don't + * support legacy I/O routing or that hard fail on legacy I/O timeouts. + */ +int ia64_pci_legacy_read(struct pci_bus *bus, u16 port, u32 *val, u8 size) +{ + int ret = size; + + switch (size) { + case 1: + *val = inb(port); + break; + case 2: + *val = inw(port); + break; + case 4: + *val = inl(port); + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +/** + * ia64_pci_legacy_write - perform a legacy I/O write + * @bus: bus pointer + * @port: port to write + * @val: value to write + * @size: number of bytes to write from @val + * + * Simply writes @size bytes of @val to @port. + */ +int ia64_pci_legacy_write(struct pci_bus *bus, u16 port, u32 val, u8 size) +{ + int ret = size; + + switch (size) { + case 1: + outb(val, port); + break; + case 2: + outw(val, port); + break; + case 4: + outl(val, port); + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +/** + * set_pci_cacheline_size - determine cacheline size for PCI devices + * + * We want to use the line-size of the outer-most cache. We assume + * that this line-size is the same for all CPUs. + * + * Code mostly taken from arch/ia64/kernel/palinfo.c:cache_info(). + */ +static void __init set_pci_dfl_cacheline_size(void) +{ + unsigned long levels, unique_caches; + long status; + pal_cache_config_info_t cci; + + status = ia64_pal_cache_summary(&levels, &unique_caches); + if (status != 0) { + pr_err("%s: ia64_pal_cache_summary() failed " + "(status=%ld)\n", __func__, status); + return; + } + + status = ia64_pal_cache_config_info(levels - 1, + /* cache_type (data_or_unified)= */ 2, &cci); + if (status != 0) { + pr_err("%s: ia64_pal_cache_config_info() failed " + "(status=%ld)\n", __func__, status); + return; + } + pci_dfl_cache_line_size = (1 << cci.pcci_line_size) / 4; +} + +u64 ia64_dma_get_required_mask(struct device *dev) +{ + u32 low_totalram = ((max_pfn - 1) << PAGE_SHIFT); + u32 high_totalram = ((max_pfn - 1) >> (32 - PAGE_SHIFT)); + u64 mask; + + if (!high_totalram) { + /* convert to mask just covering totalram */ + low_totalram = (1 << (fls(low_totalram) - 1)); + low_totalram += low_totalram - 1; + mask = low_totalram; + } else { + high_totalram = (1 << (fls(high_totalram) - 1)); + high_totalram += high_totalram - 1; + mask = (((u64)high_totalram) << 32) + 0xffffffff; + } + return mask; +} +EXPORT_SYMBOL_GPL(ia64_dma_get_required_mask); + +u64 dma_get_required_mask(struct device *dev) +{ + return platform_dma_get_required_mask(dev); +} +EXPORT_SYMBOL_GPL(dma_get_required_mask); + +static int __init pcibios_init(void) +{ + set_pci_dfl_cacheline_size(); + return 0; +} + +subsys_initcall(pcibios_init); diff --git a/kernel/arch/ia64/scripts/check-gas b/kernel/arch/ia64/scripts/check-gas new file mode 100755 index 000000000..2499e0b22 --- /dev/null +++ b/kernel/arch/ia64/scripts/check-gas @@ -0,0 +1,15 @@ +#!/bin/sh +dir=$(dirname $0) +CC=$1 +OBJDUMP=$2 +tmp=${TMPDIR:-/tmp} +out=$tmp/out$$.o +$CC -c $dir/check-gas-asm.S -o $out +res=$($OBJDUMP -r --section .data $out | fgrep 00004 | tr -s ' ' |cut -f3 -d' ') +rm -f $out +if [ $res != ".text" ]; then + echo buggy +else + echo good +fi +exit 0 diff --git a/kernel/arch/ia64/scripts/check-gas-asm.S b/kernel/arch/ia64/scripts/check-gas-asm.S new file mode 100644 index 000000000..010e1d227 --- /dev/null +++ b/kernel/arch/ia64/scripts/check-gas-asm.S @@ -0,0 +1,2 @@ +[1:] nop 0 + .xdata4 ".data", 0, 1b-. diff --git a/kernel/arch/ia64/scripts/check-model.c b/kernel/arch/ia64/scripts/check-model.c new file mode 100644 index 000000000..e1d4e86e3 --- /dev/null +++ b/kernel/arch/ia64/scripts/check-model.c @@ -0,0 +1 @@ +int __attribute__ ((__model__ (__small__))) x; diff --git a/kernel/arch/ia64/scripts/check-segrel.S b/kernel/arch/ia64/scripts/check-segrel.S new file mode 100644 index 000000000..3be4e3dbe --- /dev/null +++ b/kernel/arch/ia64/scripts/check-segrel.S @@ -0,0 +1,4 @@ + .rodata + data4 @segrel(start) + .data +start: diff --git a/kernel/arch/ia64/scripts/check-segrel.lds b/kernel/arch/ia64/scripts/check-segrel.lds new file mode 100644 index 000000000..85a0d54fb --- /dev/null +++ b/kernel/arch/ia64/scripts/check-segrel.lds @@ -0,0 +1,12 @@ +SECTIONS { + . = SIZEOF_HEADERS; + .rodata : { *(.rodata) } :ro + .note : { *(.note*) } + . = 0xa0000; + .data : { *(.data) } :dat + /DISCARD/ : { *(*) } +} +PHDRS { + ro PT_LOAD FILEHDR PHDRS; + dat PT_LOAD; +} diff --git a/kernel/arch/ia64/scripts/check-serialize.S b/kernel/arch/ia64/scripts/check-serialize.S new file mode 100644 index 000000000..0400c1068 --- /dev/null +++ b/kernel/arch/ia64/scripts/check-serialize.S @@ -0,0 +1,2 @@ + .serialize.data + .serialize.instruction diff --git a/kernel/arch/ia64/scripts/check-text-align.S b/kernel/arch/ia64/scripts/check-text-align.S new file mode 100644 index 000000000..03f586abb --- /dev/null +++ b/kernel/arch/ia64/scripts/check-text-align.S @@ -0,0 +1,6 @@ + .proc foo + .prologue +foo: .save rp, r2 + nop 0 + .align 64 + .endp foo diff --git a/kernel/arch/ia64/scripts/pvcheck.sed b/kernel/arch/ia64/scripts/pvcheck.sed new file mode 100644 index 000000000..e59809a3f --- /dev/null +++ b/kernel/arch/ia64/scripts/pvcheck.sed @@ -0,0 +1,33 @@ +# +# Checker for paravirtualizations of privileged operations. +# +s/ssm.*psr\.ic.*/.warning \"ssm psr.ic should not be used directly\"/g +s/rsm.*psr\.ic.*/.warning \"rsm psr.ic should not be used directly\"/g +s/ssm.*psr\.i.*/.warning \"ssm psr.i should not be used directly\"/g +s/rsm.*psr\.i.*/.warning \"rsm psr.i should not be used directly\"/g +s/ssm.*psr\.dt.*/.warning \"ssm psr.dt should not be used directly\"/g +s/rsm.*psr\.dt.*/.warning \"rsm psr.dt should not be used directly\"/g +s/mov.*=.*cr\.ifa/.warning \"cr.ifa should not used directly\"/g +s/mov.*=.*cr\.itir/.warning \"cr.itir should not used directly\"/g +s/mov.*=.*cr\.isr/.warning \"cr.isr should not used directly\"/g +s/mov.*=.*cr\.iha/.warning \"cr.iha should not used directly\"/g +s/mov.*=.*cr\.ipsr/.warning \"cr.ipsr should not used directly\"/g +s/mov.*=.*cr\.iim/.warning \"cr.iim should not used directly\"/g +s/mov.*=.*cr\.iip/.warning \"cr.iip should not used directly\"/g +s/mov.*=.*cr\.ivr/.warning \"cr.ivr should not used directly\"/g +s/mov.*=[^\.]*psr/.warning \"psr should not used directly\"/g # avoid ar.fpsr +s/mov.*=.*ar\.eflags/.warning \"ar.eflags should not used directly\"/g +s/mov.*=.*ar\.itc.*/.warning \"ar.itc should not used directly\"/g +s/mov.*cr\.ifa.*=.*/.warning \"cr.ifa should not used directly\"/g +s/mov.*cr\.itir.*=.*/.warning \"cr.itir should not used directly\"/g +s/mov.*cr\.iha.*=.*/.warning \"cr.iha should not used directly\"/g +s/mov.*cr\.ipsr.*=.*/.warning \"cr.ipsr should not used directly\"/g +s/mov.*cr\.ifs.*=.*/.warning \"cr.ifs should not used directly\"/g +s/mov.*cr\.iip.*=.*/.warning \"cr.iip should not used directly\"/g +s/mov.*cr\.kr.*=.*/.warning \"cr.kr should not used directly\"/g +s/mov.*ar\.eflags.*=.*/.warning \"ar.eflags should not used directly\"/g +s/itc\.i.*/.warning \"itc.i should not be used directly.\"/g +s/itc\.d.*/.warning \"itc.d should not be used directly.\"/g +s/bsw\.0/.warning \"bsw.0 should not be used directly.\"/g +s/bsw\.1/.warning \"bsw.1 should not be used directly.\"/g +s/ptc\.ga.*/.warning \"ptc.ga should not be used directly.\"/g diff --git a/kernel/arch/ia64/scripts/toolchain-flags b/kernel/arch/ia64/scripts/toolchain-flags new file mode 100755 index 000000000..3f0c2adac --- /dev/null +++ b/kernel/arch/ia64/scripts/toolchain-flags @@ -0,0 +1,53 @@ +#!/bin/sh +# +# Check whether linker can handle cross-segment @segrel(): +# +CPPFLAGS="" +CC=$1 +OBJDUMP=$2 +READELF=$3 +dir=$(dirname $0) +tmp=${TMPDIR:-/tmp} +out=$tmp/out$$ + +# Check whether cross-segment segment-relative relocs work fine. We need +# that for building the gate DSO: + +$CC -nostdlib -static -Wl,-T$dir/check-segrel.lds $dir/check-segrel.S -o $out +res=$($OBJDUMP --full --section .rodata $out | fgrep 000 | cut -f3 -d' ') +rm -f $out +if [ $res != 00000a00 ]; then + CPPFLAGS="$CPPFLAGS -DHAVE_BUGGY_SEGREL" + cat >&2 <&1 | grep __model__ | grep -q attrib +then + CPPFLAGS="$CPPFLAGS -DHAVE_MODEL_SMALL_ATTRIBUTE" +fi +rm -f $out + +# Check whether assembler supports .serialize.{data,instruction} directive. + +$CC -c $dir/check-serialize.S -o $out 2>/dev/null +res=$? +rm -f $out +if [ $res -eq 0 ]; then + CPPFLAGS="$CPPFLAGS -DHAVE_SERIALIZE_DIRECTIVE" +fi + +echo $CPPFLAGS diff --git a/kernel/arch/ia64/scripts/unwcheck.py b/kernel/arch/ia64/scripts/unwcheck.py new file mode 100644 index 000000000..2bfd941ff --- /dev/null +++ b/kernel/arch/ia64/scripts/unwcheck.py @@ -0,0 +1,64 @@ +#!/usr/bin/python +# +# Usage: unwcheck.py FILE +# +# This script checks the unwind info of each function in file FILE +# and verifies that the sum of the region-lengths matches the total +# length of the function. +# +# Based on a shell/awk script originally written by Harish Patil, +# which was converted to Perl by Matthew Chapman, which was converted +# to Python by David Mosberger. +# +import os +import re +import sys + +if len(sys.argv) != 2: + print "Usage: %s FILE" % sys.argv[0] + sys.exit(2) + +readelf = os.getenv("READELF", "readelf") + +start_pattern = re.compile("<([^>]*)>: \[0x([0-9a-f]+)-0x([0-9a-f]+)\]") +rlen_pattern = re.compile(".*rlen=([0-9]+)") + +def check_func (func, slots, rlen_sum): + if slots != rlen_sum: + global num_errors + num_errors += 1 + if not func: func = "[%#x-%#x]" % (start, end) + print "ERROR: %s: %lu slots, total region length = %lu" % (func, slots, rlen_sum) + return + +num_funcs = 0 +num_errors = 0 +func = False +slots = 0 +rlen_sum = 0 +for line in os.popen("%s -u %s" % (readelf, sys.argv[1])): + m = start_pattern.match(line) + if m: + check_func(func, slots, rlen_sum) + + func = m.group(1) + start = long(m.group(2), 16) + end = long(m.group(3), 16) + slots = 3 * (end - start) / 16 + rlen_sum = 0L + num_funcs += 1 + else: + m = rlen_pattern.match(line) + if m: + rlen_sum += long(m.group(1)) +check_func(func, slots, rlen_sum) + +if num_errors == 0: + print "No errors detected in %u functions." % num_funcs +else: + if num_errors > 1: + err="errors" + else: + err="error" + print "%u %s detected in %u functions." % (num_errors, err, num_funcs) + sys.exit(1) diff --git a/kernel/arch/ia64/sn/Makefile b/kernel/arch/ia64/sn/Makefile new file mode 100644 index 000000000..79a7df02e --- /dev/null +++ b/kernel/arch/ia64/sn/Makefile @@ -0,0 +1,12 @@ +# arch/ia64/sn/Makefile +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 2004 Silicon Graphics, Inc. All Rights Reserved. +# +# Makefile for the sn ia64 subplatform +# + +obj-y += kernel/ pci/ diff --git a/kernel/arch/ia64/sn/include/ioerror.h b/kernel/arch/ia64/sn/include/ioerror.h new file mode 100644 index 000000000..e68f2b078 --- /dev/null +++ b/kernel/arch/ia64/sn/include/ioerror.h @@ -0,0 +1,81 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1992 - 1997, 2000-2003 Silicon Graphics, Inc. All rights reserved. + */ +#ifndef _ASM_IA64_SN_IOERROR_H +#define _ASM_IA64_SN_IOERROR_H + +/* + * IO error structure. + * + * This structure would expand to hold the information retrieved from + * all IO related error registers. + * + * This structure is defined to hold all system specific + * information related to a single error. + * + * This serves a couple of purpose. + * - Error handling often involves translating one form of address to other + * form. So, instead of having different data structures at each level, + * we have a single structure, and the appropriate fields get filled in + * at each layer. + * - This provides a way to dump all error related information in any layer + * of erorr handling (debugging aid). + * + * A second possibility is to allow each layer to define its own error + * data structure, and fill in the proper fields. This has the advantage + * of isolating the layers. + * A big concern is the potential stack usage (and overflow), if each layer + * defines these structures on stack (assuming we don't want to do kmalloc. + * + * Any layer wishing to pass extra information to a layer next to it in + * error handling hierarchy, can do so as a separate parameter. + */ + +typedef struct io_error_s { + /* Bit fields indicating which structure fields are valid */ + union { + struct { + unsigned ievb_errortype:1; + unsigned ievb_widgetnum:1; + unsigned ievb_widgetdev:1; + unsigned ievb_srccpu:1; + unsigned ievb_srcnode:1; + unsigned ievb_errnode:1; + unsigned ievb_sysioaddr:1; + unsigned ievb_xtalkaddr:1; + unsigned ievb_busspace:1; + unsigned ievb_busaddr:1; + unsigned ievb_vaddr:1; + unsigned ievb_memaddr:1; + unsigned ievb_epc:1; + unsigned ievb_ef:1; + unsigned ievb_tnum:1; + } iev_b; + unsigned iev_a; + } ie_v; + + short ie_errortype; /* error type: extra info about error */ + short ie_widgetnum; /* Widget number that's in error */ + short ie_widgetdev; /* Device within widget in error */ + cpuid_t ie_srccpu; /* CPU on srcnode generating error */ + cnodeid_t ie_srcnode; /* Node which caused the error */ + cnodeid_t ie_errnode; /* Node where error was noticed */ + iopaddr_t ie_sysioaddr; /* Sys specific IO address */ + iopaddr_t ie_xtalkaddr; /* Xtalk (48bit) addr of Error */ + iopaddr_t ie_busspace; /* Bus specific address space */ + iopaddr_t ie_busaddr; /* Bus specific address */ + caddr_t ie_vaddr; /* Virtual address of error */ + iopaddr_t ie_memaddr; /* Physical memory address */ + caddr_t ie_epc; /* pc when error reported */ + caddr_t ie_ef; /* eframe when error reported */ + short ie_tnum; /* Xtalk TNUM field */ +} ioerror_t; + +#define IOERROR_INIT(e) do { (e)->ie_v.iev_a = 0; } while (0) +#define IOERROR_SETVALUE(e,f,v) do { (e)->ie_ ## f = (v); (e)->ie_v.iev_b.ievb_ ## f = 1; } while (0) + +#endif /* _ASM_IA64_SN_IOERROR_H */ diff --git a/kernel/arch/ia64/sn/include/tio.h b/kernel/arch/ia64/sn/include/tio.h new file mode 100644 index 000000000..6b2e7b75e --- /dev/null +++ b/kernel/arch/ia64/sn/include/tio.h @@ -0,0 +1,41 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2000-2005 Silicon Graphics, Inc. All rights reserved. + */ + +#ifndef _ASM_IA64_SN_TIO_H +#define _ASM_IA64_SN_TIO_H + +#define TIO_MMR_ADDR_MOD + +#define TIO_NODE_ID TIO_MMR_ADDR_MOD(0x0000000090060e80) + +#define TIO_ITTE_BASE 0xb0008800 /* base of translation table entries */ +#define TIO_ITTE(bigwin) (TIO_ITTE_BASE + 8*(bigwin)) + +#define TIO_ITTE_OFFSET_BITS 8 /* size of offset field */ +#define TIO_ITTE_OFFSET_MASK ((1<> TIO_ITTE_WIDGET_SHIFT) & TIO_ITTE_WIDGET_MASK) +#define TIO_ITTE_VALID(itte) \ + (((itte) >> TIO_ITTE_VALID_SHIFT) & TIO_ITTE_VALID_MASK) + +#define TIO_ITTE_PUT(nasid, bigwin, widget, addr, valid) \ + REMOTE_HUB_S((nasid), TIO_ITTE(bigwin), \ + (((((addr) >> TIO_BWIN_SIZE_BITS) & \ + TIO_ITTE_OFFSET_MASK) << TIO_ITTE_OFFSET_SHIFT) | \ + (((widget) & TIO_ITTE_WIDGET_MASK) << TIO_ITTE_WIDGET_SHIFT)) | \ + (( (valid) & TIO_ITTE_VALID_MASK) << TIO_ITTE_VALID_SHIFT)) + +#endif /* _ASM_IA64_SN_TIO_H */ diff --git a/kernel/arch/ia64/sn/include/xtalk/hubdev.h b/kernel/arch/ia64/sn/include/xtalk/hubdev.h new file mode 100644 index 000000000..8182583c7 --- /dev/null +++ b/kernel/arch/ia64/sn/include/xtalk/hubdev.h @@ -0,0 +1,91 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1992 - 1997, 2000-2005 Silicon Graphics, Inc. All rights reserved. + */ +#ifndef _ASM_IA64_SN_XTALK_HUBDEV_H +#define _ASM_IA64_SN_XTALK_HUBDEV_H + +#include "xtalk/xwidgetdev.h" + +#define HUB_WIDGET_ID_MAX 0xf +#define DEV_PER_WIDGET (2*2*8) +#define IIO_ITTE_WIDGET_BITS 4 /* size of widget field */ +#define IIO_ITTE_WIDGET_MASK ((1<> IIO_ITTE_WIDGET_SHIFT) & IIO_ITTE_WIDGET_MASK) + +/* + * Use the top big window as a surrogate for the first small window + */ +#define SWIN0_BIGWIN HUB_NUM_BIG_WINDOW +#define IIO_NUM_ITTES 7 +#define HUB_NUM_BIG_WINDOW (IIO_NUM_ITTES - 1) + +/* This struct is shared between the PROM and the kernel. + * Changes to this struct will require corresponding changes to the kernel. + */ +struct sn_flush_device_common { + int sfdl_bus; + int sfdl_slot; + int sfdl_pin; + struct common_bar_list { + unsigned long start; + unsigned long end; + } sfdl_bar_list[6]; + unsigned long sfdl_force_int_addr; + unsigned long sfdl_flush_value; + volatile unsigned long *sfdl_flush_addr; + u32 sfdl_persistent_busnum; + u32 sfdl_persistent_segment; + struct pcibus_info *sfdl_pcibus_info; +}; + +/* This struct is kernel only and is not used by the PROM */ +struct sn_flush_device_kernel { + spinlock_t sfdl_flush_lock; + struct sn_flush_device_common *common; +}; + +/* 01/16/06 This struct is the old PROM/kernel struct and needs to be included + * for older official PROMs to function on the new kernel base. This struct + * will be removed when the next official PROM release occurs. */ + +struct sn_flush_device_war { + struct sn_flush_device_common common; + u32 filler; /* older PROMs expect the default size of a spinlock_t */ +}; + +/* + * **widget_p - Used as an array[wid_num][device] of sn_flush_device_kernel. + */ +struct sn_flush_nasid_entry { + struct sn_flush_device_kernel **widget_p; // Used as an array of wid_num + u64 iio_itte[8]; +}; + +struct hubdev_info { + geoid_t hdi_geoid; + short hdi_nasid; + short hdi_peer_nasid; /* Dual Porting Peer */ + + struct sn_flush_nasid_entry hdi_flush_nasid_list; + struct xwidget_info hdi_xwidget_info[HUB_WIDGET_ID_MAX + 1]; + + + void *hdi_nodepda; + void *hdi_node_vertex; + u32 max_segment_number; + u32 max_pcibus_number; +}; + +extern void hubdev_init_node(nodepda_t *, cnodeid_t); +extern void hub_error_init(struct hubdev_info *); +extern void ice_error_init(struct hubdev_info *); + + +#endif /* _ASM_IA64_SN_XTALK_HUBDEV_H */ diff --git a/kernel/arch/ia64/sn/include/xtalk/xbow.h b/kernel/arch/ia64/sn/include/xtalk/xbow.h new file mode 100644 index 000000000..90f37a413 --- /dev/null +++ b/kernel/arch/ia64/sn/include/xtalk/xbow.h @@ -0,0 +1,301 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1992-1997,2000-2006 Silicon Graphics, Inc. All Rights + * Reserved. + */ +#ifndef _ASM_IA64_SN_XTALK_XBOW_H +#define _ASM_IA64_SN_XTALK_XBOW_H + +#define XBOW_PORT_8 0x8 +#define XBOW_PORT_C 0xc +#define XBOW_PORT_F 0xf + +#define MAX_XBOW_PORTS 8 /* number of ports on xbow chip */ +#define BASE_XBOW_PORT XBOW_PORT_8 /* Lowest external port */ + +#define XBOW_CREDIT 4 + +#define MAX_XBOW_NAME 16 + +/* Register set for each xbow link */ +typedef volatile struct xb_linkregs_s { +/* + * we access these through synergy unswizzled space, so the address + * gets twiddled (i.e. references to 0x4 actually go to 0x0 and vv.) + * That's why we put the register first and filler second. + */ + u32 link_ibf; + u32 filler0; /* filler for proper alignment */ + u32 link_control; + u32 filler1; + u32 link_status; + u32 filler2; + u32 link_arb_upper; + u32 filler3; + u32 link_arb_lower; + u32 filler4; + u32 link_status_clr; + u32 filler5; + u32 link_reset; + u32 filler6; + u32 link_aux_status; + u32 filler7; +} xb_linkregs_t; + +typedef volatile struct xbow_s { + /* standard widget configuration 0x000000-0x000057 */ + struct widget_cfg xb_widget; /* 0x000000 */ + + /* helper fieldnames for accessing bridge widget */ + +#define xb_wid_id xb_widget.w_id +#define xb_wid_stat xb_widget.w_status +#define xb_wid_err_upper xb_widget.w_err_upper_addr +#define xb_wid_err_lower xb_widget.w_err_lower_addr +#define xb_wid_control xb_widget.w_control +#define xb_wid_req_timeout xb_widget.w_req_timeout +#define xb_wid_int_upper xb_widget.w_intdest_upper_addr +#define xb_wid_int_lower xb_widget.w_intdest_lower_addr +#define xb_wid_err_cmdword xb_widget.w_err_cmd_word +#define xb_wid_llp xb_widget.w_llp_cfg +#define xb_wid_stat_clr xb_widget.w_tflush + +/* + * we access these through synergy unswizzled space, so the address + * gets twiddled (i.e. references to 0x4 actually go to 0x0 and vv.) + * That's why we put the register first and filler second. + */ + /* xbow-specific widget configuration 0x000058-0x0000FF */ + u32 xb_wid_arb_reload; /* 0x00005C */ + u32 _pad_000058; + u32 xb_perf_ctr_a; /* 0x000064 */ + u32 _pad_000060; + u32 xb_perf_ctr_b; /* 0x00006c */ + u32 _pad_000068; + u32 xb_nic; /* 0x000074 */ + u32 _pad_000070; + + /* Xbridge only */ + u32 xb_w0_rst_fnc; /* 0x00007C */ + u32 _pad_000078; + u32 xb_l8_rst_fnc; /* 0x000084 */ + u32 _pad_000080; + u32 xb_l9_rst_fnc; /* 0x00008c */ + u32 _pad_000088; + u32 xb_la_rst_fnc; /* 0x000094 */ + u32 _pad_000090; + u32 xb_lb_rst_fnc; /* 0x00009c */ + u32 _pad_000098; + u32 xb_lc_rst_fnc; /* 0x0000a4 */ + u32 _pad_0000a0; + u32 xb_ld_rst_fnc; /* 0x0000ac */ + u32 _pad_0000a8; + u32 xb_le_rst_fnc; /* 0x0000b4 */ + u32 _pad_0000b0; + u32 xb_lf_rst_fnc; /* 0x0000bc */ + u32 _pad_0000b8; + u32 xb_lock; /* 0x0000c4 */ + u32 _pad_0000c0; + u32 xb_lock_clr; /* 0x0000cc */ + u32 _pad_0000c8; + /* end of Xbridge only */ + u32 _pad_0000d0[12]; + + /* Link Specific Registers, port 8..15 0x000100-0x000300 */ + xb_linkregs_t xb_link_raw[MAX_XBOW_PORTS]; +} xbow_t; + +#define xb_link(p) xb_link_raw[(p) & (MAX_XBOW_PORTS - 1)] + +#define XB_FLAGS_EXISTS 0x1 /* device exists */ +#define XB_FLAGS_MASTER 0x2 +#define XB_FLAGS_SLAVE 0x0 +#define XB_FLAGS_GBR 0x4 +#define XB_FLAGS_16BIT 0x8 +#define XB_FLAGS_8BIT 0x0 + +/* is widget port number valid? (based on version 7.0 of xbow spec) */ +#define XBOW_WIDGET_IS_VALID(wid) ((wid) >= XBOW_PORT_8 && (wid) <= XBOW_PORT_F) + +/* whether to use upper or lower arbitration register, given source widget id */ +#define XBOW_ARB_IS_UPPER(wid) ((wid) >= XBOW_PORT_8 && (wid) <= XBOW_PORT_B) +#define XBOW_ARB_IS_LOWER(wid) ((wid) >= XBOW_PORT_C && (wid) <= XBOW_PORT_F) + +/* offset of arbitration register, given source widget id */ +#define XBOW_ARB_OFF(wid) (XBOW_ARB_IS_UPPER(wid) ? 0x1c : 0x24) + +#define XBOW_WID_ID WIDGET_ID +#define XBOW_WID_STAT WIDGET_STATUS +#define XBOW_WID_ERR_UPPER WIDGET_ERR_UPPER_ADDR +#define XBOW_WID_ERR_LOWER WIDGET_ERR_LOWER_ADDR +#define XBOW_WID_CONTROL WIDGET_CONTROL +#define XBOW_WID_REQ_TO WIDGET_REQ_TIMEOUT +#define XBOW_WID_INT_UPPER WIDGET_INTDEST_UPPER_ADDR +#define XBOW_WID_INT_LOWER WIDGET_INTDEST_LOWER_ADDR +#define XBOW_WID_ERR_CMDWORD WIDGET_ERR_CMD_WORD +#define XBOW_WID_LLP WIDGET_LLP_CFG +#define XBOW_WID_STAT_CLR WIDGET_TFLUSH +#define XBOW_WID_ARB_RELOAD 0x5c +#define XBOW_WID_PERF_CTR_A 0x64 +#define XBOW_WID_PERF_CTR_B 0x6c +#define XBOW_WID_NIC 0x74 + +/* Xbridge only */ +#define XBOW_W0_RST_FNC 0x00007C +#define XBOW_L8_RST_FNC 0x000084 +#define XBOW_L9_RST_FNC 0x00008c +#define XBOW_LA_RST_FNC 0x000094 +#define XBOW_LB_RST_FNC 0x00009c +#define XBOW_LC_RST_FNC 0x0000a4 +#define XBOW_LD_RST_FNC 0x0000ac +#define XBOW_LE_RST_FNC 0x0000b4 +#define XBOW_LF_RST_FNC 0x0000bc +#define XBOW_RESET_FENCE(x) ((x) > 7 && (x) < 16) ? \ + (XBOW_W0_RST_FNC + ((x) - 7) * 8) : \ + ((x) == 0) ? XBOW_W0_RST_FNC : 0 +#define XBOW_LOCK 0x0000c4 +#define XBOW_LOCK_CLR 0x0000cc +/* End of Xbridge only */ + +/* used only in ide, but defined here within the reserved portion */ +/* of the widget0 address space (before 0xf4) */ +#define XBOW_WID_UNDEF 0xe4 + +/* xbow link register set base, legal value for x is 0x8..0xf */ +#define XB_LINK_BASE 0x100 +#define XB_LINK_OFFSET 0x40 +#define XB_LINK_REG_BASE(x) (XB_LINK_BASE + ((x) & (MAX_XBOW_PORTS - 1)) * XB_LINK_OFFSET) + +#define XB_LINK_IBUF_FLUSH(x) (XB_LINK_REG_BASE(x) + 0x4) +#define XB_LINK_CTRL(x) (XB_LINK_REG_BASE(x) + 0xc) +#define XB_LINK_STATUS(x) (XB_LINK_REG_BASE(x) + 0x14) +#define XB_LINK_ARB_UPPER(x) (XB_LINK_REG_BASE(x) + 0x1c) +#define XB_LINK_ARB_LOWER(x) (XB_LINK_REG_BASE(x) + 0x24) +#define XB_LINK_STATUS_CLR(x) (XB_LINK_REG_BASE(x) + 0x2c) +#define XB_LINK_RESET(x) (XB_LINK_REG_BASE(x) + 0x34) +#define XB_LINK_AUX_STATUS(x) (XB_LINK_REG_BASE(x) + 0x3c) + +/* link_control(x) */ +#define XB_CTRL_LINKALIVE_IE 0x80000000 /* link comes alive */ +/* reserved: 0x40000000 */ +#define XB_CTRL_PERF_CTR_MODE_MSK 0x30000000 /* perf counter mode */ +#define XB_CTRL_IBUF_LEVEL_MSK 0x0e000000 /* input packet buffer + level */ +#define XB_CTRL_8BIT_MODE 0x01000000 /* force link into 8 + bit mode */ +#define XB_CTRL_BAD_LLP_PKT 0x00800000 /* force bad LLP + packet */ +#define XB_CTRL_WIDGET_CR_MSK 0x007c0000 /* LLP widget credit + mask */ +#define XB_CTRL_WIDGET_CR_SHFT 18 /* LLP widget credit + shift */ +#define XB_CTRL_ILLEGAL_DST_IE 0x00020000 /* illegal destination + */ +#define XB_CTRL_OALLOC_IBUF_IE 0x00010000 /* overallocated input + buffer */ +/* reserved: 0x0000fe00 */ +#define XB_CTRL_BNDWDTH_ALLOC_IE 0x00000100 /* bandwidth alloc */ +#define XB_CTRL_RCV_CNT_OFLOW_IE 0x00000080 /* rcv retry overflow */ +#define XB_CTRL_XMT_CNT_OFLOW_IE 0x00000040 /* xmt retry overflow */ +#define XB_CTRL_XMT_MAX_RTRY_IE 0x00000020 /* max transmit retry */ +#define XB_CTRL_RCV_IE 0x00000010 /* receive */ +#define XB_CTRL_XMT_RTRY_IE 0x00000008 /* transmit retry */ +/* reserved: 0x00000004 */ +#define XB_CTRL_MAXREQ_TOUT_IE 0x00000002 /* maximum request + timeout */ +#define XB_CTRL_SRC_TOUT_IE 0x00000001 /* source timeout */ + +/* link_status(x) */ +#define XB_STAT_LINKALIVE XB_CTRL_LINKALIVE_IE +/* reserved: 0x7ff80000 */ +#define XB_STAT_MULTI_ERR 0x00040000 /* multi error */ +#define XB_STAT_ILLEGAL_DST_ERR XB_CTRL_ILLEGAL_DST_IE +#define XB_STAT_OALLOC_IBUF_ERR XB_CTRL_OALLOC_IBUF_IE +#define XB_STAT_BNDWDTH_ALLOC_ID_MSK 0x0000ff00 /* port bitmask */ +#define XB_STAT_RCV_CNT_OFLOW_ERR XB_CTRL_RCV_CNT_OFLOW_IE +#define XB_STAT_XMT_CNT_OFLOW_ERR XB_CTRL_XMT_CNT_OFLOW_IE +#define XB_STAT_XMT_MAX_RTRY_ERR XB_CTRL_XMT_MAX_RTRY_IE +#define XB_STAT_RCV_ERR XB_CTRL_RCV_IE +#define XB_STAT_XMT_RTRY_ERR XB_CTRL_XMT_RTRY_IE +/* reserved: 0x00000004 */ +#define XB_STAT_MAXREQ_TOUT_ERR XB_CTRL_MAXREQ_TOUT_IE +#define XB_STAT_SRC_TOUT_ERR XB_CTRL_SRC_TOUT_IE + +/* link_aux_status(x) */ +#define XB_AUX_STAT_RCV_CNT 0xff000000 +#define XB_AUX_STAT_XMT_CNT 0x00ff0000 +#define XB_AUX_STAT_TOUT_DST 0x0000ff00 +#define XB_AUX_LINKFAIL_RST_BAD 0x00000040 +#define XB_AUX_STAT_PRESENT 0x00000020 +#define XB_AUX_STAT_PORT_WIDTH 0x00000010 +/* reserved: 0x0000000f */ + +/* + * link_arb_upper/link_arb_lower(x), (reg) should be the link_arb_upper + * register if (x) is 0x8..0xb, link_arb_lower if (x) is 0xc..0xf + */ +#define XB_ARB_GBR_MSK 0x1f +#define XB_ARB_RR_MSK 0x7 +#define XB_ARB_GBR_SHFT(x) (((x) & 0x3) * 8) +#define XB_ARB_RR_SHFT(x) (((x) & 0x3) * 8 + 5) +#define XB_ARB_GBR_CNT(reg,x) ((reg) >> XB_ARB_GBR_SHFT(x) & XB_ARB_GBR_MSK) +#define XB_ARB_RR_CNT(reg,x) ((reg) >> XB_ARB_RR_SHFT(x) & XB_ARB_RR_MSK) + +/* XBOW_WID_STAT */ +#define XB_WID_STAT_LINK_INTR_SHFT (24) +#define XB_WID_STAT_LINK_INTR_MASK (0xFF << XB_WID_STAT_LINK_INTR_SHFT) +#define XB_WID_STAT_LINK_INTR(x) \ + (0x1 << (((x)&7) + XB_WID_STAT_LINK_INTR_SHFT)) +#define XB_WID_STAT_WIDGET0_INTR 0x00800000 +#define XB_WID_STAT_SRCID_MASK 0x000003c0 /* Xbridge only */ +#define XB_WID_STAT_REG_ACC_ERR 0x00000020 +#define XB_WID_STAT_RECV_TOUT 0x00000010 /* Xbridge only */ +#define XB_WID_STAT_ARB_TOUT 0x00000008 /* Xbridge only */ +#define XB_WID_STAT_XTALK_ERR 0x00000004 +#define XB_WID_STAT_DST_TOUT 0x00000002 /* Xbridge only */ +#define XB_WID_STAT_MULTI_ERR 0x00000001 + +#define XB_WID_STAT_SRCID_SHFT 6 + +/* XBOW_WID_CONTROL */ +#define XB_WID_CTRL_REG_ACC_IE XB_WID_STAT_REG_ACC_ERR +#define XB_WID_CTRL_RECV_TOUT XB_WID_STAT_RECV_TOUT +#define XB_WID_CTRL_ARB_TOUT XB_WID_STAT_ARB_TOUT +#define XB_WID_CTRL_XTALK_IE XB_WID_STAT_XTALK_ERR + +/* XBOW_WID_INT_UPPER */ +/* defined in xwidget.h for WIDGET_INTDEST_UPPER_ADDR */ + +/* XBOW WIDGET part number, in the ID register */ +#define XBOW_WIDGET_PART_NUM 0x0 /* crossbow */ +#define XXBOW_WIDGET_PART_NUM 0xd000 /* Xbridge */ +#define XBOW_WIDGET_MFGR_NUM 0x0 +#define XXBOW_WIDGET_MFGR_NUM 0x0 +#define PXBOW_WIDGET_PART_NUM 0xd100 /* PIC */ + +#define XBOW_REV_1_0 0x1 /* xbow rev 1.0 is "1" */ +#define XBOW_REV_1_1 0x2 /* xbow rev 1.1 is "2" */ +#define XBOW_REV_1_2 0x3 /* xbow rev 1.2 is "3" */ +#define XBOW_REV_1_3 0x4 /* xbow rev 1.3 is "4" */ +#define XBOW_REV_2_0 0x5 /* xbow rev 2.0 is "5" */ + +#define XXBOW_PART_REV_1_0 (XXBOW_WIDGET_PART_NUM << 4 | 0x1 ) +#define XXBOW_PART_REV_2_0 (XXBOW_WIDGET_PART_NUM << 4 | 0x2 ) + +/* XBOW_WID_ARB_RELOAD */ +#define XBOW_WID_ARB_RELOAD_INT 0x3f /* GBR reload interval */ + +#define IS_XBRIDGE_XBOW(wid) \ + (XWIDGET_PART_NUM(wid) == XXBOW_WIDGET_PART_NUM && \ + XWIDGET_MFG_NUM(wid) == XXBOW_WIDGET_MFGR_NUM) + +#define IS_PIC_XBOW(wid) \ + (XWIDGET_PART_NUM(wid) == PXBOW_WIDGET_PART_NUM && \ + XWIDGET_MFG_NUM(wid) == XXBOW_WIDGET_MFGR_NUM) + +#define XBOW_WAR_ENABLED(pv, widid) ((1 << XWIDGET_REV_NUM(widid)) & pv) + +#endif /* _ASM_IA64_SN_XTALK_XBOW_H */ diff --git a/kernel/arch/ia64/sn/include/xtalk/xwidgetdev.h b/kernel/arch/ia64/sn/include/xtalk/xwidgetdev.h new file mode 100644 index 000000000..2800eda0f --- /dev/null +++ b/kernel/arch/ia64/sn/include/xtalk/xwidgetdev.h @@ -0,0 +1,70 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1992-1997,2000-2003 Silicon Graphics, Inc. All Rights Reserved. + */ +#ifndef _ASM_IA64_SN_XTALK_XWIDGET_H +#define _ASM_IA64_SN_XTALK_XWIDGET_H + +/* WIDGET_ID */ +#define WIDGET_REV_NUM 0xf0000000 +#define WIDGET_PART_NUM 0x0ffff000 +#define WIDGET_MFG_NUM 0x00000ffe +#define WIDGET_REV_NUM_SHFT 28 +#define WIDGET_PART_NUM_SHFT 12 +#define WIDGET_MFG_NUM_SHFT 1 + +#define XWIDGET_PART_NUM(widgetid) (((widgetid) & WIDGET_PART_NUM) >> WIDGET_PART_NUM_SHFT) +#define XWIDGET_REV_NUM(widgetid) (((widgetid) & WIDGET_REV_NUM) >> WIDGET_REV_NUM_SHFT) +#define XWIDGET_MFG_NUM(widgetid) (((widgetid) & WIDGET_MFG_NUM) >> WIDGET_MFG_NUM_SHFT) +#define XWIDGET_PART_REV_NUM(widgetid) ((XWIDGET_PART_NUM(widgetid) << 4) | \ + XWIDGET_REV_NUM(widgetid)) +#define XWIDGET_PART_REV_NUM_REV(partrev) (partrev & 0xf) + +/* widget configuration registers */ +struct widget_cfg{ + u32 w_id; /* 0x04 */ + u32 w_pad_0; /* 0x00 */ + u32 w_status; /* 0x0c */ + u32 w_pad_1; /* 0x08 */ + u32 w_err_upper_addr; /* 0x14 */ + u32 w_pad_2; /* 0x10 */ + u32 w_err_lower_addr; /* 0x1c */ + u32 w_pad_3; /* 0x18 */ + u32 w_control; /* 0x24 */ + u32 w_pad_4; /* 0x20 */ + u32 w_req_timeout; /* 0x2c */ + u32 w_pad_5; /* 0x28 */ + u32 w_intdest_upper_addr; /* 0x34 */ + u32 w_pad_6; /* 0x30 */ + u32 w_intdest_lower_addr; /* 0x3c */ + u32 w_pad_7; /* 0x38 */ + u32 w_err_cmd_word; /* 0x44 */ + u32 w_pad_8; /* 0x40 */ + u32 w_llp_cfg; /* 0x4c */ + u32 w_pad_9; /* 0x48 */ + u32 w_tflush; /* 0x54 */ + u32 w_pad_10; /* 0x50 */ +}; + +/* + * Crosstalk Widget Hardware Identification, as defined in the Crosstalk spec. + */ +struct xwidget_hwid{ + int mfg_num; + int rev_num; + int part_num; +}; + +struct xwidget_info{ + + struct xwidget_hwid xwi_hwid; /* Widget Identification */ + char xwi_masterxid; /* Hub's Widget Port Number */ + void *xwi_hubinfo; /* Hub's provider private info */ + u64 *xwi_hub_provider; /* prom provider functions */ + void *xwi_vertex; +}; + +#endif /* _ASM_IA64_SN_XTALK_XWIDGET_H */ diff --git a/kernel/arch/ia64/sn/kernel/Makefile b/kernel/arch/ia64/sn/kernel/Makefile new file mode 100644 index 000000000..d27df1d45 --- /dev/null +++ b/kernel/arch/ia64/sn/kernel/Makefile @@ -0,0 +1,18 @@ +# arch/ia64/sn/kernel/Makefile +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 1999,2001-2006,2008 Silicon Graphics, Inc. All Rights Reserved. +# + +ccflags-y := -Iarch/ia64/sn/include + +obj-y += setup.o bte.o bte_error.o irq.o mca.o idle.o \ + huberror.o io_acpi_init.o io_common.o \ + io_init.o iomv.o klconflib.o pio_phys.o \ + sn2/ +obj-$(CONFIG_IA64_GENERIC) += machvec.o +obj-$(CONFIG_SGI_TIOCX) += tiocx.o +obj-$(CONFIG_PCI_MSI) += msi_sn.o diff --git a/kernel/arch/ia64/sn/kernel/bte.c b/kernel/arch/ia64/sn/kernel/bte.c new file mode 100644 index 000000000..b2eb48490 --- /dev/null +++ b/kernel/arch/ia64/sn/kernel/bte.c @@ -0,0 +1,471 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2000-2007 Silicon Graphics, Inc. All Rights Reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#ifndef L1_CACHE_MASK +#define L1_CACHE_MASK (L1_CACHE_BYTES - 1) +#endif + +/* two interfaces on two btes */ +#define MAX_INTERFACES_TO_TRY 4 +#define MAX_NODES_TO_TRY 2 + +static struct bteinfo_s *bte_if_on_node(nasid_t nasid, int interface) +{ + nodepda_t *tmp_nodepda; + + if (nasid_to_cnodeid(nasid) == -1) + return (struct bteinfo_s *)NULL; + + tmp_nodepda = NODEPDA(nasid_to_cnodeid(nasid)); + return &tmp_nodepda->bte_if[interface]; + +} + +static inline void bte_start_transfer(struct bteinfo_s *bte, u64 len, u64 mode) +{ + if (is_shub2()) { + BTE_CTRL_STORE(bte, (IBLS_BUSY | ((len) | (mode) << 24))); + } else { + BTE_LNSTAT_STORE(bte, len); + BTE_CTRL_STORE(bte, mode); + } +} + +/************************************************************************ + * Block Transfer Engine copy related functions. + * + ***********************************************************************/ + +/* + * bte_copy(src, dest, len, mode, notification) + * + * Use the block transfer engine to move kernel memory from src to dest + * using the assigned mode. + * + * Parameters: + * src - physical address of the transfer source. + * dest - physical address of the transfer destination. + * len - number of bytes to transfer from source to dest. + * mode - hardware defined. See reference information + * for IBCT0/1 in the SHUB Programmers Reference + * notification - kernel virtual address of the notification cache + * line. If NULL, the default is used and + * the bte_copy is synchronous. + * + * NOTE: This function requires src, dest, and len to + * be cacheline aligned. + */ +bte_result_t bte_copy(u64 src, u64 dest, u64 len, u64 mode, void *notification) +{ + u64 transfer_size; + u64 transfer_stat; + u64 notif_phys_addr; + struct bteinfo_s *bte; + bte_result_t bte_status; + unsigned long irq_flags; + unsigned long itc_end = 0; + int nasid_to_try[MAX_NODES_TO_TRY]; + int my_nasid = cpuid_to_nasid(raw_smp_processor_id()); + int bte_if_index, nasid_index; + int bte_first, btes_per_node = BTES_PER_NODE; + + BTE_PRINTK(("bte_copy(0x%lx, 0x%lx, 0x%lx, 0x%lx, 0x%p)\n", + src, dest, len, mode, notification)); + + if (len == 0) { + return BTE_SUCCESS; + } + + BUG_ON(len & L1_CACHE_MASK); + BUG_ON(src & L1_CACHE_MASK); + BUG_ON(dest & L1_CACHE_MASK); + BUG_ON(len > BTE_MAX_XFER); + + /* + * Start with interface corresponding to cpu number + */ + bte_first = raw_smp_processor_id() % btes_per_node; + + if (mode & BTE_USE_DEST) { + /* try remote then local */ + nasid_to_try[0] = NASID_GET(dest); + if (mode & BTE_USE_ANY) { + nasid_to_try[1] = my_nasid; + } else { + nasid_to_try[1] = 0; + } + } else { + /* try local then remote */ + nasid_to_try[0] = my_nasid; + if (mode & BTE_USE_ANY) { + nasid_to_try[1] = NASID_GET(dest); + } else { + nasid_to_try[1] = 0; + } + } + +retry_bteop: + do { + local_irq_save(irq_flags); + + bte_if_index = bte_first; + nasid_index = 0; + + /* Attempt to lock one of the BTE interfaces. */ + while (nasid_index < MAX_NODES_TO_TRY) { + bte = bte_if_on_node(nasid_to_try[nasid_index],bte_if_index); + + if (bte == NULL) { + nasid_index++; + continue; + } + + if (spin_trylock(&bte->spinlock)) { + if (!(*bte->most_rcnt_na & BTE_WORD_AVAILABLE) || + (BTE_LNSTAT_LOAD(bte) & BTE_ACTIVE)) { + /* Got the lock but BTE still busy */ + spin_unlock(&bte->spinlock); + } else { + /* we got the lock and it's not busy */ + break; + } + } + + bte_if_index = (bte_if_index + 1) % btes_per_node; /* Next interface */ + if (bte_if_index == bte_first) { + /* + * We've tried all interfaces on this node + */ + nasid_index++; + } + + bte = NULL; + } + + if (bte != NULL) { + break; + } + + local_irq_restore(irq_flags); + + if (!(mode & BTE_WACQUIRE)) { + return BTEFAIL_NOTAVAIL; + } + } while (1); + + if (notification == NULL) { + /* User does not want to be notified. */ + bte->most_rcnt_na = &bte->notify; + } else { + bte->most_rcnt_na = notification; + } + + /* Calculate the number of cache lines to transfer. */ + transfer_size = ((len >> L1_CACHE_SHIFT) & BTE_LEN_MASK); + + /* Initialize the notification to a known value. */ + *bte->most_rcnt_na = BTE_WORD_BUSY; + notif_phys_addr = (u64)bte->most_rcnt_na; + + /* Set the source and destination registers */ + BTE_PRINTKV(("IBSA = 0x%lx)\n", src)); + BTE_SRC_STORE(bte, src); + BTE_PRINTKV(("IBDA = 0x%lx)\n", dest)); + BTE_DEST_STORE(bte, dest); + + /* Set the notification register */ + BTE_PRINTKV(("IBNA = 0x%lx)\n", notif_phys_addr)); + BTE_NOTIF_STORE(bte, notif_phys_addr); + + /* Initiate the transfer */ + BTE_PRINTK(("IBCT = 0x%lx)\n", BTE_VALID_MODE(mode))); + bte_start_transfer(bte, transfer_size, BTE_VALID_MODE(mode)); + + itc_end = ia64_get_itc() + (40000000 * local_cpu_data->cyc_per_usec); + + spin_unlock_irqrestore(&bte->spinlock, irq_flags); + + if (notification != NULL) { + return BTE_SUCCESS; + } + + while ((transfer_stat = *bte->most_rcnt_na) == BTE_WORD_BUSY) { + cpu_relax(); + if (ia64_get_itc() > itc_end) { + BTE_PRINTK(("BTE timeout nasid 0x%x bte%d IBLS = 0x%lx na 0x%lx\n", + NASID_GET(bte->bte_base_addr), bte->bte_num, + BTE_LNSTAT_LOAD(bte), *bte->most_rcnt_na) ); + bte->bte_error_count++; + bte->bh_error = IBLS_ERROR; + bte_error_handler((unsigned long)NODEPDA(bte->bte_cnode)); + *bte->most_rcnt_na = BTE_WORD_AVAILABLE; + goto retry_bteop; + } + } + + BTE_PRINTKV((" Delay Done. IBLS = 0x%lx, most_rcnt_na = 0x%lx\n", + BTE_LNSTAT_LOAD(bte), *bte->most_rcnt_na)); + + if (transfer_stat & IBLS_ERROR) { + bte_status = BTE_GET_ERROR_STATUS(transfer_stat); + } else { + bte_status = BTE_SUCCESS; + } + *bte->most_rcnt_na = BTE_WORD_AVAILABLE; + + BTE_PRINTK(("Returning status is 0x%lx and most_rcnt_na is 0x%lx\n", + BTE_LNSTAT_LOAD(bte), *bte->most_rcnt_na)); + + return bte_status; +} + +EXPORT_SYMBOL(bte_copy); + +/* + * bte_unaligned_copy(src, dest, len, mode) + * + * use the block transfer engine to move kernel + * memory from src to dest using the assigned mode. + * + * Parameters: + * src - physical address of the transfer source. + * dest - physical address of the transfer destination. + * len - number of bytes to transfer from source to dest. + * mode - hardware defined. See reference information + * for IBCT0/1 in the SGI documentation. + * + * NOTE: If the source, dest, and len are all cache line aligned, + * then it would be _FAR_ preferable to use bte_copy instead. + */ +bte_result_t bte_unaligned_copy(u64 src, u64 dest, u64 len, u64 mode) +{ + int destFirstCacheOffset; + u64 headBteSource; + u64 headBteLen; + u64 headBcopySrcOffset; + u64 headBcopyDest; + u64 headBcopyLen; + u64 footBteSource; + u64 footBteLen; + u64 footBcopyDest; + u64 footBcopyLen; + bte_result_t rv; + char *bteBlock, *bteBlock_unaligned; + + if (len == 0) { + return BTE_SUCCESS; + } + + /* temporary buffer used during unaligned transfers */ + bteBlock_unaligned = kmalloc(len + 3 * L1_CACHE_BYTES, GFP_KERNEL); + if (bteBlock_unaligned == NULL) { + return BTEFAIL_NOTAVAIL; + } + bteBlock = (char *)L1_CACHE_ALIGN((u64) bteBlock_unaligned); + + headBcopySrcOffset = src & L1_CACHE_MASK; + destFirstCacheOffset = dest & L1_CACHE_MASK; + + /* + * At this point, the transfer is broken into + * (up to) three sections. The first section is + * from the start address to the first physical + * cache line, the second is from the first physical + * cache line to the last complete cache line, + * and the third is from the last cache line to the + * end of the buffer. The first and third sections + * are handled by bte copying into a temporary buffer + * and then bcopy'ing the necessary section into the + * final location. The middle section is handled with + * a standard bte copy. + * + * One nasty exception to the above rule is when the + * source and destination are not symmetrically + * mis-aligned. If the source offset from the first + * cache line is different from the destination offset, + * we make the first section be the entire transfer + * and the bcopy the entire block into place. + */ + if (headBcopySrcOffset == destFirstCacheOffset) { + + /* + * Both the source and destination are the same + * distance from a cache line boundary so we can + * use the bte to transfer the bulk of the + * data. + */ + headBteSource = src & ~L1_CACHE_MASK; + headBcopyDest = dest; + if (headBcopySrcOffset) { + headBcopyLen = + (len > + (L1_CACHE_BYTES - + headBcopySrcOffset) ? L1_CACHE_BYTES + - headBcopySrcOffset : len); + headBteLen = L1_CACHE_BYTES; + } else { + headBcopyLen = 0; + headBteLen = 0; + } + + if (len > headBcopyLen) { + footBcopyLen = (len - headBcopyLen) & L1_CACHE_MASK; + footBteLen = L1_CACHE_BYTES; + + footBteSource = src + len - footBcopyLen; + footBcopyDest = dest + len - footBcopyLen; + + if (footBcopyDest == (headBcopyDest + headBcopyLen)) { + /* + * We have two contiguous bcopy + * blocks. Merge them. + */ + headBcopyLen += footBcopyLen; + headBteLen += footBteLen; + } else if (footBcopyLen > 0) { + rv = bte_copy(footBteSource, + ia64_tpa((unsigned long)bteBlock), + footBteLen, mode, NULL); + if (rv != BTE_SUCCESS) { + kfree(bteBlock_unaligned); + return rv; + } + + memcpy(__va(footBcopyDest), + (char *)bteBlock, footBcopyLen); + } + } else { + footBcopyLen = 0; + footBteLen = 0; + } + + if (len > (headBcopyLen + footBcopyLen)) { + /* now transfer the middle. */ + rv = bte_copy((src + headBcopyLen), + (dest + + headBcopyLen), + (len - headBcopyLen - + footBcopyLen), mode, NULL); + if (rv != BTE_SUCCESS) { + kfree(bteBlock_unaligned); + return rv; + } + + } + } else { + + /* + * The transfer is not symmetric, we will + * allocate a buffer large enough for all the + * data, bte_copy into that buffer and then + * bcopy to the destination. + */ + + headBcopySrcOffset = src & L1_CACHE_MASK; + headBcopyDest = dest; + headBcopyLen = len; + + headBteSource = src - headBcopySrcOffset; + /* Add the leading and trailing bytes from source */ + headBteLen = L1_CACHE_ALIGN(len + headBcopySrcOffset); + } + + if (headBcopyLen > 0) { + rv = bte_copy(headBteSource, + ia64_tpa((unsigned long)bteBlock), headBteLen, + mode, NULL); + if (rv != BTE_SUCCESS) { + kfree(bteBlock_unaligned); + return rv; + } + + memcpy(__va(headBcopyDest), ((char *)bteBlock + + headBcopySrcOffset), headBcopyLen); + } + kfree(bteBlock_unaligned); + return BTE_SUCCESS; +} + +EXPORT_SYMBOL(bte_unaligned_copy); + +/************************************************************************ + * Block Transfer Engine initialization functions. + * + ***********************************************************************/ + +/* + * bte_init_node(nodepda, cnode) + * + * Initialize the nodepda structure with BTE base addresses and + * spinlocks. + */ +void bte_init_node(nodepda_t * mynodepda, cnodeid_t cnode) +{ + int i; + + /* + * Indicate that all the block transfer engines on this node + * are available. + */ + + /* + * Allocate one bte_recover_t structure per node. It holds + * the recovery lock for node. All the bte interface structures + * will point at this one bte_recover structure to get the lock. + */ + spin_lock_init(&mynodepda->bte_recovery_lock); + init_timer(&mynodepda->bte_recovery_timer); + mynodepda->bte_recovery_timer.function = bte_error_handler; + mynodepda->bte_recovery_timer.data = (unsigned long)mynodepda; + + for (i = 0; i < BTES_PER_NODE; i++) { + u64 *base_addr; + + /* Which link status register should we use? */ + base_addr = (u64 *) + REMOTE_HUB_ADDR(cnodeid_to_nasid(cnode), BTE_BASE_ADDR(i)); + mynodepda->bte_if[i].bte_base_addr = base_addr; + mynodepda->bte_if[i].bte_source_addr = BTE_SOURCE_ADDR(base_addr); + mynodepda->bte_if[i].bte_destination_addr = BTE_DEST_ADDR(base_addr); + mynodepda->bte_if[i].bte_control_addr = BTE_CTRL_ADDR(base_addr); + mynodepda->bte_if[i].bte_notify_addr = BTE_NOTIF_ADDR(base_addr); + + /* + * Initialize the notification and spinlock + * so the first transfer can occur. + */ + mynodepda->bte_if[i].most_rcnt_na = + &(mynodepda->bte_if[i].notify); + mynodepda->bte_if[i].notify = BTE_WORD_AVAILABLE; + spin_lock_init(&mynodepda->bte_if[i].spinlock); + + mynodepda->bte_if[i].bte_cnode = cnode; + mynodepda->bte_if[i].bte_error_count = 0; + mynodepda->bte_if[i].bte_num = i; + mynodepda->bte_if[i].cleanup_active = 0; + mynodepda->bte_if[i].bh_error = 0; + } + +} diff --git a/kernel/arch/ia64/sn/kernel/bte_error.c b/kernel/arch/ia64/sn/kernel/bte_error.c new file mode 100644 index 000000000..4cb09f3f1 --- /dev/null +++ b/kernel/arch/ia64/sn/kernel/bte_error.c @@ -0,0 +1,260 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2000-2007 Silicon Graphics, Inc. All Rights Reserved. + */ + +#include +#include +#include "ioerror.h" +#include +#include +#include +#include "xtalk/xwidgetdev.h" +#include "xtalk/hubdev.h" +#include +#include + +/* + * Bte error handling is done in two parts. The first captures + * any crb related errors. Since there can be multiple crbs per + * interface and multiple interfaces active, we need to wait until + * all active crbs are completed. This is the first job of the + * second part error handler. When all bte related CRBs are cleanly + * completed, it resets the interfaces and gets them ready for new + * transfers to be queued. + */ + +void bte_error_handler(unsigned long); + +/* + * Wait until all BTE related CRBs are completed + * and then reset the interfaces. + */ +int shub1_bte_error_handler(unsigned long _nodepda) +{ + struct nodepda_s *err_nodepda = (struct nodepda_s *)_nodepda; + struct timer_list *recovery_timer = &err_nodepda->bte_recovery_timer; + nasid_t nasid; + int i; + int valid_crbs; + ii_imem_u_t imem; /* II IMEM Register */ + ii_icrb0_d_u_t icrbd; /* II CRB Register D */ + ii_ibcr_u_t ibcr; + ii_icmr_u_t icmr; + ii_ieclr_u_t ieclr; + + BTE_PRINTK(("shub1_bte_error_handler(%p) - %d\n", err_nodepda, + smp_processor_id())); + + if ((err_nodepda->bte_if[0].bh_error == BTE_SUCCESS) && + (err_nodepda->bte_if[1].bh_error == BTE_SUCCESS)) { + BTE_PRINTK(("eh:%p:%d Nothing to do.\n", err_nodepda, + smp_processor_id())); + return 1; + } + + /* Determine information about our hub */ + nasid = cnodeid_to_nasid(err_nodepda->bte_if[0].bte_cnode); + + /* + * A BTE transfer can use multiple CRBs. We need to make sure + * that all the BTE CRBs are complete (or timed out) before + * attempting to clean up the error. Resetting the BTE while + * there are still BTE CRBs active will hang the BTE. + * We should look at all the CRBs to see if they are allocated + * to the BTE and see if they are still active. When none + * are active, we can continue with the cleanup. + * + * We also want to make sure that the local NI port is up. + * When a router resets the NI port can go down, while it + * goes through the LLP handshake, but then comes back up. + */ + icmr.ii_icmr_regval = REMOTE_HUB_L(nasid, IIO_ICMR); + if (icmr.ii_icmr_fld_s.i_crb_mark != 0) { + /* + * There are errors which still need to be cleaned up by + * hubiio_crb_error_handler + */ + mod_timer(recovery_timer, jiffies + (HZ * 5)); + BTE_PRINTK(("eh:%p:%d Marked Giving up\n", err_nodepda, + smp_processor_id())); + return 1; + } + if (icmr.ii_icmr_fld_s.i_crb_vld != 0) { + + valid_crbs = icmr.ii_icmr_fld_s.i_crb_vld; + + for (i = 0; i < IIO_NUM_CRBS; i++) { + if (!((1 << i) & valid_crbs)) { + /* This crb was not marked as valid, ignore */ + continue; + } + icrbd.ii_icrb0_d_regval = + REMOTE_HUB_L(nasid, IIO_ICRB_D(i)); + if (icrbd.d_bteop) { + mod_timer(recovery_timer, jiffies + (HZ * 5)); + BTE_PRINTK(("eh:%p:%d Valid %d, Giving up\n", + err_nodepda, smp_processor_id(), + i)); + return 1; + } + } + } + + BTE_PRINTK(("eh:%p:%d Cleaning up\n", err_nodepda, smp_processor_id())); + /* Re-enable both bte interfaces */ + imem.ii_imem_regval = REMOTE_HUB_L(nasid, IIO_IMEM); + imem.ii_imem_fld_s.i_b0_esd = imem.ii_imem_fld_s.i_b1_esd = 1; + REMOTE_HUB_S(nasid, IIO_IMEM, imem.ii_imem_regval); + + /* Clear BTE0/1 error bits */ + ieclr.ii_ieclr_regval = 0; + if (err_nodepda->bte_if[0].bh_error != BTE_SUCCESS) + ieclr.ii_ieclr_fld_s.i_e_bte_0 = 1; + if (err_nodepda->bte_if[1].bh_error != BTE_SUCCESS) + ieclr.ii_ieclr_fld_s.i_e_bte_1 = 1; + REMOTE_HUB_S(nasid, IIO_IECLR, ieclr.ii_ieclr_regval); + + /* Reinitialize both BTE state machines. */ + ibcr.ii_ibcr_regval = REMOTE_HUB_L(nasid, IIO_IBCR); + ibcr.ii_ibcr_fld_s.i_soft_reset = 1; + REMOTE_HUB_S(nasid, IIO_IBCR, ibcr.ii_ibcr_regval); + + del_timer(recovery_timer); + return 0; +} + +/* + * Wait until all BTE related CRBs are completed + * and then reset the interfaces. + */ +int shub2_bte_error_handler(unsigned long _nodepda) +{ + struct nodepda_s *err_nodepda = (struct nodepda_s *)_nodepda; + struct timer_list *recovery_timer = &err_nodepda->bte_recovery_timer; + struct bteinfo_s *bte; + nasid_t nasid; + u64 status; + int i; + + nasid = cnodeid_to_nasid(err_nodepda->bte_if[0].bte_cnode); + + /* + * Verify that all the BTEs are complete + */ + for (i = 0; i < BTES_PER_NODE; i++) { + bte = &err_nodepda->bte_if[i]; + status = BTE_LNSTAT_LOAD(bte); + if (status & IBLS_ERROR) { + bte->bh_error = BTE_SHUB2_ERROR(status); + continue; + } + if (!(status & IBLS_BUSY)) + continue; + mod_timer(recovery_timer, jiffies + (HZ * 5)); + BTE_PRINTK(("eh:%p:%d Marked Giving up\n", err_nodepda, + smp_processor_id())); + return 1; + } + if (ia64_sn_bte_recovery(nasid)) + panic("bte_error_handler(): Fatal BTE Error"); + + del_timer(recovery_timer); + return 0; +} + +/* + * Wait until all BTE related CRBs are completed + * and then reset the interfaces. + */ +void bte_error_handler(unsigned long _nodepda) +{ + struct nodepda_s *err_nodepda = (struct nodepda_s *)_nodepda; + spinlock_t *recovery_lock = &err_nodepda->bte_recovery_lock; + int i; + unsigned long irq_flags; + volatile u64 *notify; + bte_result_t bh_error; + + BTE_PRINTK(("bte_error_handler(%p) - %d\n", err_nodepda, + smp_processor_id())); + + spin_lock_irqsave(recovery_lock, irq_flags); + + /* + * Lock all interfaces on this node to prevent new transfers + * from being queued. + */ + for (i = 0; i < BTES_PER_NODE; i++) { + if (err_nodepda->bte_if[i].cleanup_active) { + continue; + } + spin_lock(&err_nodepda->bte_if[i].spinlock); + BTE_PRINTK(("eh:%p:%d locked %d\n", err_nodepda, + smp_processor_id(), i)); + err_nodepda->bte_if[i].cleanup_active = 1; + } + + if (is_shub1()) { + if (shub1_bte_error_handler(_nodepda)) { + spin_unlock_irqrestore(recovery_lock, irq_flags); + return; + } + } else { + if (shub2_bte_error_handler(_nodepda)) { + spin_unlock_irqrestore(recovery_lock, irq_flags); + return; + } + } + + for (i = 0; i < BTES_PER_NODE; i++) { + bh_error = err_nodepda->bte_if[i].bh_error; + if (bh_error != BTE_SUCCESS) { + /* There is an error which needs to be notified */ + notify = err_nodepda->bte_if[i].most_rcnt_na; + BTE_PRINTK(("cnode %d bte %d error=0x%lx\n", + err_nodepda->bte_if[i].bte_cnode, + err_nodepda->bte_if[i].bte_num, + IBLS_ERROR | (u64) bh_error)); + *notify = IBLS_ERROR | bh_error; + err_nodepda->bte_if[i].bh_error = BTE_SUCCESS; + } + + err_nodepda->bte_if[i].cleanup_active = 0; + BTE_PRINTK(("eh:%p:%d Unlocked %d\n", err_nodepda, + smp_processor_id(), i)); + spin_unlock(&err_nodepda->bte_if[i].spinlock); + } + + spin_unlock_irqrestore(recovery_lock, irq_flags); +} + +/* + * First part error handler. This is called whenever any error CRB interrupt + * is generated by the II. + */ +void +bte_crb_error_handler(cnodeid_t cnode, int btenum, + int crbnum, ioerror_t * ioe, int bteop) +{ + struct bteinfo_s *bte; + + + bte = &(NODEPDA(cnode)->bte_if[btenum]); + + /* + * The caller has already figured out the error type, we save that + * in the bte handle structure for the thread exercising the + * interface to consume. + */ + bte->bh_error = ioe->ie_errortype + BTEFAIL_OFFSET; + bte->bte_error_count++; + + BTE_PRINTK(("Got an error on cnode %d bte %d: HW error type 0x%x\n", + bte->bte_cnode, bte->bte_num, ioe->ie_errortype)); + bte_error_handler((unsigned long) NODEPDA(cnode)); +} + diff --git a/kernel/arch/ia64/sn/kernel/huberror.c b/kernel/arch/ia64/sn/kernel/huberror.c new file mode 100644 index 000000000..f925dec2d --- /dev/null +++ b/kernel/arch/ia64/sn/kernel/huberror.c @@ -0,0 +1,220 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1992 - 1997, 2000,2002-2007 Silicon Graphics, Inc. All rights reserved. + */ + +#include +#include +#include +#include +#include "ioerror.h" +#include +#include +#include +#include "xtalk/xwidgetdev.h" +#include "xtalk/hubdev.h" +#include + +void hubiio_crb_error_handler(struct hubdev_info *hubdev_info); +extern void bte_crb_error_handler(cnodeid_t, int, int, ioerror_t *, + int); +static irqreturn_t hub_eint_handler(int irq, void *arg) +{ + struct hubdev_info *hubdev_info; + struct ia64_sal_retval ret_stuff; + nasid_t nasid; + + ret_stuff.status = 0; + ret_stuff.v0 = 0; + hubdev_info = (struct hubdev_info *)arg; + nasid = hubdev_info->hdi_nasid; + + if (is_shub1()) { + SAL_CALL_NOLOCK(ret_stuff, SN_SAL_HUB_ERROR_INTERRUPT, + (u64) nasid, 0, 0, 0, 0, 0, 0); + + if ((int)ret_stuff.v0) + panic("%s: Fatal %s Error", __func__, + ((nasid & 1) ? "TIO" : "HUBII")); + + if (!(nasid & 1)) /* Not a TIO, handle CRB errors */ + (void)hubiio_crb_error_handler(hubdev_info); + } else + if (nasid & 1) { /* TIO errors */ + SAL_CALL_NOLOCK(ret_stuff, SN_SAL_HUB_ERROR_INTERRUPT, + (u64) nasid, 0, 0, 0, 0, 0, 0); + + if ((int)ret_stuff.v0) + panic("%s: Fatal TIO Error", __func__); + } else + bte_error_handler((unsigned long)NODEPDA(nasid_to_cnodeid(nasid))); + + return IRQ_HANDLED; +} + +/* + * Free the hub CRB "crbnum" which encountered an error. + * Assumption is, error handling was successfully done, + * and we now want to return the CRB back to Hub for normal usage. + * + * In order to free the CRB, all that's needed is to de-allocate it + * + * Assumption: + * No other processor is mucking around with the hub control register. + * So, upper layer has to single thread this. + */ +void hubiio_crb_free(struct hubdev_info *hubdev_info, int crbnum) +{ + ii_icrb0_b_u_t icrbb; + + /* + * The hardware does NOT clear the mark bit, so it must get cleared + * here to be sure the error is not processed twice. + */ + icrbb.ii_icrb0_b_regval = REMOTE_HUB_L(hubdev_info->hdi_nasid, + IIO_ICRB_B(crbnum)); + icrbb.b_mark = 0; + REMOTE_HUB_S(hubdev_info->hdi_nasid, IIO_ICRB_B(crbnum), + icrbb.ii_icrb0_b_regval); + /* + * Deallocate the register wait till hub indicates it's done. + */ + REMOTE_HUB_S(hubdev_info->hdi_nasid, IIO_ICDR, (IIO_ICDR_PND | crbnum)); + while (REMOTE_HUB_L(hubdev_info->hdi_nasid, IIO_ICDR) & IIO_ICDR_PND) + cpu_relax(); + +} + +/* + * hubiio_crb_error_handler + * + * This routine gets invoked when a hub gets an error + * interrupt. So, the routine is running in interrupt context + * at error interrupt level. + * Action: + * It's responsible for identifying ALL the CRBs that are marked + * with error, and process them. + * + * If you find the CRB that's marked with error, map this to the + * reason it caused error, and invoke appropriate error handler. + * + * XXX Be aware of the information in the context register. + * + * NOTE: + * Use REMOTE_HUB_* macro instead of LOCAL_HUB_* so that the interrupt + * handler can be run on any node. (not necessarily the node + * corresponding to the hub that encountered error). + */ + +void hubiio_crb_error_handler(struct hubdev_info *hubdev_info) +{ + nasid_t nasid; + ii_icrb0_a_u_t icrba; /* II CRB Register A */ + ii_icrb0_b_u_t icrbb; /* II CRB Register B */ + ii_icrb0_c_u_t icrbc; /* II CRB Register C */ + ii_icrb0_d_u_t icrbd; /* II CRB Register D */ + ii_icrb0_e_u_t icrbe; /* II CRB Register D */ + int i; + int num_errors = 0; /* Num of errors handled */ + ioerror_t ioerror; + + nasid = hubdev_info->hdi_nasid; + + /* + * XXX - Add locking for any recovery actions + */ + /* + * Scan through all CRBs in the Hub, and handle the errors + * in any of the CRBs marked. + */ + for (i = 0; i < IIO_NUM_CRBS; i++) { + /* Check this crb entry to see if it is in error. */ + icrbb.ii_icrb0_b_regval = REMOTE_HUB_L(nasid, IIO_ICRB_B(i)); + + if (icrbb.b_mark == 0) { + continue; + } + + icrba.ii_icrb0_a_regval = REMOTE_HUB_L(nasid, IIO_ICRB_A(i)); + + IOERROR_INIT(&ioerror); + + /* read other CRB error registers. */ + icrbc.ii_icrb0_c_regval = REMOTE_HUB_L(nasid, IIO_ICRB_C(i)); + icrbd.ii_icrb0_d_regval = REMOTE_HUB_L(nasid, IIO_ICRB_D(i)); + icrbe.ii_icrb0_e_regval = REMOTE_HUB_L(nasid, IIO_ICRB_E(i)); + + IOERROR_SETVALUE(&ioerror, errortype, icrbb.b_ecode); + + /* Check if this error is due to BTE operation, + * and handle it separately. + */ + if (icrbd.d_bteop || + ((icrbb.b_initiator == IIO_ICRB_INIT_BTE0 || + icrbb.b_initiator == IIO_ICRB_INIT_BTE1) && + (icrbb.b_imsgtype == IIO_ICRB_IMSGT_BTE || + icrbb.b_imsgtype == IIO_ICRB_IMSGT_SN1NET))) { + + int bte_num; + + if (icrbd.d_bteop) + bte_num = icrbc.c_btenum; + else /* b_initiator bit 2 gives BTE number */ + bte_num = (icrbb.b_initiator & 0x4) >> 2; + + hubiio_crb_free(hubdev_info, i); + + bte_crb_error_handler(nasid_to_cnodeid(nasid), bte_num, + i, &ioerror, icrbd.d_bteop); + num_errors++; + continue; + } + } +} + +/* + * Function : hub_error_init + * Purpose : initialize the error handling requirements for a given hub. + * Parameters : cnode, the compact nodeid. + * Assumptions : Called only once per hub, either by a local cpu. Or by a + * remote cpu, when this hub is headless.(cpuless) + * Returns : None + */ +void hub_error_init(struct hubdev_info *hubdev_info) +{ + + if (request_irq(SGI_II_ERROR, hub_eint_handler, IRQF_SHARED, + "SN_hub_error", hubdev_info)) { + printk(KERN_ERR "hub_error_init: Failed to request_irq for 0x%p\n", + hubdev_info); + return; + } + irq_set_handler(SGI_II_ERROR, handle_level_irq); + sn_set_err_irq_affinity(SGI_II_ERROR); +} + + +/* + * Function : ice_error_init + * Purpose : initialize the error handling requirements for a given tio. + * Parameters : cnode, the compact nodeid. + * Assumptions : Called only once per tio. + * Returns : None + */ +void ice_error_init(struct hubdev_info *hubdev_info) +{ + + if (request_irq + (SGI_TIO_ERROR, (void *)hub_eint_handler, IRQF_SHARED, "SN_TIO_error", + (void *)hubdev_info)) { + printk("ice_error_init: request_irq() error hubdev_info 0x%p\n", + hubdev_info); + return; + } + irq_set_handler(SGI_TIO_ERROR, handle_level_irq); + sn_set_err_irq_affinity(SGI_TIO_ERROR); +} + diff --git a/kernel/arch/ia64/sn/kernel/idle.c b/kernel/arch/ia64/sn/kernel/idle.c new file mode 100644 index 000000000..49d178f02 --- /dev/null +++ b/kernel/arch/ia64/sn/kernel/idle.c @@ -0,0 +1,30 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2001-2004 Silicon Graphics, Inc. All rights reserved. + */ + +#include + +void snidle(int state) +{ + if (state) { + if (pda->idle_flag == 0) { + /* + * Turn the activity LED off. + */ + set_led_bits(0, LED_CPU_ACTIVITY); + } + + pda->idle_flag = 1; + } else { + /* + * Turn the activity LED on. + */ + set_led_bits(LED_CPU_ACTIVITY, LED_CPU_ACTIVITY); + + pda->idle_flag = 0; + } +} diff --git a/kernel/arch/ia64/sn/kernel/io_acpi_init.c b/kernel/arch/ia64/sn/kernel/io_acpi_init.c new file mode 100644 index 000000000..0640739cc --- /dev/null +++ b/kernel/arch/ia64/sn/kernel/io_acpi_init.c @@ -0,0 +1,510 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2006 Silicon Graphics, Inc. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include "xtalk/hubdev.h" +#include +#include +#include + + +/* + * The code in this file will only be executed when running with + * a PROM that has ACPI IO support. (i.e., SN_ACPI_BASE_SUPPORT() == 1) + */ + + +/* + * This value must match the UUID the PROM uses + * (io/acpi/defblk.c) when building a vendor descriptor. + */ +struct acpi_vendor_uuid sn_uuid = { + .subtype = 0, + .data = { 0x2c, 0xc6, 0xa6, 0xfe, 0x9c, 0x44, 0xda, 0x11, + 0xa2, 0x7c, 0x08, 0x00, 0x69, 0x13, 0xea, 0x51 }, +}; + +struct sn_pcidev_match { + u8 bus; + unsigned int devfn; + acpi_handle handle; +}; + +/* + * Perform the early IO init in PROM. + */ +static long +sal_ioif_init(u64 *result) +{ + struct ia64_sal_retval isrv = {0,0,0,0}; + + SAL_CALL_NOLOCK(isrv, + SN_SAL_IOIF_INIT, 0, 0, 0, 0, 0, 0, 0); + *result = isrv.v0; + return isrv.status; +} + +/* + * sn_acpi_hubdev_init() - This function is called by acpi_ns_get_device_callback() + * for all SGIHUB and SGITIO acpi devices defined in the + * DSDT. It obtains the hubdev_info pointer from the + * ACPI vendor resource, which the PROM setup, and sets up the + * hubdev_info in the pda. + */ + +static acpi_status __init +sn_acpi_hubdev_init(acpi_handle handle, u32 depth, void *context, void **ret) +{ + struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + struct acpi_buffer name_buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + u64 addr; + struct hubdev_info *hubdev; + struct hubdev_info *hubdev_ptr; + int i; + u64 nasid; + struct acpi_resource *resource; + acpi_status status; + struct acpi_resource_vendor_typed *vendor; + extern void sn_common_hubdev_init(struct hubdev_info *); + + status = acpi_get_vendor_resource(handle, METHOD_NAME__CRS, + &sn_uuid, &buffer); + if (ACPI_FAILURE(status)) { + acpi_get_name(handle, ACPI_FULL_PATHNAME, &name_buffer); + printk(KERN_ERR + "sn_acpi_hubdev_init: acpi_get_vendor_resource() " + "(0x%x) failed for: %s\n", status, + (char *)name_buffer.pointer); + kfree(name_buffer.pointer); + return AE_OK; /* Continue walking namespace */ + } + + resource = buffer.pointer; + vendor = &resource->data.vendor_typed; + if ((vendor->byte_length - sizeof(struct acpi_vendor_uuid)) != + sizeof(struct hubdev_info *)) { + acpi_get_name(handle, ACPI_FULL_PATHNAME, &name_buffer); + printk(KERN_ERR + "sn_acpi_hubdev_init: Invalid vendor data length: " + "%d for: %s\n", + vendor->byte_length, (char *)name_buffer.pointer); + kfree(name_buffer.pointer); + goto exit; + } + + memcpy(&addr, vendor->byte_data, sizeof(struct hubdev_info *)); + hubdev_ptr = __va((struct hubdev_info *) addr); + + nasid = hubdev_ptr->hdi_nasid; + i = nasid_to_cnodeid(nasid); + hubdev = (struct hubdev_info *)(NODEPDA(i)->pdinfo); + *hubdev = *hubdev_ptr; + sn_common_hubdev_init(hubdev); + +exit: + kfree(buffer.pointer); + return AE_OK; /* Continue walking namespace */ +} + +/* + * sn_get_bussoft_ptr() - The pcibus_bussoft pointer is found in + * the ACPI Vendor resource for this bus. + */ +static struct pcibus_bussoft * +sn_get_bussoft_ptr(struct pci_bus *bus) +{ + u64 addr; + struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + struct acpi_buffer name_buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + acpi_handle handle; + struct pcibus_bussoft *prom_bussoft_ptr; + struct acpi_resource *resource; + acpi_status status; + struct acpi_resource_vendor_typed *vendor; + + + handle = acpi_device_handle(PCI_CONTROLLER(bus)->companion); + status = acpi_get_vendor_resource(handle, METHOD_NAME__CRS, + &sn_uuid, &buffer); + if (ACPI_FAILURE(status)) { + acpi_get_name(handle, ACPI_FULL_PATHNAME, &name_buffer); + printk(KERN_ERR "%s: " + "acpi_get_vendor_resource() failed (0x%x) for: %s\n", + __func__, status, (char *)name_buffer.pointer); + kfree(name_buffer.pointer); + return NULL; + } + resource = buffer.pointer; + vendor = &resource->data.vendor_typed; + + if ((vendor->byte_length - sizeof(struct acpi_vendor_uuid)) != + sizeof(struct pcibus_bussoft *)) { + printk(KERN_ERR + "%s: Invalid vendor data length %d\n", + __func__, vendor->byte_length); + kfree(buffer.pointer); + return NULL; + } + memcpy(&addr, vendor->byte_data, sizeof(struct pcibus_bussoft *)); + prom_bussoft_ptr = __va((struct pcibus_bussoft *) addr); + kfree(buffer.pointer); + + return prom_bussoft_ptr; +} + +/* + * sn_extract_device_info - Extract the pcidev_info and the sn_irq_info + * pointers from the vendor resource using the + * provided acpi handle, and copy the structures + * into the argument buffers. + */ +static int +sn_extract_device_info(acpi_handle handle, struct pcidev_info **pcidev_info, + struct sn_irq_info **sn_irq_info) +{ + u64 addr; + struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + struct acpi_buffer name_buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + struct sn_irq_info *irq_info, *irq_info_prom; + struct pcidev_info *pcidev_ptr, *pcidev_prom_ptr; + struct acpi_resource *resource; + int ret = 0; + acpi_status status; + struct acpi_resource_vendor_typed *vendor; + + /* + * The pointer to this device's pcidev_info structure in + * the PROM, is in the vendor resource. + */ + status = acpi_get_vendor_resource(handle, METHOD_NAME__CRS, + &sn_uuid, &buffer); + if (ACPI_FAILURE(status)) { + acpi_get_name(handle, ACPI_FULL_PATHNAME, &name_buffer); + printk(KERN_ERR + "%s: acpi_get_vendor_resource() failed (0x%x) for: %s\n", + __func__, status, (char *)name_buffer.pointer); + kfree(name_buffer.pointer); + return 1; + } + + resource = buffer.pointer; + vendor = &resource->data.vendor_typed; + if ((vendor->byte_length - sizeof(struct acpi_vendor_uuid)) != + sizeof(struct pci_devdev_info *)) { + acpi_get_name(handle, ACPI_FULL_PATHNAME, &name_buffer); + printk(KERN_ERR + "%s: Invalid vendor data length: %d for: %s\n", + __func__, vendor->byte_length, + (char *)name_buffer.pointer); + kfree(name_buffer.pointer); + ret = 1; + goto exit; + } + + pcidev_ptr = kzalloc(sizeof(struct pcidev_info), GFP_KERNEL); + if (!pcidev_ptr) + panic("%s: Unable to alloc memory for pcidev_info", __func__); + + memcpy(&addr, vendor->byte_data, sizeof(struct pcidev_info *)); + pcidev_prom_ptr = __va(addr); + memcpy(pcidev_ptr, pcidev_prom_ptr, sizeof(struct pcidev_info)); + + /* Get the IRQ info */ + irq_info = kzalloc(sizeof(struct sn_irq_info), GFP_KERNEL); + if (!irq_info) + panic("%s: Unable to alloc memory for sn_irq_info", __func__); + + if (pcidev_ptr->pdi_sn_irq_info) { + irq_info_prom = __va(pcidev_ptr->pdi_sn_irq_info); + memcpy(irq_info, irq_info_prom, sizeof(struct sn_irq_info)); + } + + *pcidev_info = pcidev_ptr; + *sn_irq_info = irq_info; + +exit: + kfree(buffer.pointer); + return ret; +} + +static unsigned int +get_host_devfn(acpi_handle device_handle, acpi_handle rootbus_handle) +{ + unsigned long long adr; + acpi_handle child; + unsigned int devfn; + int function; + acpi_handle parent; + int slot; + acpi_status status; + struct acpi_buffer name_buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + + acpi_get_name(device_handle, ACPI_FULL_PATHNAME, &name_buffer); + + /* + * Do an upward search to find the root bus device, and + * obtain the host devfn from the previous child device. + */ + child = device_handle; + while (child) { + status = acpi_get_parent(child, &parent); + if (ACPI_FAILURE(status)) { + printk(KERN_ERR "%s: acpi_get_parent() failed " + "(0x%x) for: %s\n", __func__, status, + (char *)name_buffer.pointer); + panic("%s: Unable to find host devfn\n", __func__); + } + if (parent == rootbus_handle) + break; + child = parent; + } + if (!child) { + printk(KERN_ERR "%s: Unable to find root bus for: %s\n", + __func__, (char *)name_buffer.pointer); + BUG(); + } + + status = acpi_evaluate_integer(child, METHOD_NAME__ADR, NULL, &adr); + if (ACPI_FAILURE(status)) { + printk(KERN_ERR "%s: Unable to get _ADR (0x%x) for: %s\n", + __func__, status, (char *)name_buffer.pointer); + panic("%s: Unable to find host devfn\n", __func__); + } + + kfree(name_buffer.pointer); + + slot = (adr >> 16) & 0xffff; + function = adr & 0xffff; + devfn = PCI_DEVFN(slot, function); + return devfn; +} + +/* + * find_matching_device - Callback routine to find the ACPI device + * that matches up with our pci_dev device. + * Matching is done on bus number and devfn. + * To find the bus number for a particular + * ACPI device, we must look at the _BBN method + * of its parent. + */ +static acpi_status +find_matching_device(acpi_handle handle, u32 lvl, void *context, void **rv) +{ + unsigned long long bbn = -1; + unsigned long long adr; + acpi_handle parent = NULL; + acpi_status status; + unsigned int devfn; + int function; + int slot; + struct sn_pcidev_match *info = context; + struct acpi_buffer name_buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + + status = acpi_evaluate_integer(handle, METHOD_NAME__ADR, NULL, + &adr); + if (ACPI_SUCCESS(status)) { + status = acpi_get_parent(handle, &parent); + if (ACPI_FAILURE(status)) { + acpi_get_name(handle, ACPI_FULL_PATHNAME, &name_buffer); + printk(KERN_ERR + "%s: acpi_get_parent() failed (0x%x) for: %s\n", + __func__, status, (char *)name_buffer.pointer); + kfree(name_buffer.pointer); + return AE_OK; + } + status = acpi_evaluate_integer(parent, METHOD_NAME__BBN, + NULL, &bbn); + if (ACPI_FAILURE(status)) { + acpi_get_name(handle, ACPI_FULL_PATHNAME, &name_buffer); + printk(KERN_ERR + "%s: Failed to find _BBN in parent of: %s\n", + __func__, (char *)name_buffer.pointer); + kfree(name_buffer.pointer); + return AE_OK; + } + + slot = (adr >> 16) & 0xffff; + function = adr & 0xffff; + devfn = PCI_DEVFN(slot, function); + if ((info->devfn == devfn) && (info->bus == bbn)) { + /* We have a match! */ + info->handle = handle; + return 1; + } + } + return AE_OK; +} + +/* + * sn_acpi_get_pcidev_info - Search ACPI namespace for the acpi + * device matching the specified pci_dev, + * and return the pcidev info and irq info. + */ +int +sn_acpi_get_pcidev_info(struct pci_dev *dev, struct pcidev_info **pcidev_info, + struct sn_irq_info **sn_irq_info) +{ + unsigned int host_devfn; + struct sn_pcidev_match pcidev_match; + acpi_handle rootbus_handle; + unsigned long long segment; + acpi_status status; + struct acpi_buffer name_buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + + rootbus_handle = acpi_device_handle(PCI_CONTROLLER(dev)->companion); + status = acpi_evaluate_integer(rootbus_handle, METHOD_NAME__SEG, NULL, + &segment); + if (ACPI_SUCCESS(status)) { + if (segment != pci_domain_nr(dev)) { + acpi_get_name(rootbus_handle, ACPI_FULL_PATHNAME, + &name_buffer); + printk(KERN_ERR + "%s: Segment number mismatch, 0x%llx vs 0x%x for: %s\n", + __func__, segment, pci_domain_nr(dev), + (char *)name_buffer.pointer); + kfree(name_buffer.pointer); + return 1; + } + } else { + acpi_get_name(rootbus_handle, ACPI_FULL_PATHNAME, &name_buffer); + printk(KERN_ERR "%s: Unable to get __SEG from: %s\n", + __func__, (char *)name_buffer.pointer); + kfree(name_buffer.pointer); + return 1; + } + + /* + * We want to search all devices in this segment/domain + * of the ACPI namespace for the matching ACPI device, + * which holds the pcidev_info pointer in its vendor resource. + */ + pcidev_match.bus = dev->bus->number; + pcidev_match.devfn = dev->devfn; + pcidev_match.handle = NULL; + + acpi_walk_namespace(ACPI_TYPE_DEVICE, rootbus_handle, ACPI_UINT32_MAX, + find_matching_device, NULL, &pcidev_match, NULL); + + if (!pcidev_match.handle) { + printk(KERN_ERR + "%s: Could not find matching ACPI device for %s.\n", + __func__, pci_name(dev)); + return 1; + } + + if (sn_extract_device_info(pcidev_match.handle, pcidev_info, sn_irq_info)) + return 1; + + /* Build up the pcidev_info.pdi_slot_host_handle */ + host_devfn = get_host_devfn(pcidev_match.handle, rootbus_handle); + (*pcidev_info)->pdi_slot_host_handle = + ((unsigned long) pci_domain_nr(dev) << 40) | + /* bus == 0 */ + host_devfn; + return 0; +} + +/* + * sn_acpi_slot_fixup - Obtain the pcidev_info and sn_irq_info. + * Perform any SN specific slot fixup. + * At present there does not appear to be + * any generic way to handle a ROM image + * that has been shadowed by the PROM, so + * we pass a pointer to it within the + * pcidev_info structure. + */ + +void +sn_acpi_slot_fixup(struct pci_dev *dev) +{ + void __iomem *addr; + struct pcidev_info *pcidev_info = NULL; + struct sn_irq_info *sn_irq_info = NULL; + size_t image_size, size; + + if (sn_acpi_get_pcidev_info(dev, &pcidev_info, &sn_irq_info)) { + panic("%s: Failure obtaining pcidev_info for %s\n", + __func__, pci_name(dev)); + } + + if (pcidev_info->pdi_pio_mapped_addr[PCI_ROM_RESOURCE]) { + /* + * A valid ROM image exists and has been shadowed by the + * PROM. Setup the pci_dev ROM resource with the address + * of the shadowed copy, and the actual length of the ROM image. + */ + size = pci_resource_len(dev, PCI_ROM_RESOURCE); + addr = ioremap(pcidev_info->pdi_pio_mapped_addr[PCI_ROM_RESOURCE], + size); + image_size = pci_get_rom_size(dev, addr, size); + dev->resource[PCI_ROM_RESOURCE].start = (unsigned long) addr; + dev->resource[PCI_ROM_RESOURCE].end = + (unsigned long) addr + image_size - 1; + dev->resource[PCI_ROM_RESOURCE].flags |= IORESOURCE_ROM_BIOS_COPY; + } + sn_pci_fixup_slot(dev, pcidev_info, sn_irq_info); +} + +EXPORT_SYMBOL(sn_acpi_slot_fixup); + + +/* + * sn_acpi_bus_fixup - Perform SN specific setup of software structs + * (pcibus_bussoft, pcidev_info) and hardware + * registers, for the specified bus and devices under it. + */ +void +sn_acpi_bus_fixup(struct pci_bus *bus) +{ + struct pci_dev *pci_dev = NULL; + struct pcibus_bussoft *prom_bussoft_ptr; + + if (!bus->parent) { /* If root bus */ + prom_bussoft_ptr = sn_get_bussoft_ptr(bus); + if (prom_bussoft_ptr == NULL) { + printk(KERN_ERR + "%s: 0x%04x:0x%02x Unable to " + "obtain prom_bussoft_ptr\n", + __func__, pci_domain_nr(bus), bus->number); + return; + } + sn_common_bus_fixup(bus, prom_bussoft_ptr); + } + list_for_each_entry(pci_dev, &bus->devices, bus_list) { + sn_acpi_slot_fixup(pci_dev); + } +} + +/* + * sn_io_acpi_init - PROM has ACPI support for IO, defining at a minimum the + * nodes and root buses in the DSDT. As a result, bus scanning + * will be initiated by the Linux ACPI code. + */ + +void __init +sn_io_acpi_init(void) +{ + u64 result; + long status; + + /* SN Altix does not follow the IOSAPIC IRQ routing model */ + acpi_irq_model = ACPI_IRQ_MODEL_PLATFORM; + + /* Setup hubdev_info for all SGIHUB/SGITIO devices */ + acpi_get_devices("SGIHUB", sn_acpi_hubdev_init, NULL, NULL); + acpi_get_devices("SGITIO", sn_acpi_hubdev_init, NULL, NULL); + + status = sal_ioif_init(&result); + if (status || result) + panic("sal_ioif_init failed: [%lx] %s\n", + status, ia64_sal_strerror(status)); +} diff --git a/kernel/arch/ia64/sn/kernel/io_common.c b/kernel/arch/ia64/sn/kernel/io_common.c new file mode 100644 index 000000000..11f227557 --- /dev/null +++ b/kernel/arch/ia64/sn/kernel/io_common.c @@ -0,0 +1,564 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2006 Silicon Graphics, Inc. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "xtalk/hubdev.h" +#include "xtalk/xwidgetdev.h" +#include +#include +#include + +extern void sn_init_cpei_timer(void); +extern void register_sn_procfs(void); +extern void sn_io_acpi_init(void); +extern void sn_io_init(void); + + +static struct list_head sn_sysdata_list; + +/* sysdata list struct */ +struct sysdata_el { + struct list_head entry; + void *sysdata; +}; + +int sn_ioif_inited; /* SN I/O infrastructure initialized? */ + +int sn_acpi_rev; /* SN ACPI revision */ +EXPORT_SYMBOL_GPL(sn_acpi_rev); + +struct sn_pcibus_provider *sn_pci_provider[PCIIO_ASIC_MAX_TYPES]; /* indexed by asic type */ + +/* + * Hooks and struct for unsupported pci providers + */ + +static dma_addr_t +sn_default_pci_map(struct pci_dev *pdev, unsigned long paddr, size_t size, int type) +{ + return 0; +} + +static void +sn_default_pci_unmap(struct pci_dev *pdev, dma_addr_t addr, int direction) +{ + return; +} + +static void * +sn_default_pci_bus_fixup(struct pcibus_bussoft *soft, struct pci_controller *controller) +{ + return NULL; +} + +static struct sn_pcibus_provider sn_pci_default_provider = { + .dma_map = sn_default_pci_map, + .dma_map_consistent = sn_default_pci_map, + .dma_unmap = sn_default_pci_unmap, + .bus_fixup = sn_default_pci_bus_fixup, +}; + +/* + * Retrieve the DMA Flush List given nasid, widget, and device. + * This list is needed to implement the WAR - Flush DMA data on PIO Reads. + */ +static inline u64 +sal_get_device_dmaflush_list(u64 nasid, u64 widget_num, u64 device_num, + u64 address) +{ + struct ia64_sal_retval ret_stuff; + ret_stuff.status = 0; + ret_stuff.v0 = 0; + + SAL_CALL_NOLOCK(ret_stuff, + (u64) SN_SAL_IOIF_GET_DEVICE_DMAFLUSH_LIST, + (u64) nasid, (u64) widget_num, + (u64) device_num, (u64) address, 0, 0, 0); + return ret_stuff.status; +} + +/* + * sn_pcidev_info_get() - Retrieve the pcidev_info struct for the specified + * device. + */ +inline struct pcidev_info * +sn_pcidev_info_get(struct pci_dev *dev) +{ + struct pcidev_info *pcidev; + + list_for_each_entry(pcidev, + &(SN_PLATFORM_DATA(dev)->pcidev_info), pdi_list) { + if (pcidev->pdi_linux_pcidev == dev) + return pcidev; + } + return NULL; +} + +/* Older PROM flush WAR + * + * 01/16/06 -- This war will be in place until a new official PROM is released. + * Additionally note that the struct sn_flush_device_war also has to be + * removed from arch/ia64/sn/include/xtalk/hubdev.h + */ + +static s64 sn_device_fixup_war(u64 nasid, u64 widget, int device, + struct sn_flush_device_common *common) +{ + struct sn_flush_device_war *war_list; + struct sn_flush_device_war *dev_entry; + struct ia64_sal_retval isrv = {0,0,0,0}; + + printk_once(KERN_WARNING + "PROM version < 4.50 -- implementing old PROM flush WAR\n"); + + war_list = kzalloc(DEV_PER_WIDGET * sizeof(*war_list), GFP_KERNEL); + BUG_ON(!war_list); + + SAL_CALL_NOLOCK(isrv, SN_SAL_IOIF_GET_WIDGET_DMAFLUSH_LIST, + nasid, widget, __pa(war_list), 0, 0, 0 ,0); + if (isrv.status) + panic("sn_device_fixup_war failed: %s\n", + ia64_sal_strerror(isrv.status)); + + dev_entry = war_list + device; + memcpy(common,dev_entry, sizeof(*common)); + kfree(war_list); + + return isrv.status; +} + +/* + * sn_common_hubdev_init() - This routine is called to initialize the HUB data + * structure for each node in the system. + */ +void __init +sn_common_hubdev_init(struct hubdev_info *hubdev) +{ + + struct sn_flush_device_kernel *sn_flush_device_kernel; + struct sn_flush_device_kernel *dev_entry; + s64 status; + int widget, device, size; + + /* Attach the error interrupt handlers */ + if (hubdev->hdi_nasid & 1) /* If TIO */ + ice_error_init(hubdev); + else + hub_error_init(hubdev); + + for (widget = 0; widget <= HUB_WIDGET_ID_MAX; widget++) + hubdev->hdi_xwidget_info[widget].xwi_hubinfo = hubdev; + + if (!hubdev->hdi_flush_nasid_list.widget_p) + return; + + size = (HUB_WIDGET_ID_MAX + 1) * + sizeof(struct sn_flush_device_kernel *); + hubdev->hdi_flush_nasid_list.widget_p = + kzalloc(size, GFP_KERNEL); + BUG_ON(!hubdev->hdi_flush_nasid_list.widget_p); + + for (widget = 0; widget <= HUB_WIDGET_ID_MAX; widget++) { + size = DEV_PER_WIDGET * + sizeof(struct sn_flush_device_kernel); + sn_flush_device_kernel = kzalloc(size, GFP_KERNEL); + BUG_ON(!sn_flush_device_kernel); + + dev_entry = sn_flush_device_kernel; + for (device = 0; device < DEV_PER_WIDGET; + device++, dev_entry++) { + size = sizeof(struct sn_flush_device_common); + dev_entry->common = kzalloc(size, GFP_KERNEL); + BUG_ON(!dev_entry->common); + if (sn_prom_feature_available(PRF_DEVICE_FLUSH_LIST)) + status = sal_get_device_dmaflush_list( + hubdev->hdi_nasid, widget, device, + (u64)(dev_entry->common)); + else + status = sn_device_fixup_war(hubdev->hdi_nasid, + widget, device, + dev_entry->common); + if (status != SALRET_OK) + panic("SAL call failed: %s\n", + ia64_sal_strerror(status)); + + spin_lock_init(&dev_entry->sfdl_flush_lock); + } + + if (sn_flush_device_kernel) + hubdev->hdi_flush_nasid_list.widget_p[widget] = + sn_flush_device_kernel; + } +} + +void sn_pci_unfixup_slot(struct pci_dev *dev) +{ + struct pci_dev *host_pci_dev = SN_PCIDEV_INFO(dev)->host_pci_dev; + + sn_irq_unfixup(dev); + pci_dev_put(host_pci_dev); + pci_dev_put(dev); +} + +/* + * sn_pci_fixup_slot() + */ +void sn_pci_fixup_slot(struct pci_dev *dev, struct pcidev_info *pcidev_info, + struct sn_irq_info *sn_irq_info) +{ + int segment = pci_domain_nr(dev->bus); + struct pcibus_bussoft *bs; + struct pci_dev *host_pci_dev; + unsigned int bus_no, devfn; + + pci_dev_get(dev); /* for the sysdata pointer */ + + /* Add pcidev_info to list in pci_controller.platform_data */ + list_add_tail(&pcidev_info->pdi_list, + &(SN_PLATFORM_DATA(dev->bus)->pcidev_info)); + /* + * Using the PROMs values for the PCI host bus, get the Linux + * PCI host_pci_dev struct and set up host bus linkages + */ + + bus_no = (pcidev_info->pdi_slot_host_handle >> 32) & 0xff; + devfn = pcidev_info->pdi_slot_host_handle & 0xffffffff; + host_pci_dev = pci_get_domain_bus_and_slot(segment, bus_no, devfn); + + pcidev_info->host_pci_dev = host_pci_dev; + pcidev_info->pdi_linux_pcidev = dev; + pcidev_info->pdi_host_pcidev_info = SN_PCIDEV_INFO(host_pci_dev); + bs = SN_PCIBUS_BUSSOFT(dev->bus); + pcidev_info->pdi_pcibus_info = bs; + + if (bs && bs->bs_asic_type < PCIIO_ASIC_MAX_TYPES) { + SN_PCIDEV_BUSPROVIDER(dev) = sn_pci_provider[bs->bs_asic_type]; + } else { + SN_PCIDEV_BUSPROVIDER(dev) = &sn_pci_default_provider; + } + + /* Only set up IRQ stuff if this device has a host bus context */ + if (bs && sn_irq_info->irq_irq) { + pcidev_info->pdi_sn_irq_info = sn_irq_info; + dev->irq = pcidev_info->pdi_sn_irq_info->irq_irq; + sn_irq_fixup(dev, sn_irq_info); + } else { + pcidev_info->pdi_sn_irq_info = NULL; + kfree(sn_irq_info); + } +} + +/* + * sn_common_bus_fixup - Perform platform specific bus fixup. + * Execute the ASIC specific fixup routine + * for this bus. + */ +void +sn_common_bus_fixup(struct pci_bus *bus, + struct pcibus_bussoft *prom_bussoft_ptr) +{ + int cnode; + struct pci_controller *controller; + struct hubdev_info *hubdev_info; + int nasid; + void *provider_soft; + struct sn_pcibus_provider *provider; + struct sn_platform_data *sn_platform_data; + + controller = PCI_CONTROLLER(bus); + /* + * Per-provider fixup. Copies the bus soft structure from prom + * to local area and links SN_PCIBUS_BUSSOFT(). + */ + + if (prom_bussoft_ptr->bs_asic_type >= PCIIO_ASIC_MAX_TYPES) { + printk(KERN_WARNING "sn_common_bus_fixup: Unsupported asic type, %d", + prom_bussoft_ptr->bs_asic_type); + return; + } + + if (prom_bussoft_ptr->bs_asic_type == PCIIO_ASIC_TYPE_PPB) + return; /* no further fixup necessary */ + + provider = sn_pci_provider[prom_bussoft_ptr->bs_asic_type]; + if (provider == NULL) + panic("sn_common_bus_fixup: No provider registered for this asic type, %d", + prom_bussoft_ptr->bs_asic_type); + + if (provider->bus_fixup) + provider_soft = (*provider->bus_fixup) (prom_bussoft_ptr, + controller); + else + provider_soft = NULL; + + /* + * Generic bus fixup goes here. Don't reference prom_bussoft_ptr + * after this point. + */ + controller->platform_data = kzalloc(sizeof(struct sn_platform_data), + GFP_KERNEL); + BUG_ON(controller->platform_data == NULL); + sn_platform_data = + (struct sn_platform_data *) controller->platform_data; + sn_platform_data->provider_soft = provider_soft; + INIT_LIST_HEAD(&((struct sn_platform_data *) + controller->platform_data)->pcidev_info); + nasid = NASID_GET(SN_PCIBUS_BUSSOFT(bus)->bs_base); + cnode = nasid_to_cnodeid(nasid); + hubdev_info = (struct hubdev_info *)(NODEPDA(cnode)->pdinfo); + SN_PCIBUS_BUSSOFT(bus)->bs_xwidget_info = + &(hubdev_info->hdi_xwidget_info[SN_PCIBUS_BUSSOFT(bus)->bs_xid]); + + /* + * If the node information we obtained during the fixup phase is + * invalid then set controller->node to -1 (undetermined) + */ + if (controller->node >= num_online_nodes()) { + struct pcibus_bussoft *b = SN_PCIBUS_BUSSOFT(bus); + + printk(KERN_WARNING "Device ASIC=%u XID=%u PBUSNUM=%u " + "L_IO=%llx L_MEM=%llx BASE=%llx\n", + b->bs_asic_type, b->bs_xid, b->bs_persist_busnum, + b->bs_legacy_io, b->bs_legacy_mem, b->bs_base); + printk(KERN_WARNING "on node %d but only %d nodes online." + "Association set to undetermined.\n", + controller->node, num_online_nodes()); + controller->node = -1; + } +} + +void sn_bus_store_sysdata(struct pci_dev *dev) +{ + struct sysdata_el *element; + + element = kzalloc(sizeof(struct sysdata_el), GFP_KERNEL); + if (!element) { + dev_dbg(&dev->dev, "%s: out of memory!\n", __func__); + return; + } + element->sysdata = SN_PCIDEV_INFO(dev); + list_add(&element->entry, &sn_sysdata_list); +} + +void sn_bus_free_sysdata(void) +{ + struct sysdata_el *element; + struct list_head *list, *safe; + + list_for_each_safe(list, safe, &sn_sysdata_list) { + element = list_entry(list, struct sysdata_el, entry); + list_del(&element->entry); + list_del(&(((struct pcidev_info *) + (element->sysdata))->pdi_list)); + kfree(element->sysdata); + kfree(element); + } + return; +} + +/* + * hubdev_init_node() - Creates the HUB data structure and link them to it's + * own NODE specific data area. + */ +void __init hubdev_init_node(nodepda_t * npda, cnodeid_t node) +{ + struct hubdev_info *hubdev_info; + int size; + pg_data_t *pg; + + size = sizeof(struct hubdev_info); + + if (node >= num_online_nodes()) /* Headless/memless IO nodes */ + pg = NODE_DATA(0); + else + pg = NODE_DATA(node); + + hubdev_info = (struct hubdev_info *)alloc_bootmem_node(pg, size); + + npda->pdinfo = (void *)hubdev_info; +} + +geoid_t +cnodeid_get_geoid(cnodeid_t cnode) +{ + struct hubdev_info *hubdev; + + hubdev = (struct hubdev_info *)(NODEPDA(cnode)->pdinfo); + return hubdev->hdi_geoid; +} + +void sn_generate_path(struct pci_bus *pci_bus, char *address) +{ + nasid_t nasid; + cnodeid_t cnode; + geoid_t geoid; + moduleid_t moduleid; + u16 bricktype; + + nasid = NASID_GET(SN_PCIBUS_BUSSOFT(pci_bus)->bs_base); + cnode = nasid_to_cnodeid(nasid); + geoid = cnodeid_get_geoid(cnode); + moduleid = geo_module(geoid); + + sprintf(address, "module_%c%c%c%c%.2d", + '0'+RACK_GET_CLASS(MODULE_GET_RACK(moduleid)), + '0'+RACK_GET_GROUP(MODULE_GET_RACK(moduleid)), + '0'+RACK_GET_NUM(MODULE_GET_RACK(moduleid)), + MODULE_GET_BTCHAR(moduleid), MODULE_GET_BPOS(moduleid)); + + /* Tollhouse requires slot id to be displayed */ + bricktype = MODULE_GET_BTYPE(moduleid); + if ((bricktype == L1_BRICKTYPE_191010) || + (bricktype == L1_BRICKTYPE_1932)) + sprintf(address + strlen(address), "^%d", + geo_slot(geoid)); +} + +void sn_pci_fixup_bus(struct pci_bus *bus) +{ + + if (SN_ACPI_BASE_SUPPORT()) + sn_acpi_bus_fixup(bus); + else + sn_bus_fixup(bus); +} + +/* + * sn_io_early_init - Perform early IO (and some non-IO) initialization. + * In particular, setup the sn_pci_provider[] array. + * This needs to be done prior to any bus scanning + * (acpi_scan_init()) in the ACPI case, as the SN + * bus fixup code will reference the array. + */ +static int __init +sn_io_early_init(void) +{ + int i; + + if (!ia64_platform_is("sn2") || IS_RUNNING_ON_FAKE_PROM()) + return 0; + + /* we set the acpi revision to that of the DSDT table OEM rev. */ + { + struct acpi_table_header *header = NULL; + + acpi_get_table(ACPI_SIG_DSDT, 1, &header); + BUG_ON(header == NULL); + sn_acpi_rev = header->oem_revision; + } + + /* + * prime sn_pci_provider[]. Individual provider init routines will + * override their respective default entries. + */ + + for (i = 0; i < PCIIO_ASIC_MAX_TYPES; i++) + sn_pci_provider[i] = &sn_pci_default_provider; + + pcibr_init_provider(); + tioca_init_provider(); + tioce_init_provider(); + + /* + * This is needed to avoid bounce limit checks in the blk layer + */ + ia64_max_iommu_merge_mask = ~PAGE_MASK; + + sn_irq_lh_init(); + INIT_LIST_HEAD(&sn_sysdata_list); + sn_init_cpei_timer(); + +#ifdef CONFIG_PROC_FS + register_sn_procfs(); +#endif + + { + struct acpi_table_header *header; + (void)acpi_get_table(ACPI_SIG_DSDT, 1, &header); + printk(KERN_INFO "ACPI DSDT OEM Rev 0x%x\n", + header->oem_revision); + } + if (SN_ACPI_BASE_SUPPORT()) + sn_io_acpi_init(); + else + sn_io_init(); + return 0; +} + +arch_initcall(sn_io_early_init); + +/* + * sn_io_late_init() - Perform any final platform specific IO initialization. + */ + +int __init +sn_io_late_init(void) +{ + struct pci_bus *bus; + struct pcibus_bussoft *bussoft; + cnodeid_t cnode; + nasid_t nasid; + cnodeid_t near_cnode; + + if (!ia64_platform_is("sn2") || IS_RUNNING_ON_FAKE_PROM()) + return 0; + + /* + * Setup closest node in pci_controller->node for + * PIC, TIOCP, TIOCE (TIOCA does it during bus fixup using + * info from the PROM). + */ + bus = NULL; + while ((bus = pci_find_next_bus(bus)) != NULL) { + bussoft = SN_PCIBUS_BUSSOFT(bus); + nasid = NASID_GET(bussoft->bs_base); + cnode = nasid_to_cnodeid(nasid); + if ((bussoft->bs_asic_type == PCIIO_ASIC_TYPE_TIOCP) || + (bussoft->bs_asic_type == PCIIO_ASIC_TYPE_TIOCE) || + (bussoft->bs_asic_type == PCIIO_ASIC_TYPE_PIC)) { + /* PCI Bridge: find nearest node with CPUs */ + int e = sn_hwperf_get_nearest_node(cnode, NULL, + &near_cnode); + if (e < 0) { + near_cnode = (cnodeid_t)-1; /* use any node */ + printk(KERN_WARNING "sn_io_late_init: failed " + "to find near node with CPUs for " + "node %d, err=%d\n", cnode, e); + } + PCI_CONTROLLER(bus)->node = near_cnode; + } + } + + sn_ioif_inited = 1; /* SN I/O infrastructure now initialized */ + + return 0; +} + +fs_initcall(sn_io_late_init); + +EXPORT_SYMBOL(sn_pci_unfixup_slot); +EXPORT_SYMBOL(sn_bus_store_sysdata); +EXPORT_SYMBOL(sn_bus_free_sysdata); +EXPORT_SYMBOL(sn_generate_path); + diff --git a/kernel/arch/ia64/sn/kernel/io_init.c b/kernel/arch/ia64/sn/kernel/io_init.c new file mode 100644 index 000000000..1be65eb07 --- /dev/null +++ b/kernel/arch/ia64/sn/kernel/io_init.c @@ -0,0 +1,321 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1992 - 1997, 2000-2006 Silicon Graphics, Inc. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "xtalk/hubdev.h" + +/* + * The code in this file will only be executed when running with + * a PROM that does _not_ have base ACPI IO support. + * (i.e., SN_ACPI_BASE_SUPPORT() == 0) + */ + +static int max_segment_number; /* Default highest segment number */ +static int max_pcibus_number = 255; /* Default highest pci bus number */ + + +/* + * Retrieve the hub device info structure for the given nasid. + */ +static inline u64 sal_get_hubdev_info(u64 handle, u64 address) +{ + struct ia64_sal_retval ret_stuff; + ret_stuff.status = 0; + ret_stuff.v0 = 0; + + SAL_CALL_NOLOCK(ret_stuff, + (u64) SN_SAL_IOIF_GET_HUBDEV_INFO, + (u64) handle, (u64) address, 0, 0, 0, 0, 0); + return ret_stuff.v0; +} + +/* + * Retrieve the pci bus information given the bus number. + */ +static inline u64 sal_get_pcibus_info(u64 segment, u64 busnum, u64 address) +{ + struct ia64_sal_retval ret_stuff; + ret_stuff.status = 0; + ret_stuff.v0 = 0; + + SAL_CALL_NOLOCK(ret_stuff, + (u64) SN_SAL_IOIF_GET_PCIBUS_INFO, + (u64) segment, (u64) busnum, (u64) address, 0, 0, 0, 0); + return ret_stuff.v0; +} + +/* + * Retrieve the pci device information given the bus and device|function number. + */ +static inline u64 +sal_get_pcidev_info(u64 segment, u64 bus_number, u64 devfn, u64 pci_dev, + u64 sn_irq_info) +{ + struct ia64_sal_retval ret_stuff; + ret_stuff.status = 0; + ret_stuff.v0 = 0; + + SAL_CALL_NOLOCK(ret_stuff, + (u64) SN_SAL_IOIF_GET_PCIDEV_INFO, + (u64) segment, (u64) bus_number, (u64) devfn, + (u64) pci_dev, + sn_irq_info, 0, 0); + return ret_stuff.v0; +} + + +/* + * sn_fixup_ionodes() - This routine initializes the HUB data structure for + * each node in the system. This function is only + * executed when running with a non-ACPI capable PROM. + */ +static void __init sn_fixup_ionodes(void) +{ + + struct hubdev_info *hubdev; + u64 status; + u64 nasid; + int i; + extern void sn_common_hubdev_init(struct hubdev_info *); + + /* + * Get SGI Specific HUB chipset information. + * Inform Prom that this kernel can support domain bus numbering. + */ + for (i = 0; i < num_cnodes; i++) { + hubdev = (struct hubdev_info *)(NODEPDA(i)->pdinfo); + nasid = cnodeid_to_nasid(i); + hubdev->max_segment_number = 0xffffffff; + hubdev->max_pcibus_number = 0xff; + status = sal_get_hubdev_info(nasid, (u64) __pa(hubdev)); + if (status) + continue; + + /* Save the largest Domain and pcibus numbers found. */ + if (hubdev->max_segment_number) { + /* + * Dealing with a Prom that supports segments. + */ + max_segment_number = hubdev->max_segment_number; + max_pcibus_number = hubdev->max_pcibus_number; + } + sn_common_hubdev_init(hubdev); + } +} + +/* + * sn_pci_legacy_window_fixup - Setup PCI resources for + * legacy IO and MEM space. This needs to + * be done here, as the PROM does not have + * ACPI support defining the root buses + * and their resources (_CRS), + */ +static void +sn_legacy_pci_window_fixup(struct resource *res, + u64 legacy_io, u64 legacy_mem) +{ + res[0].name = "legacy_io"; + res[0].flags = IORESOURCE_IO; + res[0].start = legacy_io; + res[0].end = res[0].start + 0xffff; + res[0].parent = &ioport_resource; + res[1].name = "legacy_mem"; + res[1].flags = IORESOURCE_MEM; + res[1].start = legacy_mem; + res[1].end = res[1].start + (1024 * 1024) - 1; + res[1].parent = &iomem_resource; +} + +/* + * sn_io_slot_fixup() - We are not running with an ACPI capable PROM, + * and need to convert the pci_dev->resource + * 'start' and 'end' addresses to mapped addresses, + * and setup the pci_controller->window array entries. + */ +void +sn_io_slot_fixup(struct pci_dev *dev) +{ + int idx; + unsigned long addr, end, size, start; + struct pcidev_info *pcidev_info; + struct sn_irq_info *sn_irq_info; + int status; + + pcidev_info = kzalloc(sizeof(struct pcidev_info), GFP_KERNEL); + if (!pcidev_info) + panic("%s: Unable to alloc memory for pcidev_info", __func__); + + sn_irq_info = kzalloc(sizeof(struct sn_irq_info), GFP_KERNEL); + if (!sn_irq_info) + panic("%s: Unable to alloc memory for sn_irq_info", __func__); + + /* Call to retrieve pci device information needed by kernel. */ + status = sal_get_pcidev_info((u64) pci_domain_nr(dev), + (u64) dev->bus->number, + dev->devfn, + (u64) __pa(pcidev_info), + (u64) __pa(sn_irq_info)); + + BUG_ON(status); /* Cannot get platform pci device information */ + + + /* Copy over PIO Mapped Addresses */ + for (idx = 0; idx <= PCI_ROM_RESOURCE; idx++) { + + if (!pcidev_info->pdi_pio_mapped_addr[idx]) { + continue; + } + + start = dev->resource[idx].start; + end = dev->resource[idx].end; + size = end - start; + if (size == 0) { + continue; + } + addr = pcidev_info->pdi_pio_mapped_addr[idx]; + addr = ((addr << 4) >> 4) | __IA64_UNCACHED_OFFSET; + dev->resource[idx].start = addr; + dev->resource[idx].end = addr + size; + + /* + * if it's already in the device structure, remove it before + * inserting + */ + if (dev->resource[idx].parent && dev->resource[idx].parent->child) + release_resource(&dev->resource[idx]); + + if (dev->resource[idx].flags & IORESOURCE_IO) + insert_resource(&ioport_resource, &dev->resource[idx]); + else + insert_resource(&iomem_resource, &dev->resource[idx]); + /* + * If ROM, set the actual ROM image size, and mark as + * shadowed in PROM. + */ + if (idx == PCI_ROM_RESOURCE) { + size_t image_size; + void __iomem *rom; + + rom = ioremap(pci_resource_start(dev, PCI_ROM_RESOURCE), + size + 1); + image_size = pci_get_rom_size(dev, rom, size + 1); + dev->resource[PCI_ROM_RESOURCE].end = + dev->resource[PCI_ROM_RESOURCE].start + + image_size - 1; + dev->resource[PCI_ROM_RESOURCE].flags |= + IORESOURCE_ROM_BIOS_COPY; + } + } + + sn_pci_fixup_slot(dev, pcidev_info, sn_irq_info); +} + +EXPORT_SYMBOL(sn_io_slot_fixup); + +/* + * sn_pci_controller_fixup() - This routine sets up a bus's resources + * consistent with the Linux PCI abstraction layer. + */ +static void __init +sn_pci_controller_fixup(int segment, int busnum, struct pci_bus *bus) +{ + s64 status = 0; + struct pci_controller *controller; + struct pcibus_bussoft *prom_bussoft_ptr; + struct resource *res; + LIST_HEAD(resources); + + status = sal_get_pcibus_info((u64) segment, (u64) busnum, + (u64) ia64_tpa(&prom_bussoft_ptr)); + if (status > 0) + return; /*bus # does not exist */ + prom_bussoft_ptr = __va(prom_bussoft_ptr); + + controller = kzalloc(sizeof(*controller), GFP_KERNEL); + BUG_ON(!controller); + controller->segment = segment; + + res = kcalloc(2, sizeof(struct resource), GFP_KERNEL); + BUG_ON(!res); + + /* + * Temporarily save the prom_bussoft_ptr for use by sn_bus_fixup(). + * (platform_data will be overwritten later in sn_common_bus_fixup()) + */ + controller->platform_data = prom_bussoft_ptr; + + sn_legacy_pci_window_fixup(res, + prom_bussoft_ptr->bs_legacy_io, + prom_bussoft_ptr->bs_legacy_mem); + pci_add_resource_offset(&resources, &res[0], + prom_bussoft_ptr->bs_legacy_io); + pci_add_resource_offset(&resources, &res[1], + prom_bussoft_ptr->bs_legacy_mem); + + bus = pci_scan_root_bus(NULL, busnum, &pci_root_ops, controller, + &resources); + if (bus == NULL) { + kfree(res); + kfree(controller); + return; + } + pci_bus_add_devices(bus); +} + +/* + * sn_bus_fixup + */ +void +sn_bus_fixup(struct pci_bus *bus) +{ + struct pci_dev *pci_dev = NULL; + struct pcibus_bussoft *prom_bussoft_ptr; + + if (!bus->parent) { /* If root bus */ + prom_bussoft_ptr = PCI_CONTROLLER(bus)->platform_data; + if (prom_bussoft_ptr == NULL) { + printk(KERN_ERR + "sn_bus_fixup: 0x%04x:0x%02x Unable to " + "obtain prom_bussoft_ptr\n", + pci_domain_nr(bus), bus->number); + return; + } + sn_common_bus_fixup(bus, prom_bussoft_ptr); + } + list_for_each_entry(pci_dev, &bus->devices, bus_list) { + sn_io_slot_fixup(pci_dev); + } + +} + +/* + * sn_io_init - PROM does not have ACPI support to define nodes or root buses, + * so we need to do things the hard way, including initiating the + * bus scanning ourselves. + */ + +void __init sn_io_init(void) +{ + int i, j; + + sn_fixup_ionodes(); + + /* busses are not known yet ... */ + for (i = 0; i <= max_segment_number; i++) + for (j = 0; j <= max_pcibus_number; j++) + sn_pci_controller_fixup(i, j, NULL); +} diff --git a/kernel/arch/ia64/sn/kernel/iomv.c b/kernel/arch/ia64/sn/kernel/iomv.c new file mode 100644 index 000000000..c77ebdf98 --- /dev/null +++ b/kernel/arch/ia64/sn/kernel/iomv.c @@ -0,0 +1,82 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2000-2003, 2006 Silicon Graphics, Inc. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define IS_LEGACY_VGA_IOPORT(p) \ + (((p) >= 0x3b0 && (p) <= 0x3bb) || ((p) >= 0x3c0 && (p) <= 0x3df)) + +/** + * sn_io_addr - convert an in/out port to an i/o address + * @port: port to convert + * + * Legacy in/out instructions are converted to ld/st instructions + * on IA64. This routine will convert a port number into a valid + * SN i/o address. Used by sn_in*() and sn_out*(). + */ + +void *sn_io_addr(unsigned long port) +{ + if (!IS_RUNNING_ON_SIMULATOR()) { + if (IS_LEGACY_VGA_IOPORT(port)) + return (__ia64_mk_io_addr(port)); + /* On sn2, legacy I/O ports don't point at anything */ + if (port < (64 * 1024)) + return NULL; + if (SN_ACPI_BASE_SUPPORT()) + return (__ia64_mk_io_addr(port)); + else + return ((void *)(port | __IA64_UNCACHED_OFFSET)); + } else { + /* but the simulator uses them... */ + unsigned long addr; + + /* + * word align port, but need more than 10 bits + * for accessing registers in bedrock local block + * (so we don't do port&0xfff) + */ + addr = (is_shub2() ? 0xc00000028c000000UL : 0xc0000087cc000000UL) | ((port >> 2) << 12); + if ((port >= 0x1f0 && port <= 0x1f7) || port == 0x3f6 || port == 0x3f7) + addr |= port; + return (void *)addr; + } +} + +EXPORT_SYMBOL(sn_io_addr); + +/** + * __sn_mmiowb - I/O space memory barrier + * + * See arch/ia64/include/asm/io.h and Documentation/DocBook/deviceiobook.tmpl + * for details. + * + * On SN2, we wait for the PIO_WRITE_STATUS SHub register to clear. + * See PV 871084 for details about the WAR about zero value. + * + */ +void __sn_mmiowb(void) +{ + volatile unsigned long *adr = pda->pio_write_status_addr; + unsigned long val = pda->pio_write_status_val; + + while ((*adr & SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK) != val) + cpu_relax(); +} + +EXPORT_SYMBOL(__sn_mmiowb); diff --git a/kernel/arch/ia64/sn/kernel/irq.c b/kernel/arch/ia64/sn/kernel/irq.c new file mode 100644 index 000000000..85d095154 --- /dev/null +++ b/kernel/arch/ia64/sn/kernel/irq.c @@ -0,0 +1,488 @@ +/* + * Platform dependent support for SGI SN + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2000-2008 Silicon Graphics, Inc. All Rights Reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void register_intr_pda(struct sn_irq_info *sn_irq_info); +static void unregister_intr_pda(struct sn_irq_info *sn_irq_info); + +extern int sn_ioif_inited; +struct list_head **sn_irq_lh; +static DEFINE_SPINLOCK(sn_irq_info_lock); /* non-IRQ lock */ + +u64 sn_intr_alloc(nasid_t local_nasid, int local_widget, + struct sn_irq_info *sn_irq_info, + int req_irq, nasid_t req_nasid, + int req_slice) +{ + struct ia64_sal_retval ret_stuff; + ret_stuff.status = 0; + ret_stuff.v0 = 0; + + SAL_CALL_NOLOCK(ret_stuff, (u64) SN_SAL_IOIF_INTERRUPT, + (u64) SAL_INTR_ALLOC, (u64) local_nasid, + (u64) local_widget, __pa(sn_irq_info), (u64) req_irq, + (u64) req_nasid, (u64) req_slice); + + return ret_stuff.status; +} + +void sn_intr_free(nasid_t local_nasid, int local_widget, + struct sn_irq_info *sn_irq_info) +{ + struct ia64_sal_retval ret_stuff; + ret_stuff.status = 0; + ret_stuff.v0 = 0; + + SAL_CALL_NOLOCK(ret_stuff, (u64) SN_SAL_IOIF_INTERRUPT, + (u64) SAL_INTR_FREE, (u64) local_nasid, + (u64) local_widget, (u64) sn_irq_info->irq_irq, + (u64) sn_irq_info->irq_cookie, 0, 0); +} + +u64 sn_intr_redirect(nasid_t local_nasid, int local_widget, + struct sn_irq_info *sn_irq_info, + nasid_t req_nasid, int req_slice) +{ + struct ia64_sal_retval ret_stuff; + ret_stuff.status = 0; + ret_stuff.v0 = 0; + + SAL_CALL_NOLOCK(ret_stuff, (u64) SN_SAL_IOIF_INTERRUPT, + (u64) SAL_INTR_REDIRECT, (u64) local_nasid, + (u64) local_widget, __pa(sn_irq_info), + (u64) req_nasid, (u64) req_slice, 0); + + return ret_stuff.status; +} + +static unsigned int sn_startup_irq(struct irq_data *data) +{ + return 0; +} + +static void sn_shutdown_irq(struct irq_data *data) +{ +} + +extern void ia64_mca_register_cpev(int); + +static void sn_disable_irq(struct irq_data *data) +{ + if (data->irq == local_vector_to_irq(IA64_CPE_VECTOR)) + ia64_mca_register_cpev(0); +} + +static void sn_enable_irq(struct irq_data *data) +{ + if (data->irq == local_vector_to_irq(IA64_CPE_VECTOR)) + ia64_mca_register_cpev(data->irq); +} + +static void sn_ack_irq(struct irq_data *data) +{ + u64 event_occurred, mask; + unsigned int irq = data->irq & 0xff; + + event_occurred = HUB_L((u64*)LOCAL_MMR_ADDR(SH_EVENT_OCCURRED)); + mask = event_occurred & SH_ALL_INT_MASK; + HUB_S((u64*)LOCAL_MMR_ADDR(SH_EVENT_OCCURRED_ALIAS), mask); + __set_bit(irq, (volatile void *)pda->sn_in_service_ivecs); + + irq_move_irq(data); +} + +struct sn_irq_info *sn_retarget_vector(struct sn_irq_info *sn_irq_info, + nasid_t nasid, int slice) +{ + int vector; + int cpuid; +#ifdef CONFIG_SMP + int cpuphys; +#endif + int64_t bridge; + int local_widget, status; + nasid_t local_nasid; + struct sn_irq_info *new_irq_info; + struct sn_pcibus_provider *pci_provider; + + bridge = (u64) sn_irq_info->irq_bridge; + if (!bridge) { + return NULL; /* irq is not a device interrupt */ + } + + local_nasid = NASID_GET(bridge); + + if (local_nasid & 1) + local_widget = TIO_SWIN_WIDGETNUM(bridge); + else + local_widget = SWIN_WIDGETNUM(bridge); + vector = sn_irq_info->irq_irq; + + /* Make use of SAL_INTR_REDIRECT if PROM supports it */ + status = sn_intr_redirect(local_nasid, local_widget, sn_irq_info, nasid, slice); + if (!status) { + new_irq_info = sn_irq_info; + goto finish_up; + } + + /* + * PROM does not support SAL_INTR_REDIRECT, or it failed. + * Revert to old method. + */ + new_irq_info = kmemdup(sn_irq_info, sizeof(struct sn_irq_info), + GFP_ATOMIC); + if (new_irq_info == NULL) + return NULL; + + /* Free the old PROM new_irq_info structure */ + sn_intr_free(local_nasid, local_widget, new_irq_info); + unregister_intr_pda(new_irq_info); + + /* allocate a new PROM new_irq_info struct */ + status = sn_intr_alloc(local_nasid, local_widget, + new_irq_info, vector, + nasid, slice); + + /* SAL call failed */ + if (status) { + kfree(new_irq_info); + return NULL; + } + + register_intr_pda(new_irq_info); + spin_lock(&sn_irq_info_lock); + list_replace_rcu(&sn_irq_info->list, &new_irq_info->list); + spin_unlock(&sn_irq_info_lock); + kfree_rcu(sn_irq_info, rcu); + + +finish_up: + /* Update kernels new_irq_info with new target info */ + cpuid = nasid_slice_to_cpuid(new_irq_info->irq_nasid, + new_irq_info->irq_slice); + new_irq_info->irq_cpuid = cpuid; + + pci_provider = sn_pci_provider[new_irq_info->irq_bridge_type]; + + /* + * If this represents a line interrupt, target it. If it's + * an msi (irq_int_bit < 0), it's already targeted. + */ + if (new_irq_info->irq_int_bit >= 0 && + pci_provider && pci_provider->target_interrupt) + (pci_provider->target_interrupt)(new_irq_info); + +#ifdef CONFIG_SMP + cpuphys = cpu_physical_id(cpuid); + set_irq_affinity_info((vector & 0xff), cpuphys, 0); +#endif + + return new_irq_info; +} + +static int sn_set_affinity_irq(struct irq_data *data, + const struct cpumask *mask, bool force) +{ + struct sn_irq_info *sn_irq_info, *sn_irq_info_safe; + unsigned int irq = data->irq; + nasid_t nasid; + int slice; + + nasid = cpuid_to_nasid(cpumask_first_and(mask, cpu_online_mask)); + slice = cpuid_to_slice(cpumask_first_and(mask, cpu_online_mask)); + + list_for_each_entry_safe(sn_irq_info, sn_irq_info_safe, + sn_irq_lh[irq], list) + (void)sn_retarget_vector(sn_irq_info, nasid, slice); + + return 0; +} + +#ifdef CONFIG_SMP +void sn_set_err_irq_affinity(unsigned int irq) +{ + /* + * On systems which support CPU disabling (SHub2), all error interrupts + * are targeted at the boot CPU. + */ + if (is_shub2() && sn_prom_feature_available(PRF_CPU_DISABLE_SUPPORT)) + set_irq_affinity_info(irq, cpu_physical_id(0), 0); +} +#else +void sn_set_err_irq_affinity(unsigned int irq) { } +#endif + +static void +sn_mask_irq(struct irq_data *data) +{ +} + +static void +sn_unmask_irq(struct irq_data *data) +{ +} + +struct irq_chip irq_type_sn = { + .name = "SN hub", + .irq_startup = sn_startup_irq, + .irq_shutdown = sn_shutdown_irq, + .irq_enable = sn_enable_irq, + .irq_disable = sn_disable_irq, + .irq_ack = sn_ack_irq, + .irq_mask = sn_mask_irq, + .irq_unmask = sn_unmask_irq, + .irq_set_affinity = sn_set_affinity_irq +}; + +ia64_vector sn_irq_to_vector(int irq) +{ + if (irq >= IA64_NUM_VECTORS) + return 0; + return (ia64_vector)irq; +} + +unsigned int sn_local_vector_to_irq(u8 vector) +{ + return (CPU_VECTOR_TO_IRQ(smp_processor_id(), vector)); +} + +void sn_irq_init(void) +{ + int i; + + ia64_first_device_vector = IA64_SN2_FIRST_DEVICE_VECTOR; + ia64_last_device_vector = IA64_SN2_LAST_DEVICE_VECTOR; + + for (i = 0; i < NR_IRQS; i++) { + if (irq_get_chip(i) == &no_irq_chip) + irq_set_chip(i, &irq_type_sn); + } +} + +static void register_intr_pda(struct sn_irq_info *sn_irq_info) +{ + int irq = sn_irq_info->irq_irq; + int cpu = sn_irq_info->irq_cpuid; + + if (pdacpu(cpu)->sn_last_irq < irq) { + pdacpu(cpu)->sn_last_irq = irq; + } + + if (pdacpu(cpu)->sn_first_irq == 0 || pdacpu(cpu)->sn_first_irq > irq) + pdacpu(cpu)->sn_first_irq = irq; +} + +static void unregister_intr_pda(struct sn_irq_info *sn_irq_info) +{ + int irq = sn_irq_info->irq_irq; + int cpu = sn_irq_info->irq_cpuid; + struct sn_irq_info *tmp_irq_info; + int i, foundmatch; + + rcu_read_lock(); + if (pdacpu(cpu)->sn_last_irq == irq) { + foundmatch = 0; + for (i = pdacpu(cpu)->sn_last_irq - 1; + i && !foundmatch; i--) { + list_for_each_entry_rcu(tmp_irq_info, + sn_irq_lh[i], + list) { + if (tmp_irq_info->irq_cpuid == cpu) { + foundmatch = 1; + break; + } + } + } + pdacpu(cpu)->sn_last_irq = i; + } + + if (pdacpu(cpu)->sn_first_irq == irq) { + foundmatch = 0; + for (i = pdacpu(cpu)->sn_first_irq + 1; + i < NR_IRQS && !foundmatch; i++) { + list_for_each_entry_rcu(tmp_irq_info, + sn_irq_lh[i], + list) { + if (tmp_irq_info->irq_cpuid == cpu) { + foundmatch = 1; + break; + } + } + } + pdacpu(cpu)->sn_first_irq = ((i == NR_IRQS) ? 0 : i); + } + rcu_read_unlock(); +} + +void sn_irq_fixup(struct pci_dev *pci_dev, struct sn_irq_info *sn_irq_info) +{ + nasid_t nasid = sn_irq_info->irq_nasid; + int slice = sn_irq_info->irq_slice; + int cpu = nasid_slice_to_cpuid(nasid, slice); +#ifdef CONFIG_SMP + int cpuphys; +#endif + + pci_dev_get(pci_dev); + sn_irq_info->irq_cpuid = cpu; + sn_irq_info->irq_pciioinfo = SN_PCIDEV_INFO(pci_dev); + + /* link it into the sn_irq[irq] list */ + spin_lock(&sn_irq_info_lock); + list_add_rcu(&sn_irq_info->list, sn_irq_lh[sn_irq_info->irq_irq]); + reserve_irq_vector(sn_irq_info->irq_irq); + if (sn_irq_info->irq_int_bit != -1) + irq_set_handler(sn_irq_info->irq_irq, handle_level_irq); + spin_unlock(&sn_irq_info_lock); + + register_intr_pda(sn_irq_info); +#ifdef CONFIG_SMP + cpuphys = cpu_physical_id(cpu); + set_irq_affinity_info(sn_irq_info->irq_irq, cpuphys, 0); + /* + * Affinity was set by the PROM, prevent it from + * being reset by the request_irq() path. + */ + irqd_mark_affinity_was_set(irq_get_irq_data(sn_irq_info->irq_irq)); +#endif +} + +void sn_irq_unfixup(struct pci_dev *pci_dev) +{ + struct sn_irq_info *sn_irq_info; + + /* Only cleanup IRQ stuff if this device has a host bus context */ + if (!SN_PCIDEV_BUSSOFT(pci_dev)) + return; + + sn_irq_info = SN_PCIDEV_INFO(pci_dev)->pdi_sn_irq_info; + if (!sn_irq_info) + return; + if (!sn_irq_info->irq_irq) { + kfree(sn_irq_info); + return; + } + + unregister_intr_pda(sn_irq_info); + spin_lock(&sn_irq_info_lock); + list_del_rcu(&sn_irq_info->list); + spin_unlock(&sn_irq_info_lock); + if (list_empty(sn_irq_lh[sn_irq_info->irq_irq])) + free_irq_vector(sn_irq_info->irq_irq); + kfree_rcu(sn_irq_info, rcu); + pci_dev_put(pci_dev); + +} + +static inline void +sn_call_force_intr_provider(struct sn_irq_info *sn_irq_info) +{ + struct sn_pcibus_provider *pci_provider; + + pci_provider = sn_pci_provider[sn_irq_info->irq_bridge_type]; + + /* Don't force an interrupt if the irq has been disabled */ + if (!irqd_irq_disabled(irq_get_irq_data(sn_irq_info->irq_irq)) && + pci_provider && pci_provider->force_interrupt) + (*pci_provider->force_interrupt)(sn_irq_info); +} + +/* + * Check for lost interrupts. If the PIC int_status reg. says that + * an interrupt has been sent, but not handled, and the interrupt + * is not pending in either the cpu irr regs or in the soft irr regs, + * and the interrupt is not in service, then the interrupt may have + * been lost. Force an interrupt on that pin. It is possible that + * the interrupt is in flight, so we may generate a spurious interrupt, + * but we should never miss a real lost interrupt. + */ +static void sn_check_intr(int irq, struct sn_irq_info *sn_irq_info) +{ + u64 regval; + struct pcidev_info *pcidev_info; + struct pcibus_info *pcibus_info; + + /* + * Bridge types attached to TIO (anything but PIC) do not need this WAR + * since they do not target Shub II interrupt registers. If that + * ever changes, this check needs to accommodate. + */ + if (sn_irq_info->irq_bridge_type != PCIIO_ASIC_TYPE_PIC) + return; + + pcidev_info = (struct pcidev_info *)sn_irq_info->irq_pciioinfo; + if (!pcidev_info) + return; + + pcibus_info = + (struct pcibus_info *)pcidev_info->pdi_host_pcidev_info-> + pdi_pcibus_info; + regval = pcireg_intr_status_get(pcibus_info); + + if (!ia64_get_irr(irq_to_vector(irq))) { + if (!test_bit(irq, pda->sn_in_service_ivecs)) { + regval &= 0xff; + if (sn_irq_info->irq_int_bit & regval & + sn_irq_info->irq_last_intr) { + regval &= ~(sn_irq_info->irq_int_bit & regval); + sn_call_force_intr_provider(sn_irq_info); + } + } + } + sn_irq_info->irq_last_intr = regval; +} + +void sn_lb_int_war_check(void) +{ + struct sn_irq_info *sn_irq_info; + int i; + + if (!sn_ioif_inited || pda->sn_first_irq == 0) + return; + + rcu_read_lock(); + for (i = pda->sn_first_irq; i <= pda->sn_last_irq; i++) { + list_for_each_entry_rcu(sn_irq_info, sn_irq_lh[i], list) { + sn_check_intr(i, sn_irq_info); + } + } + rcu_read_unlock(); +} + +void __init sn_irq_lh_init(void) +{ + int i; + + sn_irq_lh = kmalloc(sizeof(struct list_head *) * NR_IRQS, GFP_KERNEL); + if (!sn_irq_lh) + panic("SN PCI INIT: Failed to allocate memory for PCI init\n"); + + for (i = 0; i < NR_IRQS; i++) { + sn_irq_lh[i] = kmalloc(sizeof(struct list_head), GFP_KERNEL); + if (!sn_irq_lh[i]) + panic("SN PCI INIT: Failed IRQ memory allocation\n"); + + INIT_LIST_HEAD(sn_irq_lh[i]); + } +} diff --git a/kernel/arch/ia64/sn/kernel/klconflib.c b/kernel/arch/ia64/sn/kernel/klconflib.c new file mode 100644 index 000000000..87682b48e --- /dev/null +++ b/kernel/arch/ia64/sn/kernel/klconflib.c @@ -0,0 +1,107 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1992 - 1997, 2000-2004 Silicon Graphics, Inc. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include + +char brick_types[MAX_BRICK_TYPES + 1] = "cri.xdpn%#=vo^kjbf890123456789..."; +/* + * Format a module id for printing. + * + * There are three possible formats: + * + * MODULE_FORMAT_BRIEF is the brief 6-character format, including + * the actual brick-type as recorded in the + * moduleid_t, eg. 002c15 for a C-brick, or + * 101#17 for a PX-brick. + * + * MODULE_FORMAT_LONG is the hwgraph format, eg. rack/002/bay/15 + * of rack/101/bay/17 (note that the brick + * type does not appear in this format). + * + * MODULE_FORMAT_LCD is like MODULE_FORMAT_BRIEF, except that it + * ensures that the module id provided appears + * exactly as it would on the LCD display of + * the corresponding brick, eg. still 002c15 + * for a C-brick, but 101p17 for a PX-brick. + * + * maule (9/13/04): Removed top-level check for (fmt == MODULE_FORMAT_LCD) + * making MODULE_FORMAT_LCD equivalent to MODULE_FORMAT_BRIEF. It was + * decided that all callers should assume the returned string should be what + * is displayed on the brick L1 LCD. + */ +void +format_module_id(char *buffer, moduleid_t m, int fmt) +{ + int rack, position; + unsigned char brickchar; + + rack = MODULE_GET_RACK(m); + brickchar = MODULE_GET_BTCHAR(m); + + /* Be sure we use the same brick type character as displayed + * on the brick's LCD + */ + switch (brickchar) + { + case L1_BRICKTYPE_GA: + case L1_BRICKTYPE_OPUS_TIO: + brickchar = L1_BRICKTYPE_C; + break; + + case L1_BRICKTYPE_PX: + case L1_BRICKTYPE_PE: + case L1_BRICKTYPE_PA: + case L1_BRICKTYPE_SA: /* we can move this to the "I's" later + * if that makes more sense + */ + brickchar = L1_BRICKTYPE_P; + break; + + case L1_BRICKTYPE_IX: + case L1_BRICKTYPE_IA: + + brickchar = L1_BRICKTYPE_I; + break; + } + + position = MODULE_GET_BPOS(m); + + if ((fmt == MODULE_FORMAT_BRIEF) || (fmt == MODULE_FORMAT_LCD)) { + /* Brief module number format, eg. 002c15 */ + + /* Decompress the rack number */ + *buffer++ = '0' + RACK_GET_CLASS(rack); + *buffer++ = '0' + RACK_GET_GROUP(rack); + *buffer++ = '0' + RACK_GET_NUM(rack); + + /* Add the brick type */ + *buffer++ = brickchar; + } + else if (fmt == MODULE_FORMAT_LONG) { + /* Fuller hwgraph format, eg. rack/002/bay/15 */ + + strcpy(buffer, "rack" "/"); buffer += strlen(buffer); + + *buffer++ = '0' + RACK_GET_CLASS(rack); + *buffer++ = '0' + RACK_GET_GROUP(rack); + *buffer++ = '0' + RACK_GET_NUM(rack); + + strcpy(buffer, "/" "bay" "/"); buffer += strlen(buffer); + } + + /* Add the bay position, using at least two digits */ + if (position < 10) + *buffer++ = '0'; + sprintf(buffer, "%d", position); +} diff --git a/kernel/arch/ia64/sn/kernel/machvec.c b/kernel/arch/ia64/sn/kernel/machvec.c new file mode 100644 index 000000000..02bb91558 --- /dev/null +++ b/kernel/arch/ia64/sn/kernel/machvec.c @@ -0,0 +1,11 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2002-2003 Silicon Graphics, Inc. All Rights Reserved. + */ + +#define MACHVEC_PLATFORM_NAME sn2 +#define MACHVEC_PLATFORM_HEADER +#include diff --git a/kernel/arch/ia64/sn/kernel/mca.c b/kernel/arch/ia64/sn/kernel/mca.c new file mode 100644 index 000000000..27793f7aa --- /dev/null +++ b/kernel/arch/ia64/sn/kernel/mca.c @@ -0,0 +1,146 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2000-2006 Silicon Graphics, Inc. All Rights Reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Interval for calling SAL to poll for errors that do NOT cause error + * interrupts. SAL will raise a CPEI if any errors are present that + * need to be logged. + */ +#define CPEI_INTERVAL (5*HZ) + +struct timer_list sn_cpei_timer; +void sn_init_cpei_timer(void); + +/* Printing oemdata from mca uses data that is not passed through SAL, it is + * global. Only one user at a time. + */ +static DEFINE_MUTEX(sn_oemdata_mutex); +static u8 **sn_oemdata; +static u64 *sn_oemdata_size, sn_oemdata_bufsize; + +/* + * print_hook + * + * This function is the callback routine that SAL calls to log error + * info for platform errors. buf is appended to sn_oemdata, resizing as + * required. + * Note: this is a SAL to OS callback, running under the same rules as the SAL + * code. SAL calls are run with preempt disabled so this routine must not + * sleep. vmalloc can sleep so print_hook cannot resize the output buffer + * itself, instead it must set the required size and return to let the caller + * resize the buffer then redrive the SAL call. + */ +static int print_hook(const char *fmt, ...) +{ + char buf[400]; + int len; + va_list args; + va_start(args, fmt); + vsnprintf(buf, sizeof(buf), fmt, args); + va_end(args); + len = strlen(buf); + if (*sn_oemdata_size + len <= sn_oemdata_bufsize) + memcpy(*sn_oemdata + *sn_oemdata_size, buf, len); + *sn_oemdata_size += len; + return 0; +} + +static void sn_cpei_handler(int irq, void *devid, struct pt_regs *regs) +{ + /* + * this function's sole purpose is to call SAL when we receive + * a CE interrupt from SHUB or when the timer routine decides + * we need to call SAL to check for CEs. + */ + + /* CALL SAL_LOG_CE */ + + ia64_sn_plat_cpei_handler(); +} + +static void sn_cpei_timer_handler(unsigned long dummy) +{ + sn_cpei_handler(-1, NULL, NULL); + mod_timer(&sn_cpei_timer, jiffies + CPEI_INTERVAL); +} + +void sn_init_cpei_timer(void) +{ + init_timer(&sn_cpei_timer); + sn_cpei_timer.expires = jiffies + CPEI_INTERVAL; + sn_cpei_timer.function = sn_cpei_timer_handler; + add_timer(&sn_cpei_timer); +} + +static int +sn_platform_plat_specific_err_print(const u8 * sect_header, u8 ** oemdata, + u64 * oemdata_size) +{ + mutex_lock(&sn_oemdata_mutex); + sn_oemdata = oemdata; + sn_oemdata_size = oemdata_size; + sn_oemdata_bufsize = 0; + *sn_oemdata_size = PAGE_SIZE; /* first guess at how much data will be generated */ + while (*sn_oemdata_size > sn_oemdata_bufsize) { + u8 *newbuf = vmalloc(*sn_oemdata_size); + if (!newbuf) { + mutex_unlock(&sn_oemdata_mutex); + printk(KERN_ERR "%s: unable to extend sn_oemdata\n", + __func__); + return 1; + } + vfree(*sn_oemdata); + *sn_oemdata = newbuf; + sn_oemdata_bufsize = *sn_oemdata_size; + *sn_oemdata_size = 0; + ia64_sn_plat_specific_err_print(print_hook, (char *)sect_header); + } + mutex_unlock(&sn_oemdata_mutex); + return 0; +} + +/* Callback when userspace salinfo wants to decode oem data via the platform + * kernel and/or prom. + */ +int sn_salinfo_platform_oemdata(const u8 *sect_header, u8 **oemdata, u64 *oemdata_size) +{ + efi_guid_t guid = *(efi_guid_t *)sect_header; + int valid = 0; + *oemdata_size = 0; + vfree(*oemdata); + *oemdata = NULL; + if (efi_guidcmp(guid, SAL_PLAT_SPECIFIC_ERR_SECT_GUID) == 0) { + sal_log_plat_specific_err_info_t *psei = (sal_log_plat_specific_err_info_t *)sect_header; + valid = psei->valid.oem_data; + } else if (efi_guidcmp(guid, SAL_PLAT_MEM_DEV_ERR_SECT_GUID) == 0) { + sal_log_mem_dev_err_info_t *mdei = (sal_log_mem_dev_err_info_t *)sect_header; + valid = mdei->valid.oem_data; + } + if (valid) + return sn_platform_plat_specific_err_print(sect_header, oemdata, oemdata_size); + else + return 0; +} + +static int __init sn_salinfo_init(void) +{ + if (ia64_platform_is("sn2")) + salinfo_platform_oemdata = &sn_salinfo_platform_oemdata; + return 0; +} + +module_init(sn_salinfo_init) diff --git a/kernel/arch/ia64/sn/kernel/msi_sn.c b/kernel/arch/ia64/sn/kernel/msi_sn.c new file mode 100644 index 000000000..a0eb27b66 --- /dev/null +++ b/kernel/arch/ia64/sn/kernel/msi_sn.c @@ -0,0 +1,238 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2006 Silicon Graphics, Inc. All Rights Reserved. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +struct sn_msi_info { + u64 pci_addr; + struct sn_irq_info *sn_irq_info; +}; + +static struct sn_msi_info sn_msi_info[NR_IRQS]; + +static struct irq_chip sn_msi_chip; + +void sn_teardown_msi_irq(unsigned int irq) +{ + nasid_t nasid; + int widget; + struct pci_dev *pdev; + struct pcidev_info *sn_pdev; + struct sn_irq_info *sn_irq_info; + struct pcibus_bussoft *bussoft; + struct sn_pcibus_provider *provider; + + sn_irq_info = sn_msi_info[irq].sn_irq_info; + if (sn_irq_info == NULL || sn_irq_info->irq_int_bit >= 0) + return; + + sn_pdev = (struct pcidev_info *)sn_irq_info->irq_pciioinfo; + pdev = sn_pdev->pdi_linux_pcidev; + provider = SN_PCIDEV_BUSPROVIDER(pdev); + + (*provider->dma_unmap)(pdev, + sn_msi_info[irq].pci_addr, + PCI_DMA_FROMDEVICE); + sn_msi_info[irq].pci_addr = 0; + + bussoft = SN_PCIDEV_BUSSOFT(pdev); + nasid = NASID_GET(bussoft->bs_base); + widget = (nasid & 1) ? + TIO_SWIN_WIDGETNUM(bussoft->bs_base) : + SWIN_WIDGETNUM(bussoft->bs_base); + + sn_intr_free(nasid, widget, sn_irq_info); + sn_msi_info[irq].sn_irq_info = NULL; + + destroy_irq(irq); +} + +int sn_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *entry) +{ + struct msi_msg msg; + int widget; + int status; + nasid_t nasid; + u64 bus_addr; + struct sn_irq_info *sn_irq_info; + struct pcibus_bussoft *bussoft = SN_PCIDEV_BUSSOFT(pdev); + struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev); + int irq; + + if (!entry->msi_attrib.is_64) + return -EINVAL; + + if (bussoft == NULL) + return -EINVAL; + + if (provider == NULL || provider->dma_map_consistent == NULL) + return -EINVAL; + + irq = create_irq(); + if (irq < 0) + return irq; + + /* + * Set up the vector plumbing. Let the prom (via sn_intr_alloc) + * decide which cpu to direct this msi at by default. + */ + + nasid = NASID_GET(bussoft->bs_base); + widget = (nasid & 1) ? + TIO_SWIN_WIDGETNUM(bussoft->bs_base) : + SWIN_WIDGETNUM(bussoft->bs_base); + + sn_irq_info = kzalloc(sizeof(struct sn_irq_info), GFP_KERNEL); + if (! sn_irq_info) { + destroy_irq(irq); + return -ENOMEM; + } + + status = sn_intr_alloc(nasid, widget, sn_irq_info, irq, -1, -1); + if (status) { + kfree(sn_irq_info); + destroy_irq(irq); + return -ENOMEM; + } + + sn_irq_info->irq_int_bit = -1; /* mark this as an MSI irq */ + sn_irq_fixup(pdev, sn_irq_info); + + /* Prom probably should fill these in, but doesn't ... */ + sn_irq_info->irq_bridge_type = bussoft->bs_asic_type; + sn_irq_info->irq_bridge = (void *)bussoft->bs_base; + + /* + * Map the xio address into bus space + */ + bus_addr = (*provider->dma_map_consistent)(pdev, + sn_irq_info->irq_xtalkaddr, + sizeof(sn_irq_info->irq_xtalkaddr), + SN_DMA_MSI|SN_DMA_ADDR_XIO); + if (! bus_addr) { + sn_intr_free(nasid, widget, sn_irq_info); + kfree(sn_irq_info); + destroy_irq(irq); + return -ENOMEM; + } + + sn_msi_info[irq].sn_irq_info = sn_irq_info; + sn_msi_info[irq].pci_addr = bus_addr; + + msg.address_hi = (u32)(bus_addr >> 32); + msg.address_lo = (u32)(bus_addr & 0x00000000ffffffff); + + /* + * In the SN platform, bit 16 is a "send vector" bit which + * must be present in order to move the vector through the system. + */ + msg.data = 0x100 + irq; + + irq_set_msi_desc(irq, entry); + pci_write_msi_msg(irq, &msg); + irq_set_chip_and_handler(irq, &sn_msi_chip, handle_edge_irq); + + return 0; +} + +#ifdef CONFIG_SMP +static int sn_set_msi_irq_affinity(struct irq_data *data, + const struct cpumask *cpu_mask, bool force) +{ + struct msi_msg msg; + int slice; + nasid_t nasid; + u64 bus_addr; + struct pci_dev *pdev; + struct pcidev_info *sn_pdev; + struct sn_irq_info *sn_irq_info; + struct sn_irq_info *new_irq_info; + struct sn_pcibus_provider *provider; + unsigned int cpu, irq = data->irq; + + cpu = cpumask_first_and(cpu_mask, cpu_online_mask); + sn_irq_info = sn_msi_info[irq].sn_irq_info; + if (sn_irq_info == NULL || sn_irq_info->irq_int_bit >= 0) + return -1; + + /* + * Release XIO resources for the old MSI PCI address + */ + + __get_cached_msi_msg(data->msi_desc, &msg); + sn_pdev = (struct pcidev_info *)sn_irq_info->irq_pciioinfo; + pdev = sn_pdev->pdi_linux_pcidev; + provider = SN_PCIDEV_BUSPROVIDER(pdev); + + bus_addr = (u64)(msg.address_hi) << 32 | (u64)(msg.address_lo); + (*provider->dma_unmap)(pdev, bus_addr, PCI_DMA_FROMDEVICE); + sn_msi_info[irq].pci_addr = 0; + + nasid = cpuid_to_nasid(cpu); + slice = cpuid_to_slice(cpu); + + new_irq_info = sn_retarget_vector(sn_irq_info, nasid, slice); + sn_msi_info[irq].sn_irq_info = new_irq_info; + if (new_irq_info == NULL) + return -1; + + /* + * Map the xio address into bus space + */ + + bus_addr = (*provider->dma_map_consistent)(pdev, + new_irq_info->irq_xtalkaddr, + sizeof(new_irq_info->irq_xtalkaddr), + SN_DMA_MSI|SN_DMA_ADDR_XIO); + + sn_msi_info[irq].pci_addr = bus_addr; + msg.address_hi = (u32)(bus_addr >> 32); + msg.address_lo = (u32)(bus_addr & 0x00000000ffffffff); + + pci_write_msi_msg(irq, &msg); + cpumask_copy(data->affinity, cpu_mask); + + return 0; +} +#endif /* CONFIG_SMP */ + +static void sn_ack_msi_irq(struct irq_data *data) +{ + irq_move_irq(data); + ia64_eoi(); +} + +static int sn_msi_retrigger_irq(struct irq_data *data) +{ + unsigned int vector = data->irq; + ia64_resend_irq(vector); + + return 1; +} + +static struct irq_chip sn_msi_chip = { + .name = "PCI-MSI", + .irq_mask = pci_msi_mask_irq, + .irq_unmask = pci_msi_unmask_irq, + .irq_ack = sn_ack_msi_irq, +#ifdef CONFIG_SMP + .irq_set_affinity = sn_set_msi_irq_affinity, +#endif + .irq_retrigger = sn_msi_retrigger_irq, +}; diff --git a/kernel/arch/ia64/sn/kernel/pio_phys.S b/kernel/arch/ia64/sn/kernel/pio_phys.S new file mode 100644 index 000000000..3c7d48d6e --- /dev/null +++ b/kernel/arch/ia64/sn/kernel/pio_phys.S @@ -0,0 +1,71 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2000-2005 Silicon Graphics, Inc. All rights reserved. + * + * This file contains macros used to access MMR registers via + * uncached physical addresses. + * pio_phys_read_mmr - read an MMR + * pio_phys_write_mmr - write an MMR + * pio_atomic_phys_write_mmrs - atomically write 1 or 2 MMRs with psr.ic=0 + * Second MMR will be skipped if address is NULL + * + * Addresses passed to these routines should be uncached physical addresses + * ie., 0x80000.... + */ + + + +#include +#include + +GLOBAL_ENTRY(pio_phys_read_mmr) + .prologue + .regstk 1,0,0,0 + .body + mov r2=psr + rsm psr.i | psr.dt + ;; + srlz.d + ld8.acq r8=[r32] + ;; + mov psr.l=r2;; + srlz.d + br.ret.sptk.many rp +END(pio_phys_read_mmr) + +GLOBAL_ENTRY(pio_phys_write_mmr) + .prologue + .regstk 2,0,0,0 + .body + mov r2=psr + rsm psr.i | psr.dt + ;; + srlz.d + st8.rel [r32]=r33 + ;; + mov psr.l=r2;; + srlz.d + br.ret.sptk.many rp +END(pio_phys_write_mmr) + +GLOBAL_ENTRY(pio_atomic_phys_write_mmrs) + .prologue + .regstk 4,0,0,0 + .body + mov r2=psr + cmp.ne p9,p0=r34,r0; + rsm psr.i | psr.dt | psr.ic + ;; + srlz.d + st8.rel [r32]=r33 +(p9) st8.rel [r34]=r35 + ;; + mov psr.l=r2;; + srlz.d + br.ret.sptk.many rp +END(pio_atomic_phys_write_mmrs) + + diff --git a/kernel/arch/ia64/sn/kernel/setup.c b/kernel/arch/ia64/sn/kernel/setup.c new file mode 100644 index 000000000..5f6b6b48c --- /dev/null +++ b/kernel/arch/ia64/sn/kernel/setup.c @@ -0,0 +1,775 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1999,2001-2006 Silicon Graphics, Inc. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "xtalk/xwidgetdev.h" +#include "xtalk/hubdev.h" +#include + + +DEFINE_PER_CPU(struct pda_s, pda_percpu); + +#define MAX_PHYS_MEMORY (1UL << IA64_MAX_PHYS_BITS) /* Max physical address supported */ + +extern void bte_init_node(nodepda_t *, cnodeid_t); + +extern void sn_timer_init(void); +extern unsigned long last_time_offset; +extern void (*ia64_mark_idle) (int); +extern void snidle(int); + +unsigned long sn_rtc_cycles_per_second; +EXPORT_SYMBOL(sn_rtc_cycles_per_second); + +DEFINE_PER_CPU(struct sn_hub_info_s, __sn_hub_info); +EXPORT_PER_CPU_SYMBOL(__sn_hub_info); + +DEFINE_PER_CPU(short, __sn_cnodeid_to_nasid[MAX_COMPACT_NODES]); +EXPORT_PER_CPU_SYMBOL(__sn_cnodeid_to_nasid); + +DEFINE_PER_CPU(struct nodepda_s *, __sn_nodepda); +EXPORT_PER_CPU_SYMBOL(__sn_nodepda); + +char sn_system_serial_number_string[128]; +EXPORT_SYMBOL(sn_system_serial_number_string); +u64 sn_partition_serial_number; +EXPORT_SYMBOL(sn_partition_serial_number); +u8 sn_partition_id; +EXPORT_SYMBOL(sn_partition_id); +u8 sn_system_size; +EXPORT_SYMBOL(sn_system_size); +u8 sn_sharing_domain_size; +EXPORT_SYMBOL(sn_sharing_domain_size); +u8 sn_coherency_id; +EXPORT_SYMBOL(sn_coherency_id); +u8 sn_region_size; +EXPORT_SYMBOL(sn_region_size); +int sn_prom_type; /* 0=hardware, 1=medusa/realprom, 2=medusa/fakeprom */ + +short physical_node_map[MAX_NUMALINK_NODES]; +static unsigned long sn_prom_features[MAX_PROM_FEATURE_SETS]; + +EXPORT_SYMBOL(physical_node_map); + +int num_cnodes; + +static void sn_init_pdas(char **); +static void build_cnode_tables(void); + +static nodepda_t *nodepdaindr[MAX_COMPACT_NODES]; + +/* + * The format of "screen_info" is strange, and due to early i386-setup + * code. This is just enough to make the console code think we're on a + * VGA color display. + */ +struct screen_info sn_screen_info = { + .orig_x = 0, + .orig_y = 0, + .orig_video_mode = 3, + .orig_video_cols = 80, + .orig_video_ega_bx = 3, + .orig_video_lines = 25, + .orig_video_isVGA = 1, + .orig_video_points = 16 +}; + +/* + * This routine can only be used during init, since + * smp_boot_data is an init data structure. + * We have to use smp_boot_data.cpu_phys_id to find + * the physical id of the processor because the normal + * cpu_physical_id() relies on data structures that + * may not be initialized yet. + */ + +static int __init pxm_to_nasid(int pxm) +{ + int i; + int nid; + + nid = pxm_to_node(pxm); + for (i = 0; i < num_node_memblks; i++) { + if (node_memblk[i].nid == nid) { + return NASID_GET(node_memblk[i].start_paddr); + } + } + return -1; +} + +/** + * early_sn_setup - early setup routine for SN platforms + * + * Sets up an initial console to aid debugging. Intended primarily + * for bringup. See start_kernel() in init/main.c. + */ + +void __init early_sn_setup(void) +{ + efi_system_table_t *efi_systab; + efi_config_table_t *config_tables; + struct ia64_sal_systab *sal_systab; + struct ia64_sal_desc_entry_point *ep; + char *p; + int i, j; + + /* + * Parse enough of the SAL tables to locate the SAL entry point. Since, console + * IO on SN2 is done via SAL calls, early_printk won't work without this. + * + * This code duplicates some of the ACPI table parsing that is in efi.c & sal.c. + * Any changes to those file may have to be made here as well. + */ + efi_systab = (efi_system_table_t *) __va(ia64_boot_param->efi_systab); + config_tables = __va(efi_systab->tables); + for (i = 0; i < efi_systab->nr_tables; i++) { + if (efi_guidcmp(config_tables[i].guid, SAL_SYSTEM_TABLE_GUID) == + 0) { + sal_systab = __va(config_tables[i].table); + p = (char *)(sal_systab + 1); + for (j = 0; j < sal_systab->entry_count; j++) { + if (*p == SAL_DESC_ENTRY_POINT) { + ep = (struct ia64_sal_desc_entry_point + *)p; + ia64_sal_handler_init(__va + (ep->sal_proc), + __va(ep->gp)); + return; + } + p += SAL_DESC_SIZE(*p); + } + } + } + /* Uh-oh, SAL not available?? */ + printk(KERN_ERR "failed to find SAL entry point\n"); +} + +extern int platform_intr_list[]; +static int shub_1_1_found; + +/* + * sn_check_for_wars + * + * Set flag for enabling shub specific wars + */ + +static inline int is_shub_1_1(int nasid) +{ + unsigned long id; + int rev; + + if (is_shub2()) + return 0; + id = REMOTE_HUB_L(nasid, SH1_SHUB_ID); + rev = (id & SH1_SHUB_ID_REVISION_MASK) >> SH1_SHUB_ID_REVISION_SHFT; + return rev <= 2; +} + +static void sn_check_for_wars(void) +{ + int cnode; + + if (is_shub2()) { + /* none yet */ + } else { + for_each_online_node(cnode) { + if (is_shub_1_1(cnodeid_to_nasid(cnode))) + shub_1_1_found = 1; + } + } +} + +/* + * Scan the EFI PCDP table (if it exists) for an acceptable VGA console + * output device. If one exists, pick it and set sn_legacy_{io,mem} to + * reflect the bus offsets needed to address it. + * + * Since pcdp support in SN is not supported in the 2.4 kernel (or at least + * the one lbs is based on) just declare the needed structs here. + * + * Reference spec http://www.dig64.org/specifications/DIG64_PCDPv20.pdf + * + * Returns 0 if no acceptable vga is found, !0 otherwise. + * + * Note: This stuff is duped here because Altix requires the PCDP to + * locate a usable VGA device due to lack of proper ACPI support. Structures + * could be used from drivers/firmware/pcdp.h, but it was decided that moving + * this file to a more public location just for Altix use was undesirable. + */ + +struct hcdp_uart_desc { + u8 pad[45]; +}; + +struct pcdp { + u8 signature[4]; /* should be 'HCDP' */ + u32 length; + u8 rev; /* should be >=3 for pcdp, <3 for hcdp */ + u8 sum; + u8 oem_id[6]; + u64 oem_tableid; + u32 oem_rev; + u32 creator_id; + u32 creator_rev; + u32 num_type0; + struct hcdp_uart_desc uart[0]; /* num_type0 of these */ + /* pcdp descriptors follow */ +} __attribute__((packed)); + +struct pcdp_device_desc { + u8 type; + u8 primary; + u16 length; + u16 index; + /* interconnect specific structure follows */ + /* device specific structure follows that */ +} __attribute__((packed)); + +struct pcdp_interface_pci { + u8 type; /* 1 == pci */ + u8 reserved; + u16 length; + u8 segment; + u8 bus; + u8 dev; + u8 fun; + u16 devid; + u16 vendid; + u32 acpi_interrupt; + u64 mmio_tra; + u64 ioport_tra; + u8 flags; + u8 translation; +} __attribute__((packed)); + +struct pcdp_vga_device { + u8 num_eas_desc; + /* ACPI Extended Address Space Desc follows */ +} __attribute__((packed)); + +/* from pcdp_device_desc.primary */ +#define PCDP_PRIMARY_CONSOLE 0x01 + +/* from pcdp_device_desc.type */ +#define PCDP_CONSOLE_INOUT 0x0 +#define PCDP_CONSOLE_DEBUG 0x1 +#define PCDP_CONSOLE_OUT 0x2 +#define PCDP_CONSOLE_IN 0x3 +#define PCDP_CONSOLE_TYPE_VGA 0x8 + +#define PCDP_CONSOLE_VGA (PCDP_CONSOLE_TYPE_VGA | PCDP_CONSOLE_OUT) + +/* from pcdp_interface_pci.type */ +#define PCDP_IF_PCI 1 + +/* from pcdp_interface_pci.translation */ +#define PCDP_PCI_TRANS_IOPORT 0x02 +#define PCDP_PCI_TRANS_MMIO 0x01 + +#if defined(CONFIG_VT) && defined(CONFIG_VGA_CONSOLE) +static void +sn_scan_pcdp(void) +{ + u8 *bp; + struct pcdp *pcdp; + struct pcdp_device_desc device; + struct pcdp_interface_pci if_pci; + extern struct efi efi; + + if (efi.hcdp == EFI_INVALID_TABLE_ADDR) + return; /* no hcdp/pcdp table */ + + pcdp = __va(efi.hcdp); + + if (pcdp->rev < 3) + return; /* only support PCDP (rev >= 3) */ + + for (bp = (u8 *)&pcdp->uart[pcdp->num_type0]; + bp < (u8 *)pcdp + pcdp->length; + bp += device.length) { + memcpy(&device, bp, sizeof(device)); + if (! (device.primary & PCDP_PRIMARY_CONSOLE)) + continue; /* not primary console */ + + if (device.type != PCDP_CONSOLE_VGA) + continue; /* not VGA descriptor */ + + memcpy(&if_pci, bp+sizeof(device), sizeof(if_pci)); + if (if_pci.type != PCDP_IF_PCI) + continue; /* not PCI interconnect */ + + if (if_pci.translation & PCDP_PCI_TRANS_IOPORT) + vga_console_iobase = if_pci.ioport_tra; + + if (if_pci.translation & PCDP_PCI_TRANS_MMIO) + vga_console_membase = + if_pci.mmio_tra | __IA64_UNCACHED_OFFSET; + + break; /* once we find the primary, we're done */ + } +} +#endif + +static unsigned long sn2_rtc_initial; + +/** + * sn_setup - SN platform setup routine + * @cmdline_p: kernel command line + * + * Handles platform setup for SN machines. This includes determining + * the RTC frequency (via a SAL call), initializing secondary CPUs, and + * setting up per-node data areas. The console is also initialized here. + */ +void __init sn_setup(char **cmdline_p) +{ + long status, ticks_per_sec, drift; + u32 version = sn_sal_rev(); + extern void sn_cpu_init(void); + + sn2_rtc_initial = rtc_time(); + ia64_sn_plat_set_error_handling_features(); // obsolete + ia64_sn_set_os_feature(OSF_MCA_SLV_TO_OS_INIT_SLV); + ia64_sn_set_os_feature(OSF_FEAT_LOG_SBES); + /* + * Note: The calls to notify the PROM of ACPI and PCI Segment + * support must be done prior to acpi_load_tables(), as + * an ACPI capable PROM will rebuild the DSDT as result + * of the call. + */ + ia64_sn_set_os_feature(OSF_PCISEGMENT_ENABLE); + ia64_sn_set_os_feature(OSF_ACPI_ENABLE); + + /* Load the new DSDT and SSDT tables into the global table list. */ + acpi_table_init(); + +#if defined(CONFIG_VT) && defined(CONFIG_VGA_CONSOLE) + /* + * Handle SN vga console. + * + * SN systems do not have enough ACPI table information + * being passed from prom to identify VGA adapters and the legacy + * addresses to access them. Until that is done, SN systems rely + * on the PCDP table to identify the primary VGA console if one + * exists. + * + * However, kernel PCDP support is optional, and even if it is built + * into the kernel, it will not be used if the boot cmdline contains + * console= directives. + * + * So, to work around this mess, we duplicate some of the PCDP code + * here so that the primary VGA console (as defined by PCDP) will + * work on SN systems even if a different console (e.g. serial) is + * selected on the boot line (or CONFIG_EFI_PCDP is off). + */ + + if (! vga_console_membase) + sn_scan_pcdp(); + + /* + * Setup legacy IO space. + * vga_console_iobase maps to PCI IO Space address 0 on the + * bus containing the VGA console. + */ + if (vga_console_iobase) { + io_space[0].mmio_base = + (unsigned long) ioremap(vga_console_iobase, 0); + io_space[0].sparse = 0; + } + + if (vga_console_membase) { + /* usable vga ... make tty0 the preferred default console */ + if (!strstr(*cmdline_p, "console=")) + add_preferred_console("tty", 0, NULL); + } else { + printk(KERN_DEBUG "SGI: Disabling VGA console\n"); + if (!strstr(*cmdline_p, "console=")) + add_preferred_console("ttySG", 0, NULL); +#ifdef CONFIG_DUMMY_CONSOLE + conswitchp = &dummy_con; +#else + conswitchp = NULL; +#endif /* CONFIG_DUMMY_CONSOLE */ + } +#endif /* def(CONFIG_VT) && def(CONFIG_VGA_CONSOLE) */ + + MAX_DMA_ADDRESS = PAGE_OFFSET + MAX_PHYS_MEMORY; + + /* + * Build the tables for managing cnodes. + */ + build_cnode_tables(); + + status = + ia64_sal_freq_base(SAL_FREQ_BASE_REALTIME_CLOCK, &ticks_per_sec, + &drift); + if (status != 0 || ticks_per_sec < 100000) { + printk(KERN_WARNING + "unable to determine platform RTC clock frequency, guessing.\n"); + /* PROM gives wrong value for clock freq. so guess */ + sn_rtc_cycles_per_second = 1000000000000UL / 30000UL; + } else + sn_rtc_cycles_per_second = ticks_per_sec; + + platform_intr_list[ACPI_INTERRUPT_CPEI] = IA64_CPE_VECTOR; + + printk("SGI SAL version %x.%02x\n", version >> 8, version & 0x00FF); + + /* + * we set the default root device to /dev/hda + * to make simulation easy + */ + ROOT_DEV = Root_HDA1; + + /* + * Create the PDAs and NODEPDAs for all the cpus. + */ + sn_init_pdas(cmdline_p); + + ia64_mark_idle = &snidle; + + /* + * For the bootcpu, we do this here. All other cpus will make the + * call as part of cpu_init in slave cpu initialization. + */ + sn_cpu_init(); + +#ifdef CONFIG_SMP + init_smp_config(); +#endif + screen_info = sn_screen_info; + + sn_timer_init(); + + /* + * set pm_power_off to a SAL call to allow + * sn machines to power off. The SAL call can be replaced + * by an ACPI interface call when ACPI is fully implemented + * for sn. + */ + pm_power_off = ia64_sn_power_down; + current->thread.flags |= IA64_THREAD_MIGRATION; +} + +/** + * sn_init_pdas - setup node data areas + * + * One time setup for Node Data Area. Called by sn_setup(). + */ +static void __init sn_init_pdas(char **cmdline_p) +{ + cnodeid_t cnode; + + /* + * Allocate & initialize the nodepda for each node. + */ + for_each_online_node(cnode) { + nodepdaindr[cnode] = + alloc_bootmem_node(NODE_DATA(cnode), sizeof(nodepda_t)); + memset(nodepdaindr[cnode]->phys_cpuid, -1, + sizeof(nodepdaindr[cnode]->phys_cpuid)); + spin_lock_init(&nodepdaindr[cnode]->ptc_lock); + } + + /* + * Allocate & initialize nodepda for TIOs. For now, put them on node 0. + */ + for (cnode = num_online_nodes(); cnode < num_cnodes; cnode++) + nodepdaindr[cnode] = + alloc_bootmem_node(NODE_DATA(0), sizeof(nodepda_t)); + + /* + * Now copy the array of nodepda pointers to each nodepda. + */ + for (cnode = 0; cnode < num_cnodes; cnode++) + memcpy(nodepdaindr[cnode]->pernode_pdaindr, nodepdaindr, + sizeof(nodepdaindr)); + + /* + * Set up IO related platform-dependent nodepda fields. + * The following routine actually sets up the hubinfo struct + * in nodepda. + */ + for_each_online_node(cnode) { + bte_init_node(nodepdaindr[cnode], cnode); + } + + /* + * Initialize the per node hubdev. This includes IO Nodes and + * headless/memless nodes. + */ + for (cnode = 0; cnode < num_cnodes; cnode++) { + hubdev_init_node(nodepdaindr[cnode], cnode); + } +} + +/** + * sn_cpu_init - initialize per-cpu data areas + * @cpuid: cpuid of the caller + * + * Called during cpu initialization on each cpu as it starts. + * Currently, initializes the per-cpu data area for SNIA. + * Also sets up a few fields in the nodepda. Also known as + * platform_cpu_init() by the ia64 machvec code. + */ +void sn_cpu_init(void) +{ + int cpuid; + int cpuphyid; + int nasid; + int subnode; + int slice; + int cnode; + int i; + static int wars_have_been_checked, set_cpu0_number; + + cpuid = smp_processor_id(); + if (cpuid == 0 && IS_MEDUSA()) { + if (ia64_sn_is_fake_prom()) + sn_prom_type = 2; + else + sn_prom_type = 1; + printk(KERN_INFO "Running on medusa with %s PROM\n", + (sn_prom_type == 1) ? "real" : "fake"); + } + + memset(pda, 0, sizeof(*pda)); + if (ia64_sn_get_sn_info(0, &sn_hub_info->shub2, + &sn_hub_info->nasid_bitmask, + &sn_hub_info->nasid_shift, + &sn_system_size, &sn_sharing_domain_size, + &sn_partition_id, &sn_coherency_id, + &sn_region_size)) + BUG(); + sn_hub_info->as_shift = sn_hub_info->nasid_shift - 2; + + /* + * Don't check status. The SAL call is not supported on all PROMs + * but a failure is harmless. + * Architecturally, cpu_init is always called twice on cpu 0. We + * should set cpu_number on cpu 0 once. + */ + if (cpuid == 0) { + if (!set_cpu0_number) { + (void) ia64_sn_set_cpu_number(cpuid); + set_cpu0_number = 1; + } + } else + (void) ia64_sn_set_cpu_number(cpuid); + + /* + * The boot cpu makes this call again after platform initialization is + * complete. + */ + if (nodepdaindr[0] == NULL) + return; + + for (i = 0; i < MAX_PROM_FEATURE_SETS; i++) + if (ia64_sn_get_prom_feature_set(i, &sn_prom_features[i]) != 0) + break; + + cpuphyid = get_sapicid(); + + if (ia64_sn_get_sapic_info(cpuphyid, &nasid, &subnode, &slice)) + BUG(); + + for (i=0; i < MAX_NUMNODES; i++) { + if (nodepdaindr[i]) { + nodepdaindr[i]->phys_cpuid[cpuid].nasid = nasid; + nodepdaindr[i]->phys_cpuid[cpuid].slice = slice; + nodepdaindr[i]->phys_cpuid[cpuid].subnode = subnode; + } + } + + cnode = nasid_to_cnodeid(nasid); + + __this_cpu_write(__sn_nodepda, nodepdaindr[cnode]); + + pda->led_address = + (typeof(pda->led_address)) (LED0 + (slice << LED_CPU_SHIFT)); + pda->led_state = LED_ALWAYS_SET; + pda->hb_count = HZ / 2; + pda->hb_state = 0; + pda->idle_flag = 0; + + if (cpuid != 0) { + /* copy cpu 0's sn_cnodeid_to_nasid table to this cpu's */ + memcpy(sn_cnodeid_to_nasid, + (&per_cpu(__sn_cnodeid_to_nasid, 0)), + sizeof(__ia64_per_cpu_var(__sn_cnodeid_to_nasid))); + } + + /* + * Check for WARs. + * Only needs to be done once, on BSP. + * Has to be done after loop above, because it uses this cpu's + * sn_cnodeid_to_nasid table which was just initialized if this + * isn't cpu 0. + * Has to be done before assignment below. + */ + if (!wars_have_been_checked) { + sn_check_for_wars(); + wars_have_been_checked = 1; + } + sn_hub_info->shub_1_1_found = shub_1_1_found; + + /* + * Set up addresses of PIO/MEM write status registers. + */ + { + u64 pio1[] = {SH1_PIO_WRITE_STATUS_0, 0, SH1_PIO_WRITE_STATUS_1, 0}; + u64 pio2[] = {SH2_PIO_WRITE_STATUS_0, SH2_PIO_WRITE_STATUS_2, + SH2_PIO_WRITE_STATUS_1, SH2_PIO_WRITE_STATUS_3}; + u64 *pio; + pio = is_shub1() ? pio1 : pio2; + pda->pio_write_status_addr = + (volatile unsigned long *)GLOBAL_MMR_ADDR(nasid, pio[slice]); + pda->pio_write_status_val = is_shub1() ? SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK : 0; + } + + /* + * WAR addresses for SHUB 1.x. + */ + if (local_node_data->active_cpu_count++ == 0 && is_shub1()) { + int buddy_nasid; + buddy_nasid = + cnodeid_to_nasid(numa_node_id() == + num_online_nodes() - 1 ? 0 : numa_node_id() + 1); + pda->pio_shub_war_cam_addr = + (volatile unsigned long *)GLOBAL_MMR_ADDR(nasid, + SH1_PI_CAM_CONTROL); + } +} + +/* + * Build tables for converting between NASIDs and cnodes. + */ +static inline int __init board_needs_cnode(int type) +{ + return (type == KLTYPE_SNIA || type == KLTYPE_TIO); +} + +void __init build_cnode_tables(void) +{ + int nasid; + int node; + lboard_t *brd; + + memset(physical_node_map, -1, sizeof(physical_node_map)); + memset(sn_cnodeid_to_nasid, -1, + sizeof(__ia64_per_cpu_var(__sn_cnodeid_to_nasid))); + + /* + * First populate the tables with C/M bricks. This ensures that + * cnode == node for all C & M bricks. + */ + for_each_online_node(node) { + nasid = pxm_to_nasid(node_to_pxm(node)); + sn_cnodeid_to_nasid[node] = nasid; + physical_node_map[nasid] = node; + } + + /* + * num_cnodes is total number of C/M/TIO bricks. Because of the 256 node + * limit on the number of nodes, we can't use the generic node numbers + * for this. Note that num_cnodes is incremented below as TIOs or + * headless/memoryless nodes are discovered. + */ + num_cnodes = num_online_nodes(); + + /* fakeprom does not support klgraph */ + if (IS_RUNNING_ON_FAKE_PROM()) + return; + + /* Find TIOs & headless/memoryless nodes and add them to the tables */ + for_each_online_node(node) { + kl_config_hdr_t *klgraph_header; + nasid = cnodeid_to_nasid(node); + klgraph_header = ia64_sn_get_klconfig_addr(nasid); + BUG_ON(klgraph_header == NULL); + brd = NODE_OFFSET_TO_LBOARD(nasid, klgraph_header->ch_board_info); + while (brd) { + if (board_needs_cnode(brd->brd_type) && physical_node_map[brd->brd_nasid] < 0) { + sn_cnodeid_to_nasid[num_cnodes] = brd->brd_nasid; + physical_node_map[brd->brd_nasid] = num_cnodes++; + } + brd = find_lboard_next(brd); + } + } +} + +int +nasid_slice_to_cpuid(int nasid, int slice) +{ + long cpu; + + for (cpu = 0; cpu < nr_cpu_ids; cpu++) + if (cpuid_to_nasid(cpu) == nasid && + cpuid_to_slice(cpu) == slice) + return cpu; + + return -1; +} + +int sn_prom_feature_available(int id) +{ + if (id >= BITS_PER_LONG * MAX_PROM_FEATURE_SETS) + return 0; + return test_bit(id, sn_prom_features); +} + +void +sn_kernel_launch_event(void) +{ + /* ignore status until we understand possible failure, if any*/ + if (ia64_sn_kernel_launch_event()) + printk(KERN_ERR "KEXEC is not supported in this PROM, Please update the PROM.\n"); +} +EXPORT_SYMBOL(sn_prom_feature_available); + diff --git a/kernel/arch/ia64/sn/kernel/sn2/Makefile b/kernel/arch/ia64/sn/kernel/sn2/Makefile new file mode 100644 index 000000000..3d09108d4 --- /dev/null +++ b/kernel/arch/ia64/sn/kernel/sn2/Makefile @@ -0,0 +1,15 @@ +# arch/ia64/sn/kernel/sn2/Makefile +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 1999,2001-2002 Silicon Graphics, Inc. All rights reserved. +# +# sn2 specific kernel files +# + +ccflags-y := -Iarch/ia64/sn/include + +obj-y += cache.o io.o ptc_deadlock.o sn2_smp.o sn_proc_fs.o \ + prominfo_proc.o timer.o timer_interrupt.o sn_hwperf.o diff --git a/kernel/arch/ia64/sn/kernel/sn2/cache.c b/kernel/arch/ia64/sn/kernel/sn2/cache.c new file mode 100644 index 000000000..2862cb330 --- /dev/null +++ b/kernel/arch/ia64/sn/kernel/sn2/cache.c @@ -0,0 +1,41 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2001-2003, 2006 Silicon Graphics, Inc. All rights reserved. + * + */ +#include +#include +#include + +/** + * sn_flush_all_caches - flush a range of address from all caches (incl. L4) + * @flush_addr: identity mapped region 7 address to start flushing + * @bytes: number of bytes to flush + * + * Flush a range of addresses from all caches including L4. + * All addresses fully or partially contained within + * @flush_addr to @flush_addr + @bytes are flushed + * from all caches. + */ +void +sn_flush_all_caches(long flush_addr, long bytes) +{ + unsigned long addr = flush_addr; + + /* SHub1 requires a cached address */ + if (is_shub1() && (addr & RGN_BITS) == RGN_BASE(RGN_UNCACHED)) + addr = (addr - RGN_BASE(RGN_UNCACHED)) + RGN_BASE(RGN_KERNEL); + + flush_icache_range(addr, addr + bytes); + /* + * The last call may have returned before the caches + * were actually flushed, so we call it again to make + * sure. + */ + flush_icache_range(addr, addr + bytes); + mb(); +} +EXPORT_SYMBOL(sn_flush_all_caches); diff --git a/kernel/arch/ia64/sn/kernel/sn2/io.c b/kernel/arch/ia64/sn/kernel/sn2/io.c new file mode 100644 index 000000000..a12c0586d --- /dev/null +++ b/kernel/arch/ia64/sn/kernel/sn2/io.c @@ -0,0 +1,101 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2003 Silicon Graphics, Inc. All rights reserved. + * + * The generic kernel requires function pointers to these routines, so + * we wrap the inlines from asm/ia64/sn/sn2/io.h here. + */ + +#include + +#ifdef CONFIG_IA64_GENERIC + +#undef __sn_inb +#undef __sn_inw +#undef __sn_inl +#undef __sn_outb +#undef __sn_outw +#undef __sn_outl +#undef __sn_readb +#undef __sn_readw +#undef __sn_readl +#undef __sn_readq +#undef __sn_readb_relaxed +#undef __sn_readw_relaxed +#undef __sn_readl_relaxed +#undef __sn_readq_relaxed + +unsigned int __sn_inb(unsigned long port) +{ + return ___sn_inb(port); +} + +unsigned int __sn_inw(unsigned long port) +{ + return ___sn_inw(port); +} + +unsigned int __sn_inl(unsigned long port) +{ + return ___sn_inl(port); +} + +void __sn_outb(unsigned char val, unsigned long port) +{ + ___sn_outb(val, port); +} + +void __sn_outw(unsigned short val, unsigned long port) +{ + ___sn_outw(val, port); +} + +void __sn_outl(unsigned int val, unsigned long port) +{ + ___sn_outl(val, port); +} + +unsigned char __sn_readb(void __iomem *addr) +{ + return ___sn_readb(addr); +} + +unsigned short __sn_readw(void __iomem *addr) +{ + return ___sn_readw(addr); +} + +unsigned int __sn_readl(void __iomem *addr) +{ + return ___sn_readl(addr); +} + +unsigned long __sn_readq(void __iomem *addr) +{ + return ___sn_readq(addr); +} + +unsigned char __sn_readb_relaxed(void __iomem *addr) +{ + return ___sn_readb_relaxed(addr); +} + +unsigned short __sn_readw_relaxed(void __iomem *addr) +{ + return ___sn_readw_relaxed(addr); +} + +unsigned int __sn_readl_relaxed(void __iomem *addr) +{ + return ___sn_readl_relaxed(addr); +} + +unsigned long __sn_readq_relaxed(void __iomem *addr) +{ + return ___sn_readq_relaxed(addr); +} + +#endif diff --git a/kernel/arch/ia64/sn/kernel/sn2/prominfo_proc.c b/kernel/arch/ia64/sn/kernel/sn2/prominfo_proc.c new file mode 100644 index 000000000..ec4de2b09 --- /dev/null +++ b/kernel/arch/ia64/sn/kernel/sn2/prominfo_proc.c @@ -0,0 +1,231 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1999,2001-2004, 2006 Silicon Graphics, Inc. All Rights Reserved. + * + * Module to export the system's Firmware Interface Tables, including + * PROM revision numbers and banners, in /proc + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +MODULE_DESCRIPTION("PROM version reporting for /proc"); +MODULE_AUTHOR("Chad Talbott"); +MODULE_LICENSE("GPL"); + +/* Standard Intel FIT entry types */ +#define FIT_ENTRY_FIT_HEADER 0x00 /* FIT header entry */ +#define FIT_ENTRY_PAL_B 0x01 /* PAL_B entry */ +/* Entries 0x02 through 0x0D reserved by Intel */ +#define FIT_ENTRY_PAL_A_PROC 0x0E /* Processor-specific PAL_A entry */ +#define FIT_ENTRY_PAL_A 0x0F /* PAL_A entry, same as... */ +#define FIT_ENTRY_PAL_A_GEN 0x0F /* ...Generic PAL_A entry */ +#define FIT_ENTRY_UNUSED 0x7F /* Unused (reserved by Intel?) */ +/* OEM-defined entries range from 0x10 to 0x7E. */ +#define FIT_ENTRY_SAL_A 0x10 /* SAL_A entry */ +#define FIT_ENTRY_SAL_B 0x11 /* SAL_B entry */ +#define FIT_ENTRY_SALRUNTIME 0x12 /* SAL runtime entry */ +#define FIT_ENTRY_EFI 0x1F /* EFI entry */ +#define FIT_ENTRY_FPSWA 0x20 /* embedded fpswa entry */ +#define FIT_ENTRY_VMLINUX 0x21 /* embedded vmlinux entry */ + +#define FIT_MAJOR_SHIFT (32 + 8) +#define FIT_MAJOR_MASK ((1 << 8) - 1) +#define FIT_MINOR_SHIFT 32 +#define FIT_MINOR_MASK ((1 << 8) - 1) + +#define FIT_MAJOR(q) \ + ((unsigned) ((q) >> FIT_MAJOR_SHIFT) & FIT_MAJOR_MASK) +#define FIT_MINOR(q) \ + ((unsigned) ((q) >> FIT_MINOR_SHIFT) & FIT_MINOR_MASK) + +#define FIT_TYPE_SHIFT (32 + 16) +#define FIT_TYPE_MASK ((1 << 7) - 1) + +#define FIT_TYPE(q) \ + ((unsigned) ((q) >> FIT_TYPE_SHIFT) & FIT_TYPE_MASK) + +struct fit_type_map_t { + unsigned char type; + const char *name; +}; + +static const struct fit_type_map_t fit_entry_types[] = { + {FIT_ENTRY_FIT_HEADER, "FIT Header"}, + {FIT_ENTRY_PAL_A_GEN, "Generic PAL_A"}, + {FIT_ENTRY_PAL_A_PROC, "Processor-specific PAL_A"}, + {FIT_ENTRY_PAL_A, "PAL_A"}, + {FIT_ENTRY_PAL_B, "PAL_B"}, + {FIT_ENTRY_SAL_A, "SAL_A"}, + {FIT_ENTRY_SAL_B, "SAL_B"}, + {FIT_ENTRY_SALRUNTIME, "SAL runtime"}, + {FIT_ENTRY_EFI, "EFI"}, + {FIT_ENTRY_VMLINUX, "Embedded Linux"}, + {FIT_ENTRY_FPSWA, "Embedded FPSWA"}, + {FIT_ENTRY_UNUSED, "Unused"}, + {0xff, "Error"}, +}; + +static const char *fit_type_name(unsigned char type) +{ + struct fit_type_map_t const *mapp; + + for (mapp = fit_entry_types; mapp->type != 0xff; mapp++) + if (type == mapp->type) + return mapp->name; + + if ((type > FIT_ENTRY_PAL_A) && (type < FIT_ENTRY_UNUSED)) + return "OEM type"; + if ((type > FIT_ENTRY_PAL_B) && (type < FIT_ENTRY_PAL_A)) + return "Reserved"; + + return "Unknown type"; +} + +static int +get_fit_entry(unsigned long nasid, int index, unsigned long *fentry, + char *banner, int banlen) +{ + return ia64_sn_get_fit_compt(nasid, index, fentry, banner, banlen); +} + + +/* + * These two routines display the FIT table for each node. + */ +static void dump_fit_entry(struct seq_file *m, unsigned long *fentry) +{ + unsigned type; + + type = FIT_TYPE(fentry[1]); + seq_printf(m, "%02x %-25s %x.%02x %016lx %u\n", + type, + fit_type_name(type), + FIT_MAJOR(fentry[1]), FIT_MINOR(fentry[1]), + fentry[0], + /* mult by sixteen to get size in bytes */ + (unsigned)(fentry[1] & 0xffffff) * 16); +} + + +/* + * We assume that the fit table will be small enough that we can print + * the whole thing into one page. (This is true for our default 16kB + * pages -- each entry is about 60 chars wide when printed.) I read + * somewhere that the maximum size of the FIT is 128 entries, so we're + * OK except for 4kB pages (and no one is going to do that on SN + * anyway). + */ +static int proc_fit_show(struct seq_file *m, void *v) +{ + unsigned long nasid = (unsigned long)m->private; + unsigned long fentry[2]; + int index; + + for (index=0;;index++) { + BUG_ON(index * 60 > PAGE_SIZE); + if (get_fit_entry(nasid, index, fentry, NULL, 0)) + break; + dump_fit_entry(m, fentry); + } + return 0; +} + +static int proc_fit_open(struct inode *inode, struct file *file) +{ + return single_open(file, proc_fit_show, PDE_DATA(inode)); +} + +static const struct file_operations proc_fit_fops = { + .open = proc_fit_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int proc_version_show(struct seq_file *m, void *v) +{ + unsigned long nasid = (unsigned long)m->private; + unsigned long fentry[2]; + char banner[128]; + int index; + + for (index = 0; ; index++) { + if (get_fit_entry(nasid, index, fentry, banner, + sizeof(banner))) + return 0; + if (FIT_TYPE(fentry[1]) == FIT_ENTRY_SAL_A) + break; + } + + seq_printf(m, "%x.%02x\n", FIT_MAJOR(fentry[1]), FIT_MINOR(fentry[1])); + + if (banner[0]) + seq_printf(m, "%s\n", banner); + return 0; +} + +static int proc_version_open(struct inode *inode, struct file *file) +{ + return single_open(file, proc_version_show, PDE_DATA(inode)); +} + +static const struct file_operations proc_version_fops = { + .open = proc_version_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +/* module entry points */ +int __init prominfo_init(void); +void __exit prominfo_exit(void); + +module_init(prominfo_init); +module_exit(prominfo_exit); + +#define NODE_NAME_LEN 11 + +int __init prominfo_init(void) +{ + struct proc_dir_entry *sgi_prominfo_entry; + cnodeid_t cnodeid; + + if (!ia64_platform_is("sn2")) + return 0; + + sgi_prominfo_entry = proc_mkdir("sgi_prominfo", NULL); + if (!sgi_prominfo_entry) + return -ENOMEM; + + for_each_online_node(cnodeid) { + struct proc_dir_entry *dir; + unsigned long nasid; + char name[NODE_NAME_LEN]; + + sprintf(name, "node%d", cnodeid); + dir = proc_mkdir(name, sgi_prominfo_entry); + if (!dir) + continue; + nasid = cnodeid_to_nasid(cnodeid); + proc_create_data("fit", 0, dir, + &proc_fit_fops, (void *)nasid); + proc_create_data("version", 0, dir, + &proc_version_fops, (void *)nasid); + } + return 0; +} + +void __exit prominfo_exit(void) +{ + remove_proc_subtree("sgi_prominfo", NULL); +} diff --git a/kernel/arch/ia64/sn/kernel/sn2/ptc_deadlock.S b/kernel/arch/ia64/sn/kernel/sn2/ptc_deadlock.S new file mode 100644 index 000000000..bebbcc4f8 --- /dev/null +++ b/kernel/arch/ia64/sn/kernel/sn2/ptc_deadlock.S @@ -0,0 +1,92 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2000-2005 Silicon Graphics, Inc. All rights reserved. + */ + +#include +#include + +#define DEADLOCKBIT SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_SHFT +#define WRITECOUNTMASK SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK +#define ALIAS_OFFSET 8 + + + .global sn2_ptc_deadlock_recovery_core + .proc sn2_ptc_deadlock_recovery_core + +sn2_ptc_deadlock_recovery_core: + .regstk 6,0,0,0 + + ptc0 = in0 + data0 = in1 + ptc1 = in2 + data1 = in3 + piowc = in4 + zeroval = in5 + piowcphy = r30 + psrsave = r2 + scr1 = r16 + scr2 = r17 + mask = r18 + + + extr.u piowcphy=piowc,0,61;; // Convert piowc to uncached physical address + dep piowcphy=-1,piowcphy,63,1 + movl mask=WRITECOUNTMASK + mov r8=r0 + +1: + cmp.ne p8,p9=r0,ptc1 // Test for shub type (ptc1 non-null on shub1) + // p8 = 1 if shub1, p9 = 1 if shub2 + + add scr2=ALIAS_OFFSET,piowc // Address of WRITE_STATUS alias register + mov scr1=7;; // Clear DEADLOCK, WRITE_ERROR, MULTI_WRITE_ERROR +(p8) st8.rel [scr2]=scr1;; +(p9) ld8.acq scr1=[scr2];; + +5: ld8.acq scr1=[piowc];; // Wait for PIOs to complete. + hint @pause + and scr2=scr1,mask;; // mask of writecount bits + cmp.ne p6,p0=zeroval,scr2 +(p6) br.cond.sptk 5b + + + + ////////////// BEGIN PHYSICAL MODE //////////////////// + mov psrsave=psr // Disable IC (no PMIs) + rsm psr.i | psr.dt | psr.ic;; + srlz.i;; + + st8.rel [ptc0]=data0 // Write PTC0 & wait for completion. + +5: ld8.acq scr1=[piowcphy];; // Wait for PIOs to complete. + hint @pause + and scr2=scr1,mask;; // mask of writecount bits + cmp.ne p6,p0=zeroval,scr2 +(p6) br.cond.sptk 5b;; + + tbit.nz p8,p7=scr1,DEADLOCKBIT;;// Test for DEADLOCK +(p7) cmp.ne p7,p0=r0,ptc1;; // Test for non-null ptc1 + +(p7) st8.rel [ptc1]=data1;; // Now write PTC1. + +5: ld8.acq scr1=[piowcphy];; // Wait for PIOs to complete. + hint @pause + and scr2=scr1,mask;; // mask of writecount bits + cmp.ne p6,p0=zeroval,scr2 +(p6) br.cond.sptk 5b + + tbit.nz p8,p0=scr1,DEADLOCKBIT;;// Test for DEADLOCK + + mov psr.l=psrsave;; // Reenable IC + srlz.i;; + ////////////// END PHYSICAL MODE //////////////////// + +(p8) add r8=1,r8 +(p8) br.cond.spnt 1b;; // Repeat if DEADLOCK occurred. + + br.ret.sptk rp + .endp sn2_ptc_deadlock_recovery_core diff --git a/kernel/arch/ia64/sn/kernel/sn2/sn2_smp.c b/kernel/arch/ia64/sn/kernel/sn2/sn2_smp.c new file mode 100644 index 000000000..f9c8d9fc5 --- /dev/null +++ b/kernel/arch/ia64/sn/kernel/sn2/sn2_smp.c @@ -0,0 +1,572 @@ +/* + * SN2 Platform specific SMP Support + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2000-2006 Silicon Graphics, Inc. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +DEFINE_PER_CPU(struct ptc_stats, ptcstats); +DECLARE_PER_CPU(struct ptc_stats, ptcstats); + +static __cacheline_aligned DEFINE_SPINLOCK(sn2_global_ptc_lock); + +/* 0 = old algorithm (no IPI flushes), 1 = ipi deadlock flush, 2 = ipi instead of SHUB ptc, >2 = always ipi */ +static int sn2_flush_opt = 0; + +extern unsigned long +sn2_ptc_deadlock_recovery_core(volatile unsigned long *, unsigned long, + volatile unsigned long *, unsigned long, + volatile unsigned long *, unsigned long); +void +sn2_ptc_deadlock_recovery(short *, short, short, int, + volatile unsigned long *, unsigned long, + volatile unsigned long *, unsigned long); + +/* + * Note: some is the following is captured here to make degugging easier + * (the macros make more sense if you see the debug patch - not posted) + */ +#define sn2_ptctest 0 +#define local_node_uses_ptc_ga(sh1) ((sh1) ? 1 : 0) +#define max_active_pio(sh1) ((sh1) ? 32 : 7) +#define reset_max_active_on_deadlock() 1 +#define PTC_LOCK(sh1) ((sh1) ? &sn2_global_ptc_lock : &sn_nodepda->ptc_lock) + +struct ptc_stats { + unsigned long ptc_l; + unsigned long change_rid; + unsigned long shub_ptc_flushes; + unsigned long nodes_flushed; + unsigned long deadlocks; + unsigned long deadlocks2; + unsigned long lock_itc_clocks; + unsigned long shub_itc_clocks; + unsigned long shub_itc_clocks_max; + unsigned long shub_ptc_flushes_not_my_mm; + unsigned long shub_ipi_flushes; + unsigned long shub_ipi_flushes_itc_clocks; +}; + +#define sn2_ptctest 0 + +static inline unsigned long wait_piowc(void) +{ + volatile unsigned long *piows; + unsigned long zeroval, ws; + + piows = pda->pio_write_status_addr; + zeroval = pda->pio_write_status_val; + do { + cpu_relax(); + } while (((ws = *piows) & SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK) != zeroval); + return (ws & SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_MASK) != 0; +} + +/** + * sn_migrate - SN-specific task migration actions + * @task: Task being migrated to new CPU + * + * SN2 PIO writes from separate CPUs are not guaranteed to arrive in order. + * Context switching user threads which have memory-mapped MMIO may cause + * PIOs to issue from separate CPUs, thus the PIO writes must be drained + * from the previous CPU's Shub before execution resumes on the new CPU. + */ +void sn_migrate(struct task_struct *task) +{ + pda_t *last_pda = pdacpu(task_thread_info(task)->last_cpu); + volatile unsigned long *adr = last_pda->pio_write_status_addr; + unsigned long val = last_pda->pio_write_status_val; + + /* Drain PIO writes from old CPU's Shub */ + while (unlikely((*adr & SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK) + != val)) + cpu_relax(); +} + +void sn_tlb_migrate_finish(struct mm_struct *mm) +{ + /* flush_tlb_mm is inefficient if more than 1 users of mm */ + if (mm == current->mm && mm && atomic_read(&mm->mm_users) == 1) + flush_tlb_mm(mm); +} + +static void +sn2_ipi_flush_all_tlb(struct mm_struct *mm) +{ + unsigned long itc; + + itc = ia64_get_itc(); + smp_flush_tlb_cpumask(*mm_cpumask(mm)); + itc = ia64_get_itc() - itc; + __this_cpu_add(ptcstats.shub_ipi_flushes_itc_clocks, itc); + __this_cpu_inc(ptcstats.shub_ipi_flushes); +} + +/** + * sn2_global_tlb_purge - globally purge translation cache of virtual address range + * @mm: mm_struct containing virtual address range + * @start: start of virtual address range + * @end: end of virtual address range + * @nbits: specifies number of bytes to purge per instruction (num = 1<<(nbits & 0xfc)) + * + * Purges the translation caches of all processors of the given virtual address + * range. + * + * Note: + * - cpu_vm_mask is a bit mask that indicates which cpus have loaded the context. + * - cpu_vm_mask is converted into a nodemask of the nodes containing the + * cpus in cpu_vm_mask. + * - if only one bit is set in cpu_vm_mask & it is the current cpu & the + * process is purging its own virtual address range, then only the + * local TLB needs to be flushed. This flushing can be done using + * ptc.l. This is the common case & avoids the global spinlock. + * - if multiple cpus have loaded the context, then flushing has to be + * done with ptc.g/MMRs under protection of the global ptc_lock. + */ + +void +sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, + unsigned long end, unsigned long nbits) +{ + int i, ibegin, shub1, cnode, mynasid, cpu, lcpu = 0, nasid; + int mymm = (mm == current->active_mm && mm == current->mm); + int use_cpu_ptcga; + volatile unsigned long *ptc0, *ptc1; + unsigned long itc, itc2, flags, data0 = 0, data1 = 0, rr_value, old_rr = 0; + short nasids[MAX_NUMNODES], nix; + nodemask_t nodes_flushed; + int active, max_active, deadlock, flush_opt = sn2_flush_opt; + + if (flush_opt > 2) { + sn2_ipi_flush_all_tlb(mm); + return; + } + + nodes_clear(nodes_flushed); + i = 0; + + for_each_cpu(cpu, mm_cpumask(mm)) { + cnode = cpu_to_node(cpu); + node_set(cnode, nodes_flushed); + lcpu = cpu; + i++; + } + + if (i == 0) + return; + + preempt_disable(); + + if (likely(i == 1 && lcpu == smp_processor_id() && mymm)) { + do { + ia64_ptcl(start, nbits << 2); + start += (1UL << nbits); + } while (start < end); + ia64_srlz_i(); + __this_cpu_inc(ptcstats.ptc_l); + preempt_enable(); + return; + } + + if (atomic_read(&mm->mm_users) == 1 && mymm) { + flush_tlb_mm(mm); + __this_cpu_inc(ptcstats.change_rid); + preempt_enable(); + return; + } + + if (flush_opt == 2) { + sn2_ipi_flush_all_tlb(mm); + preempt_enable(); + return; + } + + itc = ia64_get_itc(); + nix = 0; + for_each_node_mask(cnode, nodes_flushed) + nasids[nix++] = cnodeid_to_nasid(cnode); + + rr_value = (mm->context << 3) | REGION_NUMBER(start); + + shub1 = is_shub1(); + if (shub1) { + data0 = (1UL << SH1_PTC_0_A_SHFT) | + (nbits << SH1_PTC_0_PS_SHFT) | + (rr_value << SH1_PTC_0_RID_SHFT) | + (1UL << SH1_PTC_0_START_SHFT); + ptc0 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH1_PTC_0); + ptc1 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH1_PTC_1); + } else { + data0 = (1UL << SH2_PTC_A_SHFT) | + (nbits << SH2_PTC_PS_SHFT) | + (1UL << SH2_PTC_START_SHFT); + ptc0 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH2_PTC + + (rr_value << SH2_PTC_RID_SHFT)); + ptc1 = NULL; + } + + + mynasid = get_nasid(); + use_cpu_ptcga = local_node_uses_ptc_ga(shub1); + max_active = max_active_pio(shub1); + + itc = ia64_get_itc(); + spin_lock_irqsave(PTC_LOCK(shub1), flags); + itc2 = ia64_get_itc(); + + __this_cpu_add(ptcstats.lock_itc_clocks, itc2 - itc); + __this_cpu_inc(ptcstats.shub_ptc_flushes); + __this_cpu_add(ptcstats.nodes_flushed, nix); + if (!mymm) + __this_cpu_inc(ptcstats.shub_ptc_flushes_not_my_mm); + + if (use_cpu_ptcga && !mymm) { + old_rr = ia64_get_rr(start); + ia64_set_rr(start, (old_rr & 0xff) | (rr_value << 8)); + ia64_srlz_d(); + } + + wait_piowc(); + do { + if (shub1) + data1 = start | (1UL << SH1_PTC_1_START_SHFT); + else + data0 = (data0 & ~SH2_PTC_ADDR_MASK) | (start & SH2_PTC_ADDR_MASK); + deadlock = 0; + active = 0; + for (ibegin = 0, i = 0; i < nix; i++) { + nasid = nasids[i]; + if (use_cpu_ptcga && unlikely(nasid == mynasid)) { + ia64_ptcga(start, nbits << 2); + ia64_srlz_i(); + } else { + ptc0 = CHANGE_NASID(nasid, ptc0); + if (ptc1) + ptc1 = CHANGE_NASID(nasid, ptc1); + pio_atomic_phys_write_mmrs(ptc0, data0, ptc1, data1); + active++; + } + if (active >= max_active || i == (nix - 1)) { + if ((deadlock = wait_piowc())) { + if (flush_opt == 1) + goto done; + sn2_ptc_deadlock_recovery(nasids, ibegin, i, mynasid, ptc0, data0, ptc1, data1); + if (reset_max_active_on_deadlock()) + max_active = 1; + } + active = 0; + ibegin = i + 1; + } + } + start += (1UL << nbits); + } while (start < end); + +done: + itc2 = ia64_get_itc() - itc2; + __this_cpu_add(ptcstats.shub_itc_clocks, itc2); + if (itc2 > __this_cpu_read(ptcstats.shub_itc_clocks_max)) + __this_cpu_write(ptcstats.shub_itc_clocks_max, itc2); + + if (old_rr) { + ia64_set_rr(start, old_rr); + ia64_srlz_d(); + } + + spin_unlock_irqrestore(PTC_LOCK(shub1), flags); + + if (flush_opt == 1 && deadlock) { + __this_cpu_inc(ptcstats.deadlocks); + sn2_ipi_flush_all_tlb(mm); + } + + preempt_enable(); +} + +/* + * sn2_ptc_deadlock_recovery + * + * Recover from PTC deadlocks conditions. Recovery requires stepping thru each + * TLB flush transaction. The recovery sequence is somewhat tricky & is + * coded in assembly language. + */ + +void +sn2_ptc_deadlock_recovery(short *nasids, short ib, short ie, int mynasid, + volatile unsigned long *ptc0, unsigned long data0, + volatile unsigned long *ptc1, unsigned long data1) +{ + short nasid, i; + unsigned long *piows, zeroval, n; + + __this_cpu_inc(ptcstats.deadlocks); + + piows = (unsigned long *) pda->pio_write_status_addr; + zeroval = pda->pio_write_status_val; + + + for (i=ib; i <= ie; i++) { + nasid = nasids[i]; + if (local_node_uses_ptc_ga(is_shub1()) && nasid == mynasid) + continue; + ptc0 = CHANGE_NASID(nasid, ptc0); + if (ptc1) + ptc1 = CHANGE_NASID(nasid, ptc1); + + n = sn2_ptc_deadlock_recovery_core(ptc0, data0, ptc1, data1, piows, zeroval); + __this_cpu_add(ptcstats.deadlocks2, n); + } + +} + +/** + * sn_send_IPI_phys - send an IPI to a Nasid and slice + * @nasid: nasid to receive the interrupt (may be outside partition) + * @physid: physical cpuid to receive the interrupt. + * @vector: command to send + * @delivery_mode: delivery mechanism + * + * Sends an IPI (interprocessor interrupt) to the processor specified by + * @physid + * + * @delivery_mode can be one of the following + * + * %IA64_IPI_DM_INT - pend an interrupt + * %IA64_IPI_DM_PMI - pend a PMI + * %IA64_IPI_DM_NMI - pend an NMI + * %IA64_IPI_DM_INIT - pend an INIT interrupt + */ +void sn_send_IPI_phys(int nasid, long physid, int vector, int delivery_mode) +{ + long val; + unsigned long flags = 0; + volatile long *p; + + p = (long *)GLOBAL_MMR_PHYS_ADDR(nasid, SH_IPI_INT); + val = (1UL << SH_IPI_INT_SEND_SHFT) | + (physid << SH_IPI_INT_PID_SHFT) | + ((long)delivery_mode << SH_IPI_INT_TYPE_SHFT) | + ((long)vector << SH_IPI_INT_IDX_SHFT) | + (0x000feeUL << SH_IPI_INT_BASE_SHFT); + + mb(); + if (enable_shub_wars_1_1()) { + spin_lock_irqsave(&sn2_global_ptc_lock, flags); + } + pio_phys_write_mmr(p, val); + if (enable_shub_wars_1_1()) { + wait_piowc(); + spin_unlock_irqrestore(&sn2_global_ptc_lock, flags); + } + +} + +EXPORT_SYMBOL(sn_send_IPI_phys); + +/** + * sn2_send_IPI - send an IPI to a processor + * @cpuid: target of the IPI + * @vector: command to send + * @delivery_mode: delivery mechanism + * @redirect: redirect the IPI? + * + * Sends an IPI (InterProcessor Interrupt) to the processor specified by + * @cpuid. @vector specifies the command to send, while @delivery_mode can + * be one of the following + * + * %IA64_IPI_DM_INT - pend an interrupt + * %IA64_IPI_DM_PMI - pend a PMI + * %IA64_IPI_DM_NMI - pend an NMI + * %IA64_IPI_DM_INIT - pend an INIT interrupt + */ +void sn2_send_IPI(int cpuid, int vector, int delivery_mode, int redirect) +{ + long physid; + int nasid; + + physid = cpu_physical_id(cpuid); + nasid = cpuid_to_nasid(cpuid); + + /* the following is used only when starting cpus at boot time */ + if (unlikely(nasid == -1)) + ia64_sn_get_sapic_info(physid, &nasid, NULL, NULL); + + sn_send_IPI_phys(nasid, physid, vector, delivery_mode); +} + +#ifdef CONFIG_HOTPLUG_CPU +/** + * sn_cpu_disable_allowed - Determine if a CPU can be disabled. + * @cpu - CPU that is requested to be disabled. + * + * CPU disable is only allowed on SHub2 systems running with a PROM + * that supports CPU disable. It is not permitted to disable the boot processor. + */ +bool sn_cpu_disable_allowed(int cpu) +{ + if (is_shub2() && sn_prom_feature_available(PRF_CPU_DISABLE_SUPPORT)) { + if (cpu != 0) + return true; + else + printk(KERN_WARNING + "Disabling the boot processor is not allowed.\n"); + + } else + printk(KERN_WARNING + "CPU disable is not supported on this system.\n"); + + return false; +} +#endif /* CONFIG_HOTPLUG_CPU */ + +#ifdef CONFIG_PROC_FS + +#define PTC_BASENAME "sgi_sn/ptc_statistics" + +static void *sn2_ptc_seq_start(struct seq_file *file, loff_t * offset) +{ + if (*offset < nr_cpu_ids) + return offset; + return NULL; +} + +static void *sn2_ptc_seq_next(struct seq_file *file, void *data, loff_t * offset) +{ + (*offset)++; + if (*offset < nr_cpu_ids) + return offset; + return NULL; +} + +static void sn2_ptc_seq_stop(struct seq_file *file, void *data) +{ +} + +static int sn2_ptc_seq_show(struct seq_file *file, void *data) +{ + struct ptc_stats *stat; + int cpu; + + cpu = *(loff_t *) data; + + if (!cpu) { + seq_printf(file, + "# cpu ptc_l newrid ptc_flushes nodes_flushed deadlocks lock_nsec shub_nsec shub_nsec_max not_my_mm deadlock2 ipi_fluches ipi_nsec\n"); + seq_printf(file, "# ptctest %d, flushopt %d\n", sn2_ptctest, sn2_flush_opt); + } + + if (cpu < nr_cpu_ids && cpu_online(cpu)) { + stat = &per_cpu(ptcstats, cpu); + seq_printf(file, "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n", cpu, stat->ptc_l, + stat->change_rid, stat->shub_ptc_flushes, stat->nodes_flushed, + stat->deadlocks, + 1000 * stat->lock_itc_clocks / per_cpu(ia64_cpu_info, cpu).cyc_per_usec, + 1000 * stat->shub_itc_clocks / per_cpu(ia64_cpu_info, cpu).cyc_per_usec, + 1000 * stat->shub_itc_clocks_max / per_cpu(ia64_cpu_info, cpu).cyc_per_usec, + stat->shub_ptc_flushes_not_my_mm, + stat->deadlocks2, + stat->shub_ipi_flushes, + 1000 * stat->shub_ipi_flushes_itc_clocks / per_cpu(ia64_cpu_info, cpu).cyc_per_usec); + } + return 0; +} + +static ssize_t sn2_ptc_proc_write(struct file *file, const char __user *user, size_t count, loff_t *data) +{ + int cpu; + char optstr[64]; + + if (count == 0 || count > sizeof(optstr)) + return -EINVAL; + if (copy_from_user(optstr, user, count)) + return -EFAULT; + optstr[count - 1] = '\0'; + sn2_flush_opt = simple_strtoul(optstr, NULL, 0); + + for_each_online_cpu(cpu) + memset(&per_cpu(ptcstats, cpu), 0, sizeof(struct ptc_stats)); + + return count; +} + +static const struct seq_operations sn2_ptc_seq_ops = { + .start = sn2_ptc_seq_start, + .next = sn2_ptc_seq_next, + .stop = sn2_ptc_seq_stop, + .show = sn2_ptc_seq_show +}; + +static int sn2_ptc_proc_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &sn2_ptc_seq_ops); +} + +static const struct file_operations proc_sn2_ptc_operations = { + .open = sn2_ptc_proc_open, + .read = seq_read, + .write = sn2_ptc_proc_write, + .llseek = seq_lseek, + .release = seq_release, +}; + +static struct proc_dir_entry *proc_sn2_ptc; + +static int __init sn2_ptc_init(void) +{ + if (!ia64_platform_is("sn2")) + return 0; + + proc_sn2_ptc = proc_create(PTC_BASENAME, 0444, + NULL, &proc_sn2_ptc_operations); + if (!proc_sn2_ptc) { + printk(KERN_ERR "unable to create %s proc entry", PTC_BASENAME); + return -EINVAL; + } + spin_lock_init(&sn2_global_ptc_lock); + return 0; +} + +static void __exit sn2_ptc_exit(void) +{ + remove_proc_entry(PTC_BASENAME, NULL); +} + +module_init(sn2_ptc_init); +module_exit(sn2_ptc_exit); +#endif /* CONFIG_PROC_FS */ + diff --git a/kernel/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/kernel/arch/ia64/sn/kernel/sn2/sn_hwperf.c new file mode 100644 index 000000000..b9992571c --- /dev/null +++ b/kernel/arch/ia64/sn/kernel/sn2/sn_hwperf.c @@ -0,0 +1,1003 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2004-2006 Silicon Graphics, Inc. All rights reserved. + * + * SGI Altix topology and hardware performance monitoring API. + * Mark Goodwin . + * + * Creates /proc/sgi_sn/sn_topology (read-only) to export + * info about Altix nodes, routers, CPUs and NumaLink + * interconnection/topology. + * + * Also creates a dynamic misc device named "sn_hwperf" + * that supports an ioctl interface to call down into SAL + * to discover hw objects, topology and to read/write + * memory mapped registers, e.g. for performance monitoring. + * The "sn_hwperf" device is registered only after the procfs + * file is first opened, i.e. only if/when it's needed. + * + * This API is used by SGI Performance Co-Pilot and other + * tools, see http://oss.sgi.com/projects/pcp + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void *sn_hwperf_salheap = NULL; +static int sn_hwperf_obj_cnt = 0; +static nasid_t sn_hwperf_master_nasid = INVALID_NASID; +static int sn_hwperf_init(void); +static DEFINE_MUTEX(sn_hwperf_init_mutex); + +#define cnode_possible(n) ((n) < num_cnodes) + +static int sn_hwperf_enum_objects(int *nobj, struct sn_hwperf_object_info **ret) +{ + int e; + u64 sz; + struct sn_hwperf_object_info *objbuf = NULL; + + if ((e = sn_hwperf_init()) < 0) { + printk(KERN_ERR "sn_hwperf_init failed: err %d\n", e); + goto out; + } + + sz = sn_hwperf_obj_cnt * sizeof(struct sn_hwperf_object_info); + objbuf = vmalloc(sz); + if (objbuf == NULL) { + printk("sn_hwperf_enum_objects: vmalloc(%d) failed\n", (int)sz); + e = -ENOMEM; + goto out; + } + + e = ia64_sn_hwperf_op(sn_hwperf_master_nasid, SN_HWPERF_ENUM_OBJECTS, + 0, sz, (u64) objbuf, 0, 0, NULL); + if (e != SN_HWPERF_OP_OK) { + e = -EINVAL; + vfree(objbuf); + } + +out: + *nobj = sn_hwperf_obj_cnt; + *ret = objbuf; + return e; +} + +static int sn_hwperf_location_to_bpos(char *location, + int *rack, int *bay, int *slot, int *slab) +{ + char type; + + /* first scan for an old style geoid string */ + if (sscanf(location, "%03d%c%02d#%d", + rack, &type, bay, slab) == 4) + *slot = 0; + else /* scan for a new bladed geoid string */ + if (sscanf(location, "%03d%c%02d^%02d#%d", + rack, &type, bay, slot, slab) != 5) + return -1; + /* success */ + return 0; +} + +static int sn_hwperf_geoid_to_cnode(char *location) +{ + int cnode; + geoid_t geoid; + moduleid_t module_id; + int rack, bay, slot, slab; + int this_rack, this_bay, this_slot, this_slab; + + if (sn_hwperf_location_to_bpos(location, &rack, &bay, &slot, &slab)) + return -1; + + /* + * FIXME: replace with cleaner for_each_XXX macro which addresses + * both compute and IO nodes once ACPI3.0 is available. + */ + for (cnode = 0; cnode < num_cnodes; cnode++) { + geoid = cnodeid_get_geoid(cnode); + module_id = geo_module(geoid); + this_rack = MODULE_GET_RACK(module_id); + this_bay = MODULE_GET_BPOS(module_id); + this_slot = geo_slot(geoid); + this_slab = geo_slab(geoid); + if (rack == this_rack && bay == this_bay && + slot == this_slot && slab == this_slab) { + break; + } + } + + return cnode_possible(cnode) ? cnode : -1; +} + +static int sn_hwperf_obj_to_cnode(struct sn_hwperf_object_info * obj) +{ + if (!SN_HWPERF_IS_NODE(obj) && !SN_HWPERF_IS_IONODE(obj)) + BUG(); + if (SN_HWPERF_FOREIGN(obj)) + return -1; + return sn_hwperf_geoid_to_cnode(obj->location); +} + +static int sn_hwperf_generic_ordinal(struct sn_hwperf_object_info *obj, + struct sn_hwperf_object_info *objs) +{ + int ordinal; + struct sn_hwperf_object_info *p; + + for (ordinal=0, p=objs; p != obj; p++) { + if (SN_HWPERF_FOREIGN(p)) + continue; + if (SN_HWPERF_SAME_OBJTYPE(p, obj)) + ordinal++; + } + + return ordinal; +} + +static const char *slabname_node = "node"; /* SHub asic */ +static const char *slabname_ionode = "ionode"; /* TIO asic */ +static const char *slabname_router = "router"; /* NL3R or NL4R */ +static const char *slabname_other = "other"; /* unknown asic */ + +static const char *sn_hwperf_get_slabname(struct sn_hwperf_object_info *obj, + struct sn_hwperf_object_info *objs, int *ordinal) +{ + int isnode; + const char *slabname = slabname_other; + + if ((isnode = SN_HWPERF_IS_NODE(obj)) || SN_HWPERF_IS_IONODE(obj)) { + slabname = isnode ? slabname_node : slabname_ionode; + *ordinal = sn_hwperf_obj_to_cnode(obj); + } + else { + *ordinal = sn_hwperf_generic_ordinal(obj, objs); + if (SN_HWPERF_IS_ROUTER(obj)) + slabname = slabname_router; + } + + return slabname; +} + +static void print_pci_topology(struct seq_file *s) +{ + char *p; + size_t sz; + int e; + + for (sz = PAGE_SIZE; sz < 16 * PAGE_SIZE; sz += PAGE_SIZE) { + if (!(p = kmalloc(sz, GFP_KERNEL))) + break; + e = ia64_sn_ioif_get_pci_topology(__pa(p), sz); + if (e == SALRET_OK) + seq_puts(s, p); + kfree(p); + if (e == SALRET_OK || e == SALRET_NOT_IMPLEMENTED) + break; + } +} + +static inline int sn_hwperf_has_cpus(cnodeid_t node) +{ + return node < MAX_NUMNODES && node_online(node) && nr_cpus_node(node); +} + +static inline int sn_hwperf_has_mem(cnodeid_t node) +{ + return node < MAX_NUMNODES && node_online(node) && NODE_DATA(node)->node_present_pages; +} + +static struct sn_hwperf_object_info * +sn_hwperf_findobj_id(struct sn_hwperf_object_info *objbuf, + int nobj, int id) +{ + int i; + struct sn_hwperf_object_info *p = objbuf; + + for (i=0; i < nobj; i++, p++) { + if (p->id == id) + return p; + } + + return NULL; + +} + +static int sn_hwperf_get_nearest_node_objdata(struct sn_hwperf_object_info *objbuf, + int nobj, cnodeid_t node, cnodeid_t *near_mem_node, cnodeid_t *near_cpu_node) +{ + int e; + struct sn_hwperf_object_info *nodeobj = NULL; + struct sn_hwperf_object_info *op; + struct sn_hwperf_object_info *dest; + struct sn_hwperf_object_info *router; + struct sn_hwperf_port_info ptdata[16]; + int sz, i, j; + cnodeid_t c; + int found_mem = 0; + int found_cpu = 0; + + if (!cnode_possible(node)) + return -EINVAL; + + if (sn_hwperf_has_cpus(node)) { + if (near_cpu_node) + *near_cpu_node = node; + found_cpu++; + } + + if (sn_hwperf_has_mem(node)) { + if (near_mem_node) + *near_mem_node = node; + found_mem++; + } + + if (found_cpu && found_mem) + return 0; /* trivially successful */ + + /* find the argument node object */ + for (i=0, op=objbuf; i < nobj; i++, op++) { + if (!SN_HWPERF_IS_NODE(op) && !SN_HWPERF_IS_IONODE(op)) + continue; + if (node == sn_hwperf_obj_to_cnode(op)) { + nodeobj = op; + break; + } + } + if (!nodeobj) { + e = -ENOENT; + goto err; + } + + /* get it's interconnect topology */ + sz = op->ports * sizeof(struct sn_hwperf_port_info); + BUG_ON(sz > sizeof(ptdata)); + e = ia64_sn_hwperf_op(sn_hwperf_master_nasid, + SN_HWPERF_ENUM_PORTS, nodeobj->id, sz, + (u64)&ptdata, 0, 0, NULL); + if (e != SN_HWPERF_OP_OK) { + e = -EINVAL; + goto err; + } + + /* find nearest node with cpus and nearest memory */ + for (router=NULL, j=0; j < op->ports; j++) { + dest = sn_hwperf_findobj_id(objbuf, nobj, ptdata[j].conn_id); + if (dest && SN_HWPERF_IS_ROUTER(dest)) + router = dest; + if (!dest || SN_HWPERF_FOREIGN(dest) || + !SN_HWPERF_IS_NODE(dest) || SN_HWPERF_IS_IONODE(dest)) { + continue; + } + c = sn_hwperf_obj_to_cnode(dest); + if (!found_cpu && sn_hwperf_has_cpus(c)) { + if (near_cpu_node) + *near_cpu_node = c; + found_cpu++; + } + if (!found_mem && sn_hwperf_has_mem(c)) { + if (near_mem_node) + *near_mem_node = c; + found_mem++; + } + } + + if (router && (!found_cpu || !found_mem)) { + /* search for a node connected to the same router */ + sz = router->ports * sizeof(struct sn_hwperf_port_info); + BUG_ON(sz > sizeof(ptdata)); + e = ia64_sn_hwperf_op(sn_hwperf_master_nasid, + SN_HWPERF_ENUM_PORTS, router->id, sz, + (u64)&ptdata, 0, 0, NULL); + if (e != SN_HWPERF_OP_OK) { + e = -EINVAL; + goto err; + } + for (j=0; j < router->ports; j++) { + dest = sn_hwperf_findobj_id(objbuf, nobj, + ptdata[j].conn_id); + if (!dest || dest->id == node || + SN_HWPERF_FOREIGN(dest) || + !SN_HWPERF_IS_NODE(dest) || + SN_HWPERF_IS_IONODE(dest)) { + continue; + } + c = sn_hwperf_obj_to_cnode(dest); + if (!found_cpu && sn_hwperf_has_cpus(c)) { + if (near_cpu_node) + *near_cpu_node = c; + found_cpu++; + } + if (!found_mem && sn_hwperf_has_mem(c)) { + if (near_mem_node) + *near_mem_node = c; + found_mem++; + } + if (found_cpu && found_mem) + break; + } + } + + if (!found_cpu || !found_mem) { + /* resort to _any_ node with CPUs and memory */ + for (i=0, op=objbuf; i < nobj; i++, op++) { + if (SN_HWPERF_FOREIGN(op) || + SN_HWPERF_IS_IONODE(op) || + !SN_HWPERF_IS_NODE(op)) { + continue; + } + c = sn_hwperf_obj_to_cnode(op); + if (!found_cpu && sn_hwperf_has_cpus(c)) { + if (near_cpu_node) + *near_cpu_node = c; + found_cpu++; + } + if (!found_mem && sn_hwperf_has_mem(c)) { + if (near_mem_node) + *near_mem_node = c; + found_mem++; + } + if (found_cpu && found_mem) + break; + } + } + + if (!found_cpu || !found_mem) + e = -ENODATA; + +err: + return e; +} + + +static int sn_topology_show(struct seq_file *s, void *d) +{ + int sz; + int pt; + int e = 0; + int i; + int j; + const char *slabname; + int ordinal; + char slice; + struct cpuinfo_ia64 *c; + struct sn_hwperf_port_info *ptdata; + struct sn_hwperf_object_info *p; + struct sn_hwperf_object_info *obj = d; /* this object */ + struct sn_hwperf_object_info *objs = s->private; /* all objects */ + u8 shubtype; + u8 system_size; + u8 sharing_size; + u8 partid; + u8 coher; + u8 nasid_shift; + u8 region_size; + u16 nasid_mask; + int nasid_msb; + + if (obj == objs) { + seq_printf(s, "# sn_topology version 2\n"); + seq_printf(s, "# objtype ordinal location partition" + " [attribute value [, ...]]\n"); + + if (ia64_sn_get_sn_info(0, + &shubtype, &nasid_mask, &nasid_shift, &system_size, + &sharing_size, &partid, &coher, ®ion_size)) + BUG(); + for (nasid_msb=63; nasid_msb > 0; nasid_msb--) { + if (((u64)nasid_mask << nasid_shift) & (1ULL << nasid_msb)) + break; + } + seq_printf(s, "partition %u %s local " + "shubtype %s, " + "nasid_mask 0x%016llx, " + "nasid_bits %d:%d, " + "system_size %d, " + "sharing_size %d, " + "coherency_domain %d, " + "region_size %d\n", + + partid, utsname()->nodename, + shubtype ? "shub2" : "shub1", + (u64)nasid_mask << nasid_shift, nasid_msb, nasid_shift, + system_size, sharing_size, coher, region_size); + + print_pci_topology(s); + } + + if (SN_HWPERF_FOREIGN(obj)) { + /* private in another partition: not interesting */ + return 0; + } + + for (i = 0; i < SN_HWPERF_MAXSTRING && obj->name[i]; i++) { + if (obj->name[i] == ' ') + obj->name[i] = '_'; + } + + slabname = sn_hwperf_get_slabname(obj, objs, &ordinal); + seq_printf(s, "%s %d %s %s asic %s", slabname, ordinal, obj->location, + obj->sn_hwp_this_part ? "local" : "shared", obj->name); + + if (ordinal < 0 || (!SN_HWPERF_IS_NODE(obj) && !SN_HWPERF_IS_IONODE(obj))) + seq_putc(s, '\n'); + else { + cnodeid_t near_mem = -1; + cnodeid_t near_cpu = -1; + + seq_printf(s, ", nasid 0x%x", cnodeid_to_nasid(ordinal)); + + if (sn_hwperf_get_nearest_node_objdata(objs, sn_hwperf_obj_cnt, + ordinal, &near_mem, &near_cpu) == 0) { + seq_printf(s, ", near_mem_nodeid %d, near_cpu_nodeid %d", + near_mem, near_cpu); + } + + if (!SN_HWPERF_IS_IONODE(obj)) { + for_each_online_node(i) { + seq_printf(s, i ? ":%d" : ", dist %d", + node_distance(ordinal, i)); + } + } + + seq_putc(s, '\n'); + + /* + * CPUs on this node, if any + */ + if (!SN_HWPERF_IS_IONODE(obj)) { + for_each_cpu_and(i, cpu_online_mask, + cpumask_of_node(ordinal)) { + slice = 'a' + cpuid_to_slice(i); + c = cpu_data(i); + seq_printf(s, "cpu %d %s%c local" + " freq %luMHz, arch ia64", + i, obj->location, slice, + c->proc_freq / 1000000); + for_each_online_cpu(j) { + seq_printf(s, j ? ":%d" : ", dist %d", + node_distance( + cpu_to_node(i), + cpu_to_node(j))); + } + seq_putc(s, '\n'); + } + } + } + + if (obj->ports) { + /* + * numalink ports + */ + sz = obj->ports * sizeof(struct sn_hwperf_port_info); + if ((ptdata = kmalloc(sz, GFP_KERNEL)) == NULL) + return -ENOMEM; + e = ia64_sn_hwperf_op(sn_hwperf_master_nasid, + SN_HWPERF_ENUM_PORTS, obj->id, sz, + (u64) ptdata, 0, 0, NULL); + if (e != SN_HWPERF_OP_OK) + return -EINVAL; + for (ordinal=0, p=objs; p != obj; p++) { + if (!SN_HWPERF_FOREIGN(p)) + ordinal += p->ports; + } + for (pt = 0; pt < obj->ports; pt++) { + for (p = objs, i = 0; i < sn_hwperf_obj_cnt; i++, p++) { + if (ptdata[pt].conn_id == p->id) { + break; + } + } + seq_printf(s, "numalink %d %s-%d", + ordinal+pt, obj->location, ptdata[pt].port); + + if (i >= sn_hwperf_obj_cnt) { + /* no connection */ + seq_puts(s, " local endpoint disconnected" + ", protocol unknown\n"); + continue; + } + + if (obj->sn_hwp_this_part && p->sn_hwp_this_part) + /* both ends local to this partition */ + seq_puts(s, " local"); + else if (SN_HWPERF_FOREIGN(p)) + /* both ends of the link in foreign partiton */ + seq_puts(s, " foreign"); + else + /* link straddles a partition */ + seq_puts(s, " shared"); + + /* + * Unlikely, but strictly should query the LLP config + * registers because an NL4R can be configured to run + * NL3 protocol, even when not talking to an NL3 router. + * Ditto for node-node. + */ + seq_printf(s, " endpoint %s-%d, protocol %s\n", + p->location, ptdata[pt].conn_port, + (SN_HWPERF_IS_NL3ROUTER(obj) || + SN_HWPERF_IS_NL3ROUTER(p)) ? "LLP3" : "LLP4"); + } + kfree(ptdata); + } + + return 0; +} + +static void *sn_topology_start(struct seq_file *s, loff_t * pos) +{ + struct sn_hwperf_object_info *objs = s->private; + + if (*pos < sn_hwperf_obj_cnt) + return (void *)(objs + *pos); + + return NULL; +} + +static void *sn_topology_next(struct seq_file *s, void *v, loff_t * pos) +{ + ++*pos; + return sn_topology_start(s, pos); +} + +static void sn_topology_stop(struct seq_file *m, void *v) +{ + return; +} + +/* + * /proc/sgi_sn/sn_topology, read-only using seq_file + */ +static const struct seq_operations sn_topology_seq_ops = { + .start = sn_topology_start, + .next = sn_topology_next, + .stop = sn_topology_stop, + .show = sn_topology_show +}; + +struct sn_hwperf_op_info { + u64 op; + struct sn_hwperf_ioctl_args *a; + void *p; + int *v0; + int ret; +}; + +static void sn_hwperf_call_sal(void *info) +{ + struct sn_hwperf_op_info *op_info = info; + int r; + + r = ia64_sn_hwperf_op(sn_hwperf_master_nasid, op_info->op, + op_info->a->arg, op_info->a->sz, + (u64) op_info->p, 0, 0, op_info->v0); + op_info->ret = r; +} + +static int sn_hwperf_op_cpu(struct sn_hwperf_op_info *op_info) +{ + u32 cpu; + u32 use_ipi; + int r = 0; + cpumask_t save_allowed; + + cpu = (op_info->a->arg & SN_HWPERF_ARG_CPU_MASK) >> 32; + use_ipi = op_info->a->arg & SN_HWPERF_ARG_USE_IPI_MASK; + op_info->a->arg &= SN_HWPERF_ARG_OBJID_MASK; + + if (cpu != SN_HWPERF_ARG_ANY_CPU) { + if (cpu >= nr_cpu_ids || !cpu_online(cpu)) { + r = -EINVAL; + goto out; + } + } + + if (cpu == SN_HWPERF_ARG_ANY_CPU) { + /* don't care which cpu */ + sn_hwperf_call_sal(op_info); + } else if (cpu == get_cpu()) { + /* already on correct cpu */ + sn_hwperf_call_sal(op_info); + put_cpu(); + } else { + put_cpu(); + if (use_ipi) { + /* use an interprocessor interrupt to call SAL */ + smp_call_function_single(cpu, sn_hwperf_call_sal, + op_info, 1); + } + else { + /* migrate the task before calling SAL */ + save_allowed = current->cpus_allowed; + set_cpus_allowed_ptr(current, cpumask_of(cpu)); + sn_hwperf_call_sal(op_info); + set_cpus_allowed_ptr(current, &save_allowed); + } + } + r = op_info->ret; + +out: + return r; +} + +/* map SAL hwperf error code to system error code */ +static int sn_hwperf_map_err(int hwperf_err) +{ + int e; + + switch(hwperf_err) { + case SN_HWPERF_OP_OK: + e = 0; + break; + + case SN_HWPERF_OP_NOMEM: + e = -ENOMEM; + break; + + case SN_HWPERF_OP_NO_PERM: + e = -EPERM; + break; + + case SN_HWPERF_OP_IO_ERROR: + e = -EIO; + break; + + case SN_HWPERF_OP_BUSY: + e = -EBUSY; + break; + + case SN_HWPERF_OP_RECONFIGURE: + e = -EAGAIN; + break; + + case SN_HWPERF_OP_INVAL: + default: + e = -EINVAL; + break; + } + + return e; +} + +/* + * ioctl for "sn_hwperf" misc device + */ +static long sn_hwperf_ioctl(struct file *fp, u32 op, unsigned long arg) +{ + struct sn_hwperf_ioctl_args a; + struct cpuinfo_ia64 *cdata; + struct sn_hwperf_object_info *objs; + struct sn_hwperf_object_info *cpuobj; + struct sn_hwperf_op_info op_info; + void *p = NULL; + int nobj; + char slice; + int node; + int r; + int v0; + int i; + int j; + + /* only user requests are allowed here */ + if ((op & SN_HWPERF_OP_MASK) < 10) { + r = -EINVAL; + goto error; + } + r = copy_from_user(&a, (const void __user *)arg, + sizeof(struct sn_hwperf_ioctl_args)); + if (r != 0) { + r = -EFAULT; + goto error; + } + + /* + * Allocate memory to hold a kernel copy of the user buffer. The + * buffer contents are either copied in or out (or both) of user + * space depending on the flags encoded in the requested operation. + */ + if (a.ptr) { + p = vmalloc(a.sz); + if (!p) { + r = -ENOMEM; + goto error; + } + } + + if (op & SN_HWPERF_OP_MEM_COPYIN) { + r = copy_from_user(p, (const void __user *)a.ptr, a.sz); + if (r != 0) { + r = -EFAULT; + goto error; + } + } + + switch (op) { + case SN_HWPERF_GET_CPU_INFO: + if (a.sz == sizeof(u64)) { + /* special case to get size needed */ + *(u64 *) p = (u64) num_online_cpus() * + sizeof(struct sn_hwperf_object_info); + } else + if (a.sz < num_online_cpus() * sizeof(struct sn_hwperf_object_info)) { + r = -ENOMEM; + goto error; + } else + if ((r = sn_hwperf_enum_objects(&nobj, &objs)) == 0) { + int cpuobj_index = 0; + + memset(p, 0, a.sz); + for (i = 0; i < nobj; i++) { + if (!SN_HWPERF_IS_NODE(objs + i)) + continue; + node = sn_hwperf_obj_to_cnode(objs + i); + for_each_online_cpu(j) { + if (node != cpu_to_node(j)) + continue; + cpuobj = (struct sn_hwperf_object_info *) p + cpuobj_index++; + slice = 'a' + cpuid_to_slice(j); + cdata = cpu_data(j); + cpuobj->id = j; + snprintf(cpuobj->name, + sizeof(cpuobj->name), + "CPU %luMHz %s", + cdata->proc_freq / 1000000, + cdata->vendor); + snprintf(cpuobj->location, + sizeof(cpuobj->location), + "%s%c", objs[i].location, + slice); + } + } + + vfree(objs); + } + break; + + case SN_HWPERF_GET_NODE_NASID: + if (a.sz != sizeof(u64) || + (node = a.arg) < 0 || !cnode_possible(node)) { + r = -EINVAL; + goto error; + } + *(u64 *)p = (u64)cnodeid_to_nasid(node); + break; + + case SN_HWPERF_GET_OBJ_NODE: + i = a.arg; + if (a.sz != sizeof(u64) || i < 0) { + r = -EINVAL; + goto error; + } + if ((r = sn_hwperf_enum_objects(&nobj, &objs)) == 0) { + if (i >= nobj) { + r = -EINVAL; + vfree(objs); + goto error; + } + if (objs[i].id != a.arg) { + for (i = 0; i < nobj; i++) { + if (objs[i].id == a.arg) + break; + } + } + if (i == nobj) { + r = -EINVAL; + vfree(objs); + goto error; + } + + if (!SN_HWPERF_IS_NODE(objs + i) && + !SN_HWPERF_IS_IONODE(objs + i)) { + r = -ENOENT; + vfree(objs); + goto error; + } + + *(u64 *)p = (u64)sn_hwperf_obj_to_cnode(objs + i); + vfree(objs); + } + break; + + case SN_HWPERF_GET_MMRS: + case SN_HWPERF_SET_MMRS: + case SN_HWPERF_OBJECT_DISTANCE: + op_info.p = p; + op_info.a = &a; + op_info.v0 = &v0; + op_info.op = op; + r = sn_hwperf_op_cpu(&op_info); + if (r) { + r = sn_hwperf_map_err(r); + a.v0 = v0; + goto error; + } + break; + + default: + /* all other ops are a direct SAL call */ + r = ia64_sn_hwperf_op(sn_hwperf_master_nasid, op, + a.arg, a.sz, (u64) p, 0, 0, &v0); + if (r) { + r = sn_hwperf_map_err(r); + goto error; + } + a.v0 = v0; + break; + } + + if (op & SN_HWPERF_OP_MEM_COPYOUT) { + r = copy_to_user((void __user *)a.ptr, p, a.sz); + if (r != 0) { + r = -EFAULT; + goto error; + } + } + +error: + vfree(p); + + return r; +} + +static const struct file_operations sn_hwperf_fops = { + .unlocked_ioctl = sn_hwperf_ioctl, + .llseek = noop_llseek, +}; + +static struct miscdevice sn_hwperf_dev = { + MISC_DYNAMIC_MINOR, + "sn_hwperf", + &sn_hwperf_fops +}; + +static int sn_hwperf_init(void) +{ + u64 v; + int salr; + int e = 0; + + /* single threaded, once-only initialization */ + mutex_lock(&sn_hwperf_init_mutex); + + if (sn_hwperf_salheap) { + mutex_unlock(&sn_hwperf_init_mutex); + return e; + } + + /* + * The PROM code needs a fixed reference node. For convenience the + * same node as the console I/O is used. + */ + sn_hwperf_master_nasid = (nasid_t) ia64_sn_get_console_nasid(); + + /* + * Request the needed size and install the PROM scratch area. + * The PROM keeps various tracking bits in this memory area. + */ + salr = ia64_sn_hwperf_op(sn_hwperf_master_nasid, + (u64) SN_HWPERF_GET_HEAPSIZE, 0, + (u64) sizeof(u64), (u64) &v, 0, 0, NULL); + if (salr != SN_HWPERF_OP_OK) { + e = -EINVAL; + goto out; + } + + if ((sn_hwperf_salheap = vmalloc(v)) == NULL) { + e = -ENOMEM; + goto out; + } + salr = ia64_sn_hwperf_op(sn_hwperf_master_nasid, + SN_HWPERF_INSTALL_HEAP, 0, v, + (u64) sn_hwperf_salheap, 0, 0, NULL); + if (salr != SN_HWPERF_OP_OK) { + e = -EINVAL; + goto out; + } + + salr = ia64_sn_hwperf_op(sn_hwperf_master_nasid, + SN_HWPERF_OBJECT_COUNT, 0, + sizeof(u64), (u64) &v, 0, 0, NULL); + if (salr != SN_HWPERF_OP_OK) { + e = -EINVAL; + goto out; + } + sn_hwperf_obj_cnt = (int)v; + +out: + if (e < 0 && sn_hwperf_salheap) { + vfree(sn_hwperf_salheap); + sn_hwperf_salheap = NULL; + sn_hwperf_obj_cnt = 0; + } + mutex_unlock(&sn_hwperf_init_mutex); + return e; +} + +int sn_topology_open(struct inode *inode, struct file *file) +{ + int e; + struct seq_file *seq; + struct sn_hwperf_object_info *objbuf; + int nobj; + + if ((e = sn_hwperf_enum_objects(&nobj, &objbuf)) == 0) { + e = seq_open(file, &sn_topology_seq_ops); + seq = file->private_data; + seq->private = objbuf; + } + + return e; +} + +int sn_topology_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + + vfree(seq->private); + return seq_release(inode, file); +} + +int sn_hwperf_get_nearest_node(cnodeid_t node, + cnodeid_t *near_mem_node, cnodeid_t *near_cpu_node) +{ + int e; + int nobj; + struct sn_hwperf_object_info *objbuf; + + if ((e = sn_hwperf_enum_objects(&nobj, &objbuf)) == 0) { + e = sn_hwperf_get_nearest_node_objdata(objbuf, nobj, + node, near_mem_node, near_cpu_node); + vfree(objbuf); + } + + return e; +} + +static int sn_hwperf_misc_register_init(void) +{ + int e; + + if (!ia64_platform_is("sn2")) + return 0; + + sn_hwperf_init(); + + /* + * Register a dynamic misc device for hwperf ioctls. Platforms + * supporting hotplug will create /dev/sn_hwperf, else user + * can to look up the minor number in /proc/misc. + */ + if ((e = misc_register(&sn_hwperf_dev)) != 0) { + printk(KERN_ERR "sn_hwperf_misc_register_init: failed to " + "register misc device for \"%s\"\n", sn_hwperf_dev.name); + } + + return e; +} + +device_initcall(sn_hwperf_misc_register_init); /* after misc_init() */ +EXPORT_SYMBOL(sn_hwperf_get_nearest_node); diff --git a/kernel/arch/ia64/sn/kernel/sn2/sn_proc_fs.c b/kernel/arch/ia64/sn/kernel/sn2/sn_proc_fs.c new file mode 100644 index 000000000..7aab87f48 --- /dev/null +++ b/kernel/arch/ia64/sn/kernel/sn2/sn_proc_fs.c @@ -0,0 +1,117 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2000-2005 Silicon Graphics, Inc. All rights reserved. + */ + +#ifdef CONFIG_PROC_FS +#include +#include +#include +#include + +static int partition_id_show(struct seq_file *s, void *p) +{ + seq_printf(s, "%d\n", sn_partition_id); + return 0; +} + +static int partition_id_open(struct inode *inode, struct file *file) +{ + return single_open(file, partition_id_show, NULL); +} + +static int system_serial_number_show(struct seq_file *s, void *p) +{ + seq_printf(s, "%s\n", sn_system_serial_number()); + return 0; +} + +static int system_serial_number_open(struct inode *inode, struct file *file) +{ + return single_open(file, system_serial_number_show, NULL); +} + +static int licenseID_show(struct seq_file *s, void *p) +{ + seq_printf(s, "0x%llx\n", sn_partition_serial_number_val()); + return 0; +} + +static int licenseID_open(struct inode *inode, struct file *file) +{ + return single_open(file, licenseID_show, NULL); +} + +static int coherence_id_show(struct seq_file *s, void *p) +{ + seq_printf(s, "%d\n", partition_coherence_id()); + + return 0; +} + +static int coherence_id_open(struct inode *inode, struct file *file) +{ + return single_open(file, coherence_id_show, NULL); +} + +/* /proc/sgi_sn/sn_topology uses seq_file, see sn_hwperf.c */ +extern int sn_topology_open(struct inode *, struct file *); +extern int sn_topology_release(struct inode *, struct file *); + +static const struct file_operations proc_partition_id_fops = { + .open = partition_id_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static const struct file_operations proc_system_sn_fops = { + .open = system_serial_number_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static const struct file_operations proc_license_id_fops = { + .open = licenseID_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static const struct file_operations proc_coherence_id_fops = { + .open = coherence_id_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static const struct file_operations proc_sn_topo_fops = { + .open = sn_topology_open, + .read = seq_read, + .llseek = seq_lseek, + .release = sn_topology_release, +}; + +void register_sn_procfs(void) +{ + static struct proc_dir_entry *sgi_proc_dir = NULL; + + BUG_ON(sgi_proc_dir != NULL); + if (!(sgi_proc_dir = proc_mkdir("sgi_sn", NULL))) + return; + + proc_create("partition_id", 0444, sgi_proc_dir, + &proc_partition_id_fops); + proc_create("system_serial_number", 0444, sgi_proc_dir, + &proc_system_sn_fops); + proc_create("licenseID", 0444, sgi_proc_dir, &proc_license_id_fops); + proc_create("coherence_id", 0444, sgi_proc_dir, + &proc_coherence_id_fops); + proc_create("sn_topology", 0444, sgi_proc_dir, &proc_sn_topo_fops); +} + +#endif /* CONFIG_PROC_FS */ diff --git a/kernel/arch/ia64/sn/kernel/sn2/timer.c b/kernel/arch/ia64/sn/kernel/sn2/timer.c new file mode 100644 index 000000000..abab8f99e --- /dev/null +++ b/kernel/arch/ia64/sn/kernel/sn2/timer.c @@ -0,0 +1,60 @@ +/* + * linux/arch/ia64/sn/kernel/sn2/timer.c + * + * Copyright (C) 2003 Silicon Graphics, Inc. + * Copyright (C) 2003 Hewlett-Packard Co + * David Mosberger : updated for new timer-interpolation infrastructure + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +extern unsigned long sn_rtc_cycles_per_second; + +static cycle_t read_sn2(struct clocksource *cs) +{ + return (cycle_t)readq(RTC_COUNTER_ADDR); +} + +static struct clocksource clocksource_sn2 = { + .name = "sn2_rtc", + .rating = 450, + .read = read_sn2, + .mask = (1LL << 55) - 1, + .flags = CLOCK_SOURCE_IS_CONTINUOUS, +}; + +/* + * sn udelay uses the RTC instead of the ITC because the ITC is not + * synchronized across all CPUs, and the thread may migrate to another CPU + * if preemption is enabled. + */ +static void +ia64_sn_udelay (unsigned long usecs) +{ + unsigned long start = rtc_time(); + unsigned long end = start + + usecs * sn_rtc_cycles_per_second / 1000000; + + while (time_before((unsigned long)rtc_time(), end)) + cpu_relax(); +} + +void __init sn_timer_init(void) +{ + clocksource_sn2.archdata.fsys_mmio = RTC_COUNTER_ADDR; + clocksource_register_hz(&clocksource_sn2, sn_rtc_cycles_per_second); + + ia64_udelay = &ia64_sn_udelay; +} diff --git a/kernel/arch/ia64/sn/kernel/sn2/timer_interrupt.c b/kernel/arch/ia64/sn/kernel/sn2/timer_interrupt.c new file mode 100644 index 000000000..103d6ea8e --- /dev/null +++ b/kernel/arch/ia64/sn/kernel/sn2/timer_interrupt.c @@ -0,0 +1,60 @@ +/* + * + * + * Copyright (c) 2005, 2006 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/NoticeExplan + */ + +#include +#include +#include + +extern void sn_lb_int_war_check(void); +extern irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs); + +#define SN_LB_INT_WAR_INTERVAL 100 + +void sn_timer_interrupt(int irq, void *dev_id) +{ + /* LED blinking */ + if (!pda->hb_count--) { + pda->hb_count = HZ / 2; + set_led_bits(pda->hb_state ^= + LED_CPU_HEARTBEAT, LED_CPU_HEARTBEAT); + } + + if (is_shub1()) { + if (enable_shub_wars_1_1()) { + /* Bugfix code for SHUB 1.1 */ + if (pda->pio_shub_war_cam_addr) + *pda->pio_shub_war_cam_addr = 0x8000000000000010UL; + } + if (pda->sn_lb_int_war_ticks == 0) + sn_lb_int_war_check(); + pda->sn_lb_int_war_ticks++; + if (pda->sn_lb_int_war_ticks >= SN_LB_INT_WAR_INTERVAL) + pda->sn_lb_int_war_ticks = 0; + } +} diff --git a/kernel/arch/ia64/sn/kernel/tiocx.c b/kernel/arch/ia64/sn/kernel/tiocx.c new file mode 100644 index 000000000..e35f6485c --- /dev/null +++ b/kernel/arch/ia64/sn/kernel/tiocx.c @@ -0,0 +1,569 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2005 Silicon Graphics, Inc. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "tio.h" +#include "xtalk/xwidgetdev.h" +#include "xtalk/hubdev.h" + +#define CX_DEV_NONE 0 +#define DEVICE_NAME "tiocx" +#define WIDGET_ID 0 +#define TIOCX_DEBUG 0 + +#if TIOCX_DEBUG +#define DBG(fmt...) printk(KERN_ALERT fmt) +#else +#define DBG(fmt...) +#endif + +struct device_attribute dev_attr_cxdev_control; + +/** + * tiocx_match - Try to match driver id list with device. + * @dev: device pointer + * @drv: driver pointer + * + * Returns 1 if match, 0 otherwise. + */ +static int tiocx_match(struct device *dev, struct device_driver *drv) +{ + struct cx_dev *cx_dev = to_cx_dev(dev); + struct cx_drv *cx_drv = to_cx_driver(drv); + const struct cx_device_id *ids = cx_drv->id_table; + + if (!ids) + return 0; + + while (ids->part_num) { + if (ids->part_num == cx_dev->cx_id.part_num) + return 1; + ids++; + } + return 0; + +} + +static int tiocx_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + return -ENODEV; +} + +static void tiocx_bus_release(struct device *dev) +{ + kfree(to_cx_dev(dev)); +} + +/** + * cx_device_match - Find cx_device in the id table. + * @ids: id table from driver + * @cx_device: part/mfg id for the device + * + */ +static const struct cx_device_id *cx_device_match(const struct cx_device_id + *ids, + struct cx_dev *cx_device) +{ + /* + * NOTES: We may want to check for CX_ANY_ID too. + * Do we want to match against nasid too? + * CX_DEV_NONE == 0, if the driver tries to register for + * part/mfg == 0 we should return no-match (NULL) here. + */ + while (ids->part_num && ids->mfg_num) { + if (ids->part_num == cx_device->cx_id.part_num && + ids->mfg_num == cx_device->cx_id.mfg_num) + return ids; + ids++; + } + + return NULL; +} + +/** + * cx_device_probe - Look for matching device. + * Call driver probe routine if found. + * @cx_driver: driver table (cx_drv struct) from driver + * @cx_device: part/mfg id for the device + */ +static int cx_device_probe(struct device *dev) +{ + const struct cx_device_id *id; + struct cx_drv *cx_drv = to_cx_driver(dev->driver); + struct cx_dev *cx_dev = to_cx_dev(dev); + int error = 0; + + if (!cx_dev->driver && cx_drv->probe) { + id = cx_device_match(cx_drv->id_table, cx_dev); + if (id) { + if ((error = cx_drv->probe(cx_dev, id)) < 0) + return error; + else + cx_dev->driver = cx_drv; + } + } + + return error; +} + +/** + * cx_driver_remove - Remove driver from device struct. + * @dev: device + */ +static int cx_driver_remove(struct device *dev) +{ + struct cx_dev *cx_dev = to_cx_dev(dev); + struct cx_drv *cx_drv = cx_dev->driver; + if (cx_drv->remove) + cx_drv->remove(cx_dev); + cx_dev->driver = NULL; + return 0; +} + +struct bus_type tiocx_bus_type = { + .name = "tiocx", + .match = tiocx_match, + .uevent = tiocx_uevent, + .probe = cx_device_probe, + .remove = cx_driver_remove, +}; + +/** + * cx_driver_register - Register the driver. + * @cx_driver: driver table (cx_drv struct) from driver + * + * Called from the driver init routine to register a driver. + * The cx_drv struct contains the driver name, a pointer to + * a table of part/mfg numbers and a pointer to the driver's + * probe/attach routine. + */ +int cx_driver_register(struct cx_drv *cx_driver) +{ + cx_driver->driver.name = cx_driver->name; + cx_driver->driver.bus = &tiocx_bus_type; + + return driver_register(&cx_driver->driver); +} + +/** + * cx_driver_unregister - Unregister the driver. + * @cx_driver: driver table (cx_drv struct) from driver + */ +int cx_driver_unregister(struct cx_drv *cx_driver) +{ + driver_unregister(&cx_driver->driver); + return 0; +} + +/** + * cx_device_register - Register a device. + * @nasid: device's nasid + * @part_num: device's part number + * @mfg_num: device's manufacturer number + * @hubdev: hub info associated with this device + * @bt: board type of the device + * + */ +int +cx_device_register(nasid_t nasid, int part_num, int mfg_num, + struct hubdev_info *hubdev, int bt) +{ + struct cx_dev *cx_dev; + int r; + + cx_dev = kzalloc(sizeof(struct cx_dev), GFP_KERNEL); + DBG("cx_dev= 0x%p\n", cx_dev); + if (cx_dev == NULL) + return -ENOMEM; + + cx_dev->cx_id.part_num = part_num; + cx_dev->cx_id.mfg_num = mfg_num; + cx_dev->cx_id.nasid = nasid; + cx_dev->hubdev = hubdev; + cx_dev->bt = bt; + + cx_dev->dev.parent = NULL; + cx_dev->dev.bus = &tiocx_bus_type; + cx_dev->dev.release = tiocx_bus_release; + dev_set_name(&cx_dev->dev, "%d", cx_dev->cx_id.nasid); + r = device_register(&cx_dev->dev); + if (r) { + kfree(cx_dev); + return r; + } + get_device(&cx_dev->dev); + + device_create_file(&cx_dev->dev, &dev_attr_cxdev_control); + + return 0; +} + +/** + * cx_device_unregister - Unregister a device. + * @cx_dev: part/mfg id for the device + */ +int cx_device_unregister(struct cx_dev *cx_dev) +{ + put_device(&cx_dev->dev); + device_unregister(&cx_dev->dev); + return 0; +} + +/** + * cx_device_reload - Reload the device. + * @nasid: device's nasid + * @part_num: device's part number + * @mfg_num: device's manufacturer number + * + * Remove the device associated with 'nasid' from device list and then + * call device-register with the given part/mfg numbers. + */ +static int cx_device_reload(struct cx_dev *cx_dev) +{ + cx_device_unregister(cx_dev); + return cx_device_register(cx_dev->cx_id.nasid, cx_dev->cx_id.part_num, + cx_dev->cx_id.mfg_num, cx_dev->hubdev, + cx_dev->bt); +} + +static inline u64 tiocx_intr_alloc(nasid_t nasid, int widget, + u64 sn_irq_info, + int req_irq, nasid_t req_nasid, + int req_slice) +{ + struct ia64_sal_retval rv; + rv.status = 0; + rv.v0 = 0; + + ia64_sal_oemcall_nolock(&rv, SN_SAL_IOIF_INTERRUPT, + SAL_INTR_ALLOC, nasid, + widget, sn_irq_info, req_irq, + req_nasid, req_slice); + return rv.status; +} + +static inline void tiocx_intr_free(nasid_t nasid, int widget, + struct sn_irq_info *sn_irq_info) +{ + struct ia64_sal_retval rv; + rv.status = 0; + rv.v0 = 0; + + ia64_sal_oemcall_nolock(&rv, SN_SAL_IOIF_INTERRUPT, + SAL_INTR_FREE, nasid, + widget, sn_irq_info->irq_irq, + sn_irq_info->irq_cookie, 0, 0); +} + +struct sn_irq_info *tiocx_irq_alloc(nasid_t nasid, int widget, int irq, + nasid_t req_nasid, int slice) +{ + struct sn_irq_info *sn_irq_info; + int status; + int sn_irq_size = sizeof(struct sn_irq_info); + + if ((nasid & 1) == 0) + return NULL; + + sn_irq_info = kzalloc(sn_irq_size, GFP_KERNEL); + if (sn_irq_info == NULL) + return NULL; + + status = tiocx_intr_alloc(nasid, widget, __pa(sn_irq_info), irq, + req_nasid, slice); + if (status) { + kfree(sn_irq_info); + return NULL; + } else { + return sn_irq_info; + } +} + +void tiocx_irq_free(struct sn_irq_info *sn_irq_info) +{ + u64 bridge = (u64) sn_irq_info->irq_bridge; + nasid_t nasid = NASID_GET(bridge); + int widget; + + if (nasid & 1) { + widget = TIO_SWIN_WIDGETNUM(bridge); + tiocx_intr_free(nasid, widget, sn_irq_info); + kfree(sn_irq_info); + } +} + +u64 tiocx_dma_addr(u64 addr) +{ + return PHYS_TO_TIODMA(addr); +} + +u64 tiocx_swin_base(int nasid) +{ + return TIO_SWIN_BASE(nasid, TIOCX_CORELET); +} + +EXPORT_SYMBOL(cx_driver_register); +EXPORT_SYMBOL(cx_driver_unregister); +EXPORT_SYMBOL(cx_device_register); +EXPORT_SYMBOL(cx_device_unregister); +EXPORT_SYMBOL(tiocx_irq_alloc); +EXPORT_SYMBOL(tiocx_irq_free); +EXPORT_SYMBOL(tiocx_bus_type); +EXPORT_SYMBOL(tiocx_dma_addr); +EXPORT_SYMBOL(tiocx_swin_base); + +static void tio_conveyor_set(nasid_t nasid, int enable_flag) +{ + u64 ice_frz; + u64 disable_cb = (1ull << 61); + + if (!(nasid & 1)) + return; + + ice_frz = REMOTE_HUB_L(nasid, TIO_ICE_FRZ_CFG); + if (enable_flag) { + if (!(ice_frz & disable_cb)) /* already enabled */ + return; + ice_frz &= ~disable_cb; + } else { + if (ice_frz & disable_cb) /* already disabled */ + return; + ice_frz |= disable_cb; + } + DBG(KERN_ALERT "TIO_ICE_FRZ_CFG= 0x%lx\n", ice_frz); + REMOTE_HUB_S(nasid, TIO_ICE_FRZ_CFG, ice_frz); +} + +#define tio_conveyor_enable(nasid) tio_conveyor_set(nasid, 1) +#define tio_conveyor_disable(nasid) tio_conveyor_set(nasid, 0) + +static void tio_corelet_reset(nasid_t nasid, int corelet) +{ + if (!(nasid & 1)) + return; + + REMOTE_HUB_S(nasid, TIO_ICE_PMI_TX_CFG, 1 << corelet); + udelay(2000); + REMOTE_HUB_S(nasid, TIO_ICE_PMI_TX_CFG, 0); + udelay(2000); +} + +static int is_fpga_tio(int nasid, int *bt) +{ + u16 uninitialized_var(ioboard_type); /* GCC be quiet */ + long rc; + + rc = ia64_sn_sysctl_ioboard_get(nasid, &ioboard_type); + if (rc) { + printk(KERN_WARNING "ia64_sn_sysctl_ioboard_get failed: %ld\n", + rc); + return 0; + } + + switch (ioboard_type) { + case L1_BRICKTYPE_SA: + case L1_BRICKTYPE_ATHENA: + case L1_BOARDTYPE_DAYTONA: + *bt = ioboard_type; + return 1; + } + + return 0; +} + +static int bitstream_loaded(nasid_t nasid) +{ + u64 cx_credits; + + cx_credits = REMOTE_HUB_L(nasid, TIO_ICE_PMI_TX_DYN_CREDIT_STAT_CB3); + cx_credits &= TIO_ICE_PMI_TX_DYN_CREDIT_STAT_CB3_CREDIT_CNT_MASK; + DBG("cx_credits= 0x%lx\n", cx_credits); + + return (cx_credits == 0xf) ? 1 : 0; +} + +static int tiocx_reload(struct cx_dev *cx_dev) +{ + int part_num = CX_DEV_NONE; + int mfg_num = CX_DEV_NONE; + nasid_t nasid = cx_dev->cx_id.nasid; + + if (bitstream_loaded(nasid)) { + u64 cx_id; + int rv; + + rv = ia64_sn_sysctl_tio_clock_reset(nasid); + if (rv) { + printk(KERN_ALERT "CX port JTAG reset failed.\n"); + } else { + cx_id = *(volatile u64 *) + (TIO_SWIN_BASE(nasid, TIOCX_CORELET) + + WIDGET_ID); + part_num = XWIDGET_PART_NUM(cx_id); + mfg_num = XWIDGET_MFG_NUM(cx_id); + DBG("part= 0x%x, mfg= 0x%x\n", part_num, mfg_num); + /* just ignore it if it's a CE */ + if (part_num == TIO_CE_ASIC_PARTNUM) + return 0; + } + } + + cx_dev->cx_id.part_num = part_num; + cx_dev->cx_id.mfg_num = mfg_num; + + /* + * Delete old device and register the new one. It's ok if + * part_num/mfg_num == CX_DEV_NONE. We want to register + * devices in the table even if a bitstream isn't loaded. + * That allows use to see that a bitstream isn't loaded via + * TIOCX_IOCTL_DEV_LIST. + */ + return cx_device_reload(cx_dev); +} + +static ssize_t show_cxdev_control(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct cx_dev *cx_dev = to_cx_dev(dev); + + return sprintf(buf, "0x%x 0x%x 0x%x 0x%x\n", + cx_dev->cx_id.nasid, + cx_dev->cx_id.part_num, cx_dev->cx_id.mfg_num, + cx_dev->bt); +} + +static ssize_t store_cxdev_control(struct device *dev, struct device_attribute *attr, const char *buf, + size_t count) +{ + int n; + struct cx_dev *cx_dev = to_cx_dev(dev); + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (count <= 0) + return 0; + + n = simple_strtoul(buf, NULL, 0); + + switch (n) { + case 1: + tio_corelet_reset(cx_dev->cx_id.nasid, TIOCX_CORELET); + tiocx_reload(cx_dev); + break; + case 2: + tiocx_reload(cx_dev); + break; + case 3: + tio_corelet_reset(cx_dev->cx_id.nasid, TIOCX_CORELET); + break; + default: + break; + } + + return count; +} + +DEVICE_ATTR(cxdev_control, 0644, show_cxdev_control, store_cxdev_control); + +static int __init tiocx_init(void) +{ + cnodeid_t cnodeid; + int found_tiocx_device = 0; + int err; + + if (!ia64_platform_is("sn2")) + return 0; + + err = bus_register(&tiocx_bus_type); + if (err) + return err; + + for (cnodeid = 0; cnodeid < num_cnodes; cnodeid++) { + nasid_t nasid; + int bt; + + nasid = cnodeid_to_nasid(cnodeid); + + if ((nasid & 0x1) && is_fpga_tio(nasid, &bt)) { + struct hubdev_info *hubdev; + struct xwidget_info *widgetp; + + DBG("Found TIO at nasid 0x%x\n", nasid); + + hubdev = + (struct hubdev_info *)(NODEPDA(cnodeid)->pdinfo); + + widgetp = &hubdev->hdi_xwidget_info[TIOCX_CORELET]; + + /* The CE hangs off of the CX port but is not an FPGA */ + if (widgetp->xwi_hwid.part_num == TIO_CE_ASIC_PARTNUM) + continue; + + tio_corelet_reset(nasid, TIOCX_CORELET); + tio_conveyor_enable(nasid); + + if (cx_device_register + (nasid, widgetp->xwi_hwid.part_num, + widgetp->xwi_hwid.mfg_num, hubdev, bt) < 0) + return -ENXIO; + else + found_tiocx_device++; + } + } + + /* It's ok if we find zero devices. */ + DBG("found_tiocx_device= %d\n", found_tiocx_device); + + return 0; +} + +static int cx_remove_device(struct device * dev, void * data) +{ + struct cx_dev *cx_dev = to_cx_dev(dev); + device_remove_file(dev, &dev_attr_cxdev_control); + cx_device_unregister(cx_dev); + return 0; +} + +static void __exit tiocx_exit(void) +{ + DBG("tiocx_exit\n"); + + /* + * Unregister devices. + */ + bus_for_each_dev(&tiocx_bus_type, NULL, NULL, cx_remove_device); + bus_unregister(&tiocx_bus_type); +} + +fs_initcall(tiocx_init); +module_exit(tiocx_exit); + +/************************************************************************ + * Module licensing and description + ************************************************************************/ +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Bruce Losure "); +MODULE_DESCRIPTION("TIOCX module"); +MODULE_SUPPORTED_DEVICE(DEVICE_NAME); diff --git a/kernel/arch/ia64/sn/pci/Makefile b/kernel/arch/ia64/sn/pci/Makefile new file mode 100644 index 000000000..df2a90145 --- /dev/null +++ b/kernel/arch/ia64/sn/pci/Makefile @@ -0,0 +1,12 @@ +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. +# +# Makefile for the sn pci general routines. + +ccflags-y := -Iarch/ia64/sn/include + +obj-y := pci_dma.o tioca_provider.o tioce_provider.o pcibr/ diff --git a/kernel/arch/ia64/sn/pci/pci_dma.c b/kernel/arch/ia64/sn/pci/pci_dma.c new file mode 100644 index 000000000..d0853e8e8 --- /dev/null +++ b/kernel/arch/ia64/sn/pci/pci_dma.c @@ -0,0 +1,487 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2000,2002-2005 Silicon Graphics, Inc. All rights reserved. + * + * Routines for PCI DMA mapping. See Documentation/DMA-API.txt for + * a description of how these routines should be used. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg))) +#define SG_ENT_PHYS_ADDRESS(SG) virt_to_phys(SG_ENT_VIRT_ADDRESS(SG)) + +/** + * sn_dma_supported - test a DMA mask + * @dev: device to test + * @mask: DMA mask to test + * + * Return whether the given PCI device DMA address mask can be supported + * properly. For example, if your device can only drive the low 24-bits + * during PCI bus mastering, then you would pass 0x00ffffff as the mask to + * this function. Of course, SN only supports devices that have 32 or more + * address bits when using the PMU. + */ +static int sn_dma_supported(struct device *dev, u64 mask) +{ + BUG_ON(!dev_is_pci(dev)); + + if (mask < 0x7fffffff) + return 0; + return 1; +} + +/** + * sn_dma_set_mask - set the DMA mask + * @dev: device to set + * @dma_mask: new mask + * + * Set @dev's DMA mask if the hw supports it. + */ +int sn_dma_set_mask(struct device *dev, u64 dma_mask) +{ + BUG_ON(!dev_is_pci(dev)); + + if (!sn_dma_supported(dev, dma_mask)) + return 0; + + *dev->dma_mask = dma_mask; + return 1; +} +EXPORT_SYMBOL(sn_dma_set_mask); + +/** + * sn_dma_alloc_coherent - allocate memory for coherent DMA + * @dev: device to allocate for + * @size: size of the region + * @dma_handle: DMA (bus) address + * @flags: memory allocation flags + * + * dma_alloc_coherent() returns a pointer to a memory region suitable for + * coherent DMA traffic to/from a PCI device. On SN platforms, this means + * that @dma_handle will have the %PCIIO_DMA_CMD flag set. + * + * This interface is usually used for "command" streams (e.g. the command + * queue for a SCSI controller). See Documentation/DMA-API.txt for + * more information. + */ +static void *sn_dma_alloc_coherent(struct device *dev, size_t size, + dma_addr_t * dma_handle, gfp_t flags, + struct dma_attrs *attrs) +{ + void *cpuaddr; + unsigned long phys_addr; + int node; + struct pci_dev *pdev = to_pci_dev(dev); + struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev); + + BUG_ON(!dev_is_pci(dev)); + + /* + * Allocate the memory. + */ + node = pcibus_to_node(pdev->bus); + if (likely(node >=0)) { + struct page *p = alloc_pages_exact_node(node, + flags, get_order(size)); + + if (likely(p)) + cpuaddr = page_address(p); + else + return NULL; + } else + cpuaddr = (void *)__get_free_pages(flags, get_order(size)); + + if (unlikely(!cpuaddr)) + return NULL; + + memset(cpuaddr, 0x0, size); + + /* physical addr. of the memory we just got */ + phys_addr = __pa(cpuaddr); + + /* + * 64 bit address translations should never fail. + * 32 bit translations can fail if there are insufficient mapping + * resources. + */ + + *dma_handle = provider->dma_map_consistent(pdev, phys_addr, size, + SN_DMA_ADDR_PHYS); + if (!*dma_handle) { + printk(KERN_ERR "%s: out of ATEs\n", __func__); + free_pages((unsigned long)cpuaddr, get_order(size)); + return NULL; + } + + return cpuaddr; +} + +/** + * sn_pci_free_coherent - free memory associated with coherent DMAable region + * @dev: device to free for + * @size: size to free + * @cpu_addr: kernel virtual address to free + * @dma_handle: DMA address associated with this region + * + * Frees the memory allocated by dma_alloc_coherent(), potentially unmapping + * any associated IOMMU mappings. + */ +static void sn_dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t dma_handle, struct dma_attrs *attrs) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev); + + BUG_ON(!dev_is_pci(dev)); + + provider->dma_unmap(pdev, dma_handle, 0); + free_pages((unsigned long)cpu_addr, get_order(size)); +} + +/** + * sn_dma_map_single_attrs - map a single page for DMA + * @dev: device to map for + * @cpu_addr: kernel virtual address of the region to map + * @size: size of the region + * @direction: DMA direction + * @attrs: optional dma attributes + * + * Map the region pointed to by @cpu_addr for DMA and return the + * DMA address. + * + * We map this to the one step pcibr_dmamap_trans interface rather than + * the two step pcibr_dmamap_alloc/pcibr_dmamap_addr because we have + * no way of saving the dmamap handle from the alloc to later free + * (which is pretty much unacceptable). + * + * mappings with the DMA_ATTR_WRITE_BARRIER get mapped with + * dma_map_consistent() so that writes force a flush of pending DMA. + * (See "SGI Altix Architecture Considerations for Linux Device Drivers", + * Document Number: 007-4763-001) + * + * TODO: simplify our interface; + * figure out how to save dmamap handle so can use two step. + */ +static dma_addr_t sn_dma_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + void *cpu_addr = page_address(page) + offset; + dma_addr_t dma_addr; + unsigned long phys_addr; + struct pci_dev *pdev = to_pci_dev(dev); + struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev); + int dmabarr; + + dmabarr = dma_get_attr(DMA_ATTR_WRITE_BARRIER, attrs); + + BUG_ON(!dev_is_pci(dev)); + + phys_addr = __pa(cpu_addr); + if (dmabarr) + dma_addr = provider->dma_map_consistent(pdev, phys_addr, + size, SN_DMA_ADDR_PHYS); + else + dma_addr = provider->dma_map(pdev, phys_addr, size, + SN_DMA_ADDR_PHYS); + + if (!dma_addr) { + printk(KERN_ERR "%s: out of ATEs\n", __func__); + return 0; + } + return dma_addr; +} + +/** + * sn_dma_unmap_single_attrs - unamp a DMA mapped page + * @dev: device to sync + * @dma_addr: DMA address to sync + * @size: size of region + * @direction: DMA direction + * @attrs: optional dma attributes + * + * This routine is supposed to sync the DMA region specified + * by @dma_handle into the coherence domain. On SN, we're always cache + * coherent, so we just need to free any ATEs associated with this mapping. + */ +static void sn_dma_unmap_page(struct device *dev, dma_addr_t dma_addr, + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev); + + BUG_ON(!dev_is_pci(dev)); + + provider->dma_unmap(pdev, dma_addr, dir); +} + +/** + * sn_dma_unmap_sg - unmap a DMA scatterlist + * @dev: device to unmap + * @sg: scatterlist to unmap + * @nhwentries: number of scatterlist entries + * @direction: DMA direction + * @attrs: optional dma attributes + * + * Unmap a set of streaming mode DMA translations. + */ +static void sn_dma_unmap_sg(struct device *dev, struct scatterlist *sgl, + int nhwentries, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + int i; + struct pci_dev *pdev = to_pci_dev(dev); + struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev); + struct scatterlist *sg; + + BUG_ON(!dev_is_pci(dev)); + + for_each_sg(sgl, sg, nhwentries, i) { + provider->dma_unmap(pdev, sg->dma_address, dir); + sg->dma_address = (dma_addr_t) NULL; + sg->dma_length = 0; + } +} + +/** + * sn_dma_map_sg - map a scatterlist for DMA + * @dev: device to map for + * @sg: scatterlist to map + * @nhwentries: number of entries + * @direction: direction of the DMA transaction + * @attrs: optional dma attributes + * + * mappings with the DMA_ATTR_WRITE_BARRIER get mapped with + * dma_map_consistent() so that writes force a flush of pending DMA. + * (See "SGI Altix Architecture Considerations for Linux Device Drivers", + * Document Number: 007-4763-001) + * + * Maps each entry of @sg for DMA. + */ +static int sn_dma_map_sg(struct device *dev, struct scatterlist *sgl, + int nhwentries, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + unsigned long phys_addr; + struct scatterlist *saved_sg = sgl, *sg; + struct pci_dev *pdev = to_pci_dev(dev); + struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev); + int i; + int dmabarr; + + dmabarr = dma_get_attr(DMA_ATTR_WRITE_BARRIER, attrs); + + BUG_ON(!dev_is_pci(dev)); + + /* + * Setup a DMA address for each entry in the scatterlist. + */ + for_each_sg(sgl, sg, nhwentries, i) { + dma_addr_t dma_addr; + phys_addr = SG_ENT_PHYS_ADDRESS(sg); + if (dmabarr) + dma_addr = provider->dma_map_consistent(pdev, + phys_addr, + sg->length, + SN_DMA_ADDR_PHYS); + else + dma_addr = provider->dma_map(pdev, phys_addr, + sg->length, + SN_DMA_ADDR_PHYS); + + sg->dma_address = dma_addr; + if (!sg->dma_address) { + printk(KERN_ERR "%s: out of ATEs\n", __func__); + + /* + * Free any successfully allocated entries. + */ + if (i > 0) + sn_dma_unmap_sg(dev, saved_sg, i, dir, attrs); + return 0; + } + + sg->dma_length = sg->length; + } + + return nhwentries; +} + +static void sn_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction dir) +{ + BUG_ON(!dev_is_pci(dev)); +} + +static void sn_dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, + size_t size, + enum dma_data_direction dir) +{ + BUG_ON(!dev_is_pci(dev)); +} + +static void sn_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, + int nelems, enum dma_data_direction dir) +{ + BUG_ON(!dev_is_pci(dev)); +} + +static void sn_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, + int nelems, enum dma_data_direction dir) +{ + BUG_ON(!dev_is_pci(dev)); +} + +static int sn_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) +{ + return 0; +} + +u64 sn_dma_get_required_mask(struct device *dev) +{ + return DMA_BIT_MASK(64); +} +EXPORT_SYMBOL_GPL(sn_dma_get_required_mask); + +char *sn_pci_get_legacy_mem(struct pci_bus *bus) +{ + if (!SN_PCIBUS_BUSSOFT(bus)) + return ERR_PTR(-ENODEV); + + return (char *)(SN_PCIBUS_BUSSOFT(bus)->bs_legacy_mem | __IA64_UNCACHED_OFFSET); +} + +int sn_pci_legacy_read(struct pci_bus *bus, u16 port, u32 *val, u8 size) +{ + unsigned long addr; + int ret; + struct ia64_sal_retval isrv; + + /* + * First, try the SN_SAL_IOIF_PCI_SAFE SAL call which can work + * around hw issues at the pci bus level. SGI proms older than + * 4.10 don't implement this. + */ + + SAL_CALL(isrv, SN_SAL_IOIF_PCI_SAFE, + pci_domain_nr(bus), bus->number, + 0, /* io */ + 0, /* read */ + port, size, __pa(val)); + + if (isrv.status == 0) + return size; + + /* + * If the above failed, retry using the SAL_PROBE call which should + * be present in all proms (but which cannot work round PCI chipset + * bugs). This code is retained for compatibility with old + * pre-4.10 proms, and should be removed at some point in the future. + */ + + if (!SN_PCIBUS_BUSSOFT(bus)) + return -ENODEV; + + addr = SN_PCIBUS_BUSSOFT(bus)->bs_legacy_io | __IA64_UNCACHED_OFFSET; + addr += port; + + ret = ia64_sn_probe_mem(addr, (long)size, (void *)val); + + if (ret == 2) + return -EINVAL; + + if (ret == 1) + *val = -1; + + return size; +} + +int sn_pci_legacy_write(struct pci_bus *bus, u16 port, u32 val, u8 size) +{ + int ret = size; + unsigned long paddr; + unsigned long *addr; + struct ia64_sal_retval isrv; + + /* + * First, try the SN_SAL_IOIF_PCI_SAFE SAL call which can work + * around hw issues at the pci bus level. SGI proms older than + * 4.10 don't implement this. + */ + + SAL_CALL(isrv, SN_SAL_IOIF_PCI_SAFE, + pci_domain_nr(bus), bus->number, + 0, /* io */ + 1, /* write */ + port, size, __pa(&val)); + + if (isrv.status == 0) + return size; + + /* + * If the above failed, retry using the SAL_PROBE call which should + * be present in all proms (but which cannot work round PCI chipset + * bugs). This code is retained for compatibility with old + * pre-4.10 proms, and should be removed at some point in the future. + */ + + if (!SN_PCIBUS_BUSSOFT(bus)) { + ret = -ENODEV; + goto out; + } + + /* Put the phys addr in uncached space */ + paddr = SN_PCIBUS_BUSSOFT(bus)->bs_legacy_io | __IA64_UNCACHED_OFFSET; + paddr += port; + addr = (unsigned long *)paddr; + + switch (size) { + case 1: + *(volatile u8 *)(addr) = (u8)(val); + break; + case 2: + *(volatile u16 *)(addr) = (u16)(val); + break; + case 4: + *(volatile u32 *)(addr) = (u32)(val); + break; + default: + ret = -EINVAL; + break; + } + out: + return ret; +} + +static struct dma_map_ops sn_dma_ops = { + .alloc = sn_dma_alloc_coherent, + .free = sn_dma_free_coherent, + .map_page = sn_dma_map_page, + .unmap_page = sn_dma_unmap_page, + .map_sg = sn_dma_map_sg, + .unmap_sg = sn_dma_unmap_sg, + .sync_single_for_cpu = sn_dma_sync_single_for_cpu, + .sync_sg_for_cpu = sn_dma_sync_sg_for_cpu, + .sync_single_for_device = sn_dma_sync_single_for_device, + .sync_sg_for_device = sn_dma_sync_sg_for_device, + .mapping_error = sn_dma_mapping_error, + .dma_supported = sn_dma_supported, +}; + +void sn_dma_init(void) +{ + dma_ops = &sn_dma_ops; +} diff --git a/kernel/arch/ia64/sn/pci/pcibr/Makefile b/kernel/arch/ia64/sn/pci/pcibr/Makefile new file mode 100644 index 000000000..396bcae36 --- /dev/null +++ b/kernel/arch/ia64/sn/pci/pcibr/Makefile @@ -0,0 +1,13 @@ +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 2002-2004 Silicon Graphics, Inc. All Rights Reserved. +# +# Makefile for the sn2 io routines. + +ccflags-y := -Iarch/ia64/sn/include + +obj-y += pcibr_dma.o pcibr_reg.o \ + pcibr_ate.o pcibr_provider.o diff --git a/kernel/arch/ia64/sn/pci/pcibr/pcibr_ate.c b/kernel/arch/ia64/sn/pci/pcibr/pcibr_ate.c new file mode 100644 index 000000000..5bc34eac9 --- /dev/null +++ b/kernel/arch/ia64/sn/pci/pcibr/pcibr_ate.c @@ -0,0 +1,177 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2001-2006 Silicon Graphics, Inc. All rights reserved. + */ + +#include +#include +#include +#include +#include + +int pcibr_invalidate_ate; /* by default don't invalidate ATE on free */ + +/* + * mark_ate: Mark the ate as either free or inuse. + */ +static void mark_ate(struct ate_resource *ate_resource, int start, int number, + u64 value) +{ + u64 *ate = ate_resource->ate; + int index; + int length = 0; + + for (index = start; length < number; index++, length++) + ate[index] = value; +} + +/* + * find_free_ate: Find the first free ate index starting from the given + * index for the desired consecutive count. + */ +static int find_free_ate(struct ate_resource *ate_resource, int start, + int count) +{ + u64 *ate = ate_resource->ate; + int index; + int start_free; + + for (index = start; index < ate_resource->num_ate;) { + if (!ate[index]) { + int i; + int free; + free = 0; + start_free = index; /* Found start free ate */ + for (i = start_free; i < ate_resource->num_ate; i++) { + if (!ate[i]) { /* This is free */ + if (++free == count) + return start_free; + } else { + index = i + 1; + break; + } + } + if (i >= ate_resource->num_ate) + return -1; + } else + index++; /* Try next ate */ + } + + return -1; +} + +/* + * free_ate_resource: Free the requested number of ATEs. + */ +static inline void free_ate_resource(struct ate_resource *ate_resource, + int start) +{ + mark_ate(ate_resource, start, ate_resource->ate[start], 0); + if ((ate_resource->lowest_free_index > start) || + (ate_resource->lowest_free_index < 0)) + ate_resource->lowest_free_index = start; +} + +/* + * alloc_ate_resource: Allocate the requested number of ATEs. + */ +static inline int alloc_ate_resource(struct ate_resource *ate_resource, + int ate_needed) +{ + int start_index; + + /* + * Check for ate exhaustion. + */ + if (ate_resource->lowest_free_index < 0) + return -1; + + /* + * Find the required number of free consecutive ates. + */ + start_index = + find_free_ate(ate_resource, ate_resource->lowest_free_index, + ate_needed); + if (start_index >= 0) + mark_ate(ate_resource, start_index, ate_needed, ate_needed); + + ate_resource->lowest_free_index = + find_free_ate(ate_resource, ate_resource->lowest_free_index, 1); + + return start_index; +} + +/* + * Allocate "count" contiguous Bridge Address Translation Entries + * on the specified bridge to be used for PCI to XTALK mappings. + * Indices in rm map range from 1..num_entries. Indices returned + * to caller range from 0..num_entries-1. + * + * Return the start index on success, -1 on failure. + */ +int pcibr_ate_alloc(struct pcibus_info *pcibus_info, int count) +{ + int status; + unsigned long flags; + + spin_lock_irqsave(&pcibus_info->pbi_lock, flags); + status = alloc_ate_resource(&pcibus_info->pbi_int_ate_resource, count); + spin_unlock_irqrestore(&pcibus_info->pbi_lock, flags); + + return status; +} + +/* + * Setup an Address Translation Entry as specified. Use either the Bridge + * internal maps or the external map RAM, as appropriate. + */ +static inline u64 __iomem *pcibr_ate_addr(struct pcibus_info *pcibus_info, + int ate_index) +{ + if (ate_index < pcibus_info->pbi_int_ate_size) { + return pcireg_int_ate_addr(pcibus_info, ate_index); + } + panic("pcibr_ate_addr: invalid ate_index 0x%x", ate_index); +} + +/* + * Update the ate. + */ +void inline +ate_write(struct pcibus_info *pcibus_info, int ate_index, int count, + volatile u64 ate) +{ + while (count-- > 0) { + if (ate_index < pcibus_info->pbi_int_ate_size) { + pcireg_int_ate_set(pcibus_info, ate_index, ate); + } else { + panic("ate_write: invalid ate_index 0x%x", ate_index); + } + ate_index++; + ate += IOPGSIZE; + } + + pcireg_tflush_get(pcibus_info); /* wait until Bridge PIO complete */ +} + +void pcibr_ate_free(struct pcibus_info *pcibus_info, int index) +{ + + volatile u64 ate; + int count; + unsigned long flags; + + if (pcibr_invalidate_ate) { + /* For debugging purposes, clear the valid bit in the ATE */ + ate = *pcibr_ate_addr(pcibus_info, index); + count = pcibus_info->pbi_int_ate_resource.ate[index]; + ate_write(pcibus_info, index, count, (ate & ~PCI32_ATE_V)); + } + + spin_lock_irqsave(&pcibus_info->pbi_lock, flags); + free_ate_resource(&pcibus_info->pbi_int_ate_resource, index); + spin_unlock_irqrestore(&pcibus_info->pbi_lock, flags); +} diff --git a/kernel/arch/ia64/sn/pci/pcibr/pcibr_dma.c b/kernel/arch/ia64/sn/pci/pcibr/pcibr_dma.c new file mode 100644 index 000000000..1e863b277 --- /dev/null +++ b/kernel/arch/ia64/sn/pci/pcibr/pcibr_dma.c @@ -0,0 +1,413 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2001-2005 Silicon Graphics, Inc. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "tio.h" +#include "xtalk/xwidgetdev.h" +#include "xtalk/hubdev.h" + +extern int sn_ioif_inited; + +/* ===================================================================== + * DMA MANAGEMENT + * + * The Bridge ASIC provides three methods of doing DMA: via a "direct map" + * register available in 32-bit PCI space (which selects a contiguous 2G + * address space on some other widget), via "direct" addressing via 64-bit + * PCI space (all destination information comes from the PCI address, + * including transfer attributes), and via a "mapped" region that allows + * a bunch of different small mappings to be established with the PMU. + * + * For efficiency, we most prefer to use the 32bit direct mapping facility, + * since it requires no resource allocations. The advantage of using the + * PMU over the 64-bit direct is that single-cycle PCI addressing can be + * used; the advantage of using 64-bit direct over PMU addressing is that + * we do not have to allocate entries in the PMU. + */ + +static dma_addr_t +pcibr_dmamap_ate32(struct pcidev_info *info, + u64 paddr, size_t req_size, u64 flags, int dma_flags) +{ + + struct pcidev_info *pcidev_info = info->pdi_host_pcidev_info; + struct pcibus_info *pcibus_info = (struct pcibus_info *)pcidev_info-> + pdi_pcibus_info; + u8 internal_device = (PCI_SLOT(pcidev_info->pdi_host_pcidev_info-> + pdi_linux_pcidev->devfn)) - 1; + int ate_count; + int ate_index; + u64 ate_flags = flags | PCI32_ATE_V; + u64 ate; + u64 pci_addr; + u64 xio_addr; + u64 offset; + + /* PIC in PCI-X mode does not supports 32bit PageMap mode */ + if (IS_PIC_SOFT(pcibus_info) && IS_PCIX(pcibus_info)) { + return 0; + } + + /* Calculate the number of ATEs needed. */ + if (!(MINIMAL_ATE_FLAG(paddr, req_size))) { + ate_count = IOPG((IOPGSIZE - 1) /* worst case start offset */ + +req_size /* max mapping bytes */ + - 1) + 1; /* round UP */ + } else { /* assume requested target is page aligned */ + ate_count = IOPG(req_size /* max mapping bytes */ + - 1) + 1; /* round UP */ + } + + /* Get the number of ATEs required. */ + ate_index = pcibr_ate_alloc(pcibus_info, ate_count); + if (ate_index < 0) + return 0; + + /* In PCI-X mode, Prefetch not supported */ + if (IS_PCIX(pcibus_info)) + ate_flags &= ~(PCI32_ATE_PREF); + + if (SN_DMA_ADDRTYPE(dma_flags == SN_DMA_ADDR_PHYS)) + xio_addr = IS_PIC_SOFT(pcibus_info) ? PHYS_TO_DMA(paddr) : + PHYS_TO_TIODMA(paddr); + else + xio_addr = paddr; + + offset = IOPGOFF(xio_addr); + ate = ate_flags | (xio_addr - offset); + + /* If PIC, put the targetid in the ATE */ + if (IS_PIC_SOFT(pcibus_info)) { + ate |= (pcibus_info->pbi_hub_xid << PIC_ATE_TARGETID_SHFT); + } + + /* + * If we're mapping for MSI, set the MSI bit in the ATE. If it's a + * TIOCP based pci bus, we also need to set the PIO bit in the ATE. + */ + if (dma_flags & SN_DMA_MSI) { + ate |= PCI32_ATE_MSI; + if (IS_TIOCP_SOFT(pcibus_info)) + ate |= PCI32_ATE_PIO; + } + + ate_write(pcibus_info, ate_index, ate_count, ate); + + /* + * Set up the DMA mapped Address. + */ + pci_addr = PCI32_MAPPED_BASE + offset + IOPGSIZE * ate_index; + + /* + * If swap was set in device in pcibr_endian_set() + * we need to turn swapping on. + */ + if (pcibus_info->pbi_devreg[internal_device] & PCIBR_DEV_SWAP_DIR) + ATE_SWAP_ON(pci_addr); + + + return pci_addr; +} + +static dma_addr_t +pcibr_dmatrans_direct64(struct pcidev_info * info, u64 paddr, + u64 dma_attributes, int dma_flags) +{ + struct pcibus_info *pcibus_info = (struct pcibus_info *) + ((info->pdi_host_pcidev_info)->pdi_pcibus_info); + u64 pci_addr; + + /* Translate to Crosstalk View of Physical Address */ + if (SN_DMA_ADDRTYPE(dma_flags) == SN_DMA_ADDR_PHYS) + pci_addr = IS_PIC_SOFT(pcibus_info) ? + PHYS_TO_DMA(paddr) : + PHYS_TO_TIODMA(paddr); + else + pci_addr = paddr; + pci_addr |= dma_attributes; + + /* Handle Bus mode */ + if (IS_PCIX(pcibus_info)) + pci_addr &= ~PCI64_ATTR_PREF; + + /* Handle Bridge Chipset differences */ + if (IS_PIC_SOFT(pcibus_info)) { + pci_addr |= + ((u64) pcibus_info-> + pbi_hub_xid << PIC_PCI64_ATTR_TARG_SHFT); + } else + pci_addr |= (dma_flags & SN_DMA_MSI) ? + TIOCP_PCI64_CMDTYPE_MSI : + TIOCP_PCI64_CMDTYPE_MEM; + + /* If PCI mode, func zero uses VCHAN0, every other func uses VCHAN1 */ + if (!IS_PCIX(pcibus_info) && PCI_FUNC(info->pdi_linux_pcidev->devfn)) + pci_addr |= PCI64_ATTR_VIRTUAL; + + return pci_addr; +} + +static dma_addr_t +pcibr_dmatrans_direct32(struct pcidev_info * info, + u64 paddr, size_t req_size, u64 flags, int dma_flags) +{ + struct pcidev_info *pcidev_info = info->pdi_host_pcidev_info; + struct pcibus_info *pcibus_info = (struct pcibus_info *)pcidev_info-> + pdi_pcibus_info; + u64 xio_addr; + + u64 xio_base; + u64 offset; + u64 endoff; + + if (IS_PCIX(pcibus_info)) { + return 0; + } + + if (dma_flags & SN_DMA_MSI) + return 0; + + if (SN_DMA_ADDRTYPE(dma_flags) == SN_DMA_ADDR_PHYS) + xio_addr = IS_PIC_SOFT(pcibus_info) ? PHYS_TO_DMA(paddr) : + PHYS_TO_TIODMA(paddr); + else + xio_addr = paddr; + + xio_base = pcibus_info->pbi_dir_xbase; + offset = xio_addr - xio_base; + endoff = req_size + offset; + if ((req_size > (1ULL << 31)) || /* Too Big */ + (xio_addr < xio_base) || /* Out of range for mappings */ + (endoff > (1ULL << 31))) { /* Too Big */ + return 0; + } + + return PCI32_DIRECT_BASE | offset; +} + +/* + * Wrapper routine for freeing DMA maps + * DMA mappings for Direct 64 and 32 do not have any DMA maps. + */ +void +pcibr_dma_unmap(struct pci_dev *hwdev, dma_addr_t dma_handle, int direction) +{ + struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(hwdev); + struct pcibus_info *pcibus_info = + (struct pcibus_info *)pcidev_info->pdi_pcibus_info; + + if (IS_PCI32_MAPPED(dma_handle)) { + int ate_index; + + ate_index = + IOPG((ATE_SWAP_OFF(dma_handle) - PCI32_MAPPED_BASE)); + pcibr_ate_free(pcibus_info, ate_index); + } +} + +/* + * On SN systems there is a race condition between a PIO read response and + * DMA's. In rare cases, the read response may beat the DMA, causing the + * driver to think that data in memory is complete and meaningful. This code + * eliminates that race. This routine is called by the PIO read routines + * after doing the read. For PIC this routine then forces a fake interrupt + * on another line, which is logically associated with the slot that the PIO + * is addressed to. It then spins while watching the memory location that + * the interrupt is targeted to. When the interrupt response arrives, we + * are sure that the DMA has landed in memory and it is safe for the driver + * to proceed. For TIOCP use the Device(x) Write Request Buffer Flush + * Bridge register since it ensures the data has entered the coherence domain, + * unlike the PIC Device(x) Write Request Buffer Flush register. + */ + +void sn_dma_flush(u64 addr) +{ + nasid_t nasid; + int is_tio; + int wid_num; + int i, j; + unsigned long flags; + u64 itte; + struct hubdev_info *hubinfo; + struct sn_flush_device_kernel *p; + struct sn_flush_device_common *common; + struct sn_flush_nasid_entry *flush_nasid_list; + + if (!sn_ioif_inited) + return; + + nasid = NASID_GET(addr); + if (-1 == nasid_to_cnodeid(nasid)) + return; + + hubinfo = (NODEPDA(nasid_to_cnodeid(nasid)))->pdinfo; + + BUG_ON(!hubinfo); + + flush_nasid_list = &hubinfo->hdi_flush_nasid_list; + if (flush_nasid_list->widget_p == NULL) + return; + + is_tio = (nasid & 1); + if (is_tio) { + int itte_index; + + if (TIO_HWIN(addr)) + itte_index = 0; + else if (TIO_BWIN_WINDOWNUM(addr)) + itte_index = TIO_BWIN_WINDOWNUM(addr); + else + itte_index = -1; + + if (itte_index >= 0) { + itte = flush_nasid_list->iio_itte[itte_index]; + if (! TIO_ITTE_VALID(itte)) + return; + wid_num = TIO_ITTE_WIDGET(itte); + } else + wid_num = TIO_SWIN_WIDGETNUM(addr); + } else { + if (BWIN_WINDOWNUM(addr)) { + itte = flush_nasid_list->iio_itte[BWIN_WINDOWNUM(addr)]; + wid_num = IIO_ITTE_WIDGET(itte); + } else + wid_num = SWIN_WIDGETNUM(addr); + } + if (flush_nasid_list->widget_p[wid_num] == NULL) + return; + p = &flush_nasid_list->widget_p[wid_num][0]; + + /* find a matching BAR */ + for (i = 0; i < DEV_PER_WIDGET; i++,p++) { + common = p->common; + for (j = 0; j < PCI_ROM_RESOURCE; j++) { + if (common->sfdl_bar_list[j].start == 0) + break; + if (addr >= common->sfdl_bar_list[j].start + && addr <= common->sfdl_bar_list[j].end) + break; + } + if (j < PCI_ROM_RESOURCE && common->sfdl_bar_list[j].start != 0) + break; + } + + /* if no matching BAR, return without doing anything. */ + if (i == DEV_PER_WIDGET) + return; + + /* + * For TIOCP use the Device(x) Write Request Buffer Flush Bridge + * register since it ensures the data has entered the coherence + * domain, unlike PIC. + */ + if (is_tio) { + /* + * Note: devices behind TIOCE should never be matched in the + * above code, and so the following code is PIC/CP centric. + * If CE ever needs the sn_dma_flush mechanism, we will have + * to account for that here and in tioce_bus_fixup(). + */ + u32 tio_id = HUB_L(TIO_IOSPACE_ADDR(nasid, TIO_NODE_ID)); + u32 revnum = XWIDGET_PART_REV_NUM(tio_id); + + /* TIOCP BRINGUP WAR (PV907516): Don't write buffer flush reg */ + if ((1 << XWIDGET_PART_REV_NUM_REV(revnum)) & PV907516) { + return; + } else { + pcireg_wrb_flush_get(common->sfdl_pcibus_info, + (common->sfdl_slot - 1)); + } + } else { + spin_lock_irqsave(&p->sfdl_flush_lock, flags); + *common->sfdl_flush_addr = 0; + + /* force an interrupt. */ + *(volatile u32 *)(common->sfdl_force_int_addr) = 1; + + /* wait for the interrupt to come back. */ + while (*(common->sfdl_flush_addr) != 0x10f) + cpu_relax(); + + /* okay, everything is synched up. */ + spin_unlock_irqrestore(&p->sfdl_flush_lock, flags); + } + return; +} + +/* + * DMA interfaces. Called from pci_dma.c routines. + */ + +dma_addr_t +pcibr_dma_map(struct pci_dev * hwdev, unsigned long phys_addr, size_t size, int dma_flags) +{ + dma_addr_t dma_handle; + struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(hwdev); + + /* SN cannot support DMA addresses smaller than 32 bits. */ + if (hwdev->dma_mask < 0x7fffffff) { + return 0; + } + + if (hwdev->dma_mask == ~0UL) { + /* + * Handle the most common case: 64 bit cards. This + * call should always succeed. + */ + + dma_handle = pcibr_dmatrans_direct64(pcidev_info, phys_addr, + PCI64_ATTR_PREF, dma_flags); + } else { + /* Handle 32-63 bit cards via direct mapping */ + dma_handle = pcibr_dmatrans_direct32(pcidev_info, phys_addr, + size, 0, dma_flags); + if (!dma_handle) { + /* + * It is a 32 bit card and we cannot do direct mapping, + * so we use an ATE. + */ + + dma_handle = pcibr_dmamap_ate32(pcidev_info, phys_addr, + size, PCI32_ATE_PREF, + dma_flags); + } + } + + return dma_handle; +} + +dma_addr_t +pcibr_dma_map_consistent(struct pci_dev * hwdev, unsigned long phys_addr, + size_t size, int dma_flags) +{ + dma_addr_t dma_handle; + struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(hwdev); + + if (hwdev->dev.coherent_dma_mask == ~0UL) { + dma_handle = pcibr_dmatrans_direct64(pcidev_info, phys_addr, + PCI64_ATTR_BAR, dma_flags); + } else { + dma_handle = (dma_addr_t) pcibr_dmamap_ate32(pcidev_info, + phys_addr, size, + PCI32_ATE_BAR, dma_flags); + } + + return dma_handle; +} + +EXPORT_SYMBOL(sn_dma_flush); diff --git a/kernel/arch/ia64/sn/pci/pcibr/pcibr_provider.c b/kernel/arch/ia64/sn/pci/pcibr/pcibr_provider.c new file mode 100644 index 000000000..8dbbef4a4 --- /dev/null +++ b/kernel/arch/ia64/sn/pci/pcibr/pcibr_provider.c @@ -0,0 +1,265 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2001-2004, 2006 Silicon Graphics, Inc. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "xtalk/xwidgetdev.h" +#include "xtalk/hubdev.h" + +int +sal_pcibr_slot_enable(struct pcibus_info *soft, int device, void *resp, + char **ssdt) +{ + struct ia64_sal_retval ret_stuff; + u64 busnum; + u64 segment; + + ret_stuff.status = 0; + ret_stuff.v0 = 0; + + segment = soft->pbi_buscommon.bs_persist_segment; + busnum = soft->pbi_buscommon.bs_persist_busnum; + SAL_CALL_NOLOCK(ret_stuff, (u64) SN_SAL_IOIF_SLOT_ENABLE, segment, + busnum, (u64) device, (u64) resp, (u64)ia64_tpa(ssdt), + 0, 0); + + return (int)ret_stuff.v0; +} + +int +sal_pcibr_slot_disable(struct pcibus_info *soft, int device, int action, + void *resp) +{ + struct ia64_sal_retval ret_stuff; + u64 busnum; + u64 segment; + + ret_stuff.status = 0; + ret_stuff.v0 = 0; + + segment = soft->pbi_buscommon.bs_persist_segment; + busnum = soft->pbi_buscommon.bs_persist_busnum; + SAL_CALL_NOLOCK(ret_stuff, (u64) SN_SAL_IOIF_SLOT_DISABLE, + segment, busnum, (u64) device, (u64) action, + (u64) resp, 0, 0); + + return (int)ret_stuff.v0; +} + +static int sal_pcibr_error_interrupt(struct pcibus_info *soft) +{ + struct ia64_sal_retval ret_stuff; + u64 busnum; + int segment; + ret_stuff.status = 0; + ret_stuff.v0 = 0; + + segment = soft->pbi_buscommon.bs_persist_segment; + busnum = soft->pbi_buscommon.bs_persist_busnum; + SAL_CALL_NOLOCK(ret_stuff, + (u64) SN_SAL_IOIF_ERROR_INTERRUPT, + (u64) segment, (u64) busnum, 0, 0, 0, 0, 0); + + return (int)ret_stuff.v0; +} + +u16 sn_ioboard_to_pci_bus(struct pci_bus *pci_bus) +{ + long rc; + u16 uninitialized_var(ioboard); /* GCC be quiet */ + nasid_t nasid = NASID_GET(SN_PCIBUS_BUSSOFT(pci_bus)->bs_base); + + rc = ia64_sn_sysctl_ioboard_get(nasid, &ioboard); + if (rc) { + printk(KERN_WARNING "ia64_sn_sysctl_ioboard_get failed: %ld\n", + rc); + return 0; + } + + return ioboard; +} + +/* + * PCI Bridge Error interrupt handler. Gets invoked whenever a PCI + * bridge sends an error interrupt. + */ +static irqreturn_t +pcibr_error_intr_handler(int irq, void *arg) +{ + struct pcibus_info *soft = arg; + + if (sal_pcibr_error_interrupt(soft) < 0) + panic("pcibr_error_intr_handler(): Fatal Bridge Error"); + + return IRQ_HANDLED; +} + +void * +pcibr_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *controller) +{ + int nasid, cnode, j; + struct hubdev_info *hubdev_info; + struct pcibus_info *soft; + struct sn_flush_device_kernel *sn_flush_device_kernel; + struct sn_flush_device_common *common; + + if (! IS_PCI_BRIDGE_ASIC(prom_bussoft->bs_asic_type)) { + return NULL; + } + + /* + * Allocate kernel bus soft and copy from prom. + */ + + soft = kmemdup(prom_bussoft, sizeof(struct pcibus_info), GFP_KERNEL); + if (!soft) { + return NULL; + } + + soft->pbi_buscommon.bs_base = (unsigned long) + ioremap(REGION_OFFSET(soft->pbi_buscommon.bs_base), + sizeof(struct pic)); + + spin_lock_init(&soft->pbi_lock); + + /* + * register the bridge's error interrupt handler + */ + if (request_irq(SGI_PCIASIC_ERROR, pcibr_error_intr_handler, + IRQF_SHARED, "PCIBR error", (void *)(soft))) { + printk(KERN_WARNING + "pcibr cannot allocate interrupt for error handler\n"); + } + irq_set_handler(SGI_PCIASIC_ERROR, handle_level_irq); + sn_set_err_irq_affinity(SGI_PCIASIC_ERROR); + + /* + * Update the Bridge with the "kernel" pagesize + */ + if (PAGE_SIZE < 16384) { + pcireg_control_bit_clr(soft, PCIBR_CTRL_PAGE_SIZE); + } else { + pcireg_control_bit_set(soft, PCIBR_CTRL_PAGE_SIZE); + } + + nasid = NASID_GET(soft->pbi_buscommon.bs_base); + cnode = nasid_to_cnodeid(nasid); + hubdev_info = (struct hubdev_info *)(NODEPDA(cnode)->pdinfo); + + if (hubdev_info->hdi_flush_nasid_list.widget_p) { + sn_flush_device_kernel = hubdev_info->hdi_flush_nasid_list. + widget_p[(int)soft->pbi_buscommon.bs_xid]; + if (sn_flush_device_kernel) { + for (j = 0; j < DEV_PER_WIDGET; + j++, sn_flush_device_kernel++) { + common = sn_flush_device_kernel->common; + if (common->sfdl_slot == -1) + continue; + if ((common->sfdl_persistent_segment == + soft->pbi_buscommon.bs_persist_segment) && + (common->sfdl_persistent_busnum == + soft->pbi_buscommon.bs_persist_busnum)) + common->sfdl_pcibus_info = + soft; + } + } + } + + /* Setup the PMU ATE map */ + soft->pbi_int_ate_resource.lowest_free_index = 0; + soft->pbi_int_ate_resource.ate = + kzalloc(soft->pbi_int_ate_size * sizeof(u64), GFP_KERNEL); + + if (!soft->pbi_int_ate_resource.ate) { + kfree(soft); + return NULL; + } + + return soft; +} + +void pcibr_force_interrupt(struct sn_irq_info *sn_irq_info) +{ + struct pcidev_info *pcidev_info; + struct pcibus_info *pcibus_info; + int bit = sn_irq_info->irq_int_bit; + + if (! sn_irq_info->irq_bridge) + return; + + pcidev_info = (struct pcidev_info *)sn_irq_info->irq_pciioinfo; + if (pcidev_info) { + pcibus_info = + (struct pcibus_info *)pcidev_info->pdi_host_pcidev_info-> + pdi_pcibus_info; + pcireg_force_intr_set(pcibus_info, bit); + } +} + +void pcibr_target_interrupt(struct sn_irq_info *sn_irq_info) +{ + struct pcidev_info *pcidev_info; + struct pcibus_info *pcibus_info; + int bit = sn_irq_info->irq_int_bit; + u64 xtalk_addr = sn_irq_info->irq_xtalkaddr; + + pcidev_info = (struct pcidev_info *)sn_irq_info->irq_pciioinfo; + if (pcidev_info) { + pcibus_info = + (struct pcibus_info *)pcidev_info->pdi_host_pcidev_info-> + pdi_pcibus_info; + + /* Disable the device's IRQ */ + pcireg_intr_enable_bit_clr(pcibus_info, (1 << bit)); + + /* Change the device's IRQ */ + pcireg_intr_addr_addr_set(pcibus_info, bit, xtalk_addr); + + /* Re-enable the device's IRQ */ + pcireg_intr_enable_bit_set(pcibus_info, (1 << bit)); + + pcibr_force_interrupt(sn_irq_info); + } +} + +/* + * Provider entries for PIC/CP + */ + +struct sn_pcibus_provider pcibr_provider = { + .dma_map = pcibr_dma_map, + .dma_map_consistent = pcibr_dma_map_consistent, + .dma_unmap = pcibr_dma_unmap, + .bus_fixup = pcibr_bus_fixup, + .force_interrupt = pcibr_force_interrupt, + .target_interrupt = pcibr_target_interrupt +}; + +int +pcibr_init_provider(void) +{ + sn_pci_provider[PCIIO_ASIC_TYPE_PIC] = &pcibr_provider; + sn_pci_provider[PCIIO_ASIC_TYPE_TIOCP] = &pcibr_provider; + + return 0; +} + +EXPORT_SYMBOL_GPL(sal_pcibr_slot_enable); +EXPORT_SYMBOL_GPL(sal_pcibr_slot_disable); +EXPORT_SYMBOL_GPL(sn_ioboard_to_pci_bus); diff --git a/kernel/arch/ia64/sn/pci/pcibr/pcibr_reg.c b/kernel/arch/ia64/sn/pci/pcibr/pcibr_reg.c new file mode 100644 index 000000000..8b8bbd51d --- /dev/null +++ b/kernel/arch/ia64/sn/pci/pcibr/pcibr_reg.c @@ -0,0 +1,285 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2004 Silicon Graphics, Inc. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +union br_ptr { + struct tiocp tio; + struct pic pic; +}; + +/* + * Control Register Access -- Read/Write 0000_0020 + */ +void pcireg_control_bit_clr(struct pcibus_info *pcibus_info, u64 bits) +{ + union br_ptr __iomem *ptr = (union br_ptr __iomem *)pcibus_info->pbi_buscommon.bs_base; + + if (pcibus_info) { + switch (pcibus_info->pbi_bridge_type) { + case PCIBR_BRIDGETYPE_TIOCP: + __sn_clrq_relaxed(&ptr->tio.cp_control, bits); + break; + case PCIBR_BRIDGETYPE_PIC: + __sn_clrq_relaxed(&ptr->pic.p_wid_control, bits); + break; + default: + panic + ("pcireg_control_bit_clr: unknown bridgetype bridge 0x%p", + ptr); + } + } +} + +void pcireg_control_bit_set(struct pcibus_info *pcibus_info, u64 bits) +{ + union br_ptr __iomem *ptr = (union br_ptr __iomem *)pcibus_info->pbi_buscommon.bs_base; + + if (pcibus_info) { + switch (pcibus_info->pbi_bridge_type) { + case PCIBR_BRIDGETYPE_TIOCP: + __sn_setq_relaxed(&ptr->tio.cp_control, bits); + break; + case PCIBR_BRIDGETYPE_PIC: + __sn_setq_relaxed(&ptr->pic.p_wid_control, bits); + break; + default: + panic + ("pcireg_control_bit_set: unknown bridgetype bridge 0x%p", + ptr); + } + } +} + +/* + * PCI/PCIX Target Flush Register Access -- Read Only 0000_0050 + */ +u64 pcireg_tflush_get(struct pcibus_info *pcibus_info) +{ + union br_ptr __iomem *ptr = (union br_ptr __iomem *)pcibus_info->pbi_buscommon.bs_base; + u64 ret = 0; + + if (pcibus_info) { + switch (pcibus_info->pbi_bridge_type) { + case PCIBR_BRIDGETYPE_TIOCP: + ret = __sn_readq_relaxed(&ptr->tio.cp_tflush); + break; + case PCIBR_BRIDGETYPE_PIC: + ret = __sn_readq_relaxed(&ptr->pic.p_wid_tflush); + break; + default: + panic + ("pcireg_tflush_get: unknown bridgetype bridge 0x%p", + ptr); + } + } + + /* Read of the Target Flush should always return zero */ + if (ret != 0) + panic("pcireg_tflush_get:Target Flush failed\n"); + + return ret; +} + +/* + * Interrupt Status Register Access -- Read Only 0000_0100 + */ +u64 pcireg_intr_status_get(struct pcibus_info * pcibus_info) +{ + union br_ptr __iomem *ptr = (union br_ptr __iomem *)pcibus_info->pbi_buscommon.bs_base; + u64 ret = 0; + + if (pcibus_info) { + switch (pcibus_info->pbi_bridge_type) { + case PCIBR_BRIDGETYPE_TIOCP: + ret = __sn_readq_relaxed(&ptr->tio.cp_int_status); + break; + case PCIBR_BRIDGETYPE_PIC: + ret = __sn_readq_relaxed(&ptr->pic.p_int_status); + break; + default: + panic + ("pcireg_intr_status_get: unknown bridgetype bridge 0x%p", + ptr); + } + } + return ret; +} + +/* + * Interrupt Enable Register Access -- Read/Write 0000_0108 + */ +void pcireg_intr_enable_bit_clr(struct pcibus_info *pcibus_info, u64 bits) +{ + union br_ptr __iomem *ptr = (union br_ptr __iomem *)pcibus_info->pbi_buscommon.bs_base; + + if (pcibus_info) { + switch (pcibus_info->pbi_bridge_type) { + case PCIBR_BRIDGETYPE_TIOCP: + __sn_clrq_relaxed(&ptr->tio.cp_int_enable, bits); + break; + case PCIBR_BRIDGETYPE_PIC: + __sn_clrq_relaxed(&ptr->pic.p_int_enable, bits); + break; + default: + panic + ("pcireg_intr_enable_bit_clr: unknown bridgetype bridge 0x%p", + ptr); + } + } +} + +void pcireg_intr_enable_bit_set(struct pcibus_info *pcibus_info, u64 bits) +{ + union br_ptr __iomem *ptr = (union br_ptr __iomem *)pcibus_info->pbi_buscommon.bs_base; + + if (pcibus_info) { + switch (pcibus_info->pbi_bridge_type) { + case PCIBR_BRIDGETYPE_TIOCP: + __sn_setq_relaxed(&ptr->tio.cp_int_enable, bits); + break; + case PCIBR_BRIDGETYPE_PIC: + __sn_setq_relaxed(&ptr->pic.p_int_enable, bits); + break; + default: + panic + ("pcireg_intr_enable_bit_set: unknown bridgetype bridge 0x%p", + ptr); + } + } +} + +/* + * Intr Host Address Register (int_addr) -- Read/Write 0000_0130 - 0000_0168 + */ +void pcireg_intr_addr_addr_set(struct pcibus_info *pcibus_info, int int_n, + u64 addr) +{ + union br_ptr __iomem *ptr = (union br_ptr __iomem *)pcibus_info->pbi_buscommon.bs_base; + + if (pcibus_info) { + switch (pcibus_info->pbi_bridge_type) { + case PCIBR_BRIDGETYPE_TIOCP: + __sn_clrq_relaxed(&ptr->tio.cp_int_addr[int_n], + TIOCP_HOST_INTR_ADDR); + __sn_setq_relaxed(&ptr->tio.cp_int_addr[int_n], + (addr & TIOCP_HOST_INTR_ADDR)); + break; + case PCIBR_BRIDGETYPE_PIC: + __sn_clrq_relaxed(&ptr->pic.p_int_addr[int_n], + PIC_HOST_INTR_ADDR); + __sn_setq_relaxed(&ptr->pic.p_int_addr[int_n], + (addr & PIC_HOST_INTR_ADDR)); + break; + default: + panic + ("pcireg_intr_addr_addr_get: unknown bridgetype bridge 0x%p", + ptr); + } + } +} + +/* + * Force Interrupt Register Access -- Write Only 0000_01C0 - 0000_01F8 + */ +void pcireg_force_intr_set(struct pcibus_info *pcibus_info, int int_n) +{ + union br_ptr __iomem *ptr = (union br_ptr __iomem *)pcibus_info->pbi_buscommon.bs_base; + + if (pcibus_info) { + switch (pcibus_info->pbi_bridge_type) { + case PCIBR_BRIDGETYPE_TIOCP: + writeq(1, &ptr->tio.cp_force_pin[int_n]); + break; + case PCIBR_BRIDGETYPE_PIC: + writeq(1, &ptr->pic.p_force_pin[int_n]); + break; + default: + panic + ("pcireg_force_intr_set: unknown bridgetype bridge 0x%p", + ptr); + } + } +} + +/* + * Device(x) Write Buffer Flush Reg Access -- Read Only 0000_0240 - 0000_0258 + */ +u64 pcireg_wrb_flush_get(struct pcibus_info *pcibus_info, int device) +{ + union br_ptr __iomem *ptr = (union br_ptr __iomem *)pcibus_info->pbi_buscommon.bs_base; + u64 ret = 0; + + if (pcibus_info) { + switch (pcibus_info->pbi_bridge_type) { + case PCIBR_BRIDGETYPE_TIOCP: + ret = + __sn_readq_relaxed(&ptr->tio.cp_wr_req_buf[device]); + break; + case PCIBR_BRIDGETYPE_PIC: + ret = + __sn_readq_relaxed(&ptr->pic.p_wr_req_buf[device]); + break; + default: + panic("pcireg_wrb_flush_get: unknown bridgetype bridge 0x%p", ptr); + } + + } + /* Read of the Write Buffer Flush should always return zero */ + return ret; +} + +void pcireg_int_ate_set(struct pcibus_info *pcibus_info, int ate_index, + u64 val) +{ + union br_ptr __iomem *ptr = (union br_ptr __iomem *)pcibus_info->pbi_buscommon.bs_base; + + if (pcibus_info) { + switch (pcibus_info->pbi_bridge_type) { + case PCIBR_BRIDGETYPE_TIOCP: + writeq(val, &ptr->tio.cp_int_ate_ram[ate_index]); + break; + case PCIBR_BRIDGETYPE_PIC: + writeq(val, &ptr->pic.p_int_ate_ram[ate_index]); + break; + default: + panic + ("pcireg_int_ate_set: unknown bridgetype bridge 0x%p", + ptr); + } + } +} + +u64 __iomem *pcireg_int_ate_addr(struct pcibus_info *pcibus_info, int ate_index) +{ + union br_ptr __iomem *ptr = (union br_ptr __iomem *)pcibus_info->pbi_buscommon.bs_base; + u64 __iomem *ret = NULL; + + if (pcibus_info) { + switch (pcibus_info->pbi_bridge_type) { + case PCIBR_BRIDGETYPE_TIOCP: + ret = &ptr->tio.cp_int_ate_ram[ate_index]; + break; + case PCIBR_BRIDGETYPE_PIC: + ret = &ptr->pic.p_int_ate_ram[ate_index]; + break; + default: + panic + ("pcireg_int_ate_addr: unknown bridgetype bridge 0x%p", + ptr); + } + } + return ret; +} diff --git a/kernel/arch/ia64/sn/pci/tioca_provider.c b/kernel/arch/ia64/sn/pci/tioca_provider.c new file mode 100644 index 000000000..a70b11fd5 --- /dev/null +++ b/kernel/arch/ia64/sn/pci/tioca_provider.c @@ -0,0 +1,677 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2003-2005 Silicon Graphics, Inc. All Rights Reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +u32 tioca_gart_found; +EXPORT_SYMBOL(tioca_gart_found); /* used by agp-sgi */ + +LIST_HEAD(tioca_list); +EXPORT_SYMBOL(tioca_list); /* used by agp-sgi */ + +static int tioca_gart_init(struct tioca_kernel *); + +/** + * tioca_gart_init - Initialize SGI TIOCA GART + * @tioca_common: ptr to common prom/kernel struct identifying the + * + * If the indicated tioca has devices present, initialize its associated + * GART MMR's and kernel memory. + */ +static int +tioca_gart_init(struct tioca_kernel *tioca_kern) +{ + u64 ap_reg; + u64 offset; + struct page *tmp; + struct tioca_common *tioca_common; + struct tioca __iomem *ca_base; + + tioca_common = tioca_kern->ca_common; + ca_base = (struct tioca __iomem *)tioca_common->ca_common.bs_base; + + if (list_empty(tioca_kern->ca_devices)) + return 0; + + ap_reg = 0; + + /* + * Validate aperature size + */ + + switch (CA_APERATURE_SIZE >> 20) { + case 4: + ap_reg |= (0x3ff << CA_GART_AP_SIZE_SHFT); /* 4MB */ + break; + case 8: + ap_reg |= (0x3fe << CA_GART_AP_SIZE_SHFT); /* 8MB */ + break; + case 16: + ap_reg |= (0x3fc << CA_GART_AP_SIZE_SHFT); /* 16MB */ + break; + case 32: + ap_reg |= (0x3f8 << CA_GART_AP_SIZE_SHFT); /* 32 MB */ + break; + case 64: + ap_reg |= (0x3f0 << CA_GART_AP_SIZE_SHFT); /* 64 MB */ + break; + case 128: + ap_reg |= (0x3e0 << CA_GART_AP_SIZE_SHFT); /* 128 MB */ + break; + case 256: + ap_reg |= (0x3c0 << CA_GART_AP_SIZE_SHFT); /* 256 MB */ + break; + case 512: + ap_reg |= (0x380 << CA_GART_AP_SIZE_SHFT); /* 512 MB */ + break; + case 1024: + ap_reg |= (0x300 << CA_GART_AP_SIZE_SHFT); /* 1GB */ + break; + case 2048: + ap_reg |= (0x200 << CA_GART_AP_SIZE_SHFT); /* 2GB */ + break; + case 4096: + ap_reg |= (0x000 << CA_GART_AP_SIZE_SHFT); /* 4 GB */ + break; + default: + printk(KERN_ERR "%s: Invalid CA_APERATURE_SIZE " + "0x%lx\n", __func__, (ulong) CA_APERATURE_SIZE); + return -1; + } + + /* + * Set up other aperature parameters + */ + + if (PAGE_SIZE >= 16384) { + tioca_kern->ca_ap_pagesize = 16384; + ap_reg |= CA_GART_PAGE_SIZE; + } else { + tioca_kern->ca_ap_pagesize = 4096; + } + + tioca_kern->ca_ap_size = CA_APERATURE_SIZE; + tioca_kern->ca_ap_bus_base = CA_APERATURE_BASE; + tioca_kern->ca_gart_entries = + tioca_kern->ca_ap_size / tioca_kern->ca_ap_pagesize; + + ap_reg |= (CA_GART_AP_ENB_AGP | CA_GART_AP_ENB_PCI); + ap_reg |= tioca_kern->ca_ap_bus_base; + + /* + * Allocate and set up the GART + */ + + tioca_kern->ca_gart_size = tioca_kern->ca_gart_entries * sizeof(u64); + tmp = + alloc_pages_node(tioca_kern->ca_closest_node, + GFP_KERNEL | __GFP_ZERO, + get_order(tioca_kern->ca_gart_size)); + + if (!tmp) { + printk(KERN_ERR "%s: Could not allocate " + "%llu bytes (order %d) for GART\n", + __func__, + tioca_kern->ca_gart_size, + get_order(tioca_kern->ca_gart_size)); + return -ENOMEM; + } + + tioca_kern->ca_gart = page_address(tmp); + tioca_kern->ca_gart_coretalk_addr = + PHYS_TO_TIODMA(virt_to_phys(tioca_kern->ca_gart)); + + /* + * Compute PCI/AGP convenience fields + */ + + offset = CA_PCI32_MAPPED_BASE - CA_APERATURE_BASE; + tioca_kern->ca_pciap_base = CA_PCI32_MAPPED_BASE; + tioca_kern->ca_pciap_size = CA_PCI32_MAPPED_SIZE; + tioca_kern->ca_pcigart_start = offset / tioca_kern->ca_ap_pagesize; + tioca_kern->ca_pcigart_base = + tioca_kern->ca_gart_coretalk_addr + offset; + tioca_kern->ca_pcigart = + &tioca_kern->ca_gart[tioca_kern->ca_pcigart_start]; + tioca_kern->ca_pcigart_entries = + tioca_kern->ca_pciap_size / tioca_kern->ca_ap_pagesize; + tioca_kern->ca_pcigart_pagemap = + kzalloc(tioca_kern->ca_pcigart_entries / 8, GFP_KERNEL); + if (!tioca_kern->ca_pcigart_pagemap) { + free_pages((unsigned long)tioca_kern->ca_gart, + get_order(tioca_kern->ca_gart_size)); + return -1; + } + + offset = CA_AGP_MAPPED_BASE - CA_APERATURE_BASE; + tioca_kern->ca_gfxap_base = CA_AGP_MAPPED_BASE; + tioca_kern->ca_gfxap_size = CA_AGP_MAPPED_SIZE; + tioca_kern->ca_gfxgart_start = offset / tioca_kern->ca_ap_pagesize; + tioca_kern->ca_gfxgart_base = + tioca_kern->ca_gart_coretalk_addr + offset; + tioca_kern->ca_gfxgart = + &tioca_kern->ca_gart[tioca_kern->ca_gfxgart_start]; + tioca_kern->ca_gfxgart_entries = + tioca_kern->ca_gfxap_size / tioca_kern->ca_ap_pagesize; + + /* + * various control settings: + * use agp op-combining + * use GET semantics to fetch memory + * participate in coherency domain + * DISABLE GART PREFETCHING due to hw bug tracked in SGI PV930029 + */ + + __sn_setq_relaxed(&ca_base->ca_control1, + CA_AGPDMA_OP_ENB_COMBDELAY); /* PV895469 ? */ + __sn_clrq_relaxed(&ca_base->ca_control2, CA_GART_MEM_PARAM); + __sn_setq_relaxed(&ca_base->ca_control2, + (0x2ull << CA_GART_MEM_PARAM_SHFT)); + tioca_kern->ca_gart_iscoherent = 1; + __sn_clrq_relaxed(&ca_base->ca_control2, + (CA_GART_WR_PREFETCH_ENB | CA_GART_RD_PREFETCH_ENB)); + + /* + * Unmask GART fetch error interrupts. Clear residual errors first. + */ + + writeq(CA_GART_FETCH_ERR, &ca_base->ca_int_status_alias); + writeq(CA_GART_FETCH_ERR, &ca_base->ca_mult_error_alias); + __sn_clrq_relaxed(&ca_base->ca_int_mask, CA_GART_FETCH_ERR); + + /* + * Program the aperature and gart registers in TIOCA + */ + + writeq(ap_reg, &ca_base->ca_gart_aperature); + writeq(tioca_kern->ca_gart_coretalk_addr|1, &ca_base->ca_gart_ptr_table); + + return 0; +} + +/** + * tioca_fastwrite_enable - enable AGP FW for a tioca and its functions + * @tioca_kernel: structure representing the CA + * + * Given a CA, scan all attached functions making sure they all support + * FastWrite. If so, enable FastWrite for all functions and the CA itself. + */ + +void +tioca_fastwrite_enable(struct tioca_kernel *tioca_kern) +{ + int cap_ptr; + u32 reg; + struct tioca __iomem *tioca_base; + struct pci_dev *pdev; + struct tioca_common *common; + + common = tioca_kern->ca_common; + + /* + * Scan all vga controllers on this bus making sure they all + * support FW. If not, return. + */ + + list_for_each_entry(pdev, tioca_kern->ca_devices, bus_list) { + if (pdev->class != (PCI_CLASS_DISPLAY_VGA << 8)) + continue; + + cap_ptr = pci_find_capability(pdev, PCI_CAP_ID_AGP); + if (!cap_ptr) + return; /* no AGP CAP means no FW */ + + pci_read_config_dword(pdev, cap_ptr + PCI_AGP_STATUS, ®); + if (!(reg & PCI_AGP_STATUS_FW)) + return; /* function doesn't support FW */ + } + + /* + * Set fw for all vga fn's + */ + + list_for_each_entry(pdev, tioca_kern->ca_devices, bus_list) { + if (pdev->class != (PCI_CLASS_DISPLAY_VGA << 8)) + continue; + + cap_ptr = pci_find_capability(pdev, PCI_CAP_ID_AGP); + pci_read_config_dword(pdev, cap_ptr + PCI_AGP_COMMAND, ®); + reg |= PCI_AGP_COMMAND_FW; + pci_write_config_dword(pdev, cap_ptr + PCI_AGP_COMMAND, reg); + } + + /* + * Set ca's fw to match + */ + + tioca_base = (struct tioca __iomem*)common->ca_common.bs_base; + __sn_setq_relaxed(&tioca_base->ca_control1, CA_AGP_FW_ENABLE); +} + +EXPORT_SYMBOL(tioca_fastwrite_enable); /* used by agp-sgi */ + +/** + * tioca_dma_d64 - create a DMA mapping using 64-bit direct mode + * @paddr: system physical address + * + * Map @paddr into 64-bit CA bus space. No device context is necessary. + * Bits 53:0 come from the coretalk address. We just need to mask in the + * following optional bits of the 64-bit pci address: + * + * 63:60 - Coretalk Packet Type - 0x1 for Mem Get/Put (coherent) + * 0x2 for PIO (non-coherent) + * We will always use 0x1 + * 55:55 - Swap bytes Currently unused + */ +static u64 +tioca_dma_d64(unsigned long paddr) +{ + dma_addr_t bus_addr; + + bus_addr = PHYS_TO_TIODMA(paddr); + + BUG_ON(!bus_addr); + BUG_ON(bus_addr >> 54); + + /* Set upper nibble to Cache Coherent Memory op */ + bus_addr |= (1UL << 60); + + return bus_addr; +} + +/** + * tioca_dma_d48 - create a DMA mapping using 48-bit direct mode + * @pdev: linux pci_dev representing the function + * @paddr: system physical address + * + * Map @paddr into 64-bit bus space of the CA associated with @pcidev_info. + * + * The CA agp 48 bit direct address falls out as follows: + * + * When direct mapping AGP addresses, the 48 bit AGP address is + * constructed as follows: + * + * [47:40] - Low 8 bits of the page Node ID extracted from coretalk + * address [47:40]. The upper 8 node bits are fixed + * and come from the xxx register bits [5:0] + * [39:38] - Chiplet ID extracted from coretalk address [39:38] + * [37:00] - node offset extracted from coretalk address [37:00] + * + * Since the node id in general will be non-zero, and the chiplet id + * will always be non-zero, it follows that the device must support + * a dma mask of at least 0xffffffffff (40 bits) to target node 0 + * and in general should be 0xffffffffffff (48 bits) to target nodes + * up to 255. Nodes above 255 need the support of the xxx register, + * and so a given CA can only directly target nodes in the range + * xxx - xxx+255. + */ +static u64 +tioca_dma_d48(struct pci_dev *pdev, u64 paddr) +{ + struct tioca_common *tioca_common; + struct tioca __iomem *ca_base; + u64 ct_addr; + dma_addr_t bus_addr; + u32 node_upper; + u64 agp_dma_extn; + struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(pdev); + + tioca_common = (struct tioca_common *)pcidev_info->pdi_pcibus_info; + ca_base = (struct tioca __iomem *)tioca_common->ca_common.bs_base; + + ct_addr = PHYS_TO_TIODMA(paddr); + if (!ct_addr) + return 0; + + bus_addr = (dma_addr_t) (ct_addr & 0xffffffffffffUL); + node_upper = ct_addr >> 48; + + if (node_upper > 64) { + printk(KERN_ERR "%s: coretalk addr 0x%p node id out " + "of range\n", __func__, (void *)ct_addr); + return 0; + } + + agp_dma_extn = __sn_readq_relaxed(&ca_base->ca_agp_dma_addr_extn); + if (node_upper != (agp_dma_extn >> CA_AGP_DMA_NODE_ID_SHFT)) { + printk(KERN_ERR "%s: coretalk upper node (%u) " + "mismatch with ca_agp_dma_addr_extn (%llu)\n", + __func__, + node_upper, (agp_dma_extn >> CA_AGP_DMA_NODE_ID_SHFT)); + return 0; + } + + return bus_addr; +} + +/** + * tioca_dma_mapped - create a DMA mapping using a CA GART + * @pdev: linux pci_dev representing the function + * @paddr: host physical address to map + * @req_size: len (bytes) to map + * + * Map @paddr into CA address space using the GART mechanism. The mapped + * dma_addr_t is guaranteed to be contiguous in CA bus space. + */ +static dma_addr_t +tioca_dma_mapped(struct pci_dev *pdev, unsigned long paddr, size_t req_size) +{ + int ps, ps_shift, entry, entries, mapsize; + u64 xio_addr, end_xio_addr; + struct tioca_common *tioca_common; + struct tioca_kernel *tioca_kern; + dma_addr_t bus_addr = 0; + struct tioca_dmamap *ca_dmamap; + void *map; + unsigned long flags; + struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(pdev); + + tioca_common = (struct tioca_common *)pcidev_info->pdi_pcibus_info; + tioca_kern = (struct tioca_kernel *)tioca_common->ca_kernel_private; + + xio_addr = PHYS_TO_TIODMA(paddr); + if (!xio_addr) + return 0; + + spin_lock_irqsave(&tioca_kern->ca_lock, flags); + + /* + * allocate a map struct + */ + + ca_dmamap = kzalloc(sizeof(struct tioca_dmamap), GFP_ATOMIC); + if (!ca_dmamap) + goto map_return; + + /* + * Locate free entries that can hold req_size. Account for + * unaligned start/length when allocating. + */ + + ps = tioca_kern->ca_ap_pagesize; /* will be power of 2 */ + ps_shift = ffs(ps) - 1; + end_xio_addr = xio_addr + req_size - 1; + + entries = (end_xio_addr >> ps_shift) - (xio_addr >> ps_shift) + 1; + + map = tioca_kern->ca_pcigart_pagemap; + mapsize = tioca_kern->ca_pcigart_entries; + + entry = bitmap_find_next_zero_area(map, mapsize, 0, entries, 0); + if (entry >= mapsize) { + kfree(ca_dmamap); + goto map_return; + } + + bitmap_set(map, entry, entries); + + bus_addr = tioca_kern->ca_pciap_base + (entry * ps); + + ca_dmamap->cad_dma_addr = bus_addr; + ca_dmamap->cad_gart_size = entries; + ca_dmamap->cad_gart_entry = entry; + list_add(&ca_dmamap->cad_list, &tioca_kern->ca_dmamaps); + + if (xio_addr % ps) { + tioca_kern->ca_pcigart[entry] = tioca_paddr_to_gart(xio_addr); + bus_addr += xio_addr & (ps - 1); + xio_addr &= ~(ps - 1); + xio_addr += ps; + entry++; + } + + while (xio_addr < end_xio_addr) { + tioca_kern->ca_pcigart[entry] = tioca_paddr_to_gart(xio_addr); + xio_addr += ps; + entry++; + } + + tioca_tlbflush(tioca_kern); + +map_return: + spin_unlock_irqrestore(&tioca_kern->ca_lock, flags); + return bus_addr; +} + +/** + * tioca_dma_unmap - release CA mapping resources + * @pdev: linux pci_dev representing the function + * @bus_addr: bus address returned by an earlier tioca_dma_map + * @dir: mapping direction (unused) + * + * Locate mapping resources associated with @bus_addr and release them. + * For mappings created using the direct modes (64 or 48) there are no + * resources to release. + */ +static void +tioca_dma_unmap(struct pci_dev *pdev, dma_addr_t bus_addr, int dir) +{ + int i, entry; + struct tioca_common *tioca_common; + struct tioca_kernel *tioca_kern; + struct tioca_dmamap *map; + struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(pdev); + unsigned long flags; + + tioca_common = (struct tioca_common *)pcidev_info->pdi_pcibus_info; + tioca_kern = (struct tioca_kernel *)tioca_common->ca_kernel_private; + + /* return straight away if this isn't be a mapped address */ + + if (bus_addr < tioca_kern->ca_pciap_base || + bus_addr >= (tioca_kern->ca_pciap_base + tioca_kern->ca_pciap_size)) + return; + + spin_lock_irqsave(&tioca_kern->ca_lock, flags); + + list_for_each_entry(map, &tioca_kern->ca_dmamaps, cad_list) + if (map->cad_dma_addr == bus_addr) + break; + + BUG_ON(map == NULL); + + entry = map->cad_gart_entry; + + for (i = 0; i < map->cad_gart_size; i++, entry++) { + clear_bit(entry, tioca_kern->ca_pcigart_pagemap); + tioca_kern->ca_pcigart[entry] = 0; + } + tioca_tlbflush(tioca_kern); + + list_del(&map->cad_list); + spin_unlock_irqrestore(&tioca_kern->ca_lock, flags); + kfree(map); +} + +/** + * tioca_dma_map - map pages for PCI DMA + * @pdev: linux pci_dev representing the function + * @paddr: host physical address to map + * @byte_count: bytes to map + * + * This is the main wrapper for mapping host physical pages to CA PCI space. + * The mapping mode used is based on the devices dma_mask. As a last resort + * use the GART mapped mode. + */ +static u64 +tioca_dma_map(struct pci_dev *pdev, unsigned long paddr, size_t byte_count, int dma_flags) +{ + u64 mapaddr; + + /* + * Not supported for now ... + */ + if (dma_flags & SN_DMA_MSI) + return 0; + + /* + * If card is 64 or 48 bit addressable, use a direct mapping. 32 + * bit direct is so restrictive w.r.t. where the memory resides that + * we don't use it even though CA has some support. + */ + + if (pdev->dma_mask == ~0UL) + mapaddr = tioca_dma_d64(paddr); + else if (pdev->dma_mask == 0xffffffffffffUL) + mapaddr = tioca_dma_d48(pdev, paddr); + else + mapaddr = 0; + + /* Last resort ... use PCI portion of CA GART */ + + if (mapaddr == 0) + mapaddr = tioca_dma_mapped(pdev, paddr, byte_count); + + return mapaddr; +} + +/** + * tioca_error_intr_handler - SGI TIO CA error interrupt handler + * @irq: unused + * @arg: pointer to tioca_common struct for the given CA + * + * Handle a CA error interrupt. Simply a wrapper around a SAL call which + * defers processing to the SGI prom. + */ +static irqreturn_t +tioca_error_intr_handler(int irq, void *arg) +{ + struct tioca_common *soft = arg; + struct ia64_sal_retval ret_stuff; + u64 segment; + u64 busnum; + ret_stuff.status = 0; + ret_stuff.v0 = 0; + + segment = soft->ca_common.bs_persist_segment; + busnum = soft->ca_common.bs_persist_busnum; + + SAL_CALL_NOLOCK(ret_stuff, + (u64) SN_SAL_IOIF_ERROR_INTERRUPT, + segment, busnum, 0, 0, 0, 0, 0); + + return IRQ_HANDLED; +} + +/** + * tioca_bus_fixup - perform final PCI fixup for a TIO CA bus + * @prom_bussoft: Common prom/kernel struct representing the bus + * + * Replicates the tioca_common pointed to by @prom_bussoft in kernel + * space. Allocates and initializes a kernel-only area for a given CA, + * and sets up an irq for handling CA error interrupts. + * + * On successful setup, returns the kernel version of tioca_common back to + * the caller. + */ +static void * +tioca_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *controller) +{ + struct tioca_common *tioca_common; + struct tioca_kernel *tioca_kern; + struct pci_bus *bus; + + /* sanity check prom rev */ + + if (is_shub1() && sn_sal_rev() < 0x0406) { + printk + (KERN_ERR "%s: SGI prom rev 4.06 or greater required " + "for tioca support\n", __func__); + return NULL; + } + + /* + * Allocate kernel bus soft and copy from prom. + */ + + tioca_common = kmemdup(prom_bussoft, sizeof(struct tioca_common), + GFP_KERNEL); + if (!tioca_common) + return NULL; + + tioca_common->ca_common.bs_base = (unsigned long) + ioremap(REGION_OFFSET(tioca_common->ca_common.bs_base), + sizeof(struct tioca_common)); + + /* init kernel-private area */ + + tioca_kern = kzalloc(sizeof(struct tioca_kernel), GFP_KERNEL); + if (!tioca_kern) { + kfree(tioca_common); + return NULL; + } + + tioca_kern->ca_common = tioca_common; + spin_lock_init(&tioca_kern->ca_lock); + INIT_LIST_HEAD(&tioca_kern->ca_dmamaps); + tioca_kern->ca_closest_node = + nasid_to_cnodeid(tioca_common->ca_closest_nasid); + tioca_common->ca_kernel_private = (u64) tioca_kern; + + bus = pci_find_bus(tioca_common->ca_common.bs_persist_segment, + tioca_common->ca_common.bs_persist_busnum); + BUG_ON(!bus); + tioca_kern->ca_devices = &bus->devices; + + /* init GART */ + + if (tioca_gart_init(tioca_kern) < 0) { + kfree(tioca_kern); + kfree(tioca_common); + return NULL; + } + + tioca_gart_found++; + list_add(&tioca_kern->ca_list, &tioca_list); + + if (request_irq(SGI_TIOCA_ERROR, + tioca_error_intr_handler, + IRQF_SHARED, "TIOCA error", (void *)tioca_common)) + printk(KERN_WARNING + "%s: Unable to get irq %d. " + "Error interrupts won't be routed for TIOCA bus %d\n", + __func__, SGI_TIOCA_ERROR, + (int)tioca_common->ca_common.bs_persist_busnum); + + irq_set_handler(SGI_TIOCA_ERROR, handle_level_irq); + sn_set_err_irq_affinity(SGI_TIOCA_ERROR); + + /* Setup locality information */ + controller->node = tioca_kern->ca_closest_node; + return tioca_common; +} + +static struct sn_pcibus_provider tioca_pci_interfaces = { + .dma_map = tioca_dma_map, + .dma_map_consistent = tioca_dma_map, + .dma_unmap = tioca_dma_unmap, + .bus_fixup = tioca_bus_fixup, + .force_interrupt = NULL, + .target_interrupt = NULL +}; + +/** + * tioca_init_provider - init SN PCI provider ops for TIO CA + */ +int +tioca_init_provider(void) +{ + sn_pci_provider[PCIIO_ASIC_TYPE_TIOCA] = &tioca_pci_interfaces; + return 0; +} diff --git a/kernel/arch/ia64/sn/pci/tioce_provider.c b/kernel/arch/ia64/sn/pci/tioce_provider.c new file mode 100644 index 000000000..46d3df4b0 --- /dev/null +++ b/kernel/arch/ia64/sn/pci/tioce_provider.c @@ -0,0 +1,1062 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2003-2006 Silicon Graphics, Inc. All Rights Reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * 1/26/2006 + * + * WAR for SGI PV 944642. For revA TIOCE, need to use the following recipe + * (taken from the above PV) before and after accessing tioce internal MMR's + * to avoid tioce lockups. + * + * The recipe as taken from the PV: + * + * if(mmr address < 0x45000) { + * if(mmr address == 0 or 0x80) + * mmr wrt or read address 0xc0 + * else if(mmr address == 0x148 or 0x200) + * mmr wrt or read address 0x28 + * else + * mmr wrt or read address 0x158 + * + * do desired mmr access (rd or wrt) + * + * if(mmr address == 0x100) + * mmr wrt or read address 0x38 + * mmr wrt or read address 0xb050 + * } else + * do desired mmr access + * + * According to hw, we can use reads instead of writes to the above address + * + * Note this WAR can only to be used for accessing internal MMR's in the + * TIOCE Coretalk Address Range 0x0 - 0x07ff_ffff. This includes the + * "Local CE Registers and Memories" and "PCI Compatible Config Space" address + * spaces from table 2-1 of the "CE Programmer's Reference Overview" document. + * + * All registers defined in struct tioce will meet that criteria. + */ + +static void inline +tioce_mmr_war_pre(struct tioce_kernel *kern, void __iomem *mmr_addr) +{ + u64 mmr_base; + u64 mmr_offset; + + if (kern->ce_common->ce_rev != TIOCE_REV_A) + return; + + mmr_base = kern->ce_common->ce_pcibus.bs_base; + mmr_offset = (unsigned long)mmr_addr - mmr_base; + + if (mmr_offset < 0x45000) { + u64 mmr_war_offset; + + if (mmr_offset == 0 || mmr_offset == 0x80) + mmr_war_offset = 0xc0; + else if (mmr_offset == 0x148 || mmr_offset == 0x200) + mmr_war_offset = 0x28; + else + mmr_war_offset = 0x158; + + readq_relaxed((void __iomem *)(mmr_base + mmr_war_offset)); + } +} + +static void inline +tioce_mmr_war_post(struct tioce_kernel *kern, void __iomem *mmr_addr) +{ + u64 mmr_base; + u64 mmr_offset; + + if (kern->ce_common->ce_rev != TIOCE_REV_A) + return; + + mmr_base = kern->ce_common->ce_pcibus.bs_base; + mmr_offset = (unsigned long)mmr_addr - mmr_base; + + if (mmr_offset < 0x45000) { + if (mmr_offset == 0x100) + readq_relaxed((void __iomem *)(mmr_base + 0x38)); + readq_relaxed((void __iomem *)(mmr_base + 0xb050)); + } +} + +/* load mmr contents into a variable */ +#define tioce_mmr_load(kern, mmrp, varp) do {\ + tioce_mmr_war_pre(kern, mmrp); \ + *(varp) = readq_relaxed(mmrp); \ + tioce_mmr_war_post(kern, mmrp); \ +} while (0) + +/* store variable contents into mmr */ +#define tioce_mmr_store(kern, mmrp, varp) do {\ + tioce_mmr_war_pre(kern, mmrp); \ + writeq(*varp, mmrp); \ + tioce_mmr_war_post(kern, mmrp); \ +} while (0) + +/* store immediate value into mmr */ +#define tioce_mmr_storei(kern, mmrp, val) do {\ + tioce_mmr_war_pre(kern, mmrp); \ + writeq(val, mmrp); \ + tioce_mmr_war_post(kern, mmrp); \ +} while (0) + +/* set bits (immediate value) into mmr */ +#define tioce_mmr_seti(kern, mmrp, bits) do {\ + u64 tmp; \ + tioce_mmr_load(kern, mmrp, &tmp); \ + tmp |= (bits); \ + tioce_mmr_store(kern, mmrp, &tmp); \ +} while (0) + +/* clear bits (immediate value) into mmr */ +#define tioce_mmr_clri(kern, mmrp, bits) do { \ + u64 tmp; \ + tioce_mmr_load(kern, mmrp, &tmp); \ + tmp &= ~(bits); \ + tioce_mmr_store(kern, mmrp, &tmp); \ +} while (0) + +/** + * Bus address ranges for the 5 flavors of TIOCE DMA + */ + +#define TIOCE_D64_MIN 0x8000000000000000UL +#define TIOCE_D64_MAX 0xffffffffffffffffUL +#define TIOCE_D64_ADDR(a) ((a) >= TIOCE_D64_MIN) + +#define TIOCE_D32_MIN 0x0000000080000000UL +#define TIOCE_D32_MAX 0x00000000ffffffffUL +#define TIOCE_D32_ADDR(a) ((a) >= TIOCE_D32_MIN && (a) <= TIOCE_D32_MAX) + +#define TIOCE_M32_MIN 0x0000000000000000UL +#define TIOCE_M32_MAX 0x000000007fffffffUL +#define TIOCE_M32_ADDR(a) ((a) >= TIOCE_M32_MIN && (a) <= TIOCE_M32_MAX) + +#define TIOCE_M40_MIN 0x0000004000000000UL +#define TIOCE_M40_MAX 0x0000007fffffffffUL +#define TIOCE_M40_ADDR(a) ((a) >= TIOCE_M40_MIN && (a) <= TIOCE_M40_MAX) + +#define TIOCE_M40S_MIN 0x0000008000000000UL +#define TIOCE_M40S_MAX 0x000000ffffffffffUL +#define TIOCE_M40S_ADDR(a) ((a) >= TIOCE_M40S_MIN && (a) <= TIOCE_M40S_MAX) + +/* + * ATE manipulation macros. + */ + +#define ATE_PAGESHIFT(ps) (__ffs(ps)) +#define ATE_PAGEMASK(ps) ((ps)-1) + +#define ATE_PAGE(x, ps) ((x) >> ATE_PAGESHIFT(ps)) +#define ATE_NPAGES(start, len, pagesize) \ + (ATE_PAGE((start)+(len)-1, pagesize) - ATE_PAGE(start, pagesize) + 1) + +#define ATE_VALID(ate) ((ate) & (1UL << 63)) +#define ATE_MAKE(addr, ps, msi) \ + (((addr) & ~ATE_PAGEMASK(ps)) | (1UL << 63) | ((msi)?(1UL << 62):0)) + +/* + * Flavors of ate-based mapping supported by tioce_alloc_map() + */ + +#define TIOCE_ATE_M32 1 +#define TIOCE_ATE_M40 2 +#define TIOCE_ATE_M40S 3 + +#define KB(x) ((u64)(x) << 10) +#define MB(x) ((u64)(x) << 20) +#define GB(x) ((u64)(x) << 30) + +/** + * tioce_dma_d64 - create a DMA mapping using 64-bit direct mode + * @ct_addr: system coretalk address + * + * Map @ct_addr into 64-bit CE bus space. No device context is necessary + * and no CE mapping are consumed. + * + * Bits 53:0 come from the coretalk address. The remaining bits are set as + * follows: + * + * 63 - must be 1 to indicate d64 mode to CE hardware + * 62 - barrier bit ... controlled with tioce_dma_barrier() + * 61 - msi bit ... specified through dma_flags + * 60:54 - reserved, MBZ + */ +static u64 +tioce_dma_d64(unsigned long ct_addr, int dma_flags) +{ + u64 bus_addr; + + bus_addr = ct_addr | (1UL << 63); + if (dma_flags & SN_DMA_MSI) + bus_addr |= (1UL << 61); + + return bus_addr; +} + +/** + * pcidev_to_tioce - return misc ce related pointers given a pci_dev + * @pci_dev: pci device context + * @base: ptr to store struct tioce_mmr * for the CE holding this device + * @kernel: ptr to store struct tioce_kernel * for the CE holding this device + * @port: ptr to store the CE port number that this device is on + * + * Return pointers to various CE-related structures for the CE upstream of + * @pci_dev. + */ +static inline void +pcidev_to_tioce(struct pci_dev *pdev, struct tioce __iomem **base, + struct tioce_kernel **kernel, int *port) +{ + struct pcidev_info *pcidev_info; + struct tioce_common *ce_common; + struct tioce_kernel *ce_kernel; + + pcidev_info = SN_PCIDEV_INFO(pdev); + ce_common = (struct tioce_common *)pcidev_info->pdi_pcibus_info; + ce_kernel = (struct tioce_kernel *)ce_common->ce_kernel_private; + + if (base) + *base = (struct tioce __iomem *)ce_common->ce_pcibus.bs_base; + if (kernel) + *kernel = ce_kernel; + + /* + * we use port as a zero-based value internally, even though the + * documentation is 1-based. + */ + if (port) + *port = + (pdev->bus->number < ce_kernel->ce_port1_secondary) ? 0 : 1; +} + +/** + * tioce_alloc_map - Given a coretalk address, map it to pcie bus address + * space using one of the various ATE-based address modes. + * @ce_kern: tioce context + * @type: map mode to use + * @port: 0-based port that the requesting device is downstream of + * @ct_addr: the coretalk address to map + * @len: number of bytes to map + * + * Given the addressing type, set up various parameters that define the + * ATE pool to use. Search for a contiguous block of entries to cover the + * length, and if enough resources exist, fill in the ATEs and construct a + * tioce_dmamap struct to track the mapping. + */ +static u64 +tioce_alloc_map(struct tioce_kernel *ce_kern, int type, int port, + u64 ct_addr, int len, int dma_flags) +{ + int i; + int j; + int first; + int last; + int entries; + int nates; + u64 pagesize; + int msi_capable, msi_wanted; + u64 *ate_shadow; + u64 __iomem *ate_reg; + u64 addr; + struct tioce __iomem *ce_mmr; + u64 bus_base; + struct tioce_dmamap *map; + + ce_mmr = (struct tioce __iomem *)ce_kern->ce_common->ce_pcibus.bs_base; + + switch (type) { + case TIOCE_ATE_M32: + /* + * The first 64 entries of the ate3240 pool are dedicated to + * super-page (TIOCE_ATE_M40S) mode. + */ + first = 64; + entries = TIOCE_NUM_M3240_ATES - 64; + ate_shadow = ce_kern->ce_ate3240_shadow; + ate_reg = ce_mmr->ce_ure_ate3240; + pagesize = ce_kern->ce_ate3240_pagesize; + bus_base = TIOCE_M32_MIN; + msi_capable = 1; + break; + case TIOCE_ATE_M40: + first = 0; + entries = TIOCE_NUM_M40_ATES; + ate_shadow = ce_kern->ce_ate40_shadow; + ate_reg = ce_mmr->ce_ure_ate40; + pagesize = MB(64); + bus_base = TIOCE_M40_MIN; + msi_capable = 0; + break; + case TIOCE_ATE_M40S: + /* + * ate3240 entries 0-31 are dedicated to port1 super-page + * mappings. ate3240 entries 32-63 are dedicated to port2. + */ + first = port * 32; + entries = 32; + ate_shadow = ce_kern->ce_ate3240_shadow; + ate_reg = ce_mmr->ce_ure_ate3240; + pagesize = GB(16); + bus_base = TIOCE_M40S_MIN; + msi_capable = 0; + break; + default: + return 0; + } + + msi_wanted = dma_flags & SN_DMA_MSI; + if (msi_wanted && !msi_capable) + return 0; + + nates = ATE_NPAGES(ct_addr, len, pagesize); + if (nates > entries) + return 0; + + last = first + entries - nates; + for (i = first; i <= last; i++) { + if (ATE_VALID(ate_shadow[i])) + continue; + + for (j = i; j < i + nates; j++) + if (ATE_VALID(ate_shadow[j])) + break; + + if (j >= i + nates) + break; + } + + if (i > last) + return 0; + + map = kzalloc(sizeof(struct tioce_dmamap), GFP_ATOMIC); + if (!map) + return 0; + + addr = ct_addr; + for (j = 0; j < nates; j++) { + u64 ate; + + ate = ATE_MAKE(addr, pagesize, msi_wanted); + ate_shadow[i + j] = ate; + tioce_mmr_storei(ce_kern, &ate_reg[i + j], ate); + addr += pagesize; + } + + map->refcnt = 1; + map->nbytes = nates * pagesize; + map->ct_start = ct_addr & ~ATE_PAGEMASK(pagesize); + map->pci_start = bus_base + (i * pagesize); + map->ate_hw = &ate_reg[i]; + map->ate_shadow = &ate_shadow[i]; + map->ate_count = nates; + + list_add(&map->ce_dmamap_list, &ce_kern->ce_dmamap_list); + + return (map->pci_start + (ct_addr - map->ct_start)); +} + +/** + * tioce_dma_d32 - create a DMA mapping using 32-bit direct mode + * @pdev: linux pci_dev representing the function + * @paddr: system physical address + * + * Map @paddr into 32-bit bus space of the CE associated with @pcidev_info. + */ +static u64 +tioce_dma_d32(struct pci_dev *pdev, u64 ct_addr, int dma_flags) +{ + int dma_ok; + int port; + struct tioce __iomem *ce_mmr; + struct tioce_kernel *ce_kern; + u64 ct_upper; + u64 ct_lower; + dma_addr_t bus_addr; + + if (dma_flags & SN_DMA_MSI) + return 0; + + ct_upper = ct_addr & ~0x3fffffffUL; + ct_lower = ct_addr & 0x3fffffffUL; + + pcidev_to_tioce(pdev, &ce_mmr, &ce_kern, &port); + + if (ce_kern->ce_port[port].dirmap_refcnt == 0) { + u64 tmp; + + ce_kern->ce_port[port].dirmap_shadow = ct_upper; + tioce_mmr_storei(ce_kern, &ce_mmr->ce_ure_dir_map[port], + ct_upper); + tmp = ce_mmr->ce_ure_dir_map[port]; + dma_ok = 1; + } else + dma_ok = (ce_kern->ce_port[port].dirmap_shadow == ct_upper); + + if (dma_ok) { + ce_kern->ce_port[port].dirmap_refcnt++; + bus_addr = TIOCE_D32_MIN + ct_lower; + } else + bus_addr = 0; + + return bus_addr; +} + +/** + * tioce_dma_barrier - swizzle a TIOCE bus address to include or exclude + * the barrier bit. + * @bus_addr: bus address to swizzle + * + * Given a TIOCE bus address, set the appropriate bit to indicate barrier + * attributes. + */ +static u64 +tioce_dma_barrier(u64 bus_addr, int on) +{ + u64 barrier_bit; + + /* barrier not supported in M40/M40S mode */ + if (TIOCE_M40_ADDR(bus_addr) || TIOCE_M40S_ADDR(bus_addr)) + return bus_addr; + + if (TIOCE_D64_ADDR(bus_addr)) + barrier_bit = (1UL << 62); + else /* must be m32 or d32 */ + barrier_bit = (1UL << 30); + + return (on) ? (bus_addr | barrier_bit) : (bus_addr & ~barrier_bit); +} + +/** + * tioce_dma_unmap - release CE mapping resources + * @pdev: linux pci_dev representing the function + * @bus_addr: bus address returned by an earlier tioce_dma_map + * @dir: mapping direction (unused) + * + * Locate mapping resources associated with @bus_addr and release them. + * For mappings created using the direct modes there are no resources + * to release. + */ +void +tioce_dma_unmap(struct pci_dev *pdev, dma_addr_t bus_addr, int dir) +{ + int i; + int port; + struct tioce_kernel *ce_kern; + struct tioce __iomem *ce_mmr; + unsigned long flags; + + bus_addr = tioce_dma_barrier(bus_addr, 0); + pcidev_to_tioce(pdev, &ce_mmr, &ce_kern, &port); + + /* nothing to do for D64 */ + + if (TIOCE_D64_ADDR(bus_addr)) + return; + + spin_lock_irqsave(&ce_kern->ce_lock, flags); + + if (TIOCE_D32_ADDR(bus_addr)) { + if (--ce_kern->ce_port[port].dirmap_refcnt == 0) { + ce_kern->ce_port[port].dirmap_shadow = 0; + tioce_mmr_storei(ce_kern, &ce_mmr->ce_ure_dir_map[port], + 0); + } + } else { + struct tioce_dmamap *map; + + list_for_each_entry(map, &ce_kern->ce_dmamap_list, + ce_dmamap_list) { + u64 last; + + last = map->pci_start + map->nbytes - 1; + if (bus_addr >= map->pci_start && bus_addr <= last) + break; + } + + if (&map->ce_dmamap_list == &ce_kern->ce_dmamap_list) { + printk(KERN_WARNING + "%s: %s - no map found for bus_addr 0x%llx\n", + __func__, pci_name(pdev), bus_addr); + } else if (--map->refcnt == 0) { + for (i = 0; i < map->ate_count; i++) { + map->ate_shadow[i] = 0; + tioce_mmr_storei(ce_kern, &map->ate_hw[i], 0); + } + + list_del(&map->ce_dmamap_list); + kfree(map); + } + } + + spin_unlock_irqrestore(&ce_kern->ce_lock, flags); +} + +/** + * tioce_do_dma_map - map pages for PCI DMA + * @pdev: linux pci_dev representing the function + * @paddr: host physical address to map + * @byte_count: bytes to map + * + * This is the main wrapper for mapping host physical pages to CE PCI space. + * The mapping mode used is based on the device's dma_mask. + */ +static u64 +tioce_do_dma_map(struct pci_dev *pdev, u64 paddr, size_t byte_count, + int barrier, int dma_flags) +{ + unsigned long flags; + u64 ct_addr; + u64 mapaddr = 0; + struct tioce_kernel *ce_kern; + struct tioce_dmamap *map; + int port; + u64 dma_mask; + + dma_mask = (barrier) ? pdev->dev.coherent_dma_mask : pdev->dma_mask; + + /* cards must be able to address at least 31 bits */ + if (dma_mask < 0x7fffffffUL) + return 0; + + if (SN_DMA_ADDRTYPE(dma_flags) == SN_DMA_ADDR_PHYS) + ct_addr = PHYS_TO_TIODMA(paddr); + else + ct_addr = paddr; + + /* + * If the device can generate 64 bit addresses, create a D64 map. + */ + if (dma_mask == ~0UL) { + mapaddr = tioce_dma_d64(ct_addr, dma_flags); + if (mapaddr) + goto dma_map_done; + } + + pcidev_to_tioce(pdev, NULL, &ce_kern, &port); + + spin_lock_irqsave(&ce_kern->ce_lock, flags); + + /* + * D64 didn't work ... See if we have an existing map that covers + * this address range. Must account for devices dma_mask here since + * an existing map might have been done in a mode using more pci + * address bits than this device can support. + */ + list_for_each_entry(map, &ce_kern->ce_dmamap_list, ce_dmamap_list) { + u64 last; + + last = map->ct_start + map->nbytes - 1; + if (ct_addr >= map->ct_start && + ct_addr + byte_count - 1 <= last && + map->pci_start <= dma_mask) { + map->refcnt++; + mapaddr = map->pci_start + (ct_addr - map->ct_start); + break; + } + } + + /* + * If we don't have a map yet, and the card can generate 40 + * bit addresses, try the M40/M40S modes. Note these modes do not + * support a barrier bit, so if we need a consistent map these + * won't work. + */ + if (!mapaddr && !barrier && dma_mask >= 0xffffffffffUL) { + /* + * We have two options for 40-bit mappings: 16GB "super" ATEs + * and 64MB "regular" ATEs. We'll try both if needed for a + * given mapping but which one we try first depends on the + * size. For requests >64MB, prefer to use a super page with + * regular as the fallback. Otherwise, try in the reverse order. + */ + + if (byte_count > MB(64)) { + mapaddr = tioce_alloc_map(ce_kern, TIOCE_ATE_M40S, + port, ct_addr, byte_count, + dma_flags); + if (!mapaddr) + mapaddr = + tioce_alloc_map(ce_kern, TIOCE_ATE_M40, -1, + ct_addr, byte_count, + dma_flags); + } else { + mapaddr = tioce_alloc_map(ce_kern, TIOCE_ATE_M40, -1, + ct_addr, byte_count, + dma_flags); + if (!mapaddr) + mapaddr = + tioce_alloc_map(ce_kern, TIOCE_ATE_M40S, + port, ct_addr, byte_count, + dma_flags); + } + } + + /* + * 32-bit direct is the next mode to try + */ + if (!mapaddr && dma_mask >= 0xffffffffUL) + mapaddr = tioce_dma_d32(pdev, ct_addr, dma_flags); + + /* + * Last resort, try 32-bit ATE-based map. + */ + if (!mapaddr) + mapaddr = + tioce_alloc_map(ce_kern, TIOCE_ATE_M32, -1, ct_addr, + byte_count, dma_flags); + + spin_unlock_irqrestore(&ce_kern->ce_lock, flags); + +dma_map_done: + if (mapaddr && barrier) + mapaddr = tioce_dma_barrier(mapaddr, 1); + + return mapaddr; +} + +/** + * tioce_dma - standard pci dma map interface + * @pdev: pci device requesting the map + * @paddr: system physical address to map into pci space + * @byte_count: # bytes to map + * + * Simply call tioce_do_dma_map() to create a map with the barrier bit clear + * in the address. + */ +static u64 +tioce_dma(struct pci_dev *pdev, unsigned long paddr, size_t byte_count, int dma_flags) +{ + return tioce_do_dma_map(pdev, paddr, byte_count, 0, dma_flags); +} + +/** + * tioce_dma_consistent - consistent pci dma map interface + * @pdev: pci device requesting the map + * @paddr: system physical address to map into pci space + * @byte_count: # bytes to map + * + * Simply call tioce_do_dma_map() to create a map with the barrier bit set + * in the address. + */ +static u64 +tioce_dma_consistent(struct pci_dev *pdev, unsigned long paddr, size_t byte_count, int dma_flags) +{ + return tioce_do_dma_map(pdev, paddr, byte_count, 1, dma_flags); +} + +/** + * tioce_error_intr_handler - SGI TIO CE error interrupt handler + * @irq: unused + * @arg: pointer to tioce_common struct for the given CE + * + * Handle a CE error interrupt. Simply a wrapper around a SAL call which + * defers processing to the SGI prom. + */ +static irqreturn_t +tioce_error_intr_handler(int irq, void *arg) +{ + struct tioce_common *soft = arg; + struct ia64_sal_retval ret_stuff; + ret_stuff.status = 0; + ret_stuff.v0 = 0; + + SAL_CALL_NOLOCK(ret_stuff, (u64) SN_SAL_IOIF_ERROR_INTERRUPT, + soft->ce_pcibus.bs_persist_segment, + soft->ce_pcibus.bs_persist_busnum, 0, 0, 0, 0, 0); + + if (ret_stuff.v0) + panic("tioce_error_intr_handler: Fatal TIOCE error"); + + return IRQ_HANDLED; +} + +/** + * tioce_reserve_m32 - reserve M32 ATEs for the indicated address range + * @tioce_kernel: TIOCE context to reserve ATEs for + * @base: starting bus address to reserve + * @limit: last bus address to reserve + * + * If base/limit falls within the range of bus space mapped through the + * M32 space, reserve the resources corresponding to the range. + */ +static void +tioce_reserve_m32(struct tioce_kernel *ce_kern, u64 base, u64 limit) +{ + int ate_index, last_ate, ps; + struct tioce __iomem *ce_mmr; + + ce_mmr = (struct tioce __iomem *)ce_kern->ce_common->ce_pcibus.bs_base; + ps = ce_kern->ce_ate3240_pagesize; + ate_index = ATE_PAGE(base, ps); + last_ate = ate_index + ATE_NPAGES(base, limit-base+1, ps) - 1; + + if (ate_index < 64) + ate_index = 64; + + if (last_ate >= TIOCE_NUM_M3240_ATES) + last_ate = TIOCE_NUM_M3240_ATES - 1; + + while (ate_index <= last_ate) { + u64 ate; + + ate = ATE_MAKE(0xdeadbeef, ps, 0); + ce_kern->ce_ate3240_shadow[ate_index] = ate; + tioce_mmr_storei(ce_kern, &ce_mmr->ce_ure_ate3240[ate_index], + ate); + ate_index++; + } +} + +/** + * tioce_kern_init - init kernel structures related to a given TIOCE + * @tioce_common: ptr to a cached tioce_common struct that originated in prom + */ +static struct tioce_kernel * +tioce_kern_init(struct tioce_common *tioce_common) +{ + int i; + int ps; + int dev; + u32 tmp; + unsigned int seg, bus; + struct tioce __iomem *tioce_mmr; + struct tioce_kernel *tioce_kern; + + tioce_kern = kzalloc(sizeof(struct tioce_kernel), GFP_KERNEL); + if (!tioce_kern) { + return NULL; + } + + tioce_kern->ce_common = tioce_common; + spin_lock_init(&tioce_kern->ce_lock); + INIT_LIST_HEAD(&tioce_kern->ce_dmamap_list); + tioce_common->ce_kernel_private = (u64) tioce_kern; + + /* + * Determine the secondary bus number of the port2 logical PPB. + * This is used to decide whether a given pci device resides on + * port1 or port2. Note: We don't have enough plumbing set up + * here to use pci_read_config_xxx() so use raw_pci_read(). + */ + + seg = tioce_common->ce_pcibus.bs_persist_segment; + bus = tioce_common->ce_pcibus.bs_persist_busnum; + + raw_pci_read(seg, bus, PCI_DEVFN(2, 0), PCI_SECONDARY_BUS, 1,&tmp); + tioce_kern->ce_port1_secondary = (u8) tmp; + + /* + * Set PMU pagesize to the largest size available, and zero out + * the ATEs. + */ + + tioce_mmr = (struct tioce __iomem *)tioce_common->ce_pcibus.bs_base; + tioce_mmr_clri(tioce_kern, &tioce_mmr->ce_ure_page_map, + CE_URE_PAGESIZE_MASK); + tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_ure_page_map, + CE_URE_256K_PAGESIZE); + ps = tioce_kern->ce_ate3240_pagesize = KB(256); + + for (i = 0; i < TIOCE_NUM_M40_ATES; i++) { + tioce_kern->ce_ate40_shadow[i] = 0; + tioce_mmr_storei(tioce_kern, &tioce_mmr->ce_ure_ate40[i], 0); + } + + for (i = 0; i < TIOCE_NUM_M3240_ATES; i++) { + tioce_kern->ce_ate3240_shadow[i] = 0; + tioce_mmr_storei(tioce_kern, &tioce_mmr->ce_ure_ate3240[i], 0); + } + + /* + * Reserve ATEs corresponding to reserved address ranges. These + * include: + * + * Memory space covered by each PPB mem base/limit register + * Memory space covered by each PPB prefetch base/limit register + * + * These bus ranges are for pio (downstream) traffic only, and so + * cannot be used for DMA. + */ + + for (dev = 1; dev <= 2; dev++) { + u64 base, limit; + + /* mem base/limit */ + + raw_pci_read(seg, bus, PCI_DEVFN(dev, 0), + PCI_MEMORY_BASE, 2, &tmp); + base = (u64)tmp << 16; + + raw_pci_read(seg, bus, PCI_DEVFN(dev, 0), + PCI_MEMORY_LIMIT, 2, &tmp); + limit = (u64)tmp << 16; + limit |= 0xfffffUL; + + if (base < limit) + tioce_reserve_m32(tioce_kern, base, limit); + + /* + * prefetch mem base/limit. The tioce ppb's have 64-bit + * decoders, so read the upper portions w/o checking the + * attributes. + */ + + raw_pci_read(seg, bus, PCI_DEVFN(dev, 0), + PCI_PREF_MEMORY_BASE, 2, &tmp); + base = ((u64)tmp & PCI_PREF_RANGE_MASK) << 16; + + raw_pci_read(seg, bus, PCI_DEVFN(dev, 0), + PCI_PREF_BASE_UPPER32, 4, &tmp); + base |= (u64)tmp << 32; + + raw_pci_read(seg, bus, PCI_DEVFN(dev, 0), + PCI_PREF_MEMORY_LIMIT, 2, &tmp); + + limit = ((u64)tmp & PCI_PREF_RANGE_MASK) << 16; + limit |= 0xfffffUL; + + raw_pci_read(seg, bus, PCI_DEVFN(dev, 0), + PCI_PREF_LIMIT_UPPER32, 4, &tmp); + limit |= (u64)tmp << 32; + + if ((base < limit) && TIOCE_M32_ADDR(base)) + tioce_reserve_m32(tioce_kern, base, limit); + } + + return tioce_kern; +} + +/** + * tioce_force_interrupt - implement altix force_interrupt() backend for CE + * @sn_irq_info: sn asic irq that we need an interrupt generated for + * + * Given an sn_irq_info struct, set the proper bit in ce_adm_force_int to + * force a secondary interrupt to be generated. This is to work around an + * asic issue where there is a small window of opportunity for a legacy device + * interrupt to be lost. + */ +static void +tioce_force_interrupt(struct sn_irq_info *sn_irq_info) +{ + struct pcidev_info *pcidev_info; + struct tioce_common *ce_common; + struct tioce_kernel *ce_kern; + struct tioce __iomem *ce_mmr; + u64 force_int_val; + + if (!sn_irq_info->irq_bridge) + return; + + if (sn_irq_info->irq_bridge_type != PCIIO_ASIC_TYPE_TIOCE) + return; + + pcidev_info = (struct pcidev_info *)sn_irq_info->irq_pciioinfo; + if (!pcidev_info) + return; + + ce_common = (struct tioce_common *)pcidev_info->pdi_pcibus_info; + ce_mmr = (struct tioce __iomem *)ce_common->ce_pcibus.bs_base; + ce_kern = (struct tioce_kernel *)ce_common->ce_kernel_private; + + /* + * TIOCE Rev A workaround (PV 945826), force an interrupt by writing + * the TIO_INTx register directly (1/26/2006) + */ + if (ce_common->ce_rev == TIOCE_REV_A) { + u64 int_bit_mask = (1ULL << sn_irq_info->irq_int_bit); + u64 status; + + tioce_mmr_load(ce_kern, &ce_mmr->ce_adm_int_status, &status); + if (status & int_bit_mask) { + u64 force_irq = (1 << 8) | sn_irq_info->irq_irq; + u64 ctalk = sn_irq_info->irq_xtalkaddr; + u64 nasid, offset; + + nasid = (ctalk & CTALK_NASID_MASK) >> CTALK_NASID_SHFT; + offset = (ctalk & CTALK_NODE_OFFSET); + HUB_S(TIO_IOSPACE_ADDR(nasid, offset), force_irq); + } + + return; + } + + /* + * irq_int_bit is originally set up by prom, and holds the interrupt + * bit shift (not mask) as defined by the bit definitions in the + * ce_adm_int mmr. These shifts are not the same for the + * ce_adm_force_int register, so do an explicit mapping here to make + * things clearer. + */ + + switch (sn_irq_info->irq_int_bit) { + case CE_ADM_INT_PCIE_PORT1_DEV_A_SHFT: + force_int_val = 1UL << CE_ADM_FORCE_INT_PCIE_PORT1_DEV_A_SHFT; + break; + case CE_ADM_INT_PCIE_PORT1_DEV_B_SHFT: + force_int_val = 1UL << CE_ADM_FORCE_INT_PCIE_PORT1_DEV_B_SHFT; + break; + case CE_ADM_INT_PCIE_PORT1_DEV_C_SHFT: + force_int_val = 1UL << CE_ADM_FORCE_INT_PCIE_PORT1_DEV_C_SHFT; + break; + case CE_ADM_INT_PCIE_PORT1_DEV_D_SHFT: + force_int_val = 1UL << CE_ADM_FORCE_INT_PCIE_PORT1_DEV_D_SHFT; + break; + case CE_ADM_INT_PCIE_PORT2_DEV_A_SHFT: + force_int_val = 1UL << CE_ADM_FORCE_INT_PCIE_PORT2_DEV_A_SHFT; + break; + case CE_ADM_INT_PCIE_PORT2_DEV_B_SHFT: + force_int_val = 1UL << CE_ADM_FORCE_INT_PCIE_PORT2_DEV_B_SHFT; + break; + case CE_ADM_INT_PCIE_PORT2_DEV_C_SHFT: + force_int_val = 1UL << CE_ADM_FORCE_INT_PCIE_PORT2_DEV_C_SHFT; + break; + case CE_ADM_INT_PCIE_PORT2_DEV_D_SHFT: + force_int_val = 1UL << CE_ADM_FORCE_INT_PCIE_PORT2_DEV_D_SHFT; + break; + default: + return; + } + tioce_mmr_storei(ce_kern, &ce_mmr->ce_adm_force_int, force_int_val); +} + +/** + * tioce_target_interrupt - implement set_irq_affinity for tioce resident + * functions. Note: only applies to line interrupts, not MSI's. + * + * @sn_irq_info: SN IRQ context + * + * Given an sn_irq_info, set the associated CE device's interrupt destination + * register. Since the interrupt destination registers are on a per-ce-slot + * basis, this will retarget line interrupts for all functions downstream of + * the slot. + */ +static void +tioce_target_interrupt(struct sn_irq_info *sn_irq_info) +{ + struct pcidev_info *pcidev_info; + struct tioce_common *ce_common; + struct tioce_kernel *ce_kern; + struct tioce __iomem *ce_mmr; + int bit; + u64 vector; + + pcidev_info = (struct pcidev_info *)sn_irq_info->irq_pciioinfo; + if (!pcidev_info) + return; + + ce_common = (struct tioce_common *)pcidev_info->pdi_pcibus_info; + ce_mmr = (struct tioce __iomem *)ce_common->ce_pcibus.bs_base; + ce_kern = (struct tioce_kernel *)ce_common->ce_kernel_private; + + bit = sn_irq_info->irq_int_bit; + + tioce_mmr_seti(ce_kern, &ce_mmr->ce_adm_int_mask, (1UL << bit)); + vector = (u64)sn_irq_info->irq_irq << INTR_VECTOR_SHFT; + vector |= sn_irq_info->irq_xtalkaddr; + tioce_mmr_storei(ce_kern, &ce_mmr->ce_adm_int_dest[bit], vector); + tioce_mmr_clri(ce_kern, &ce_mmr->ce_adm_int_mask, (1UL << bit)); + + tioce_force_interrupt(sn_irq_info); +} + +/** + * tioce_bus_fixup - perform final PCI fixup for a TIO CE bus + * @prom_bussoft: Common prom/kernel struct representing the bus + * + * Replicates the tioce_common pointed to by @prom_bussoft in kernel + * space. Allocates and initializes a kernel-only area for a given CE, + * and sets up an irq for handling CE error interrupts. + * + * On successful setup, returns the kernel version of tioce_common back to + * the caller. + */ +static void * +tioce_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *controller) +{ + struct tioce_common *tioce_common; + struct tioce_kernel *tioce_kern; + struct tioce __iomem *tioce_mmr; + + /* + * Allocate kernel bus soft and copy from prom. + */ + + tioce_common = kzalloc(sizeof(struct tioce_common), GFP_KERNEL); + if (!tioce_common) + return NULL; + + memcpy(tioce_common, prom_bussoft, sizeof(struct tioce_common)); + tioce_common->ce_pcibus.bs_base = (unsigned long) + ioremap(REGION_OFFSET(tioce_common->ce_pcibus.bs_base), + sizeof(struct tioce_common)); + + tioce_kern = tioce_kern_init(tioce_common); + if (tioce_kern == NULL) { + kfree(tioce_common); + return NULL; + } + + /* + * Clear out any transient errors before registering the error + * interrupt handler. + */ + + tioce_mmr = (struct tioce __iomem *)tioce_common->ce_pcibus.bs_base; + tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_adm_int_status_alias, ~0ULL); + tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_adm_error_summary_alias, + ~0ULL); + tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_dre_comp_err_addr, 0ULL); + + if (request_irq(SGI_PCIASIC_ERROR, + tioce_error_intr_handler, + IRQF_SHARED, "TIOCE error", (void *)tioce_common)) + printk(KERN_WARNING + "%s: Unable to get irq %d. " + "Error interrupts won't be routed for " + "TIOCE bus %04x:%02x\n", + __func__, SGI_PCIASIC_ERROR, + tioce_common->ce_pcibus.bs_persist_segment, + tioce_common->ce_pcibus.bs_persist_busnum); + + irq_set_handler(SGI_PCIASIC_ERROR, handle_level_irq); + sn_set_err_irq_affinity(SGI_PCIASIC_ERROR); + return tioce_common; +} + +static struct sn_pcibus_provider tioce_pci_interfaces = { + .dma_map = tioce_dma, + .dma_map_consistent = tioce_dma_consistent, + .dma_unmap = tioce_dma_unmap, + .bus_fixup = tioce_bus_fixup, + .force_interrupt = tioce_force_interrupt, + .target_interrupt = tioce_target_interrupt +}; + +/** + * tioce_init_provider - init SN PCI provider ops for TIO CE + */ +int +tioce_init_provider(void) +{ + sn_pci_provider[PCIIO_ASIC_TYPE_TIOCE] = &tioce_pci_interfaces; + return 0; +} diff --git a/kernel/arch/ia64/uv/Makefile b/kernel/arch/ia64/uv/Makefile new file mode 100644 index 000000000..aa9f91947 --- /dev/null +++ b/kernel/arch/ia64/uv/Makefile @@ -0,0 +1,12 @@ +# arch/ia64/uv/Makefile +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 2008 Silicon Graphics, Inc. All Rights Reserved. +# +# Makefile for the sn uv subplatform +# + +obj-y += kernel/ diff --git a/kernel/arch/ia64/uv/kernel/Makefile b/kernel/arch/ia64/uv/kernel/Makefile new file mode 100644 index 000000000..124e441d3 --- /dev/null +++ b/kernel/arch/ia64/uv/kernel/Makefile @@ -0,0 +1,13 @@ +# arch/ia64/uv/kernel/Makefile +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 2008 Silicon Graphics, Inc. All Rights Reserved. +# + +ccflags-y := -Iarch/ia64/sn/include + +obj-y += setup.o +obj-$(CONFIG_IA64_GENERIC) += machvec.o diff --git a/kernel/arch/ia64/uv/kernel/machvec.c b/kernel/arch/ia64/uv/kernel/machvec.c new file mode 100644 index 000000000..50737a9dc --- /dev/null +++ b/kernel/arch/ia64/uv/kernel/machvec.c @@ -0,0 +1,11 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + */ + +#define MACHVEC_PLATFORM_NAME uv +#define MACHVEC_PLATFORM_HEADER +#include diff --git a/kernel/arch/ia64/uv/kernel/setup.c b/kernel/arch/ia64/uv/kernel/setup.c new file mode 100644 index 000000000..f1490657b --- /dev/null +++ b/kernel/arch/ia64/uv/kernel/setup.c @@ -0,0 +1,116 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * SGI UV Core Functions + * + * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved. + */ + +#include +#include +#include +#include +#include + +DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); +EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info); + +#ifdef CONFIG_IA64_SGI_UV +int sn_prom_type; +long sn_partition_id; +EXPORT_SYMBOL(sn_partition_id); +long sn_coherency_id; +EXPORT_SYMBOL_GPL(sn_coherency_id); +long sn_region_size; +EXPORT_SYMBOL(sn_region_size); +#endif + +struct redir_addr { + unsigned long redirect; + unsigned long alias; +}; + +#define DEST_SHIFT UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT + +static __initdata struct redir_addr redir_addrs[] = { + {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR, UVH_SI_ALIAS0_OVERLAY_CONFIG}, + {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR, UVH_SI_ALIAS1_OVERLAY_CONFIG}, + {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR, UVH_SI_ALIAS2_OVERLAY_CONFIG}, +}; + +static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size) +{ + union uvh_si_alias0_overlay_config_u alias; + union uvh_rh_gam_alias210_redirect_config_2_mmr_u redirect; + int i; + + for (i = 0; i < ARRAY_SIZE(redir_addrs); i++) { + alias.v = uv_read_local_mmr(redir_addrs[i].alias); + if (alias.s.base == 0) { + *size = (1UL << alias.s.m_alias); + redirect.v = uv_read_local_mmr(redir_addrs[i].redirect); + *base = (unsigned long)redirect.s.dest_base << DEST_SHIFT; + return; + } + } + BUG(); +} + +void __init uv_setup(char **cmdline_p) +{ + union uvh_si_addr_map_config_u m_n_config; + union uvh_node_id_u node_id; + unsigned long gnode_upper; + int nid, cpu, m_val, n_val; + unsigned long mmr_base, lowmem_redir_base, lowmem_redir_size; + + if (IS_MEDUSA()) { + lowmem_redir_base = 0; + lowmem_redir_size = 0; + node_id.v = 0; + m_n_config.s.m_skt = 37; + m_n_config.s.n_skt = 0; + mmr_base = 0; +#if 0 + /* Need BIOS calls - TDB */ + if (!ia64_sn_is_fake_prom()) + sn_prom_type = 1; + else +#endif + sn_prom_type = 2; + printk(KERN_INFO "Running on medusa with %s PROM\n", + (sn_prom_type == 1) ? "real" : "fake"); + } else { + get_lowmem_redirect(&lowmem_redir_base, &lowmem_redir_size); + node_id.v = uv_read_local_mmr(UVH_NODE_ID); + m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG); + mmr_base = + uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) & + ~UV_MMR_ENABLE; + } + + m_val = m_n_config.s.m_skt; + n_val = m_n_config.s.n_skt; + printk(KERN_DEBUG "UV: global MMR base 0x%lx\n", mmr_base); + + gnode_upper = (((unsigned long)node_id.s.node_id) & + ~((1 << n_val) - 1)) << m_val; + + for_each_present_cpu(cpu) { + nid = cpu_to_node(cpu); + uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base; + uv_cpu_hub_info(cpu)->lowmem_remap_top = + lowmem_redir_base + lowmem_redir_size; + uv_cpu_hub_info(cpu)->m_val = m_val; + uv_cpu_hub_info(cpu)->n_val = n_val; + uv_cpu_hub_info(cpu)->pnode_mask = (1 << n_val) -1; + uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1; + uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper; + uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base; + uv_cpu_hub_info(cpu)->coherency_domain_number = 0;/* ZZZ */ + printk(KERN_DEBUG "UV cpu %d, nid %d\n", cpu, nid); + } +} + -- cgit 1.2.3-korg